##### The sections below were corresponding to those in Methods part and provide the analytic details and step-by-step scripts (provided in "script" folder). Date:20200730; edited by Ming-Jung Liu

###Section list:
1.Sequencing data processing
2.Identification of in vivo translation initiation sites
3.Determination of translation efficiency and tRNA abundance
4.Calculation of PWM scores for the flanking regions of TISs
5.Conservation of alternative TISs in orthologous gene pairs

###note:
1. The Solanum lycopersicum genome sequences and gene models were based on the genome versions SL3.0 and ITAG3.2 (https://solgenomics.net). The gene model of rRNA, tRNA, snoRNA and snRNA genes for these non-coding genes was retrieved from the SL2.5 assembly in Ensembl Plants (https://plants.ensembl.org).
2. To visualise the results, figures were generated based on the raw data generated below and plotted by Prism software.

-------------------------------------------------------------------------------------------------------------------
####1.Sequencing data processing

###1 align the reads to genomes
PATHtoFILE="specify_sequencing_datasets.fastq”
RealName="specify_filename"
##packages required: Fastx-Toolkit, FastQC, bowtie2, STAR, bedtools

##1.1 clean the reads
fastq_quality_filter -q20 -p85 -Q33 -i $PATHtoFILE -o $PATHtoFILE'.filtered'
fastx_clipper -a CTGTAGGCACCATCAAT -Q33 -i $PATHtoFILE'.filtered' -o $PATHtoFILE'.filtered.triAda'
fastx_trimmer -Q33 -f 2 -i $PATHtoFILE'.filtered.triAda' -o  $PATHtoFILE'.filtered.triAda.trim1stnt'

##1.2 map reads to non-coding RNAs
mkdir "rRNA_alignment"
bowtie2 -t -x Ensembl/SL2.5/SL2.5_rRNA_bowtie2_index/SL_rRNA -U $PATHtoFILE'.filtered.triAda.trim1stnt' --un $PATHtoFILE'.filtered.triAda.trim1stnt.DrRNA'\
-S rRNA_alignment/$PATHtoFILE'.rRNA_alignment.sam' 2> rRNA_alignment/$PATHtoFILE'.rRNA_alignment_summary.txt'  

mkdir "tRNA_alignment"
bowtie2 -t -x Ensembl/SL2.5/SL2.5_tRNA_bowtie2_index/tRNA -U $PATHtoFILE'.filtered.triAda.trim1stnt.DrRNA' --un $PATHtoFILE'.filtered.triAda.trim1stnt.DrRNA.DtRNA'\
-S tRNA_alignment/$PATHtoFILE'.tRNA_alignment.sam' 2> tRNA_alignment/$PATHtoFILE'.tRNA_alignment_summary.txt'

mkdir "snoRNA_alignment"
bowtie2 -t -x Ensembl/SL2.5/SL2.5_snoRNA_bowtie2_index/snoRNA -U $PATHtoFILE'.filtered.triAda.trim1stnt.DrRNA.DtRNA' --un $PATHtoFILE'.filtered.triAda.trim1stnt.DrRNA.DtRNA.DsnoRNA'\
-S snoRNA_alignment/RIBOseq_snoRNA_alignment.sam  2>  snoRNA_alignment/bowtie2_snoRNA_alignment_summary.txt 

mkdir "snRNA_alignment"
bowtie2 -t -x Ensembl/SL2.5/SL2.5_snRNA_bowtie2_index/snRNA -U $PATHtoFILE'.filtered.triAda.trim1stnt.DrRNA.DtRNA.DsnoRNA' --un $PATHtoFILE'.filtered.triAda.trim1stnt.DrRNA.DtRNA.DsnoRNA.DsnRNA'\
-S snRNA_alignment/RIBOseq_snRNA_alignment.sam  2>  snRNA_alignment/bowtie2_snRNA_alignment_summary.txt

##1.3 map reads to genomes
mkdir STAR_out/
STAR --genomeDir SGN/ITAG3.2/STAR_index_genome_SL3.2 --readFilesIn $PATHtoFILE'.filtered.triAda.trim1stnt.DrRNA.DtRNA.DsnoRNA.DsnRNA.gz' --readFilesCommand zcat --alignIntronMin 20 --alignIntronMax 6756 --outFilterMultimapNmax 20 --outFilterMismatchNmax 3 --outFileNamePrefix ./STAR_out/ --outSAMtype BAM SortedByCoordinate --runThreadN 1 --outSAMmultNmax 1 --outMultimapperOrder Random --alignEndsType EndToEnd --quantMode GeneCounts --outSAMattributes All
bedtools bamtobed -cigar -i STAR_out/Aligned.sortedByCoord.out.bam > STAR_out/$RealName'.bed'

##1.4 get the 5'end positions of mapped reads on genes
# 5'end assignment
PRF_len_min="25"
PRF_len_max="31"
python R1_5Prime_nt_assignment.py STAR_out/$RealName'.bed' $PRF_len_min $PRF_len_max second

# make bedgraph file
for((i=$PRF_len_min;i<=$PRF_len_max;i++))
do
genomeCoverageBed -bg -strand - -g SGN/SL3.0/S_lycopersicum_chromosomes.3.00.fa.fai -i 5PrimeEnd_Reads/"${i}_nt_5terminus.bed" > 5PrimeEnd_Reads/"${i}_nt_5terminus.bed_minus.bedgraph"
genomeCoverageBed -bg -strand + -g SGN/SL3.0/S_lycopersicum_chromosomes.3.00.fa.fai -i 5PrimeEnd_Reads/"${i}_nt_5terminus.bed" > 5PrimeEnd_Reads/"${i}_nt_5terminus.bed_plus.bedgraph"
done

# make gene coverage from bedgraph
for((i=$PRF_len_min;i<=$PRF_len_max;i++))
do
python R4_make_single_gene_coverage.py SGN/ITAG3.2/ITAG3.2_gene_models.gff 5PrimeEnd_Reads/"${i}_nt_5terminus.bed_plus.bedgraph" 5PrimeEnd_Reads/"${i}_nt_5terminus.bed_minus.bedgraph"
done

# Calculate the meta-coverage of each site; output raw read count of each position
for((i=$PRF_len_min;i<=$PRF_len_max;i++))
do
python R5_calculate_coverage.py 5PrimeEnd_Reads/"${i}_nt_5terminus.bed_plus.bedgraph_minus_BedGraph.GeneCovN" 20 1 RawReadCount
done

# Output the pdf of metaplot to reveal the OffsetAssignment
for((i=$PRF_len_min;i<=$PRF_len_max;i++))
do
Rscript R6_making_metaplot_toTIS.R 5PrimeEnd_Reads/"${i}_nt_5terminus.bed_plus.bedgraph_minus_BedGraph.GeneCovN.RawReadCount_COV_toTIS.txt" 2
done

###2 get the number of P-site assigned reads along transcripts of a given gene based on gene models
# Re-assignment of P-site
OffsetAssignment="25:9,26:10,27:11,28:11,29:12,30:13,31:13"
python R7_Psite_assignment.py STAR_out/$RealName'.bed' $OffsetAssignment second merge

# make bedgraph file
genomeCoverageBed -bg -strand + -g SGN/SL3.0/S_lycopersicum_chromosomes.3.00.fa.fai -i Merge_PsiteAssigned.bed > Merge_PsiteAssigned.bed_plus.bedgraph
genomeCoverageBed -bg -strand - -g SGN/SL3.0/S_lycopersicum_chromosomes.3.00.fa.fai -i Merge_PsiteAssigned.bed > Merge_PsiteAssigned.bed_minus.bedgraph

# make gene coverage from bedgraph
python R4_make_single_gene_coverage.py SGN/ITAG3.2/ITAG3.2_gene_models.gff Merge_PsiteAssigned.bed_plus.bedgraph Merge_PsiteAssigned.bed_minus.bedgraph
#creat "GeneCovN" files -> readcount per posiiotn for each gene

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

####2.Identification of in vivo translation initiation sites

###1 identify the TIS position by comparing the LTM and CHX signals on genes 
##LTM.GeneCovN, CHX.GeneCovN --> readcount per gene generated from section#1
python 1_Identification_TIS_within_genes_by_LTM_CHX.py LTM.GeneCovN CHX.GeneCovN
#create a outoup file -> LTM.GeneCovN.TIS.CHX

### 2 get/adjust the TIS position on genes
##ITAG3.2_cDNA.fasta -> download from  the genome versions ITAG3.2 (https://solgenomics.net)
##SGN/SL3.0/ITAG3.2_cDNA.fasta_seq -> Each row = gene_id\t cdna_sequences\n
python 2_agjust_TIS.py LTM.GeneCovN.TIS.CHX SGN/SL3.0/ITAG3.2_cDNA.fasta_seq LTM.GeneCovN CHX.GeneCovN

### 3 get the codon/positions of TISs for each gene
python 3_get_TIS_codon.py LTM.GeneCovN.TIS.CHX.TIS_ATG_nearReadCount_frame.filtered

### 4 get the read_counts for each TIS at LTM, CHX and RNA samples
python 5_get_TIS_readcount.py LTM.GeneCovN.TIS.CHX.TIS_ATG_nearReadCount_frame.filtered.TIS_gene_CDS1.3 LTM.GeneCovN CHX.GeneCovN RNA.GeneCovN

### 5 get the condon composition on CDS of annotated genes
##ITAG3.2_CDS.fasta_seq -> -> Each row = gene_id\t cds_sequences\n
python get_codon_on_CDS.py ITAG3.2_CDS.fasta_seq.txt ITAG3.2_CDS.fasta_seq annotated_gene_list

### 6 get the sequences and ORF types of the alternative TIS-associated ORFs
python Get_TIS_position.py LTM.GeneCovN.TIS.CHX.TIS_ATG_nearReadCount_frame.filtered.TIS_gene_CDS1.3 ITAG3.2_cDNA.fasta_seq
python get_TIS_ORF_type.py LTM.GeneCovN.TIS.CHX.TIS_ATG_nearReadCount_frame.filtered.TIS_gene_CDS1.3_seq_v2.txt
#creat an output file - > LTM.GeneCovN.TIS.CHX.TIS_ATG_nearReadCount_frame.filtered.TIS_gene_CDS1.3_seq_v2.txt.features

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
####3.Determination of translation efficiency and tRNA abundance

###1 For the determination of translation efficiency, the read counts of the CDS in the CHX/total RNA samples, of a given TIS (a 5-nt window flanking the TIS) in the LTM sample and of the transcribed regions of a gene in mRNA sample were generated from step#4 of Section#2 and then used to determine translation efficiency by Excel as described in Methods.

###2 get the tRNA annotation
python tRNA_annotation.py Ensembl/Solanum_lycopersicum.SL2.50.ncrna.fa_tRNA.fasta

# Solanum_lycopersicum.SL2.50.ncrna.fa_tRNA.fasta -> from the SL2.5 version of the reference genome in Ensembl Plants (https://plants.ensembl.org) 
# Creates the file -> Solanum_lycopersicum.SL2.50.ncrna.fa_tRNA.fasta_annotation.txt

###3 get the tRNA expression for each tRNA gene based on the seqeuncing datasets

PATHtoFILE="specify_sequencing_datasets.fastq”
RealName="specify_filename"
#packages required: Fastx-Toolkit, FastQC, bowtie2, bedtools

##3.1 clean the reads
fastq_quality_filter -q20 -p85 -Q33 -i $PATHtoFILE -o $PATHtoFILE'.filtered'
fastx_clipper -a CTGTAGGCACCATCAAT -Q33 -i $PATHtoFILE'.filtered' -o $PATHtoFILE'.filtered.triAda'
fastx_trimmer -Q33 -f 2 -i $PATHtoFILE'.filtered.triAda' -o  $PATHtoFILE'.filtered.triAda.trim1stnt'

##3.2 map reads to tRNAs
# rRNA depletion
mkdir "rRNA_alignment"
bowtie2 -t -x Ensembl/SL2.5/SL2.5_rRNA_bowtie2_index/SL_rRNA -U $PATHtoFILE'.filtered.triAda.trim1stnt' --un $PATHtoFILE'.filtered.triAda.trim1stnt.DrRNA'\
-S rRNA_alignment/$PATHtoFILE'.rRNA_alignment.sam' 2> rRNA_alignment/$PATHtoFILE'.rRNA_alignment_summary.txt'

# tRNA alignment
mkdir "tRNA_alignment"
bowtie2 -t -x Ensembl/SL2.5/SL2.5_tRNA_bowtie2_index/tRNA -U $PATHtoFILE'.filtered.triAda.trim1stnt.DrRNA' --un $PATHtoFILE'.filtered.triAda.trim1stnt.DrRNA.DtRNA'\
-S tRNA_alignment/$PATHtoFILE'.tRNA_alignment.sam' 2> tRNA_alignment/$PATHtoFILE'.tRNA_alignment_summary.txt'

# generate the tRNA.bed files
cd tRNA_alignment
bedtools bamtobed -cigar -i $PATHtoFILE.tRNA_alignment.sam.bam > $PATHtoFILE.tRNA_alignment.sam.bam.bed
genomeCoverageBed -d -5 -strand + -g Solanum_lycopersicum.SL2.50.ncrna.fa_tRNA.fasta.fai -i $PATHtoFILE.tRNA_alignment.sam.bam.bed > tRNA.bed
# Creates the file -> tRNA.bed

#3.3 get the tRNA reads based on tRNA annotation file
python calculate_tRNA_abuandce.py tRNA.bed Solanum_lycopersicum.SL2.50.ncrna.fa_tRNA.fasta_annotation.txt

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
####4.Calculation of PWM scores for the flanking regions of TISs

###1 calculate the nucleotide frequency of the entire 5' UTR regions genes for genes with annotated TISs.
## ITAG3.2_genemodel_info.txt  --> gene_id\t the position info of CDS_start\t the position info of CDS_end on a cDNA of a gene\n

python Kozac_seq_analyses_background_5UTR.py ITAG3.2_cDNA.fasta_seq ITAG3.2_genemodel_info.txt interested_genes.txt
#creat a output file- > TIS_flankingsequences.atcg.background

###2: calculate the nucleotide frequency for a 13-nt window flanking the annotated TISs.

python Kozac_seq_analyses.py ITAG3.2_cDNA.fasta_seq interested_genes.txt
#creat a output file- >"TIS_flankingsequences.atcg".

###3: calculate the normalized nucleotide frequency at aTISs.
##based on the output files from step 1 and 2, determine the log2 ratio between nucleotide frequency of all annotated TISs and the background (i.e., the nucleotide frequency of the entire 5' UTR regions of all annotated genes via Excel software and then create a output file- >"normalized_aTIS_atcg.txt"

###4: calculate PWM scores for alternative TISs
##alternative_TISs.flankingsequences --> the sequence info of the interested alternative TISs for a 13-nt window flanking a alternative TIS in a fasta formate.

python 4.calculate_ATG_score.py normalized_aTIS_atcg.txt alternative_TISs.flankingsequences

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
####5.Conservation of alternative TISs in orthologous gene pairs

### 1 Get Between Species Matches
## 1 All vs. ALl BLAST
cat ITAG3.2_protein.fasta Araport11_protein.fasta >SL3.2_AT11_protein.fasta
cat ITAG3.2_cds.fasta Araport11_cds.fasta >SL3.2_AT11_cds.fasta
sbatch sbatch_blastAll.sh 

## 2 Get Reciprocal Best Matches & Cross-Species Matches
python ParseBlast.py -f xspecies -blast SL3.2_AT11_protein.out
python ParseBlast.py -f get_reciprocal -blast SL3.2_AT11_protein.out.xspecies -target bitscore
python ParseBlast.py -f xspecies -blast SL3.2_AT11_protein.out.recip

## 3. Get Best vs. Non-Best Matches
python 1_FilterReciprocalHits.py SL3.2_AT11_protein.out.xspecies.recip SL3.2_AT11_protein.out.recip.xspecies

# Creates the following files
# - blast.out.BestMatches    --> Between species matches without better within-species match 
# - blast.out.NonBestMatches --> Between species matches with better within-species match

python 2_GetBetterMatches.py blast.out.NonBestMatches blast.out
python 2_GetBetterMatches.py SL3.2_AT11_protein.out.NonBestMatches SL3.2_AT11_protein.out

# Creates the following files
# - blast.out.NonBestMatches.within_species.better_matches          --> With with species matches for your non best matches
# - blast.out.NonBestMatches.within_species.better_matches.groups   --> Each row = \t ser group of gense (non-best orthologs + all within species better matches)

### 2 Convert Protein to Nucleotide Alignment
## 2.1a Create protein fasta files for each group of genes
python 1_MakeGroupFasta.py SL3.2_AT11_protein.fasta SL3.2_AT11_protein.out.BestMatches
mkdir 3_MAFFT
mv *Group_* 3_MAFFT/
mkdir Best_ProteinAlignment_SL3.2_AT11
cp blast.out.BestMatches.Group_* Best_ProteinAlignment_SL3.2_AT11/
rm *protein.out.BestMatches.Group_*

## 2.2a Align protein sequenecs
ls Best_ProteinAlignment_SL3.2_AT11/SL3.2_AT11_protein.out.BestMatches.Group_* > Best_ProteinFASTAFiles.txt
python 2_WriteMAFFTAlignCommands.py Best_ProteinFASTAFiles.txt
module load mafft
sh MAFFTAlign.cc

## 2.3a Create nucleotide files for each group of genes
python 1_MakeGroupFasta.py SL3.2_AT11_cds.fasta SL3.2_AT11_protein.out.BestMatches
mkdir Best_NucleotideAlignment_SL3.2_AT11
cp SL3.2_AT11_protein.out.BestMatches.Group_* Best_NucleotideAlignment_SL3.2_AT11

## 2.4a Back align nucleotide sequences to protein alignments
cd 3_MAFFT
python 3_WriteBackAlignCommands.py Best_ProteinAlignment_SL3.2_AT11 Best_NucleotideAlignment_SL3.2_AT11
sh BackAlign.cc

### Repeat within NonBest Alignments
## 2.1b Create protein fasta files for each group of genes
mkdir NonBest_ProteinAlignment
cp SL3.2_AT11_protein.out.NonBestMatches.Group* 3_MAFFT/NonBest_ProteinAlignment
rm SL3.2_AT11_protein.out.NonBestMatches.Group_*

## 2.2b Align protein sequenecs
ls NonBest_ProteinAlignment/SL3.2_AT11_protein.out.NonBestMatches.Group* > NonBest_ProteinFASTAFiles.txt
python 2_WriteMAFFTAlignCommands.py NonBest_ProteinFASTAFiles.txt
sh MAFFTAlign.cc

## 2.3b Create nucleotide files for each group of genes
python 1_MakeGroupFasta.py SL3.2_AT11_cds.fasta SL3.2_AT11_protein.out.NonBestMatches
mkdir NonBest_NucleotideAlignment_SL3.2_AT11
cp SL3.2_AT11_protein.out.NonBestMatches.Group_* 3_MAFFT/NonBest_NucleotideAlignment_SL3.2_AT11

## 2.4b Back align nucleotide sequences to protein alignments
cd 3_MAFFT
python 3_WriteBackAlignCommands.py NonBest_ProteinAlignment NonBest_NucleotideAlignment_SL3.2_AT11
sh BackAlign.cc

### 3 Get Ks of Matches ###
## 3.1 Make of list of nucleotide alignment files
mkdir 4_PAML/NonBest_NucleotideAlignment_SL3.2_AT11_aligned
cp 3_MAFFT/NonBest_NucleotideAlignment_SL3.2_AT11/SL3.2_AT11_protein.out.NonBestMatches.Group_*.aligned.fa 4_PAML/NonBest_NucleotideAlignment_SL3.2_AT11_aligned
python 1b_RenameFiles.py NonBest_NucleotideAlignment_SL3.2_AT11_aligned 4 .

mkdir 4_PAML/Best_NucleotideAlignment_SL3.2_AT11_aligned
cp 3_MAFFT/Best_NucleotideAlignment_SL3.2_AT11/SL3.2_AT11_protein.out.BestMatches.Group_*.aligned.fa 4_PAML/Best_NucleotideAlignment_SL3.2_AT11_aligned
python 1b_RenameFiles.py 4_PAML/Best_NucleotideAlignment_SL3.2_AT11_aligned 4 .

ls Best_NucleotideAlignment_SL3.2_AT11_aligned/*.aligned.fa > Best_NucAlignFiles.txt
ls NonBest_NucleotideAlignment_SL3.2_AT11_aligned/*.aligned.fa > NonBest_NucAlignFiles.txt

## 3.2 Write PAML Alignment commands
# Use YN00 for BestMatches
module load paml
cd 4_PAML
python 2a_WriteYN00Commands_Paired.py Best_NucAlignFiles.txt
sh yn00_Commands.cc

# Use Codeml for NonBestMathces
python 2b_WriteCodemlCommandsPairwise.py NonBest_NucAlignFiles.txt
sh CodemlCommands_Pairwise.cc

## 3.3 Get and process results
# yn00
ls Best_NucleotideAlignment_SL3.2_AT11_aligned/*_Results > yn00_Results_List.txt
python 3a_ProcessYN00_Results.py yn00_Results_List.txt
# will create/output a file name Combined_yn00_Results.txt

#rename the output file (Combined_yn00_Results.txt to anoter name)
mv Combined_yn00_Results.txt Combined_yn00_Results_SL3.2_AT11.txt

# Codeml
ls NonBest_NucleotideAlignment_SL3.2_AT11_aligned/*_Results > codeml_Results_List.txt
python 3b_ParseCodeml_Results.py codeml_Results_List.txt
cat NonBest_NucleotideAlignment_SL3.2_AT11_aligned/*.parse > CombinedCodemlParsed.txt

# Make a list of NonBest pairs and Extract between species matched from Codeml results
cut -f 1,2 SL3.2_AT11_protein.out.NonBestMatches > blast_SL3.2_AT11_protein.out.NonBestMatches.pairs
python 3c_FilterForMultiSpecies.py CombinedCodemlParsed.txt blast_SL3.2_AT11_protein.out.NonBestMatches.pairs
# Creates a file named CombinedCodemlParsed.txt.filtered

### 4 get orthologous gene paris paying the threshold
python filter_OGs.py Combined_yn00_Results_SL3.2_AT11.txt 5 6
python filter_OGs.py Ortho_SL_AT/CombinedCodemlParsed.txt.filtered 5 3

##then combine output lists to get the orthologous gene pairs ("AT11_SL_OG_pair.txt").

###5 MUSCLE alignment of 5UTR and CDS of orthologous gene pairs
##5UTR alignment
#Araport11_genes.5UTR/CDS, ITAG3.2_gene.5UTR/CDS --> 5UTR/CDS seqeunces per gene 
module load muscle
mkdir 5_muscle/UTR_5/
python 1_OG_muscle_AT_SL.py Araport11_genes.5UTR ITAG3.2_gene.5UTR AT11_SL_OG_pair.txt 5_muscle/UTR_5/
python 1.2_mask_OG_alignedseq.py 5_muscle/UTR_5/

##CDS alignment
mkdir 5_muscle/CDS/
python 1_OG_muscle_AT_SL.py Araport11_genes.CDS ITAG3.2_gene.CDS AT11_SL_OG_pair.txt 5_muscle/CDS/
python 1.2_mask_OG_alignedseq.py 5_muscle/CDS/


###6 reveal the presence of TIS and sequences similarity on OG_pairs
##5UTR
#SL_alternative_TIS_5UTR/CDS, Arabidopsis_alternative_TIS_5UTR/CDS -> gene/position inof of alterantive TISs located in 5UTR/CDS
python OG_TIS_conservation_5utr.py AT11_SL_OG_pair.txt SL_alternative_TIS_5UTR Arabidopsis_alternative_TIS_5UTR
# Creates an output file ->  SL_alternative_TIS_5UTR.conservation

##CDS
python OG_TIS_conservation_CDS.py AT11_SL_OG_pair.txt SL_alternative_TIS_CDS Arabidopsis_alternative_TIS_CDS
# Creates an output file ->  SL_alternative_TIS_CDS.conservation

###7 reveal the conservation of TIS-associated ORF types on OG_pairs
#SL/Ara_LTM.GeneCovN.TIS.CHX.TIS_ATG_nearReadCount_frame.filtered.TIS_gene_CDS1.3_seq_v2.txt.features were generated from step#6 of Section#2.
python OG_TIS_conservation_type.py SL_LTM.GeneCovN.TIS.CHX.TIS_ATG_nearReadCount_frame.filtered.TIS_gene_CDS1.3_seq_v2.txt.features Ara_LTM.GeneCovN.TIS.CHX.TIS_ATG_nearReadCount_frame.filtered.TIS_gene_CDS1.3_seq_v2.txt.features SL_alternative_TIS_5UTR.conservation UTR_5 [NCC/ATG]
