Durand et al. 2019
The turnover of ribosome-associated transcripts from de novo ORFs produces gene-like characteristics available for de novo gene emergence in wild yeast populations. 
#########################################################################
# Supplemental Code: description of the folder content 
#########################################################################

/00_figures_script/ # Scripts to draw figures 
/00_tables/ # Additionnal tables

#=============================================================================================
/01orf_annotation/ # identification of microsyntenic regions and ORF annotation

	1.MakePseudo.py # Make pseudomolecules from scaffolds IDBA with ABACAS software
	2.Run_augustus_pseudo.py #Gene annotation with augustus
	3.parse_AugToFasta.py # Parse augustus output to prepare multifasta file with genes sequences for SynCHRO 
	4.runSynchro.txt #link to run SynCHRO
	5.parseRBH_tosilixx.py #Parse SynCHRO reciprocal best hit results and run Silixx clustering
	5b.annotgff_RBH.py #gene annotation based on reference S288C
	6.parse_silixx.py #parse silixx output
	7.extract_syntpairs.py #select conserved genes and extract genomic coordinates of syntenic intergenic regions   
	8.align_intergenic.py #extract intergenic sequences and muscle alignment (4 strains)
	9.run_historian.py # run ancestral reconstruction with historian
	10alignSIDwithrecons.py #script to align all synt intergenic regions (24 HC strains + YPS128+S288C+ ancestral reconstructions)
	11.runRM.py #script to run ReppeatMasker on katak 
	11b.align_db.py #prepare file for masking and db alignments for masking
	12.annotORF.py #ORF annotation
	13.make_orf_matrix.R #Script to make a presence/absence table  of each annotated ORF per haplotype and add an ORF number as unique ID based on position conservation
	14.Mask_RM_db.py #Remove ORFs overlapping a masked region 
	15.mask_SIDfasta.py #Mask Syntenic intergenic regions to check their characteristics
	16.checkconservation.R #Final filter: keep ORFs families with no overlapp with masked regions in all strains

#=============================================================================================
/02orf_analysis/ # ORF analysis and figures 
	01orfconservation.R #Make conservation groups
	02conservation_figures.R #Make conservation figures

#=============================================================================================
/03plastid/ #detection of translated ORFs
	01.gfftogtf_genes.py #prepare gtf file for plastid : genes
	02.gfftogtf_orfs.py #idem for orfs
	03.metagene_analysis_genes.py #metagene analysis with genes
	04.metagene_analysis_orf.py #metagene analysis with ORFs
	05.merge_density.py #merge gene or orf densities in one file per haplo
	06.parse_tabledensity.py #parse table density for analysis
	06.phasing.R #script to detect initiation peak and phasing from RPF read density
	
#=============================================================================================
/04diff_expression/ #expression analysis
	0_filtgff_sizecons.py  #prepare gff with ORFs >= 60 nt
	1_run_multicov.py  #Run bedtools multicov -> genes and ORFs read coverage
	2.mergecounts_all.R #Merge coverage results in one file: one line per feature 
	3_norm_counts.R  #Normalize read counts with DeSeq2
	4.extract_TE_ratio.R #TE analysis with DeSeq2
	5_select_diffexp_candidates.R #Script to identify lineage specific expression increase or decrease


#=============================================================================================
/07_candidates/ and /DHFR-phenotype-analysis/ #Analysis differentially expressed tORFs 

#=============================================================================================
/09structure_long/ #Disorder with iupred
	
	00make_fastaorf.py #prepare data for iupred 
	00make_fastagene.py #prepare data for iupred 
	1.run_iupred_orf.py #run iupred on ORFs
	1b.run_iupred_genes.py #run iupred on genes 
	2.parse_iupred.R #parse iupred results 
	2.score_evolution_orf.R #check disorder evolution 
#=============================================================================================
/11_prop/ and /19_proximity/ # Analysis on tORFs sequence properties 

#=============================================================================================

/links/ # contains links, directories and some correspondances for analysis  

