#!/usr/bin/env python


#prepare table for expression analysis 
#keep orf longer 60 nt and presents in at least one spar haplotype 
#(from conservation table)
minsize=60
startsize=60


chaplo=["S288C"]

#filt orf > 60 nt and with statut =1 
#=======================================================================
dir_notfilt="../../13_TE_Cer/03_annotations/"
dir_filt="../../13_TE_Cer/04_coverage_start/"

#haplo="SA03"
haplo ="S288C"

file_notfilt="orf_scer_genomic.gff"


file_filt=haplo+"orf_scer_genomic_start"+str(startsize)+".gff"

with open (dir_filt+file_filt,"w") as out:
	with open(dir_notfilt+file_notfilt)as gff:
		for line in gff:
			line =line.rstrip()
			array=line.split("\t")
			#calculate size for test
			size=int(array[4])-int(array[3])+1
			#check if orf in conservation table
			
			if size >= minsize :
				newarray=array
				#change coordinates to keep start positions depending on the orientation
				if array[6]=="+":
					newarray[4]=str(int(array[3])+startsize)
				
				if array[6]=="-":
					newarray[3]=str(int(array[4])-startsize)
				
				newline="\t".join(newarray)
				out.write(newline+"\n")



#take genes with conserved annotation
#======================================================================= 
dir_genes="../../13_TE_Cer/03_annotations/"
dir_filt="../../13_TE_Cer/04_coverage_start/"

file_notfilt="S288C_Genes.gff"

file_filt=haplo+"_Genes_start"+str(startsize)+".gff"

with open (dir_filt+file_filt,"w") as out:
	with open(dir_genes+file_notfilt)as gff:
		for line in gff:
			line =line.rstrip()
			array=line.split("\t")
			#calculate size for test
			size=int(array[4])-int(array[3])+1
			#check if gene conserved by synteny
			if size >= minsize :
				newarray=array
				#change coordinates to keep start positions depending on the orientation
				if array[6]=="+":
					newarray[4]=str(int(array[3])+startsize)
				
				if array[6]=="-":
					newarray[3]=str(int(array[4])-startsize)
				
				newline="\t".join(newarray)
				out.write(newline+"\n")

