#!/usr/bin/env python


#prepare table for expression analysis 
#keep orf longer 60 nt and presents in at least one spar haplotype 
#(from conservation table)
minsize=60
startsize=60

#First read conservation table to stock conservation group in a dictionnary 
dir_cons="../tables_out/02conservation/conservation_table_spar.txt"
dico_cons={}
with open (dir_cons) as cons:
    for line in cons:
        line=line.rstrip()
        array=line.split("\t")
        orf=array[0]
        cons=array[4]
        dico_cons[orf]=cons

chaplo=["SA03", "SD06","SD01", "Y128"]

#filt orf > 60 nt and with statut =1 
#=======================================================================
dir_notfilt="../../03synt_intergenic_orf/02_ORF_tables/03per_haplo_stat1_ps/"
dir_filt="../../05diff_exp_start50/01_gff_st1_60nt/"

#haplo="SA03"
for haplo in chaplo:
	
	file_notfilt="table_orf_"+haplo+"_stat1.gff"
	
	
	file_filt=haplo+"_orf_stat1_spar_60nt_start"+str(startsize)+".gff"
	
	with open (dir_filt+file_filt,"w") as out:
		with open(dir_notfilt+file_notfilt)as gff:
			for line in gff:
				line =line.rstrip()
				array=line.split("\t")
				#calculate size for test
				size=int(array[4])-int(array[3])+1
				#check if orf in conservation table
				testi=dico_cons.get(array[2],"not")
				if size >= minsize and testi != "not":
					newarray=array
					#change coordinates to keep start positions depending on the orientation
					if array[6]=="+":
						newarray[4]=str(int(array[3])+startsize)
					
					if array[6]=="-":
						newarray[3]=str(int(array[4])-startsize)
					
					newline="\t".join(newarray)
					out.write(newline+"\n")


#filt orf > 60 nt and with statut =1 or 0 
#=======================================================================

dir_notfilt="../../03synt_intergenic_orf/02_ORF_tables/02per_haplo_ps/"
dir_filt="../../05diff_exp_start50/01_gff_stall_60nt/"

#haplo="SA03"
for haplo in chaplo:
	
	file_notfilt="table_orf_"+haplo+".gff"
	
	
	file_filt=haplo+"_orf_statall_spar_start"+str(startsize)+".gff"
	
	with open (dir_filt+file_filt,"w") as out:
		with open(dir_notfilt+file_notfilt)as gff:
			for line in gff:
				line =line.rstrip()
				array=line.split("\t")
				#calculate size for test
				size=int(array[4])-int(array[3])+1
				#check if orf in conservation table
				testi=dico_cons.get(array[2],"not")
				if size >= minsize and testi != "not":
					newarray=array
					#change coordinates to keep start positions depending on the orientation
					if array[6]=="+":
						newarray[4]=str(int(array[3])+startsize)
					
					if array[6]=="-":
						newarray[3]=str(int(array[4])-startsize)
					
					newline="\t".join(newarray)
					out.write(newline+"\n")



#take genes with conserved annotation
#======================================================================= 
dir_genes="../../01annot_gene/04gff_annotcons/conserved_synt/"
dir_filt="../../05diff_exp_start50/01_gff_st1_60nt/"

for haplo in chaplo:
	haplo2=haplo.replace("SA","A").replace("SD","D").replace("Y","YPS")
	
	file_notfilt=haplo2+"_IDBA_aug_cons.gff"
	
	file_filt=haplo+"_genes_stat1_spar_60nt_start"+str(startsize)+".gff"
	
	with open (dir_filt+file_filt,"w") as out:
		with open(dir_genes+file_notfilt)as gff:
			for line in gff:
				line =line.rstrip()
				array=line.split("\t")
				#calculate size for test
				size=int(array[4])-int(array[3])+1
				#check if gene conserved by synteny
				info=array[8]
				consi=info.split(";")[2]
				if size >= minsize and consi=="conserved=TRUE":
					newarray=array
					#change coordinates to keep start positions depending on the orientation
					if array[6]=="+":
						newarray[4]=str(int(array[3])+startsize)
					
					if array[6]=="-":
						newarray[3]=str(int(array[4])-startsize)
					
					newline="\t".join(newarray)
					out.write(newline+"\n")

