#Script to calculate distance with closest feature 
import os 
chaplo=["SA03","SD01","SD06","Y128"]

for haplo in chaplo:
	
	dir_orf="../../03synt_intergenic_orf/03_ORF_cons_spar/03per_haplo_stat1_ps/table_orf_"+haplo+"_stat1.gff"
	dir_genes="../../01annot_gene/04gff_annotcons/all_annotations/"+haplo+"_IDBA_aug.gff"
	
	
	#Genes
	#make bed file 
	dir_genes_bed="gene.bed"
	
	with open(dir_genes_bed, "w") as out:
		with open(dir_genes) as gff:
			for line in gff:
				line=line.rstrip()
				array=line.split("\t")
				
				chrom=array[0].split(".")[0]
				newline="\t".join([chrom,array[3],array[4],array[8].split(";")[1]])
				out.write(newline+"\n")
			
	#sort 
	cmd="sort -k1,1 -k2,2n "+dir_genes_bed+" > "+dir_genes_bed.replace(".bed","_sorted.bed")
	os.system(cmd)
	
	#ORF
	#make bed file 
	
	dir_orf_bed="orf.bed"
	
	with open(dir_orf_bed, "w") as out:
		with open(dir_orf) as gff:
			for line in gff:
				line=line.rstrip()
				array=line.split("\t")
			
				chrom=array[0].split("|")[0]
				newline="\t".join([chrom,array[3],array[4],array[2]])
				out.write(newline+"\n")
	
	#sort 
	cmd="sort -k1,1 -k2,2n "+dir_orf_bed+" > "+dir_orf_bed.replace(".bed","_sorted.bed")
	os.system(cmd)
	
	
	#Intersect 
	
	cmd="bedtools closest -d -a "+dir_orf_bed.replace(".bed","_sorted.bed")+" -b "+dir_genes_bed.replace(".bed","_sorted.bed")+" > closest.bed"
	os.system(cmd)
	
	#parse output 
	dir_out="../../19_proximity/Dist_orf_"+haplo+".txt"
	with open(dir_out,"w")as out:
		with open("closest.bed")as filedist:
			for line in filedist:
				line=line.rstrip()
				array=line.split("\t")
				newline="\t".join([array[3],haplo,array[8]])
				out.write(newline+"\n")
