#script to merge gene or orf densities in one file per haplo

import numpy

import os
for type_feat in ["gene","orf"]:
    
    dir_density="../../13_TE_Cer/03_annotations/density_per_"+type_feat+"/"
    merge_file="../../13_TE_Cer/03_annotations/table_density_per_"+type_feat+".txt"
    cfiles=os.popen("ls "+dir_density+"*").read().rstrip().split("\n")
    
    
    with open(merge_file, "w") as out:
        for file_density in cfiles:
            count="" #str to stock counts values
            with open(file_density) as GENE:
                for line in GENE:
                    line=line.rstrip()
                    line=str(line.split(".")[0])
                    count=count+line+"-"
                
                gene_name=file_density.split("/")[-1].split(".")[0]
                newline=gene_name+"\t"+type_feat+"\t"+count
                out.write(newline+"\n")

#read density files 
#and extract the 50 first nt coverage in a new table

dir_density="../../13_TE_Cer/03_annotations/"
dir_bilan="../../13_TE_Cer/03_annotations/table_density_all.txt"

begin_at=45
nt_phas=50

#max peak beetween start -5 to orf end = 45 : 
# count 53:100 # check if correct position

with open (dir_bilan,"w") as out:
    header="\t".join(["name","haplo", "type", "cons", "max1", "max2", "max3", \
    "counts"])
    
    
    out.write(header+"\n")
    
    for typei in ["gene", "orf"]:
        
        file_gene="table_density_per_"+typei+".txt"
        
        with open(dir_density+file_gene) as density:
                for line in density:
                    line=line.rstrip()
                    array=line.split("\t")
                    # take number of reads per position and change in numeric
                    count=array[2].rstrip("-").split("-")
                    count=[int(i) for i in count]
                    cut=count[begin_at:]
                    
                    #values of the 3 max peaks 
                    m1=sorted(cut)[-1] #highest
                    m2=sorted(cut)[-2] #second highest
                    m3=sorted(cut)[-3] #third highest 
                    
                    #take first 50 nucleotides for metagene analysis and phasing
                     
                    cut_meta=count[0:100]
                    cut_meta=[str(i) for i in cut_meta]
                    meta="-".join(cut_meta)
                    #newline contains 
                    name=array[0] 
                    type_feat=array[1]
                    cons=typei
                    newline="\t".join([name, "S288C", type_feat, cons, \
                    str(m1),str(m2),str(m3),meta])
                    out.write(newline+"\n")
