import numpy

#read density files 
#and extract the 50 first nt coverage in a new table

dir_density="../../04plastid/02_metagene_results/"
dir_bilan="../../04plastid/03_density_analysis/table_density_all.txt"

begin_at=45
nt_phas=51

#First read conservation table to stock conservation group in a dictionnary 
dir_cons="../tables_outrevised/02conservation/conservation_table_spar.txt"
dico_cons={}
with open (dir_cons) as cons:
    for line in cons:
        line=line.rstrip()
        array=line.split("\t")
        orf=array[0]
        cons=array[4]
        dico_cons[orf]=cons

#max peak beetween start -5 to orf end = 45 : 
# count 53:100 # check if correct position

chaplo=["SA03", "SD01", "SD06","Y128"]


with open (dir_bilan,"w") as out:
    header="\t".join(["name","haplo", "type", "cons", "max1", "max2", "max3", \
    "counts"])
    
    
    out.write(header+"\n")
    
    for haplo in chaplo:
        for typei in ["gene", "orf"]:
            #haplo="SA03"   
            
            file_gene=haplo+"_table_density_per_"+typei+".txt"
            
            with open(dir_density+file_gene) as density:
                for line in density:
                    line=line.rstrip()
                    array=line.split("\t")
                    # take number of reads per position and change in numeric
                    count=array[2].rstrip("-").split("-")
                    count=[int(i) for i in count]
                    cut=count[begin_at:]
                    
                    #values of the 3 max peaks 
                    m1=sorted(cut)[-1] #highest
                    m2=sorted(cut)[-2] #second highest
                    m3=sorted(cut)[-3] #third highest 
                    
                    #take first 50 nucleotides for metagene analysis and phasing
                     
                    cut_meta=count[0:101]
                    cut_meta=[str(i) for i in cut_meta]
                    meta="-".join(cut_meta)
                    #newline contains 
                    name=array[0] 
                    #test if conserved in analysis (without Scer specific)
                    testi=dico_cons.get(name,"not")
                    if testi != "not" or typei =="gene":
                        type_feat=array[1]
                        if type_feat=="orf":
                            cons=dico_cons[name]
                        if type_feat=="gene":
                            cons="0_gene"
                        
                        newline="\t".join([name, haplo, type_feat, cons, \
                        str(m1),str(m2),str(m3),meta])
                        out.write(newline+"\n")

