import sys, time
def coverage_gene(inp_coverage):
    coverage_read={}
    cds_po={}
    for line in inp_coverage:
        if not line.startswith("#"):
            i=line.strip("\n").split("\t")
            coverage=eval(i[6])
            coverage_read[i[0]]=coverage
            cds_po[i[0]]=i[3]+"_"+i[4]
    return coverage_read, cds_po

def TIS_identification(ge, LTM_read, CHX_read, thre):
    LTM_sum=sum(LTM_read)
    CHX_sum=sum(CHX_read)
    TIS_po=[]
    if len(LTM_read)>28 and LTM_sum>0 and CHX_sum>0:

        for p_i in range(0, 15):
            if CHX_read[p_i]==0:
                CHX_read[p_i]=1

            if LTM_read[p_i] >= 10 and CHX_read[p_i] >= 1:
                w_15=LTM_read[0:p_i+16]
                if LTM_read[p_i] == max(w_15):
                    RLTM=10*LTM_read[p_i]/LTM_sum
                    RCHX=10*CHX_read[p_i]/CHX_sum
                    di=RLTM-RCHX
                    if di >= 0.05:
                        TIS_po.append(p_i+1)
        
        for p_i in range(15, len(LTM_read)-15):
            if CHX_read[p_i]==0:
                CHX_read[p_i]=1
            if LTM_read[p_i] >= 10 and CHX_read[p_i] >= 1:
                w_15=LTM_read[p_i-15:p_i+16]
                if LTM_read[p_i] == max(w_15):
                    RLTM=10*LTM_read[p_i]/LTM_sum
                    RCHX=10*CHX_read[p_i]/CHX_sum
                    di=RLTM-RCHX
                    if di >= thre:
                        TIS_po.append(p_i+1)
    return TIS_po

Without_repre_list = False
list_repre_protein_genes = []
if len(sys.argv[1:]) > 3:
    with open(sys.argv[4], 'r') as inp_repre:
        for l_3 in inp_repre:
            if not l_3.startswith("#") and l_3 !="\n":
                list_repre_protein_genes.append(l_3.strip('\n').strip('\r').split("\t")[0])
else:
    Without_repre_list = True
    print "Without gene list given"

with open(sys.argv[2], "r") as inp_coverage:
    LTM_gene, cds_posi=coverage_gene(inp_coverage)

with open(sys.argv[3], "r") as inp_coverage:
    CHX_gene, cds_po=coverage_gene(inp_coverage)

criteria_peak=float(sys.argv[1])

with open(sys.argv[2]+".TIS.CHX_pseudocount."+sys.argv[1], "w") as output:
    output.write("#python %s\n" %(" ".join(sys.argv)))
    output.write("#%s\t%s\t%s\t%s\n" %("Gene", "CDS_start", "CDS_end", "TIS sites"))
    for ge in LTM_gene.keys():
        if ge in list_repre_protein_genes or Without_repre_list:
            LTM_read=map(float, LTM_gene[ge])
            CHX_read=map(float, CHX_gene[ge])
            TIS_potential=TIS_identification(ge, LTM_read, CHX_read, criteria_peak)
            output.write("%s\t%s\t%s\t"%(ge, cds_posi[ge].split("_")[0], cds_posi[ge].split("_")[1]))
            if len(TIS_potential)>0:
                output.write("%s\n"%(TIS_potential))
                print ge, len(TIS_potential)
            else:
                output.write("NA\n")
