
import sys, time
from ast import literal_eval

def get_TIS(inp_coverage):
    tis_i={}
    ge_list=[]
    for l1 in inp_coverage:
        if not l1.startswith("#"):
            data=l1.strip().split("\t")
            tis_5=literal_eval(data[3].strip('"'))
            tis_aTI=literal_eval(data[4].strip('"'))
            tis_d=literal_eval(data[5].strip('"'))
            tis_3=literal_eval(data[6].strip('"'))
            ge_list.append(data[0])
            num_ge_tis=str(len(tis_5)+len(tis_aTI)+len(tis_d)+len(tis_3))
            id=""
            if len(tis_5)>0:
                #print tis_5
                for i_t in tis_5:
                    id=data[0]+"_"+i_t
                    if id in tis_i.keys():
                        print "ERROR: duplictes of TIS:5UTR", id
                    else:
                        tis_i[id]=[]
                        tis_i[id].extend((data[1], data[2], "5UTR", num_ge_tis))
            
            if len(tis_aTI)>0:
                for i_t in tis_aTI:
                    id=data[0]+"_"+i_t
                    if id in tis_i.keys():
                        print "ERROR: duplictes of TIS", id
                    else:
                        tis_i[id]=[]
                        tis_i[id].extend((data[1], data[2], "aTIS", num_ge_tis))

            if len(tis_d)>0:
                for i_t in tis_d:
                    id=data[0]+"_"+i_t
                    if id in tis_i.keys():
                        print "ERROR: duplictes of TIS", id
                    else:
                        tis_i[id]=[]
                        tis_i[id].extend((data[1], data[2], "dTIS", num_ge_tis))

            if len(tis_3)>0:
                for i_t in tis_3:
                    id=data[0]+"_"+i_t
                    if id in tis_i.keys():
                        print "ERROR: duplictes of TIS", id
                    else:
                        tis_i[id]=[]
                        tis_i[id].extend((data[1], data[2], "3UTR", num_ge_tis))
    return tis_i, ge_list

def get_TIS_read(inp_coverage, all_TIS_re, all_TIS_po, gene_inte):
    coverage_read={}
    for line in inp_coverage:
        if not line.startswith("#"):
            i=line.strip("\n").split("\t")
            if i[0] in gene_inte:
                coverage=map(float, literal_eval(i[6]))
                coverage_read[i[0]]=coverage

    for e_TIS in all_TIS_po.keys():
        ge_cove=coverage_read[e_TIS.split("_")[0]]
        trans_read=(sum(ge_cove))
        tis_pp=int(e_TIS.split("_")[1])
        
        cds_s=int(all_TIS_po[e_TIS][0])
        cds_t=int(all_TIS_po[e_TIS][1])
        cds_read=(sum(ge_cove[cds_s-1:cds_t]))
        
        if tis_pp==1:
            TIS_read=(sum(ge_cove[tis_pp-1:tis_pp+3]))
        else:
            TIS_read=(sum(ge_cove[tis_pp-2:tis_pp+3]))
        
        if e_TIS not in all_TIS_re.keys():
            all_TIS_re[e_TIS]=[]
            all_TIS_re[e_TIS].extend((TIS_read, trans_read, cds_read))
        else:
            all_TIS_re[e_TIS].extend((TIS_read, trans_read, cds_read))
                   
    return all_TIS_re



with open(sys.argv[1], "r") as inp_coverage:
    print time.strftime("%H:%M:%S"), "Loading LTM-TIS..."
    all_TIS, gene_in=get_TIS(inp_coverage)


file_in=[]
file_list=[]
all_TIS_read={}
for f_i in range(2, len(sys.argv)):
    with open(sys.argv[f_i], "r") as inp_coverage:
        file_name=sys.argv[f_i].split("/")[-1].split(".G")[0]
        file_list.extend((file_name+"__readcount", file_name+"_transcript_readcount", file_name+"_CDS_readcount"))
        all_TIS_read=get_TIS_read(inp_coverage, all_TIS_read, all_TIS, gene_in)

with open(sys.argv[1]+".readcount", "w") as output:
    output.write("#python %s\n" %(" ".join(sys.argv)))
    output.write("#TIS_ID\tCDS_start\tCDS_eds\tTIS_location\tTIS_number_onGene\t%s\n"%("\t".join(file_list)))
    for tis_ii in all_TIS.keys():
        output.write("%s\t%s\t%s\n"%(tis_ii, "\t".join(all_TIS[tis_ii]), "\t".join(map(str, all_TIS_read[tis_ii]))))
