import numpy as np
import scipy.stats as stats
import sys

isoDict={}

quantified=open('/mnt/holocron1/nanopore_data/mouse_transcriptome/mando_all/Isoforms_geneNames.filtered.clean.quant','r')
quantified.readline()
for line in quantified:
    a=line.strip().split('\t')
    name=('_').join(a[0].split('_')[:5])
    #print(name)
    values=np.array(a[1:],dtype=float)

    brain =          values[0]+values[1]+values[2]+values[3]+values[4]+values[5]+values[6]+values[7]+values[8]+values[9]+values[10]+values[11]
    eye =            values[12]+values[13]+values[14]+values[15]+values[16]+values[17]+values[18]+values[19]+values[20]+values[21]+values[22]+values[23]
    heart =          values[24]+values[25]+values[26]+values[27]+values[28]+values[29]+values[30]+values[31]+values[32]+values[33]+values[34]+values[35]
    liver =          values[36]+values[37]+values[38]+values[39]+values[40]+values[41]+values[42]+values[43]+values[44]+values[45]+values[46]+values[47]
    lung =           values[48]+values[49]+values[50]+values[51]+values[52]+values[53]+values[54]+values[55]+values[56]+values[57]+values[58]+values[59]
    kidney =         values[60]+values[61]+values[62]+values[63]+values[64]+values[65]+values[66]+values[67]+values[68]+values[69]+values[70]+values[71]
    salivaryGland = values[72]+values[73]+values[74]+values[75]
    smoothMuscle =  values[76]+values[77]+values[78]+values[79]+values[80]+values[81]+values[82]+values[83]+values[84]+values[85]+values[86]+values[87]
    spinalCord =    values[88]+values[89]+values[90]+values[91]+values[92]+values[93]+values[94]+values[95]+values[96]+values[97]+values[98]+values[99]
    spleen =         values[100]+values[101]+values[102]+values[103]+values[104]+values[105]+values[106]+values[107]+values[108]+values[109]+values[110]+values[111]
    stomach =        values[112]+values[113]+values[114]+values[115]+values[116]+values[117]+values[118]+values[119]+values[120]+values[121]+values[122]+values[123]
    testis =         values[124]+values[125]+values[126]+values[127]+values[128]+values[129]+values[130]+values[131]+values[132]+values[133]+values[134]+values[135]

    if name not in isoDict:
        isoDict[name]=[]
    isoDict[name].append([brain, eye, heart, liver, lung, kidney, salivaryGland, smoothMuscle, spinalCord, spleen, stomach, testis])
conditionDict={}
results=[]

#print(isoDict)
for name,isoforms in isoDict.items():
    tissues=set()
    conditionDict['brain']=[]
    conditionDict['eye']=[]
    conditionDict['heart']=[]
    conditionDict['liver']=[]
    conditionDict['lung']=[]
    conditionDict['kidney']=[]
    conditionDict['salivaryGland']=[]
    conditionDict['smoothMuscle']=[]
    conditionDict['spinalCord']=[]
    conditionDict['spleen']=[]
    conditionDict['stomach']=[]
    conditionDict['testis']=[]

    if len(isoforms)>1:
        for isoform in isoforms:

            conditionDict['brain'].append(isoform[0])
            conditionDict['eye'].append(isoform[1])
            conditionDict['heart'].append(isoform[2])
            conditionDict['liver'].append(isoform[3])
            conditionDict['lung'].append(isoform[4])
            conditionDict['kidney'].append(isoform[5])
            conditionDict['salivaryGland'].append(isoform[6])
            conditionDict['smoothMuscle'].append(isoform[7])
            conditionDict['spinalCord'].append(isoform[8])
            conditionDict['spleen'].append(isoform[9])
            conditionDict['stomach'].append(isoform[10])
            conditionDict['testis'].append(isoform[11])
#    print(conditionDict)

#    print(name,conditionDict['NoStim'],conditionDict['LPS'])
    combined=[]
    for condition in ['brain', 'eye', 'heart', 'liver', 'lung', 'kidney', 'salivaryGland', 'smoothMuscle', 'spinalCord', 'spleen', 'stomach', 'testis']:
        sum=np.sum(conditionDict[condition])
        if sum>50:
            combined.append(np.around((np.array(conditionDict[condition])/sum)*100))
            tissues.add(condition)

    if len(combined)>1:
        DoNotUse=[]
        for position in range(0,len(combined[0]),1):
            Allzero=True
            for entry in combined:
                if entry[position]!=0:
                    Allzero=False
            if Allzero==True:
                DoNotUse.append(position)
        newCombined=[]
        for entry in combined:
            newEntry=[]
            for position in range(0,len(combined[0]),1):
                if position not in DoNotUse:
                    newEntry.append(entry[position])
            newCombined.append(newEntry)

        results.append([stats.chi2_contingency(newCombined)[1],name,combined,tissues])
#print(results)
outfile = open(sys.argv[1],'w')
testedDict={}
posDict={}
for tissue in conditionDict:
    testedDict[tissue]=0
    posDict[tissue]=0

tests=len(results)
positive=0
for item in sorted(results):
    entries=item[1].split('_')
    name=entries[0]
    chr=entries[2]
    start=entries[3]
    end=entries[4]
#    print(name,item[0])
    for tissue in conditionDict:
        if tissue in item[3]:
            testedDict[tissue]+=1


    if item[0]*tests <= 0.01:
        outfile.write(name+'\t'+str(item[0])+'\t'+str(chr)+'\t'+str(start)+'\t'+str(end)+'\n')
        positive+=1
        for tissue in conditionDict:
            if tissue in item[3]:
                posDict[tissue]+=1

print(len(results))
print(testedDict)
print(posDict)
print(positive)
