import sys
import mappy as mm
import editdistance as ld
import os

readFolder=sys.argv[2]
sampleSheet=sys.argv[1]

def match_index(seq5, seq7, indexes):
    match5=''
    match7=''
    if len(seq5)==16 and len(seq7)==16:
        dist_dict, dist_list = {}, []
    # there needs to be a better/more efficient way to do this.
        for index5 in indexes:
            dist_dict[index5] = []
            for position in range(0,len(seq5),1):
                slice5=seq5[position:position+len(index5)]
                dist = ld.eval(slice5, index5)
                dist_dict[index5].append(dist)
        for idx, distances in dist_dict.items():
            dist_list.append((idx, min(distances)))
        dist_list = sorted(dist_list, key=lambda x: x[1])
        if dist_list[0][1] < 2:
            if len(dist_list)>1:
                if dist_list[1][1] - dist_list[0][1] > 1:
                    match5 = dist_list[0][0]
            else:
                match5 = dist_list[0][0]
        if match5:
            dist_dict, dist_list = {}, []
            for index7 in indexes[match5]:
                dist_dict[index7] = []
                for position in range(0,len(seq7),1):
                    slice7=seq7[position:position+len(index7)]
                    dist = ld.eval(slice7, index7)
                    dist_dict[index7].append(dist)
            for idx, distances in dist_dict.items():
                dist_list.append((idx, min(distances)))
            dist_list = sorted(dist_list, key=lambda x: x[1])
            if dist_list[0][1] < 2:
                if len(dist_list)>1:
                    if dist_list[1][1] - dist_list[0][1] > 1:
                        match7 = dist_list[0][0]
                else:
                    match7 = dist_list[0][0]
    if match5 and match7:
        return indexes[match5][match7]
    else:
        return 'Undetermined'

if os.path.exists(readFolder+'/demultiplexed'):
    os.system('rm -r %s/demultiplexed' %(readFolder))
os.system('mkdir %s/demultiplexed' %(readFolder))

countDict={}
countDict['All']=0
countDict['Undetermined']=0

indexDict={}
for line in open(sampleSheet):
    a=line.strip().split('\t')
    libraryName=a[3]
    Splint=a[0]
    i5IndexSequence=a[1]
    i7IndexSequence=a[2]
    if Splint not in indexDict:
        indexDict[Splint]={}
    if i5IndexSequence not in indexDict[Splint]:
        indexDict[Splint][i5IndexSequence]={}
    indexDict[Splint][i5IndexSequence][i7IndexSequence]=libraryName
    out=open(readFolder+'/demultiplexed/'+libraryName+'.fasta','w')
    out.close()
    countDict[libraryName]=0

# print(indexDict)
counter=0
for folder in os.listdir(readFolder):
    if os.path.isdir(readFolder+'/'+folder):
        if folder in indexDict:
            readFile=readFolder+'/'+folder+'/R2C2_full_length_consensus_reads.fasta'
            for name,seq,q in mm.fastx_read(readFile):
                countDict['All']+=1
                # if countDict['All']%100000==0:
                    # print(countDict)

                index5=seq[:16]
                index7=seq[-16:]
                libraryName=match_index(index5,index7,indexDict[folder])
                out=open(readFolder+'/demultiplexed/'+libraryName+'.fasta','a')
                if not q:
                    out.write('>%s\n%s\n' %(name,seq))
                else:
                    out.write('@%s\n%s\n+\n%s\n' %(name,seq,q))
                out.close()
                countDict[libraryName]+=1


# print(countDict)
