#!/usr/bin/env python3
# Name: Bryan Thornlow
# Date: 4/10/2018
# findSegDups.py

import sys
import os
import time
import random
import numpy
import gzip
import math

##########################
##### MAIN FUNCTIONS #####
##########################

def extendBed(abb, extend1, extend2):
    chromToLen = {}
    for line in open('/public/groups/corbettlab/tRNA/classifier/JOEL/'+abb+'/'+abb+'_hal_chrom_lengths.txt'):
        splitLine = (line.strip()).split('\t')
        if len(splitLine) > 1:
            chromToLen[splitLine[0]] = int(splitLine[2])
    tRNAToFasta = {}
    fastaTotRNA = {}
    myBed = abb.upper()+'/tRNAHiConf.bed'
    myOutString = ''
    for line in open(myBed):
        splitLine = (line.strip()).split('\t')
        if splitLine[5] == '+':
            splitLine[1] = int(splitLine[1])-int(extend1)
            splitLine[2] = int(splitLine[2])+int(extend2)
        else:
            splitLine[1] = int(splitLine[1])-int(extend2)
            splitLine[2] = int(splitLine[2])+int(extend1)
        if splitLine[1] < 0 or splitLine[2] > chromToLen[splitLine[0]]:
            print(abb, splitLine, chromToLen[splitLine[0]])
        else:
            myOutString += joiner(splitLine)+'\n'
    open(myBed.split('.bed')[0]+'_'+str(extend1)+'_'+str(extend2)+'.bed', 'w').write(myOutString)

def tRNAFasta(abb, extend1, extend2):
    chromTotRNACoords = {}
    myOutFasta = abb.upper()+'/TEST_'+str(extend1)+'_'+str(extend2)+'.fasta'
    for line in open(abb.upper()+'/tRNAHiConf_'+str(extend1)+'_'+str(extend2)+'.bed'):
        splitLine = (line.strip()).split('\t')
        myChrom = splitLine[0]
        myStart = int(splitLine[1])
        myEnd = int(splitLine[2])
        myName = str(splitLine[3])
        myStrand = str(splitLine[5])
        if not myChrom in chromTotRNACoords:
            chromTotRNACoords[myChrom] = []
        (chromTotRNACoords[myChrom]).append([myStart, myEnd, myName, myStrand])

    currentChrom = ''
    myCurrentIndex = 0
    tRNAToSeq = {}
    for line in open('/public/groups/corbettlab/tRNA/classifier/JOEL/'+abb+'/'+abb+'.fasta'):
        stripLine = line.strip()
        if stripLine.startswith('>'):
            if len(currentChrom) > 0:
                for tRNA in mytRNAs:
                    if tRNA[3] == '+':
                        tRNAToSeq[str(tRNA[2])] = currentChrom[(tRNA[0]):(tRNA[1])]
                    else:
                        tRNAToSeq[str(tRNA[2])] = revComp(currentChrom[(tRNA[0]):(tRNA[1])])

            myChrom = stripLine[1:]
            currentChrom = ''
            mytRNAs = []
            if myChrom in chromTotRNACoords:
                mytRNAs = sorted(chromTotRNACoords[myChrom], key=lambda x: x[1])
        elif len(mytRNAs) > 0:
            currentChrom += (stripLine.upper())

    if len(currentChrom) > 0:
        for tRNA in mytRNAs:
            if tRNA[3] == '+':
                tRNAToSeq[str(tRNA[2])] = currentChrom[(tRNA[0]):(tRNA[1])]
            else:
                tRNAToSeq[str(tRNA[2])] = revComp(currentChrom[(tRNA[0]):(tRNA[1])])

    seqTotRNA = {}
    mySegDups = {}
    myOutString = ''
    for tRNA in sorted(tRNAToSeq.keys()):
        mySeq = tRNAToSeq[tRNA]
        myOutString += tRNA+'\t'+mySeq+'\n'
        if not mySeq in seqTotRNA:
            seqTotRNA[mySeq] = tRNA
        else:
            print(abb, tRNA, seqTotRNA[mySeq], mySeq)
            mySegDups[tRNA] = True
            mySegDups[seqTotRNA[mySeq]] = True
    sys.stderr.write("Finished "+abb+'\n')
    open(myOutFasta, 'w').write(myOutString)

    mySegDupsOut = ''
    for k in sorted(mySegDups.keys()):
        mySegDupsOut += k+'\n'
    open(abb.upper()+'/segDups'+str(extend1)+str(extend2)+'.txt', 'w').write(mySegDupsOut)

def checkAlignment():
    tRNAToOrthCount = {}
    for line in open('oneToOneMapAllSpeciesAugmentMacaque.txt'):
        splitLine = (line.strip()).split('\t')
        if not splitLine[0] == 'Ananc2':
            myCount = len(splitLine) - splitLine.count('N/A') - 1
            for k in splitLine:
                if not k == 'N/A':
                    tRNAToOrthCount[k] = myCount

    seqTotRNAs = {}
    for line in open('tRNASegDups10BothSidesNoScaffolds.txt'):
        splitLine = (line.strip()).split('\t')
        abb = splitLine[0]
        mytRNA1 = abb+'-'+splitLine[1]
        mytRNA2 = abb+'-'+splitLine[2]
        mySeq = splitLine[3]
        if not mySeq in seqTotRNAs:
            seqTotRNAs[mySeq] = []
        for tRNA in [mytRNA1, mytRNA2]:
            if not tRNA in seqTotRNAs[mySeq]:
                seqTotRNAs[mySeq].append(tRNA)
    #print(seqTotRNAs)

    for seq in seqTotRNAs:
        for tRNA in seqTotRNAs[seq]:
            sys.stderr.write(str(tRNA)+': '+str(tRNAToOrthCount[tRNA])+'\t')
        sys.stderr.write('\n')
    

def revComp(seq):
    diction = {}
    diction['A'] = 'T'
    diction['C'] = 'G'
    diction['G'] = 'C'
    diction['T'] = 'A'
    diction['N'] = 'N'
    mySeq = seq[::-1]
    myReturn = ''
    i = 0
    while i < len(mySeq):
        myReturn += diction[mySeq[i]]
        i += 1
    return(myReturn)


def joiner(entry):
    newList = []
    for k in entry:
        newList.append(str(k))
    return '\t'.join(newList)

def main():

    mySpeciesAbbrevs = ['Hsapi38','Ptrog5','Ggori5','Ppygm3','Mmula8','Ananc2','Mmuri3','Jjacu1','Mochr1','Mmusc10']
    mySpeciesAbbrevs += ['Rnorv6','Hglab2','Cporc3','Clani1','Odegu1','Mmarm2','Ocuni2','Sscro11','Oorca1','Btaur8']
    mySpeciesAbbrevs += ['Chirc1','Ecaba2','Cfami3','Mputo1','Palec1','Efusc1','Eeuro2','Casia1','Dnove3']

    for k in mySpeciesAbbrevs:
        extendBed(k, 80, 40)
        tRNAFasta(k, 80, 40)
    #checkAlignment()


if __name__ == "__main__":
    """
    Calls main when program is run by user.
    """
    main();
    raise SystemExit