#! /usr/bin/env python3.5

import os
import subprocess

class Seq :
    def __init__(self, _id, _seq) :
        self.id = _id
        self.seq = _seq

def parseFasta(lines) :
    seqsMap = {}
    id = ''
    s = ''
    for line in lines :
        line = line.strip()
        if line[0] == '>' :
            if len(id) != 0 :
                seqsMap[id] = Seq(id, s)
            id = line[1:].strip()
            s = ''
        else :
            s = s + line.upper()
    if len(id) != 0 :
        seqsMap[id] = Seq(id, s)
    return seqsMap

def readFasta(fname) :
    ifh = open(fname, 'rt')
    seqsMap = parseFasta(ifh.readlines())
    ifh.close()
    return seqsMap

# Example:
# chr_start-stop  K562_H3k4me1    K562_H3k27ac    K562_MAFK       motif
# chr10_59575924-59576130 0       0       0       0
# chr4_90025918-90026134  1       0       0       0
# chr11_101627272-101627424       0       0       0       0
# chr12_94891658-94891927 0       0       0       0
# chr4_90029626-90029704  1       0       0       0
# chr11_21832616-21832829 0       0       0       0
# chr11_21836087-21836298 0       0       0       0
# chr2_53717712-53717918  0       0       0       0
# chr4_28012466-28012685  0       0       0       0
# chr11_101626917-101627207       0       0       0       0
# chr13_88991532-88991805 0       0       0       0

def readPhenFile(fname) :
    phens = {}
    ifh = open(fname, 'rt')
    header = ifh.readline().strip().split()
    for pname in header[1:] :
        phens[pname] = { 'cases': set(), 'controls': set() }
    for line in ifh :
        if line.strip() == 'consensus' :
            continue
        line = line.strip().split()
        for i in range(1, len(line)) :
            if line[i] == '1' :
                phens[header[i]]['cases'].add(line[0])
            elif line[i] == '0' :
                phens[header[i]]['controls'].add(line[0])
    ifh.close()
    return phens

def _getConsensusCLI(seqsList) :
    ofh = open('tmp.fa', 'wt')
    for seq in seqsList :
        ofh.write('>%s\n%s\n' %(seq.id, seq.seq))
    ofh.close()
    with open(os.devnull, 'wb') as devnull :
        with open('tmp.aln', 'wb') as tmpAln :
            subprocess.run(['mafft', 'tmp.fa'], stderr=devnull, stdout=tmpAln)
        os.remove('tmp.fa')
        subprocess.run(['cons', '-sequence', 'tmp.aln', '-outseq', 'tmp.cons.fa'], stdout=devnull, stderr=subprocess.STDOUT)
        os.remove('tmp.aln')
    ifh = open('tmp.cons.fa', 'rt')
    consensus = list(parseFasta(ifh.readlines()).values())[0].seq
    ifh.close()
    os.remove('tmp.cons.fa')
    return consensus

def getConsensus(ids, seqsMap) :
    if len(ids) == 0 :
        return '-'
    if len(ids) == 1 :
        return seqsMap[list(ids)[0]].seq
    toAlign = [ seqsMap[id] for id in ids ]
    consensus = _getConsensusCLI(toAlign)
    return consensus

def main(fastaFile, phenFile) :
    seqsMap = readFasta(fastaFile)
    phens = readPhenFile(phenFile)
    ofh = open('consensus.fa', 'wt')
    for phen in phens :
    #for phen in ['K562_H3k27ac'] :
        ctrl = getConsensus(phens[phen]['controls'], seqsMap)
        case = getConsensus(phens[phen]['cases'], seqsMap)
        ofh.write('>%s.case.consensus\n%s\n>%s.control.consensus\n%s\n' %(phen, case, phen, ctrl))
    ofh.close()

if __name__ == '__main__' :
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('fastaFile', help='All sequences file.')
    parser.add_argument('phenFile', help='File containing phenotypes.')
    args = parser.parse_args()
    main(args.fastaFile, args.phenFile)

