#! /usr/bin/env python3.5
#Edited by Alan 05/03/2019, changed ignore consensus to be if line contains 'consensus' string

import os
import subprocess

__LOCAL = 235
__GLOBAL = -532

def readFasta(fname) :
    seqs = {}
    ID = ''
    seq = ''
    fh = open(fname, 'rt')
    for line in fh :
        line = line.strip()
        if line[0] == '>' :
            if len(seq) != 0 :
                seqs[ID] = seq
            ID = line[1:].strip()
            seq = ''
        else :
            seq = seq + line.upper()
    if len(seq) != 0 :
        seqs[ID] = seq
    return seqs

def readPhenotypes(phenFile) :
    ifh = open(phenFile, 'rt')
    header = ifh.readline().strip().split()
    phens = {}
    for i in range(1, len(header)) :
        phens[header[i]] = {
            'cases': [],
            'controls': []
        }
    for line in ifh :
        if 'consensus' in line:
            continue
        line = line.strip().split()
        ID = line[0]
        for i in range(1, len(header)) :
            if line[i] == '1' :
                phens[header[i]]['cases'].append(ID)
            elif line[i] == '0' :
                phens[header[i]]['controls'].append(ID)
    ifh.close()
    return phens

def alignFasta(ids, seqsMap, ofname, type) :
    tmpName = 'tmp.fasta'
    ofh = open(tmpName, 'wt')
    for id in ids :
        if id in seqsMap :
            ofh.write('>%s\n%s\n' %(id, seqsMap[id]))
    ofh.close()
    with open(os.devnull, 'wb') as devnull :
        with open(ofname, 'wb') as alnOFH :
            if type == __LOCAL :
                #subprocess.run(['mafft', '--maxiterate', '1000', '--localpair', tmpName], stderr=devnull, stdout=alnOFH)
                subprocess.run(['mafft', '--localpair', tmpName], stderr=devnull, stdout=alnOFH)
            if type == __GLOBAL :
                #subprocess.run(['mafft', '--maxiterate', '1000', '--globalpair', tmpName], stderr=devnull, stdout=alnOFH)
                subprocess.run(['mafft', tmpName], stderr=devnull, stdout=alnOFH)
    os.remove(tmpName)

def main(phenFile, fullFasta, type) :
    phens = readPhenotypes(phenFile)
    seqs = readFasta(fullFasta)
    subtype = 'local'
    if type == __GLOBAL :
        subtype = 'global'
    for phen in phens :
        print('Aligning %s' %phen)
        alignFasta(phens[phen]['cases'], seqs, '%s.%s.cases.aln.fa' %(phen, subtype), type)
        alignFasta(phens[phen]['controls'], seqs, '%s.%s.controls.aln.fa' %(phen, subtype), type)
        # alignFasta(phens[phen]['cases'] + phens[phen]['controls'], seqs, '%s.%s.all.aln.fa' %(phen, subtype), type)

if __name__ == '__main__' :
    from argparse import ArgumentParser
    parser = ArgumentParser()
    parser.add_argument('phenFile', help='file with phenotypes')
    parser.add_argument('fullFasta', help='file with sequences')
    parser.add_argument('-g', action="store_true", help='global, not local')
    args = parser.parse_args()
    type = __LOCAL
    if args.g :
        type = __GLOBAL
        print('using global alignment, not local')
    main(args.phenFile, args.fullFasta, type)

