#! /usr/bin/env python3.5

# FID IID PAT MAT SEX AgeQ1 AgeQ2 AgeQ3 MJ5_phe IntvSex MJ2A MJ4_phe

def readNames(fasta) :
    idToLines = {}
    linesInOrder = []
    ifh = open(fasta, 'rt')
    for line in ifh :
        if line[0] != '>' :
            continue
        name = line.replace('>', '').strip()
        idToLines[name] = '%s %s 0 0 0 0' %(name, name)
        linesInOrder.append(name)
    ifh.close()
    return idToLines, linesInOrder

# Example:
# chr_start-stop  K562_H3k4me1    K562_H3k27ac    K562_MAFK       motif
# chr10_59575924-59576130 0       0       0       0
# chr4_90025918-90026134  1       0       0       0
# chr11_101627272-101627424       0       0       0       0
# chr12_94891658-94891927 0       0       0       0
# chr4_90029626-90029704  1       0       0       0
# chr11_21832616-21832829 0       0       0       0
# chr11_21836087-21836298 0       0       0       0
# chr2_53717712-53717918  0       0       0       0
# chr4_28012466-28012685  0       0       0       0
# chr11_101626917-101627207       0       0       0       0
# chr13_88991532-88991805 0       0       0       0

def readVarFile(idToLines, header, fname) :
    ifh = open(fname, 'rt')
    _varHeader = ifh.readline().strip().split()
    header = header + ' ' + ' '.join(_varHeader[1:])
    for line in ifh :
        if line.strip() == 'consensus' :
            continue
        line = line.strip().split()
        for i in range(1, len(_varHeader)) :
            val = line[i]
            if val == '1' :
                val = 2
            else :
                val = 1
            id = line[0]
            if id in idToLines :
                idToLines[id] = idToLines[id] + ' %d' %val
        i += 1
    ifh.close()
    return idToLines, header

# def readVarFile(rvLines, header, fname) :
#     ifh = open(fname, 'rt')
#     i = 0
#     header = header + ' %s' %fname[:-4].split('/')[-1]
#     for line in ifh :
#         val = line.strip()
#         if val == '1' :
#             val = 2
#         else :
#             val = 1
#         rvLines[i] = rvLines[i] + ' %d' %val
#         i += 1
#     ifh.close()
#     return rvLines, header

def writeCovar(lines, ofname) :
    ofh = open(ofname, 'wt')
    for line in lines :
        ofh.write('%s\n' %line)
    ofh.close()

def main(fasta, phenFile, outName) :
    header = 'FID IID PAT MAT SEX PHEN'
    idToLines , linesInOrder = readNames(fasta)
    idToLines, header = readVarFile(idToLines, header, phenFile)
    lines = [header]
    for id in linesInOrder :
        lines.append(idToLines[id])
    writeCovar(lines, outName)

if __name__ == '__main__' :
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('fasta', help='Really, all we need are the IDs, but they must be in the order of the fasta.')
    parser.add_argument('phen', help='File containing phenotype data.')
    parser.add_argument('outName')
    args = parser.parse_args()
    main(args.fasta, args.phen, args.outName)

