#! /usr/bin/env python3.5

def readFasta(fname) :
    seqs = {}
    h = ''
    s = ''
    fh = open(fname, 'rt')
    for line in fh :
        line = line.strip()
        if line[0] == '>' :
            if len(s) != 0 :
                seqs[h] = s
            h = line[1:]
            s = ''
        else :
            s = s + line.upper()
    if len(s) != 0 :
        seqs[h] = s
    fh.close()
    return seqs

'''
chr_start-stop  K562_H3k4me1    K562_H3k27ac    K562_MAFK       motif
chr10_59575924-59576130 0       0       0       0
chr4_90025918-90026134  1       0       0       0
chr11_101627272-101627424       0       0       0       0
chr12_94891658-94891927 0       0       0       0
chr4_90029626-90029704  1       0       0       0
chr11_21832616-21832829 0       0       0       0
chr11_21836087-21836298 0       0       0       0
chr2_53717712-53717918  0       0       0       0
chr4_28012466-28012685  0       0       0       0
chr11_101626917-101627207       0       0       0       0
chr13_88991532-88991805 0       0       0       0
'''

def reportPhen(phenFile, seqs, pos) :
    ifh = open(phenFile, 'rt')
    header = ifh.readline().strip().split()
    print(header)
    phens = {
        'K562_H3k4me1' : {
            'cases': {'A':0,'T':0,'C':0,'G':0,'-':0},
            'controls': {'A':0,'T':0,'C':0,'G':0,'-':0}
        },
        'K562_H3k27ac' : {
            'cases': {'A':0,'T':0,'C':0,'G':0,'-':0},
            'controls': {'A':0,'T':0,'C':0,'G':0,'-':0}
        },
        'K562_MAFK' : {
            'cases': {'A':0,'T':0,'C':0,'G':0,'-':0},
            'controls': {'A':0,'T':0,'C':0,'G':0,'-':0}
        },
        'motif' : {
            'cases': {'A':0,'T':0,'C':0,'G':0,'-':0},
            'controls': {'A':0,'T':0,'C':0,'G':0,'-':0}
        }
    }
    numToCaseControl = { '1': 'cases', '0': 'controls' }
    for line in ifh :
        line = line.strip().split()
        if len(line) == 1 and line[0] == 'consensus' :
            continue
        allele = seqs[line[0]][pos - 1]
        for i in range(1, 5) :
            phens[header[i]][numToCaseControl[line[i]]][allele] += 1
    ifh.close()
    for i in range(1, 5) :
        print('For location %d, phenotype %s (consensus: %s)' %(pos, header[i], seqs['consensus'][pos - 1]))
        print('  CASES     CONTROLS')
        print('A   %d           %d' %(phens[header[i]]['cases']['A'], phens[header[i]]['controls']['A']))
        print('T   %d           %d' %(phens[header[i]]['cases']['T'], phens[header[i]]['controls']['T']))
        print('G   %d           %d' %(phens[header[i]]['cases']['G'], phens[header[i]]['controls']['G']))
        print('C   %d           %d' %(phens[header[i]]['cases']['C'], phens[header[i]]['controls']['C']))

def main(pos) :
    fullFasta = '/disk32/home2/njensen/compute/epigenetics/LTR18A_sequence/hg19_ltr18a_sequences.aln.fa'
    seqs = readFasta(fullFasta)
    phenFile = '/disk32/home2/njensen/compute/epigenetics/LTR18A_epigenome/LTR18A_epigenetic_annotations.txt'
    reportPhen(phenFile, seqs, pos)

if __name__ == '__main__' :
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('-p', type=int, help='Position number to retrieve (1-indexed).')
    args = parser.parse_args()
    main(args.p)

