import sys, gzip
import pprint as pp


# read map file

mapfile = "gene.names.txt.gz"


mapdct = {}

with gzip.open(mapfile, 'rt') as fp :
    while True :
        line = fp.readline()
        if not line : break
        llst = line.replace('\n', '').split(',')
        k = llst[3]
        gene = llst[0]
        mapdct[k] = gene

afr = {}
eas = {}
btw = {}

for k in mapdct :
    gene = mapdct[k]
    inpfile = "../combined/"+ k +".cds.div.txt"
    try :
        with open(inpfile, 'r') as fp :
            while True:
                line = fp.readline()
                if not line : break
                llst = line.split()
                if llst[0] == 'hprc-afr' : afr[gene] = llst[1:]
                if llst[0] == 'cpc' : eas[gene] = llst[1:]
                if llst[0] == 'hprc-afrVScpc' : btw[gene] = llst[1:]
    except FileNotFoundError :
        continue


print('category\tgene\tob\tsim_ave\tsim_var\tleft\tmid\tright')
for k in afr : print('afr\t' + k + '\t' + '\t'.join(afr[k]))
for k in eas : print('eas\t' + k + '\t' + '\t'.join(eas[k]))
for k in btw : print('btw\t' + k + '\t' + '\t'.join(btw[k]))

