import pandas as pd
import numpy as np
import math
import sys

chros = ['chr'+str(i) for i in range(1, 23)]
res = 10000
snps = pd.read_csv('snps/snps_by_parent_term/{}.txt'.format(sys.argv[1]),sep="\t")

for chro in chros:
	bed = pd.read_csv('bed/{}_10kb.bed'.format(chro),header=None, names=['chro','start','end','idx'],index_col=False, sep="\t")
	sublist = snps.loc[snps['chro']==chro]
	offset = bed['start'].values[0]
	indices = []
	snp_ids = []
	for _,row in sublist.iterrows():
		idx = (row['bin'] - offset)/res
		indices.append(idx)
		snpid = row['id']
		#print(str(row['reported_gene']))
		if str(row['reported_gene']) != 'nan':
			snpid += ' (' + row['reported_gene'] + ')'
		snp_ids.append(snpid)
	temp = pd.DataFrame({'idx':indices,'SNP (reported gene if any)':snp_ids}) #,'reported_genes':genes})
	temp = temp.groupby(['idx'])['SNP (reported gene if any)'].apply(', '.join).reset_index()
	temp.to_csv('map_snps_to_bins/{}/{}_snp_and_gene.txt'.format(sys.argv[1],chro),index=False,float_format="%d",sep="\t")
	
