import pandas as pd
import numpy as np
import sys

timepoints =["ES","NPC","CN"]
T = len(timepoints)

go_term = pd.read_csv('gene_to_go.txt',sep="\t",index_col=False)
genes_unique = go_term['Genenam'].unique()
pd.DataFrame({'gene_list':genes_unique}).to_csv('bg_gene_list.txt',index=False,header=False)

for i in range(T):
	for j in range(i+1,T):
		pair = '{}vs{}'.format(timepoints[i],timepoints[j])
		df = pd.read_csv('preliminary_files/genes_de_pval_{}.bed'.format(pair),sep="\t",index_col=False,header=None,names=['chrom','start','end','name','padj'])
		df['diff_exp'] = (df.padj < 0.05)
		db = pd.read_csv('preliminary_files/genes_within_100kb_of_sig_db_{}.bed'.format(pair),sep="\t",index_col=False,header=None,usecols=[0,1,2,3],names=['chrom','start','end','name'])
		db['near_diff_boundary'] = True
		df = pd.merge(df, db, how='left', on = ['chrom','start','end','name'])
		df['near_diff_boundary'].fillna(value=False,inplace=True)
		df = df.loc[df['name'].isin(genes_unique)]
		df['cid'] = 2*df['near_diff_boundary'] + df['diff_exp']
		df['name'] = df.name.astype(str)
		df = df[['cid','name']].groupby(['cid'])['name'].apply('#'.join).reset_index()
		df.to_csv('cluster_files/{}.txt'.format(pair),sep="\t",index=False,header=False)
