import pandas as pd
import numpy as np
from scipy.stats import hypergeom

days = [0,2,5,7,15,80]
chroms = ['chr'+str(i) for i in range(1,23)]
o = open('ctcf_enrichment.txt','w')
o.write('Day\tMethod\tn\texpected\tsamples\tsuccesses\tfold enrichment\tp-value\n')
for day in days:
	n = 0
	expected = 0
	successes = 0
	samples = 0

	ctcf = pd.read_csv('../ctcf_peaks/day{:}.bed'.format(str(day).zfill(2)),sep="\t",index_col=False,header=None,names=['chro','start'],usecols=[0,1])
	ctcf['hasPeak'] = True

	for chrom in chroms:
		bed = pd.read_csv('../bed/{:}_10kb.bed'.format(chrom),sep="\t",usecols=[0,1], header=None, names=['chro','start'],index_col=False)
		bed['chro'] = 'chr' + bed['chro'].astype(str)

		df = pd.merge(bed,ctcf,how='left',on=['chro','start'])
	
		bd = pd.read_csv('output/day{:}_{:}_10kb.txt.bed'.format(str(day).zfill(2),chrom), sep="\t", index_col=False, header=None,names=['chro','start'],usecols=[0,1])
		bd['boundary'] = True

		df = pd.merge(df,bd,on=['chro','start'],how='left')
		df.fillna(0,inplace=True)

		n += df.shape[0]
		expected += df['hasPeak'].sum()
		samples += df['boundary'].sum()
		successes += ((df.hasPeak & df.boundary)).sum()

	fold_enrichment = ((successes/samples)/(expected/n))
	pval = hypergeom.sf(successes, n, expected, samples)
	o.write('day{:d}\tTopDom\t{:d}\t{:d}\t{:d}\t{:d}\t{:.3f}\t{:.3e}\n'.format(day, n, int(expected), int(samples), int(successes), fold_enrichment, pval))	

o.close()
