import numpy as np
import pandas as pd
import sys
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from scipy.stats import ttest_ind,mannwhitneyu
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42

atac = pd.read_csv('../atacseq_chipseq_reads_coverage/H1_atacseq.bed',sep='\t',header=None,names=['chr','start','end','H1'])
atac['end'] +=1
atac_endoderm = pd.read_csv('../atacseq_chipseq_reads_coverage/endoderm_atacseq_qn.bed',sep="\t",header=None,names=['chr','start','end','endoderm'])
atac['endoderm'] = atac_endoderm.endoderm
atac['|log(FoldChange)|'] = np.abs(np.log(atac.H1/atac_endoderm.endoderm))
atac.replace([np.inf,-np.inf],np.nan,inplace=True)	

df = pd.DataFrame()
for c in range(1,23):
	sigdiff = pd.read_csv('output/chr{:d}/H1_vs_endoderm_significantly_differential_compartmental_regions.txt'.format(c),sep="\t",header=None,usecols=[1,2],names=['start','end'])
	sigdiff['chr'] = 'chr'+str(c)
	sigdiff['category'] = 'within sig dc'
	tmp = pd.merge(atac,sigdiff,how='left',on=['chr','start','end'])
	df= pd.concat((df,tmp))

df.category.fillna('NOT within sig dc',inplace=True)
df.dropna(inplace=True)
within = df.loc[df.category == 'within sig dc']
without = df.loc[df.category == 'NOT within sig dc']

o = open('compare_within_vs_not.txt','w')
o.write('test\tMWpval\n')
test = mannwhitneyu(within['|log(FoldChange)|'],without['|log(FoldChange)|'],alternative='greater')
o.write('within sig dc > not within sig dc\t{:.3e}\n'.format(test.pvalue))
o.close()

sns.boxplot(data = df, x= 'category', y = '|log(FoldChange)|', showfliers=False)
plt.savefig('within_sig_dc_vs_not.pdf')

	
