import pandas as pd
from scipy.stats import ttest_ind
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from scipy.stats import ttest_ind,mannwhitneyu
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42

methods = ['dchic','tgif']
vals = {}
df = pd.DataFrame()
for m in methods:
	tmp = pd.read_csv('genes_within_{}_sig_dc.bed'.format(m),sep="\t",index_col=False,header=None,names=['chrom','start','end','logFoldChange'],usecols=[0,1,2,4])
	tmp['method'] = m
	tmp.replace([np.inf,-np.inf],np.nan,inplace=True)
	tmp.dropna(inplace=True)
	tmp['|log(H1/endoderm)|'] = np.absolute(tmp.logFoldChange)
	vals[m] = tmp['|log(H1/endoderm)|'].values
	df = pd.concat((df,tmp))

sns.boxplot(data = df, x= 'method', y = '|log(H1/endoderm)|', showfliers=False)
plt.savefig('tgif_vs_dchic.pdf')

with open('ttest.txt','w') as o:
	o.write('test\tstatistic\tpval\n')
	a = ttest_ind(vals['tgif'],vals['dchic'],equal_var=False,nan_policy='omit')
	o.write('TGIF=dcHiC\t{:.3f}\t{:e}\n'.format(a.statistic,a.pvalue))
	a = ttest_ind(vals['tgif'],vals['dchic'],equal_var=False,alternative='less',nan_policy='omit')
	o.write('TGIF<dcHiC\t{:.3f}\t{:e}\n'.format(a.statistic,a.pvalue))
	a = ttest_ind(vals['tgif'],vals['dchic'],equal_var=False,alternative='greater',nan_policy='omit')
	o.write('TGIF>dcHiC\t{:.3f}\t{:e}\n'.format(a.statistic,a.pvalue))
