import numpy as np
import pandas as pd
import sys
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from scipy.stats import ttest_ind,mannwhitneyu
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42

subsets = ['dchic','tgif']
o = open('compare_within_vs_not.txt','w')
o.write('test\tMWpval\n')
for s in subsets:
	within = pd.read_csv('genes_within_{}_sig_dc.bed'.format(s),header=None,names=['chrom','start','end','name','logFoldChange'],index_col=False,sep="\t")
	within['|log2(FoldChange)|'] = np.absolute(within.logFoldChange)
	within['category'] = 'within sig dc'
	without = pd.read_csv('genes_NOT_within_{}_sig_dc.bed'.format(s),header=None,names=['chrom','start','end','name','logFoldChange'],index_col=False,sep="\t")
	without['|log2(FoldChange)|'] = np.absolute(without.logFoldChange)
	without['category'] = 'NOT within sig dc' 
	test = mannwhitneyu(within['|log2(FoldChange)|'],without['|log2(FoldChange)|'],alternative='greater')
	o.write('within {} sig dc > not within {} sig dc\t{:.3e}\n'.format(s,s,test.pvalue))
	df = pd.concat((within,without))
	print(df)
	sns.boxplot(data = df, x= 'category', y = '|log2(FoldChange)|', showfliers=False)
	plt.savefig('{}_within_sig_dc_vs_not.pdf'.format(s))
	plt.clf()

o.close()
	
