import pandas as pd
import numpy as np
import seaborn as sns
import string
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
plt.rcParams["figure.figsize"] = (8,5)

base = pd.read_csv('select_terms.txt',index_col=False,header=None,names=['1term'])

clustermap = {1:'DE not near DB',3:'DE near DB'}
days = ['00','02','05','07','15','80']
T = 6
big = pd.DataFrame()
for i in range(5):
	d1 = days[i]
	for j in range(i+1,T):
		d2 = days[j]
		print(d1,d2)
		df = pd.read_csv('../enrichment_vs_all_genes/extended_day{}_vs_day{}_details.txt'.format(d1,d2), sep="\t",header=None,index_col=False,usecols=[0,1,3],names=['cluster','term','qval'])
		df['-log(q)'] = -1 * np.log(df['qval']) 

		for c in [1,3]:
			subdf = df.loc[(df['cluster']==c)]
			subdf = subdf[['term','-log(q)']].copy()
			subdf.columns = ['1term','{}(Day{}vs{})'.format(clustermap[c],d1,d2)]
			base = pd.merge(base,subdf,how='left',on='1term')
base = base.reindex(sorted(base.columns), axis=1)
#base.fillna(0,inplace=True)
#print(base)
#base.set_index('1term',inplace=True)
base=base.melt(id_vars=['1term'])

sns.scatterplot(data = base,x='variable',y='1term',palette='Reds',hue='value',size='value')
#fig = plt.Figure(figsize=(15,10))
#sns.heatmap(base,cmap='Reds',annot=True,fmt='.1f')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig('select_terms.pdf')


