import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import sys
from sklearn.cluster import KMeans
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42

#marks = 'H3K27ac H3K27me3 H3K36me3 H3K4me1 H3K4me3 H3K9me3'.split()
marks = ['H3K27ac','H3K4me1','H3K36me3','H3K4me3','H3K27me3','H3K9me3']
chros = ['chr'+ str(c) for c in range(1,20)]

ab = pd.DataFrame()

for chro in chros:
	df = pd.read_csv('bed/{}_100kb.bed'.format(chro),sep="\t",index_col=False,header=None,names=['chro','start','end'],usecols=[0,1,2])
	for mark in marks:
		npc = np.loadtxt('histone_{}/{}/{}_100kb.txt'.format('NPC',mark,chro)) +1
		cn = np.loadtxt('histone_{}/{}/{}_100kb.txt'.format('CN',mark,chro)) +1
		fold  = np.log2(npc/cn)
		np.nan_to_num(fold,copy=False)
		df[mark] = fold
	sigdiff = pd.read_csv('dc_output/{}/NPC_vs_CN_significantly_differential_compartmental_regions.txt'.format(chro),index_col=False,header=None,usecols=[0,1,2],names=['chro','start','end'],sep="\t")
	sub_ab = pd.merge(sigdiff,df,how='inner',on=['chro','start','end'])
	ab = pd.concat((ab,sub_ab))

kmeans = KMeans(n_clusters=5,random_state=66).fit(ab[marks].values)
idx = kmeans.labels_.argsort()
ab_sorted = ab[marks].values[idx[::-1]]

fig = plt.Figure(figsize=(10,5))
ax = sns.heatmap(ab_sorted,cmap='RdBu_r',vmin=-1.8,vmax=1.8)
ax.set_xticklabels(marks)
plt.tight_layout()
plt.savefig('log_fold_diff')
plt.clf()

cids = kmeans.labels_[idx[::-1]]
cids = cids.reshape((cids.shape[0],1))
sns.heatmap(cids,cmap='Set2')
plt.savefig('mark_clusters')

