import pandas as pd
import numpy as np
from scipy import stats
from scipy.spatial import distance
from sklearn.metrics.cluster import rand_score

methods = ['cscore','dchic','pca','tgif']
M = len(methods)
chroms = range(1,23)
idx_cols = ['chrom','start','end']
o = open('compartment_similarity.txt','w')
o.write('chrom\tone\ttwo\tpearsonr\tcosine sim\trand index\n')
for c in chroms:
	df = pd.read_csv('cscore/H1_chr{:d}_100kb.txt'.format(c),sep="\t",index_col=False)
	df.columns = idx_cols + ['cscore_score','cscore_cluster']
	for m in ['dchic','pca','tgif']:
		tmp = pd.read_csv('{}/H1_chr{:d}_100kb.txt'.format(m,c),sep="\t",index_col=False)
		tmp.columns = idx_cols + [m+'_score',m+'_cluster']
		df = pd.merge(df,tmp,how='inner',on=idx_cols)
	df.dropna(inplace=True)
	print(df)
	for i in range(M):
		m1 = methods[i]
		m1_score = df[m1+'_score']
		for j in range(i+1, M):
			m2 = methods[j]
			m2_score = df[m2+'_score']
			corr = max(stats.pearsonr(m1_score,m2_score)[0],stats.pearsonr(m1_score, -1*m2_score)[0])
			cos = max(1-distance.cosine(m1_score,m2_score),1-distance.cosine(m1_score, -1*m2_score))
			rand = rand_score(df[m1+'_cluster'],df[m2+'_cluster'])
			o.write('chr{:d}\t{}\t{}\t{:.3f}\t{:.3f}\t{:.3f}\n'.format(c,m1,m2,corr,cos,rand))
o.close() 
