import pandas as pd
import numpy as np
from sklearn import metrics

o = open('compartment_quality_oe.txt','w')
o.write('method\tchrom\tSI\tCH\tDBI\n')

for c in range(1,23):
	#tmp = pd.read_csv('../bed/H1_chr{:d}_100kb.bed'.format(c),sep="\t",header=None,names=['start','end'],usecols=[1,2])
	na_mask= np.loadtxt('output/H1_chr{:d}_100kb_nonzero_mask.txt'.format(c))

	#tmp['na_mask'] = na_mask
	cluster = pd.read_csv('output/H1_chr{:d}_100kb_cluster.txt'.format(c),header=None,names=['cid'])
	cluster['tgif_mask'] = pd.read_csv('../tgif/output/chr{:d}/cluster_assignment.txt'.format(c),sep="\t")['H1'].values
	cluster['na_mask'] = na_mask
	cluster.dropna(inplace=True)

	features = np.loadtxt('../oe_matrices/chr{:d}/H1_oe.txt'.format(c))[cluster.na_mask > 0]

	clus = cluster.cid.values[cluster.na_mask > 0]

	si = metrics.silhouette_score(features,clus)
	ch = metrics.calinski_harabasz_score(features,clus)
	db = 1/metrics.davies_bouldin_score(features,clus)

	o.write('PCA\tchr{:d}\t{:.3f}\t{:.3f}\t{:.3f}\n'.format(c,si,ch,db))

o.close()

