import pandas as pd
import numpy as np
from sklearn import metrics

atac = pd.read_csv('../atacseq_chipseq_reads_coverage/H1_atacseq.bed',sep='\t',header=None,names=['chrom','start','end','accessibility'])
atac['end'] +=1

cs = pd.read_csv('hg38_100kb.bed',sep="\t",header=None,names=['chrom','start','end'])
cs.start -=1
cs['score'] = np.loadtxt('all_cscore.txt',usecols=[1])
cs['cluster'] = (cs.score.values > 0).astype(int)

df = pd.merge(cs,atac,how='left',on=['chrom','start','end']) 
df.dropna(inplace=True)
o = open('compartment_quality_accessibility.txt','w')
o.write('method\tchrom\tSI\tCH\tDBI\n')

for c in range(1,23):
	subdf = df.loc[df.chrom == ('chr'+str(c))]

	features = subdf.accessibility.values.reshape(-1,1) 
	clus = subdf.cluster.values.astype(int)

	si = metrics.silhouette_score(features,clus)
	ch = metrics.calinski_harabasz_score(features,clus)
	db = 1/metrics.davies_bouldin_score(features,clus)

	o.write('Cscore\tchr{:d}\t{:.3f}\t{:.3f}\t{:.3f}\n'.format(c,si,ch,db))

o.close()

