import numpy as np
import pandas as pd

chrom = ['chr'+str(i) for i in range(1, 23)]
cells = ['H1','endoderm']
atac = {}
for c in cells:
	tmp = pd.read_csv('atacseq_chipseq_reads_coverage/{}_atacseq.bed'.format(c),sep="\t",header=None,usecols=[0,1,3],names=['chrom','start','atac'],index_col=False)
	atac[c] = tmp 

df = pd.DataFrame()
for c in chrom:
	bed = pd.read_csv('bed/H1_{}_100kb.bed'.format(c),sep="\t",header=None,usecols=[0,1],names=['chrom','start'],index_col=False)
	bed['chrom'] = 'chr' + bed['chrom'].astype(str)
	tmp = pd.read_csv('tgif_output/{}/cluster_assignment.txt'.format(c),sep="\t",index_col=False)
	bed = pd.concat((bed,tmp),axis=1)
	df = pd.concat((df,bed))

for c in cells:
	tmp = df[['chrom','start',c]]
	atac[c] = pd.merge(tmp,atac[c],how='left',on=['chrom','start']).dropna()
	atac[c] = atac[c].groupby(['chrom',c])['atac'].mean().reset_index()	
	atac[c][c] = 'cluster' + atac[c][c].astype(int).astype(str)
	atac[c] = atac[c].pivot(index='chrom',columns=c,values='atac').reset_index()
	atac[c]['chrom'] = atac[c]['chrom'].str.removeprefix('chr').astype(int)
	atac[c].sort_values(by='chrom',inplace=True)
	atac[c].to_csv('mean_atac_per_cluster/{}.txt'.format(c),sep="\t",index=False)


