import numpy as np
import pandas as pd

chrom = ['chr'+str(i) for i in range(1, 20)]
cells = ['ES','NPC','CN']

gc = pd.read_csv('nucleotide_profile.txt',sep="\t",skiprows=1,index_col=False,usecols=[0,1,4,6,7,11], header=None,names=['chrom','start','GC%','C','G','N'])

df = pd.DataFrame()
for c in chrom:
    bed = pd.read_csv('bed/{}_100kb.bed'.format(c),sep="\t",header=None,usecols=[0,1],names=['chrom','start'],index_col=False)
    bed['chrom'] = 'chr' + bed['chrom'].astype(str)
    for cell in ['ES','NPC','CN']:
        print(c,cell)
        cid = pd.read_csv('pca_output/{}_{}_100kb.txt'.format(cell,c),na_values=['\"\"'],header=None,names=[cell],index_col=False)
        bed = pd.concat((bed,cid),axis=1)
    bed.to_csv('annotated/{}_100kb.bed'.format(c),sep="\t",index=False,float_format="%d")
    df = pd.concat((df,bed))

for c in cells:
    tmp = df[['chrom','start',c]]
    tmp = pd.merge(tmp,gc,how='left',on=['chrom','start']).dropna()
    tmp = tmp.groupby(['chrom',c])['GC%'].mean().reset_index()	
    tmp[c] = 'cluster' + tmp[c].astype(int).astype(str)
    tmp = tmp.pivot(index='chrom',columns=c,values='GC%').reset_index()
    #tmp['chrom'] = tmp['chrom'].str.removeprefix('chr').astype(int)
    #tmp.sort_values(by='chrom',inplace=True)
    tmp.to_csv('mean_gc_per_cluster/{}.txt'.format(c),sep="\t",index=False)


