import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import sys
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42


df = pd.read_csv('closest_sig_db_to_each_gene.bed',sep="\t",index_col=False,header=None,names=['gene_chrom','gene_start','gene_end','logFoldChange','db_chrom','db_start','db_end','scoreDiff','distance'])

g = sns.scatterplot(data=df,x='logFoldChange',y='distance')
g.set_yscale("log")
plt.savefig('logFoldChange_vs_distance.png')
plt.close()

sns.scatterplot(data=df,x='logFoldChange',y='scoreDiff')
plt.tight_layout()
plt.savefig('logFoldChange_vs_scoreDiff.png')

with open('correlation.txt','w') as o:
	o.write('x\ty\tpearonr\n')
	o.write('gene expression log fold change\tdistance to closest sig DB\t{:.3f}\n'.format(np.corrcoef(df.logFoldChange,df.distance)[0,1]))
	o.write('gene expression log fold change\tboundary score diff in closest sig DB\t{:.3f}\n'.format(np.corrcoef(df.logFoldChange,df.scoreDiff)[0,1]))
