# compare2ObservedExpected.py
# Jonathan M Galazka
#
# Calculates the ratio of two observed/expected heatmaps. Saves an array with these values
# and also plots a heatmap of the result
#
# Usage: python ./compare2ObservedExpected.py


import numpy as np
import matplotlib.cm as cm
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable


dataset1 = 'dataset1' # Reference to compare to
dataset2 = 'dataset2'
resolution = 50 # res. in kb
lg = 1 # chromosome (zero-based)

# get chromosome starts and stops
resolution_string = str(resolution * 1000)
chr_starts_path = '/Volumes/HD/HiC/' + dataset1 + '/' + dataset1 + '-ic-hm-' + resolution_string + '-txt/chromosomeStarts'
chr_starts_array = np.loadtxt(chr_starts_path, delimiter=' ')
chr_starts = np.transpose(chr_starts_array.astype(int)[0:7])
chr_ends = np.transpose(chr_starts_array.astype(int)[1:8])

# Compare fold change from expected heatmaps
observed_expected_1 = '/Volumes/HD/HiC/' + dataset1 + '/' + dataset1 + '-ic-hm-' + resolution_string + '-txt/obs-exp.txt'	
observed_expected_2 = '/Volumes/HD/HiC/' + dataset2 + '/' + dataset2 + '-ic-hm-' + resolution_string + '-txt/obs-exp.txt'

array3 = np.loadtxt(observed_expected_1, delimiter=' ')
array4 = np.loadtxt(observed_expected_2, delimiter=' ')

fold_of_reference = array4 / array3 # calculate reference

# save the entire comparison array
savename2 = '/Users/galazkaj/Desktop/' + dataset2 + '-' + dataset1 + '-obs_exp_compare.txt'
np.savetxt(savename, fold_of_reference, fmt='%.10f', delimiter=' ')


# only plot one chromosome
to_plot = fold_of_reference[chr_starts[lg]:chr_ends[lg], chr_starts[lg]:chr_ends[lg]]

f, ax = plt.subplots(1)

heat = ax.imshow(np.log2(to_plot), interpolation='nearest', cmap=cm.coolwarm)
heat.set_clim(-2,2)
ax.set_title(dataset2 + ' / ' + dataset1)
f.colorbar(heat)

plt.savefig('/Users/galazkaj/Desktop/' + dataset2 + '_' + dataset1 + '_comparison.pdf')
plt.show()



