# measureCenCore.py
# Jonathan M. Galazka, Andrew D. Klocko
# 2015
# Usage: python ./measureCenCore.py
#
# Use to produce plots that estimate the location of the structural core of centromeres.


import matplotlib.pyplot as plt
import numpy as np

def getChrStarts(resolution):
	''' Get the location of chromosomes starts and stops given a resolution.
	Returns both as lists.
	'''
	res_string = str(resolution)
	chr_starts_path = '/Volumes/HD/HiC2/NMF39/NMF39_ic-' + res_string + '-diag-txt/ChromosomeStarts'
	chr_starts_array = np.loadtxt(chr_starts_path, delimiter=' ')
	chr_starts = np.transpose(chr_starts_array.astype(int)[0:7])
	chr_ends = np.transpose(chr_starts_array.astype(int)[1:8])
	
	return chr_starts, chr_ends
	
def movingaverage(interval, window_size):
	''' Calculate sliding average from the array, interval, with window size, window_size.
	Returns and array.
	'''
    window = np.ones(int(window_size))/float(window_size)
    return np.convolve(interval, window, 'same')


datasets = ['NMF39','N3944','N5580'] # datasets to analyze
colors = ['black','blue','red'] # colors for datasets above in plot
resolution = 10000 # resolution
lg = 1 # chromosome to analyze
chr_starts, chr_ends = getChrStarts(resolution) # get chr starts and ends

# set-up plot for heatmaps
f1, axes = plt.subplots(ncols=3, nrows=1)
ax_array = axes.reshape(-1)

# set-up plot for line plot
f2, ax1 = plt.subplots(1, figsize=(7,2.5))

# loop through datasets
for i in range(0, 3):

	dataset = datasets[i]
	color = colors[i]

	# load dataset and get chromosome array
	datasetpath = '/Volumes/HD/HiC2/' + dataset + '/' + dataset + '_ic-' + str(resolution) + '-diag-txt/obs-exp.txt'
	array = np.loadtxt(datasetpath)
	lg_array = array[chr_starts[lg]:chr_ends[lg], chr_starts[lg]:chr_ends[lg]]
	
	# plot heatmaps
	heat = ax_array[i].imshow(np.log2(lg_array), cmap='coolwarm', interpolation='nearest')
	heat.set_clim(-2,2)

	# get first line, which runs just outside the centromere and captures the signal
	# for insulation of the centromere
	line1 = (lg_array[100,:])
	line1_av = movingaverage(line1, 10)
	
	# get second line, which runs just outside the centromere and captures the signal
	# for insulation of the centromere
	line2 = (lg_array[150,:])
	line2_av = movingaverage(line2, 10)

	# plot both lines
	ax1.plot(line1_av, color=color, alpha=0.7, lw=2)
	ax1.plot(line2_av, color=color, alpha=0.7, lw=2)
	ax1.set_xlim(55,200)
	
	
#f2.savefig('/Users/galazkaj/Desktop/wiggle.pdf')
plt.show()

