# plotCentromeres.py
# Jonathan M. Galazka, Andrew D. Klocko
# 2015
#
# Usage: python ./plotCentromeres.py
#
# Will plot the average HiC signal at all 7 centromeres by stacking them.
# Will also plot entire genome and add boxes showing LG and centromere extent.
# Will also plot all 7 centromeres individually.
# Will also calculate the signal for interaction between pericentromeres and print to terminal.


import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.patches import Rectangle

def drawLgBoxes(chr_starts, chr_ends, axis):
	''' Given a list of chromosome starts (chr_starts) and ends (chr_ends),
	as well as a current graphing axis (axis), will draw boxes around the chromosomes
	in a HiC map.
	'''
	for i in range(0,7):
	
		bottomX = chr_starts[i] - 0.5
		bottomY = chr_ends[i] - 0.5
		#print(str(bottomX) + ' ' + str(bottomY))
		width = chr_ends[i] - chr_starts[i]
		height = -width
		currentAxis = axis
		currentAxis.add_patch(Rectangle((bottomX, bottomY), width, height, fill=False, alpha=1, lw=1.0))
				
def drawCenBox(cen_start, cen_end, axis):
	''' Given a centromere start (cen_start) and end (cen_end),
	as well as a current graphing axis (axis), will draw box around the centromere
	in a HiC map.
	'''
	bottomX = cen_start - 0.5
	bottomY = cen_end - 0.5
	#print(str(bottomX) + ' ' + str(bottomY))
	width = cen_end - cen_start
	height = -width
	currentAxis = axis
	currentAxis.add_patch(Rectangle((bottomX, bottomY), width, height, fill=False, alpha=1, lw=1.0))
		 	


resolution = 40000 # hic dataset resolution

datasets = ['NMF39_1'] # hic dataset

# get chromosome starts and stops
resolution_string = str(resolution)
chr_starts_path = '/Volumes/HD/HiC2/' + datasets[0] + '/' + datasets[0] + '_ic-' + resolution_string + '-diag-txt/chromosomeStarts'
chr_starts_array = np.loadtxt(chr_starts_path, delimiter=' ')
chr_starts = np.transpose(chr_starts_array.astype(int)[0:7])
chr_ends = np.transpose(chr_starts_array.astype(int)[1:8])

# lists of centromere start and stop bin, produced by manual inspection
c_starts = np.array((81, 261, 364, 499, 652, 860, 949))
c_ends = np.array((111, 291, 394, 529, 682, 890, 979))

# loop through all datasets
for dataset in datasets:

	# set-up plot
	f1, axes1 = plt.subplots(nrows=1, ncols=7)
	f1.suptitle(dataset + ' 7 centromeres')
	f, ax = plt.subplots(1)
	f.suptitle(dataset + ' location of centromeres')
	
	# load dataset
	datasetpath = '/Volumes/HD/HiC2/' + dataset + '/' + dataset + '_ic-' + resolution_string + '-diag-txt/' + dataset + '_observed_expected_3_40000.txt'
	array = np.loadtxt(datasetpath, delimiter=' ')
	array[array == 0.0000000000] = np.nan # convert 0.0000 to nan so that they don't mess up analysis. zeros are dropped bins, not real.
	
	# plot hic data
	heat = ax.imshow(np.log2(array), interpolation='nearest', cmap=cm.coolwarm)
	heat.set_clim(-3,3)
	
	drawLgBoxes(chr_starts, chr_ends, ax) # draw boxes around chromosomes
	
	# make list to hold centromeres, then loop through all 7 chromosomes, pull out centromeres and add them to this list.
	cen_arrays = list()
	for i in range(0, 7):
	
		cen_start = c_starts[i]
		cen_end = c_ends[i]
		centromere = array[cen_start:cen_end, cen_start:cen_end]
		
		plot_num = i + 1
		
		drawCenBox(cen_start, cen_end, ax)
		
		heatmap = axes1[i].imshow(np.log2(centromere), cmap=cm.coolwarm, interpolation='nearest')
		heatmap.set_clim(-3,3)
		axes1[i].tick_params(axis='both',which='both',bottom='off',top='off',left='off',right='off',labelbottom='off',labeltop='off',labelleft='off',labelright='off') 
		axes1[i].set_title('LG ' + str(i + 1))
		
		cen_arrays.append(centromere)
			
	mean_array = np.zeros((30, 30)) # make 30x30 array to hold average centromere data
	# loop through centromeres and add them to mean_array
	for j in range(0, 30):
		for k in range(0, 30):
			values_at_position = list()
			for i in range(0, len(cen_arrays)):
				values_at_position.append(cen_arrays[i][j,k])
			mean_at_position = np.nanmean(values_at_position)
			mean_array[j,k] = mean_at_position
	
	# plot average centromere			
	f2, ax2 = plt.subplots(1)
	f2.suptitle(dataset + ' average centromere')
	heat = ax2.imshow(np.log2(mean_array), interpolation='nearest', cmap=cm.coolwarm)
	heat.set_clim(-2,2)	
	f2.colorbar(heat)
	ax2.tick_params(axis='both',which='both',bottom='off',top='off',left='off',right='off',labelbottom='off',labeltop='off',labelleft='off',labelright='off') 

	# add box around pericentromere
	bottom = mean_array.shape[0] - 9
	left = 8
	top = 0 + 4
	right = 30 - 4
	width = 4
	height = -4
	ax2.add_patch(Rectangle((left - 0.5, bottom - 0.5), width, height, fill=False, alpha=1, lw=1.0))
	
	# Uncomment to save figure
	#savename = '/Users/galazkaj/Desktop/' + dataset + '_ave_cen.pdf'
	#f2.savefig(savename)
	
	# calculate value at pericentromere
	pericen = mean_array[17:21, 8:12]
	
	print(dataset)
	print('Pericen contacts: ' + str(np.mean(pericen)))
	
plt.show()
		
		
