# Jonathan M. Galazka
#
# Take a HiC observed/expected file and convert to a Circos links file.
#
# Usage: python ./matrix2Circos.py > circosLinksFile.txt

import sys
import numpy as np

dataset = 'dataset1'

resolution = 50000

# Get chromosome starts
res_string = str(resolution)
chr_starts_path = '/Volumes/HD/HiC/' + dataset + '/' + dataset + '-ic-hm-' + res_string + '-txt/ChromosomeStarts'
chr_starts_array = np.loadtxt(chr_starts_path, delimiter=' ')
chr_starts = np.transpose(chr_starts_array.astype(int)[0:7])
chr_ends = np.transpose(chr_starts_array.astype(int)[1:8])

# Load observed expected heatmap
dataset_path = '/Volumes/HD/HiC/' + dataset + '/' + dataset + '-ic-hm-' + res_string + '-txt/obs-exp.txt'
array = np.loadtxt(dataset_path, delimiter=' ')

lg_array = array[chr_starts[0]:chr_ends[6],chr_starts[0]:chr_ends[6]]

array_x_dim, array_y_dim = lg_array.shape

# go through each datapoint in the matrix and determine which chromosome it belongs to
# and the start and end position of the associated bin
for i in range(0, array_y_dim):

	if(i >= chr_starts[0] and i < chr_ends[0]):
		lg_string1 = 'LGI'
		start1 = (i - chr_starts[0]) * resolution
		end1 = start1 + resolution
		
	elif(i >= chr_starts[1] and i < chr_ends[1]):
		lg_string1 = 'LGII'
		start1 = (i - chr_starts[1]) * resolution
		end1 = start1 + resolution
		
	elif(i >= chr_starts[2] and i < chr_ends[2]):
		lg_string1 = 'LGIII'
		start1 = (i - chr_starts[2]) * resolution
		end1 = start1 + resolution
		
	elif(i >= chr_starts[3] and i < chr_ends[3]):
		lg_string1 = 'LGIV'
		start1 = (i - chr_starts[3]) * resolution
		end1 = start1 + resolution
		
	elif(i >= chr_starts[4] and i < chr_ends[4]):
		lg_string1 = 'LGV'
		start1 = (i - chr_starts[4]) * resolution
		end1 = start1 + resolution
		
	elif(i >= chr_starts[5] and i < chr_ends[5]):
		lg_string1 = 'LGVI'
		start1 = (i - chr_starts[5]) * resolution
		end1 = start1 + resolution
		
	elif(i >= chr_starts[6] and i < chr_ends[6]):
		lg_string1 = 'LGVII'
		start1 = (i - chr_starts[6]) * resolution
		end1 = start1 + resolution
	
	for j in range(0, array_x_dim):
	
		if(j >= chr_starts[0] and j < chr_ends[0]):
			lg_string2 = 'LGI'
			start2 = (j - chr_starts[0]) * resolution
			end2 = start2 + resolution
			
		elif(j >= chr_starts[1] and j < chr_ends[1]):
			lg_string2 = 'LGII'
			start2 = (j - chr_starts[1]) * resolution
			end2 = start2 + resolution
			
		elif(j >= chr_starts[2] and j < chr_ends[2]):
			lg_string2 = 'LGIII'
			start2 = (j - chr_starts[2]) * resolution
			end2 = start2 + resolution
			
		elif(j >= chr_starts[3] and j < chr_ends[3]):
			lg_string2 = 'LGIV'
			start2 = (j - chr_starts[3]) * resolution
			end2 = start2 + resolution
			
		elif(j >= chr_starts[4] and j < chr_ends[4]):
			lg_string2 = 'LGV'
			start2 = (j - chr_starts[4]) * resolution
			end2 = start2 + resolution
			
		elif(j >= chr_starts[5] and j < chr_ends[5]):
			lg_string2 = 'LGVI'
			start2 = (j - chr_starts[5]) * resolution
			end2 = start2 + resolution
			
		elif(j >= chr_starts[6] and j < chr_ends[6]):
			lg_string2 = 'LGVII'
			start2 = (j - chr_starts[6]) * resolution
			end2 = start2 + resolution
	
		value = float(norm_array[i,j]) # observed expected value in bin
		
		if(np.isnan(value) == True or np.isposinf(value) == True or np.isneginf(value) == True or value == 0.00):
			
			pass
			
		else:
		
			value = np.log2(value) # log2 convert
		
			thickness = 1 # set line thickness
			
			color = 'black' # set line color
		
			if(value >= 3.0): # set cut-off for inclusions.
		
				if(lg_string1 != lg_string2): # take only inter-chromosomal interactions. change to 'lg_string1 == lg_string2' to take only intra-
							
					# print a circos links file to screen
					print(lg_string1+'\t'+str(start1)+'\t'+str(end1)+'\t'+lg_string2+'\t'+str(start2)+'\t'+str(end2))+'\t'+'thickness='+str(thickness)+'p,color='+str(color)
					
				

