# K9RandomizeInter.py
# Jonathan M. Galazka, Andrew D. Klocko
# 2015
# Usage: python ./K9RandomizeInter.py


import numpy as np
import random as random
import matplotlib.pyplot as plt

def getChrStarts(resolution):
	''' Get chromosome starts and ends from WT data set at indicated resolution.
	'''
	res_string = str(resolution)
	chr_starts_path = '/Volumes/HD/HiC2/NMF39/NMF39_ic-' + res_string + '-diag-txt/ChromosomeStarts'
	chr_starts_array = np.loadtxt(chr_starts_path, delimiter=' ')
	chr_starts = np.transpose(chr_starts_array.astype(int)[0:7])
	chr_ends = np.transpose(chr_starts_array.astype(int)[1:8])
	
	return chr_starts, chr_ends

def getLG(position, resolution, chr_starts, chr_ends):
	''' Get the chromosome of a position given the position, resolution and a list of
	chromosome starts and ends. Returns a string indicating the chromosome.
	'''
	i = position
	if(i >= chr_starts[0] and i < chr_ends[0]):
		lg_string1 = 'LGI'
		start1 = (i - chr_starts[0]) * resolution
		end1 = start1 + resolution
		
	elif(i >= chr_starts[1] and i < chr_ends[1]):
		lg_string1 = 'LGII'
		start1 = (i - chr_starts[1]) * resolution
		end1 = start1 + resolution
		
	elif(i >= chr_starts[2] and i < chr_ends[2]):
		lg_string1 = 'LGIII'
		start1 = (i - chr_starts[2]) * resolution
		end1 = start1 + resolution
		
	elif(i >= chr_starts[3] and i < chr_ends[3]):
		lg_string1 = 'LGIV'
		start1 = (i - chr_starts[3]) * resolution
		end1 = start1 + resolution
		
	elif(i >= chr_starts[4] and i < chr_ends[4]):
		lg_string1 = 'LGV'
		start1 = (i - chr_starts[4]) * resolution
		end1 = start1 + resolution
		
	elif(i >= chr_starts[5] and i < chr_ends[5]):
		lg_string1 = 'LGVI'
		start1 = (i - chr_starts[5]) * resolution
		end1 = start1 + resolution
		
	elif(i >= chr_starts[6] and i < chr_ends[6]):
		lg_string1 = 'LGVII'
		start1 = (i - chr_starts[6]) * resolution
		end1 = start1 + resolution
		
	else:
		lg_string1 = 'None'
		
	return lg_string1
	
resolution = 10000
chr_starts, chr_ends = getChrStarts(resolution)

# Get K9me3 ChIP data and calculate an enriched threshold of 2x the median value
k9_path = '/Volumes/HD/HiC2/ChIP_tracks/h3k9_klocko/k9_all_chr_10k_array.txt'
k9_array = np.transpose(np.loadtxt(k9_path))
k9_median = np.median(k9_array)
k9_median_plus = k9_median + (k9_median * 1)

# if value of k9me3 array is >2x media set to 1, otherwise 0
for i in range(0, k9_array.size):
	if(k9_array[i] < k9_median_plus):
		k9_array[i] = 0
	else:
		k9_array[i] = 1

# make a list of all positions where k9me3 is enriched			
k9_true_list = list()
for i in range(0, k9_array.size):
	if(k9_array[i] == 1):
		k9_true_list.append((i))		
k9_true_array = np.array(k9_true_list)

# 10000x, select 3227 random starting position from the list of positions where
# k9me3 is enriched and then have these terminate at a randomly selected position
ratios = list()
for i in range(0, 10000):

	random_k9_array_inter = np.random.choice(k9_true_array, size=3227, replace=True) # choose 3227 random starting positions
	
	# pick random endings
	random_inter_endings = list()
	j = 0
	while j < 3227:
		start_position = random_k9_array_inter[j] # get starting position
		start_lg = getLG(start_position, resolution, chr_starts, chr_ends) # get starting LG
		random_ending = random.randint(0, k9_array.size - 1) # get random ending
		end_lg = getLG(random_ending, resolution, chr_starts, chr_ends) # get random ending LG
		
		# if start chromosome does not equal ending chromosome, add to list
		if(start_lg != end_lg):
			random_inter_endings.append(random_ending)
			j = j + 1
	
	# go through randomly generated links and determine if the terminate in a h3k9me3
	# enriched region.
	inter_k9_to_k9 = 0
	inter_k9_to_other = 0
	for inter_ending in random_inter_endings:
		if(k9_array[inter_ending] == 1):
			inter_k9_to_k9  = inter_k9_to_k9  + 1
		else:
			inter_k9_to_other = inter_k9_to_other  + 1
	

	ratio = float(inter_k9_to_k9) / float(inter_k9_to_other)
	ratios.append(ratio)

for ratio in ratios:
	print(ratio)


