# binsWithoutDataRes.py
# Jonathan M. Galazka, Andrew D. Klocko
# 2015
# Usage: python ./binsWithoutDataRes.py
#
# Use to calculate the number of bins without data as resolution is increased.

import matplotlib.pyplot as plt
import numpy as np
import subprocess
from mirnylib import genome
from mirnylib import h5dict
from mirnylib import plotting
from hiclib import binnedData
from hiclib.binnedData import binnedDataAnalysis

zeros = list() # list to hold number of zeros from each dataset

datasets = ['NMF39_1', 'NMF39_2', 'N3944_1', 'N3944_2', 'N5580_1', 'N5580_2'] # Datasets to loops through
counter = 1

# Loop through the datasets
for dataset in datasets:
	resolutions = [110000,100000,90000,80000,70000,60000,50000,40000,20000]
	for resolution in resolutions:
	
		# Set genome path
		genome_db = genome.Genome('/Users/galazkaj/genome/Chromosomes_complete_fixedinversion', readChrms=['#'])
	
		# Set raw heatmap path using the current dataset
		heatmap_path = '/Volumes/HD/HiC2/' + dataset + '/' + dataset + '-hm-' + str(resolution) + '.hdf5'
	
		# Set export path using the current datset
		export_path = '/Volumes/HD/HiC2/' + dataset + '/' + dataset + '_ic-hm-' + str(resolution) + '-diag.hdf5'
		
		# Usual heatmap processing
		raw_heatmap = h5dict.h5dict(heatmap_path, mode='r') 
		resolution = int(raw_heatmap['resolution'])
		BD = binnedData.binnedData(resolution, genome_db)
		BD.simpleLoad(heatmap_path, 'HindIII_GM_1')
		BD.removeBySequencedCount(0.5)
		BD.removePoorRegions(cutoff=1)
		BD.truncTrans(high=0.0005)
		BD.iterativeCorrectWithoutSS(M=50)
	
		# Use this to save the iteratively corrected heatmap of entire genome.
		BD.export('HindIII_GM_1', export_path)

		# save heatmap as .txt file to folder
		unpacked_folder = '/Volumes/HD/HiC2/' + dataset + '/' + dataset + '_ic-' + str(resolution) + '-diag-txt'
		h5dict_unpack_command = 'python ~/mirnylib/mirnylib/h5dictUtils/h5dictToTxt.py ' + export_path + ' ' + unpacked_folder
		subprocess.call(h5dict_unpack_command, shell=True)
		
		# open heatmap
		heatmap_file = unpacked_folder + '/heatmap'
		array = np.loadtxt(heatmap_file, delimiter=' ')
		
		x_dim, y_dim = array.shape # get array dimensions
		num_cells = float(x_dim * y_dim) # get total number of cells
		
		nonzeros = np.count_nonzero(array) # get number of non zeros
		nonzeros_ratio = nonzeros / num_cells # calculate ration of non zeros
		percent_zeros = 100.0 - (100.0 * nonzeros_ratio) # calculate percent zeros
		zeros.append(percent_zeros)
		
		counter = counter + 1

# ouput			
for zero in zeros:
	print(zero)







