# binsWithoutDataReads.py
# Jonathan M. Galazka, Andrew D. Klocko
# 2015
# Usage: binsWithoutDataReads.py
#
# Use to calculate the number of bins without data as read number is decreased.

import matplotlib.pyplot as plt
import numpy as np
import subprocess
from mirnylib import genome
from mirnylib import h5dict
from mirnylib import plotting
from hiclib import binnedData
from hiclib.binnedData import binnedDataAnalysis

zeros = list()

# Datasets to loops through
datasets = ['5M', '10M', '20M', '30M', '40M', '50M']

# Loop through the datasets
for dataset in datasets:

	resolutions = [10000,20000,30000,40000,50000]
	
	for resolution in resolutions:
	
		# Set genome path
		genome_db = genome.Genome('/Users/galazkaj/genome/Chromosomes', readChrms=['#'])
	
		res_string = str(resolution)
		
		# Set raw heatmap path using the current dataset
		heatmap_path = '/Volumes/HD/HiC2/NMF39/' + dataset + '/' + dataset + '-hm-' + res_string + '.hdf5'
	
		# Set export path using the current datset
		export_path = '/Volumes/HD/HiC2/NMF39/' + dataset + '/' + dataset + '-ic-hm-diag-' + res_string + '.hdf5'
		
		# Usual heatmap processing
		raw_heatmap = h5dict.h5dict(heatmap_path, mode='r') 
		resolution = int(raw_heatmap['resolution'])
		BD = binnedData.binnedData(resolution, genome_db)
		BD.simpleLoad(heatmap_path, 'HindIII_GM_1')
		BD.removeBySequencedCount(0.5)
		BD.removePoorRegions(cutoff=1)
		BD.truncTrans(high=0.0005)
		BD.iterativeCorrectWithoutSS(M=50)
	
		# Use this to save the iteratively corrected heatmap of entire genome.
		BD.export('HindIII_GM_1', export_path)

		unpacked_folder = '/Volumes/HD/HiC2/NMF39/' + dataset + '/' + dataset + '-ic-hm-diag-' + res_string + '-txt'
		h5dict_unpack_command = 'python ~/mirnylib/mirnylib/h5dictUtils/h5dictToTxt.py ' + export_path + ' ' + unpacked_folder
		subprocess.call(h5dict_unpack_command, shell=True)
		
		heatmap_file = unpacked_folder + '/heatmap'
		array = np.loadtxt(heatmap_file, delimiter=' ')
		
		x_dim, y_dim = array.shape
		num_cells = float(x_dim * y_dim)
		
		num_zeros = 0
		for x in np.nditer(array):
			if(x == 0):
				num_zeros = num_zeros + 1
		zeros.append(num_zeros / num_cells)

for zero in zeros:
	print(zero)

