# 02_filterBatch.py
# Jonathan M Galazka, Andrew D Klocko
# 2015
#
# A simple modification of the Mirny Lab 02_fragment_filtering.py script available
# at https://bitbucket.org/mirnylab/hiclib
#
# Requires Mirny Lab scripts available at https://bitbucket.org/mirnylab/hiclib and all of their dependencies.
# 
# Usage: python ./filterBatch
# Use after 01_mapBatch.py to filter datasets and produce uncorrected heatmaps.
# Be sure to use a matching directory structure as in 01_mapBatch.py


from mirnylib import genome
from hiclib import fragmentHiC
import subprocess

datasets = ['dataset1','dataset2']


for dataset in datasets:
	
	fragment_dataset_path = '/Volumes/HD/HiC/' + dataset + '/fragment_dataset.hdf5'
	
	genome_db = genome.Genome('/Users/galazkaj/genome/chromosomes/', readChrms=['#'])

	mapped_reads_path = '/Volumes/HD/HiC/' + dataset + '/mapped_reads.hdf5'

	fragments = fragmentHiC.HiCdataset(filename= fragment_dataset_path, genome= genome_db, maximumMoleculeLength= 1000, mode='w')
	fragments.parseInputData(dictLike = mapped_reads_path, enzymeToFillRsites = 'HindIII')
	fragments.filterRsiteStart(offset=5)
	fragments.filterDuplicates()
	fragments.filterLarge()
	fragments.filterExtreme(cutH=0.005, cutL=0.005)
	fragments.writeFilteringStats()
	fragments.printMetadata(saveTo='/Volumes/HD/HiC/' + dataset + '/' + dataset + '-filtering-metadata.txt')
	
	resolutions = [50000, 40000, 20000, 10000]
	
	for resolution in resolutions:
	
		res_string = str(resolution)
	
		heatmap_path = '/Volumes/HD/HiC/' + dataset + '/' + dataset + '-hm-' + res_string + '.hdf5'

		fragments.saveHeatmap(heatmap_path, resolution=resolution, countDiagonalReads="Twice", useWeights=False, useFragmentOverlap=True, maxBinSpawn=10)	
		