# Jonathan Galazka
# Usage: python ./tadbitModel.py
# Be sure to have the following directory structure in place before starting:
# Root modeling folder: /Volumes/HD/HiC/modeling/
# 50 kb modeling folder: /Volumes/HD/HiC/modeling/LG_modeling_50k/
# Dataset folder for each dataset: /Volumes/HD/HiC/modeling/LG_modeling_50k/dataset1
# LG folder for each LG: /Volumes/HD/HiC/modeling/LG_modeling_50k/dataset1/LGI
# Use with single chromosome HiC datasets placed at: '/Volumes/HD/HiC/modeling/LG_modeling_50k/dataset1_LGI_50k.txt', etc.

from pytadbit import Chromosome
from pytadbit.imp.impoptimizer import IMPoptimizer
from datetime import datetime

datasets = ['dataset1']

lgs = ['I','II','III','IV','V','VI','VII']

# These need to be determined empirically for you dataset.
optpar = {'kforce': 5,'lowfreq': -0.8,'upfreq': 0.2,'maxdist': 1000,'scale': 0.01,'reference': 'WT'}

for dataset in datasets:

	for lg in lgs:
          
		chr = Chromosome(lg)
		lg_string =  'WT'
		dataset_path = '/Volumes/HD/HiC/modeling/LG_modeling_50k/' + dataset + '_LG' + lg + '_50k.txt'
		chr.add_experiment(lg_string, exp_type='Hi-C', identifier=lg_string, resolution=50000, hic_data=dataset_path)
		exp = chr.experiments[0]
		exp.normalize_hic()
	
		print(lg + ' modeling started at: ')
		print(datetime.now())

		models = exp.model_region(start=1, end=None, n_models=500, n_keep=100, n_cpus=4, keep_all=False, config=optpar)

		print(lg + ' modeling finished at: ')
		print(datetime.now())

		save_name = '/Volumes/HD/HiC/modeling/LG_modeling_50k/' + dataset + '/LG' + lg + '/' + '50k.models'
		models.save_models(save_name)
		cmm_directory = '/Volumes/HD/HiC/modeling/LG_modeling_50k/' + dataset + '/LG' + lg 
		models.write_cmm(directory=cmm_directory, model_num=0)
		


