import os
configfile: "config.json"
shell.executable("/bin/bash")

TARGETS = []

if (config["SAMPLESDIR"] and config["SAMPLES"]) and (config["ASSEMBLY"] == 'hg38' or config["ASSEMBLY"] == 'mm10'):
	TARGETS.extend(expand(["margeoutput/relativeRP/{sample}_all_relativeRP.txt","margeoutput/HDF5/H3K27ac_RP.h5","margeoutput/HDF5/H3K27ac_UDHS.h5","margeoutput/k27acUDHScounts/{sample}_Strength.txt"],sample=config["SAMPLES"].strip().split()))

if (config["SAMPLESDIR"] and config["SAMPLES"]) and (config["ASSEMBLY"] == 'hg19' or config["ASSEMBLY"] == 'mm9'):
	TARGETS.extend(expand(["margeoutput/RP/{sample}_all_RP.txt"], sample=config["SAMPLES"].strip().split()))

if (config["EXPSDIR"] and config["EXPS"]) and (config["ASSEMBLY"] == 'hg38' or config["ASSEMBLY"] == 'mm10'):
	TARGETS.extend(expand(["margeoutput/regression/{exp}_target_regressionInfo.txt","margeoutput/cisRegions/{exp}_enhancer_prediction.txt"], exp=config["EXPS"].strip().split()))

if not(config["SAMPLESDIR"] and config["SAMPLES"]):
	newRPHDF = "margeoutput/HDF5/H3K27ac_RP_Empty.h5"
	newDHSHDF = "margeoutput/HDF5/H3K27ac_UDHS_Empty.h5"

if (config["SAMPLESDIR"] and config["SAMPLES"]):
	newRPHDF = "margeoutput/HDF5/H3K27ac_RP.h5"
	newDHSHDF = "margeoutput/HDF5/H3K27ac_UDHS.h5"


if config["ASSEMBLY"] == 'hg38':
	GENOME="hs"
	CHROMESIZE = os.path.join(config["REFdir"],'hg38.len')
	GENEREFERENCE = os.path.join(config["REFdir"],'hg38_refseq_TSS.bed')
	RPHDF = os.path.join(config["REFdir"],'hg38_RP.h5')
	K27acHDF = os.path.join(config["REFdir"],'hg38_UDHS_H3K27ac.h5')
	MEDIANRP = os.path.join(config["REFdir"],'hg38_Medians.txt')
	ANNOTATION = os.path.join(config["REFdir"],'SupTable1_HumanH3K27ac_Description.txt')
	UDHS = os.path.join(config["REFdir"],'hg38_UDHS.bed')

elif config["ASSEMBLY"] == 'hg19':
	GENOME="hs"
	CHROMESIZE = os.path.join(config["REFdir"],'hg19.len')
	GENEREFERENCE = os.path.join(config["REFdir"],'hg19_refseq_TSS.bed')
	RPHDF = ''
	K27acHDF = ''
	MEDIANRP = ''
	ANNOTATION = ''

elif config["ASSEMBLY"] == 'mm10':
	GENOME="mm"
	CHROMESIZE = os.path.join(config["REFdir"],'mm10.len')
	GENEREFERENCE = os.path.join(config["REFdir"],'mm10_refseq_TSS.bed')
	RPHDF = os.path.join(config["REFdir"],'mm10_RP.h5')
	K27acHDF = os.path.join(config["REFdir"],'mm10_UDHS_H3K27ac.h5')
	MEDIANRP = os.path.join(config["REFdir"],'mm10_Medians.txt')
	ANNOTATION = os.path.join(config["REFdir"],'SupTable2_mouseH3K27ac_Description.txt')
	UDHS = os.path.join(config["REFdir"],'mm10_UDHS.bed')

elif config["ASSEMBLY"] == 'mm9':
	GENOME="mm"
	CHROMESIZE = os.path.join(config["REFdir"],'mm9.len')
	GENEREFERENCE = os.path.join(config["REFdir"],'mm9_refseq_TSS.bed')
	RPHDF = ''
	K27acHDF = ''
	MEDIANRP = ''
	ANNOTATION = ''

if config["ID"] == 'GeneSymbol':
	GNAME2 = '--gname2'
else:
	GNAME2 = ''

RPCALC = os.path.join(config["MARGEdir"],'RPCalc.py')
RELATIVERPCACL = os.path.join(config["MARGEdir"],'relativeRPCalc.py')
ReadsCountInUDHS = os.path.join(config["MARGEdir"],'H3K27ac_readsCount.py')
RPREGRESS = os.path.join(config["MARGEdir"],'rpregress.py')
ENHANCERIDENTIFIER = os.path.join(config["MARGEdir"],'enhancerIdentifier.py')
createHDF5 = os.path.join(config["MARGEdir"],'createHDF5.py')
createEmptyHDF5 = os.path.join(config["MARGEdir"],'createEmptyHDF5.py')

rule all:
	input:
		TARGETS

rule bam2bdg:
	input:
		os.path.join(config["SAMPLESDIR"],"{sample}.bam")
	output:
		"margeoutput/profile/{sample}_treat_pileup.bdg"
	params:
		macs = config["tools"]["MACS2"],
		dir = "margeoutput/profile",
		name = '{sample}'
	log:
		"margeoutput/logs/{sample}_bam2bdg.log"
	shell:
		"{params.macs} callpeak -t {input} -f BAM --SPMR -B -q 0.01 --keep-dup 1 --extsize 146 --nomodel -g {GENOME} --outdir {params.dir} -n {params.name} 2> {log}"

rule bdgclip:
	input:
		"margeoutput/profile/{sample}_treat_pileup.bdg",
		CHROMESIZE
	output:
		"margeoutput/profile/{sample}_treat_pileup.clip.bdg"
	params:
		bedclip = config["tools"]["bedClip"]
	shell:
		"{params.bedclip} {input[0]} {input[1]} {output}"

rule bdg2bw:
	input:
		"margeoutput/profile/{sample}_treat_pileup.clip.bdg",
		CHROMESIZE,
	output:
		"margeoutput/profile/{sample}_treat_pileup.bw"
	params:
		bdg2bw = config["tools"]["bedGraphToBigWig"]
	shell:
		"{params.bdg2bw} {input[0]} {input[1]} {output}"

rule rp:
	input:
		"margeoutput/profile/{sample}_treat_pileup.bw",
		CHROMESIZE,
		GENEREFERENCE,
		config["tools"]["bigWigSummary"]
	output:
		"margeoutput/RP/{sample}_all_RP.txt"
	params:
		prefix = "margeoutput/RP/{sample}",
		assembly = config["ASSEMBLY"]
	log:
		"margeoutput/logs/{sample}_rp.log"
	shell:
		"python3 {RPCALC} -g {params.assembly} -n {params.prefix} -b {input[0]} --cs {input[1]} --tss {input[2]} --bwsum {input[3]} 2> {log}"

rule relrp:
	input:
		"margeoutput/RP/{sample}_all_RP.txt",
	output:
		"margeoutput/relativeRP/{sample}_all_relativeRP.txt"
	shell:
		"python3 {RELATIVERPCACL} {input[0]} {MEDIANRP} {output}"

rule k27acInUDHS:
	input:
		"margeoutput/profile/{sample}_treat_pileup.bw",
		config["tools"]["bigWigAverageOverBed"]
	output:
		"margeoutput/k27acUDHScounts/{sample}_Strength.txt"
	params:
		prefix = "margeoutput/k27acUDHScounts/{sample}"
	shell:
		"python3 {ReadsCountInUDHS} {UDHS} {params.prefix} {input[0]} {input[1]}"


rule rphdf5generation:
	input:
		expand("margeoutput/RP/{sample}_all_RP.txt",sample=config["SAMPLES"].strip().split())
	output:
		"margeoutput/HDF5/H3K27ac_RP.h5"
	shell:
		"python3 {createHDF5} -i 4 -f {input} -n {output}"

rule udhshdf5generation:
	input:
		expand("margeoutput/k27acUDHScounts/{sample}_Strength.txt",sample=config["SAMPLES"].strip().split())
	output:
		"margeoutput/HDF5/H3K27ac_UDHS.h5"
	shell:	
		"python3 {createHDF5} -i 1 -f {input} -n {output}"	

rule emptyrphdf5:
	output:
		"margeoutput/HDF5/H3K27ac_RP_Empty.h5"
	shell:
		"python3 {createEmptyHDF5} -n {output}"

rule emptyudhshdf5:
	output:
		"margeoutput/HDF5/H3K27ac_UDHS_Empty.h5"
	shell:
		"python3 {createEmptyHDF5} -n {output}"	

rule regression:
	input:
		os.path.join(config["EXPSDIR"],"{exp}.txt"),
		newRPHDF
	output:
		"margeoutput/regression/{exp}_target_regressionInfo.txt"
	params:
		prefix = "margeoutput/regression/{exp}",
		assembly = config["ASSEMBLY"],
		exptype = config["EXPTYPE"]
	log:
		"margeoutput/logs/{exp}_regression.log"
	shell:
		"python3 {RPREGRESS} -e {input[0]} -r {RPHDF},{input[1]} -n {params.prefix} -g {params.assembly} --exptype {params.exptype} {GNAME2} -s -a {ANNOTATION} -m {GENEREFERENCE} 2> {log}"

rule enhancerPrediction:
	input:
		os.path.join(config["EXPSDIR"],"{exp}.txt"),
		"margeoutput/regression/{exp}_target_regressionInfo.txt",
		newRPHDF,
		newDHSHDF
	output:
		"margeoutput/cisRegions/{exp}_enhancer_prediction.log",
		"margeoutput/cisRegions/{exp}_enhancer_prediction.txt"
	params:
		prefix= "margeoutput/cisRegions/{exp}",
		assembly = config["ASSEMBLY"]
	log:
		"margeoutput/logs/{exp}_cisregions.log"
	shell:
		"python3 {ENHANCERIDENTIFIER} -i {input[0]} -s {input[1]} -r {GENEREFERENCE} --rp {RPHDF},{input[2]} --k27ac {K27acHDF},{input[3]} -n {params.prefix} -g {params.assembly} 2> {log}"
		
