#wdl wrapper to run paragraph
version 1.0

workflow paragraph_workflow {
	input {
		File vcf
		File cram
		File crai
		File reference
		File reference_index
        Int thread = 4
        Int additional_disk_Gb = 0
	}
	call paragraph {
		input:
            vcf = vcf, 
			cram = cram,
			crai = crai,
			reference = reference,
			reference_index = reference_index,
            thread = thread,
            additional_disk_Gb = additional_disk_Gb
	}
	output {
		File variants_vcf = paragraph.variants_vcf
		File genotypes_vcf = paragraph.genotypes_vcf
		File variants_json = paragraph.variants_json
		File genotypes_json = paragraph.genotypes_json
		File runtime_log = paragraph.log
	}
}
task paragraph {
	input {
		File vcf
		File cram
		File crai
		File reference
		File reference_index
        Int thread
        Int additional_disk_Gb
	}
	String sample_id = basename("~{cram}",".cram")
	command <<<
	/opt/paragraph-build/bin/idxdepth -b "~{cram}" -r "~{reference}" -o "~{sample_id}.json"
	# this code is to parse the idxdepth result into the manifest file. 
	python <<CODE
	import json
	with open("~{sample_id}.json","r") as info_json:
		data = json.load(info_json)
		cram_path = str(data["bam_path"])
		sample_id = cram_path.split("/")[-1].split(".")[0]
		depth = str(int(data["autosome"]["depth"]))
		read_length = str(int(data["read_length"]))
		line1 = "\t".join(["id","path","depth","read length"]) + "\n"
		line2 = "\t".join([sample_id,cram_path,depth,read_length]) + "\n"
	to_print = line1 + line2
	# write idx file 
	with open("~{sample_id}.manifest.txt","w") as idx_out:
		idx_out.write(to_print)
	CODE
	# get depth
	depth=$(awk 'NR == 2 {print $3}' ~{sample_id}.manifest.txt)
	# calculate M
	M=$(expr $depth \* 20)
	# this command runs on a single subject at a time
	python3 /opt/paragraph-build/bin/multigrmpy.py -i "~{vcf}" -m ~{sample_id}.manifest.txt -r "~{reference}" -o ~{sample_id} -M $M -t ~{thread}
	>>>
	output {
		# output json_file is used to characterize bam file
		File idxdepth_json = "~{sample_id}.json"
		File manifest = "~{sample_id}.manifest.txt"
		File variants_vcf = sample_id+"/variants.vcf.gz"
		File genotypes_vcf = sample_id+"/genotypes.vcf.gz"
		File variants_json = sample_id+"/variants.json.gz"
		File genotypes_json = sample_id+"/genotypes.json.gz"
		File log =  sample_id+"/grmpy.log"
	}
	Int diskGb = ceil(7.0 * size(cram, "G")) + additional_disk_Gb
    Int ncpu = thread+4
    Int memory = ncpu*2 
	runtime {    
		# Use this container, pull from DockerHub   
		docker: "bni1/paragraph:2.4a"    
		memory: "${memory}G"
		cpu: ncpu
		disks: "local-disk ${diskGb} SSD"
        preemptible: 1
	}
}
