configfile: 'sample_config.yaml'
configfile: srcdir('path_config.yaml')

BWA = config['BWA']
SAMTOOLS = config['SAMTOOLS']
PICARD = config['PICARD']
JAVA = config['JAVA']
GATK = config['GATK']
assembly= 'GRCh38'
if config['assembly']=='GRCh38':
    chromosomes = ['chr'+str(i) for i in list(range(1, 23)) + ['X', 'Y']]
    REFERENCE = config['GRCh38_REFERENCE']
    KNOWNINDEL = config['GRCh38_KNOWNINDEL']

elif config['assembly']=='GRCh37':
    chromosomes = [str(i) for i in list(range(1, 23)) + ['X', 'Y']]
    REFERENCE = config['GRCh37_REFERENCE']
    KNOWNINDEL = config['GRCh37_KNOWNINDEL']
    DBSNP= config['GRCh37_DBSNP']
else:
    print(config['assembly'] + ' not supported, exiting...')
    sys.exit(1)

if config['type']=='WGS':
	TYPE='WGS'
elif config['type']=='WES':
	TYPE='WES'

print (config['sample'])

rule all:
    input:
        expand("dna_bam/{sample}.s.mark.indel.base.bam", sample=config['sample']), 
#        expand("tmp_bam/{sample}.s.bam", sample=config['sample']), 

rule bwa_align:
    params:
        name = lambda x: config['sample'][x.sample]['name'],
#        rg = '@RG\tID:{TYPE}_{sample}\tLB:{TYPE}_{sample}\tSM:{TYPE}_{sample}\tPL:ILLUMINA'
    input:
        fq1 = lambda x: config['sample'][x.sample]['fq1'],
        fq2 = lambda x: config['sample'][x.sample]['fq2']
    output:
        bam = "tmp_bam/{sample}.s.bam",
    log:
        "logs/{sample}.bwa.log"
    threads: 4
    shell:
        "({BWA} mem -M -t {threads} -R '@RG\\tID:{TYPE}_{params.name}\\tLB:{TYPE}_{params.name}\\tSM:{TYPE}_{params.name}\\tPL:ILLUMINA' {REFERENCE} {input.fq1} {input.fq2} | {SAMTOOLS} sort -@ {threads} -o {output.bam})  >{log};"
        "{SAMTOOLS} index -@ {threads} {output.bam} >{log};"
#        "{SAMTOOLS} view -Sb -@ {threads} ",
#        "{SAMTOOLS} index "
#        "{SAMTOOLS} sort -@ {threads} -o {output.bam} "
#        "{SAMTOOLS} index {output.bam}"



rule markduplicate:
    params:
         name= lambda x: config['sample'][x.sample]['name']
    input:
        bam ="tmp_bam/{sample}.s.bam"
#	id ={sample}
    output:
        bam ="tmp_bam/{sample}.s.mark.bam"
#        matrics ="tmp_bam/{sample}.matrics.txt"
    log:
        "logs/{sample}.picard.log"
    threads: 4
    shell:
        "({JAVA} -Xmx16g -jar {PICARD} MarkDuplicates REMOVE_DUPLICATES=true REMOVE_SEQUENCING_DUPLICATES=true I={input.bam} O={output.bam} M=tmp_bam/{params.name}.matrics.txt VALIDATION_STRINGENCY=LENIENT ; "
        "{SAMTOOLS} index {output.bam}) &>{log}"


rule indel_targetcreator:
    input:
        bam ="tmp_bam/{sample}.s.mark.bam", 
    output:
        intervals = 'realign_intervals/{sample}.intervals'
    threads: 1
    log:
        'logs/{sample}.indel_targetcreator.log'
    shell:
        "({JAVA} -Djava.io.tmpdir=tmp/  -jar {GATK} "
        "-T RealignerTargetCreator "
        "-R {REFERENCE} "
        "-known {KNOWNINDEL} "
        "-I {input.bam} "
        "-o {output.intervals}) &> {log}"

rule realign:
    input:
        bam = "tmp_bam/{sample}.s.mark.bam", 
        intervals = 'realign_intervals/{sample}.intervals'
    output:
        bam = "tmp_bam/{sample}.s.mark.indel.bam"
    threads: 1
    log:
        "logs/{sample}.realign.log"
    shell:
        "({JAVA} -Xmx8G -Djava.io.tmpdir=tmp/ -jar {GATK} "
        "-T IndelRealigner "
        "-R {REFERENCE} "
        "-targetIntervals {input.intervals} "
        "-known {KNOWNINDEL} " 
        "-I {input.bam} "
        "-o {output.bam}; "
        "{SAMTOOLS} index  {output.bam}) &> {log}"

rule baserecal:
    input:
        bam = "tmp_bam/{sample}.s.mark.indel.bam"
    output:
        table= 'baserecal/{sample}.table'
    threads: 1
    log:
        "logs/{sample}.base_recal.log"
    shell:
        "({JAVA} -Xmx8g -Djava.io.tmpdir=tmp/ -jar {GATK} "
        "-T BaseRecalibrator "
        "-R {REFERENCE} "
        "--knownSites {KNOWNINDEL} "
        "--knownSites {DBSNP} "
        "-I {input.bam} "
        "-o {output.table} ) &> {log}"

rule printreads:
    input:
        bam = "tmp_bam/{sample}.s.mark.indel.bam",
        table= "baserecal/{sample}.table"
    output:
        bam = "dna_bam/{sample}.s.mark.indel.base.bam"
    threads: 1
    log:
        "logs/{sample}.pr.log"
    shell:
        "({JAVA} -Xmx8g -Djava.io.tmpdir=tmp/ -jar {GATK} "
        "-T PrintReads "
        "-R {REFERENCE} "
        "-BQSR {input.table} "
        "-I {input.bam} "
        "-o {output.bam}) &>{log}"
