##edit process
##bofre, merge -> vaf_info.v2.txt
##now, before merge, calculate vaf, and merge
configfile: 'sample_config.yaml'
#configfile: srcdir('path_config.yaml')
configfile: srcdir('path_config.botseq_snv_anno.v6.yaml')

PYTHON = config['PYTHON2']
BEDTOOLS = config['BEDTOOLS']
HEADER= config['HEADER']
EXCLUDE_BED= config['EXCLUDE_BED']
SPLIT= config['SPLIT']
READINFO= config['READINFO']
SOMATIC = config['SOMATIC']
DEPTH = config['DEPTH']
FILTER= config['FILTER']
RASM = config['RASM']
MERGE=config['MERGE']
READC=config['READC']
BASEQ=config['BASEQ']
READLOC=config['READLOC']
CONTFILTER=config['CONTFILTER']
FILTER2=config['FILTER2']
DISTFILTER=config['DISTFILTER']
HEK_bam=config['HEK_bam']
RSCRIPT=config["RSCRIPT"]
CONTEXT=config["CONTEXT"]
SPLIT_CALL=config["SPLIT_CALL"]
#print(config['sample'])
#print(lambda x: config['sample'][x.sample]['vcf'])
chromosomes=[]
for i in range(1,25):
        if len(str(i))==1:
            chromosomes.append("0"+str(i))
        else:
            chromosomes.append(str(i))


wildcard_constraints:
    chrom="\d+",
    seq="[A-Z,a-z_]",
    seq2="[A-Z,a-z,_,0-9]"

seq1_list= ["DS_mut","only_SS_lib_mut","SS_mut"]
seq2_list= ["DS_mut_2nd","only_SS_lib_mut_2nd","SS_mut_2nd"]
rule all:
#    params:
#        seq1=seq1_list,
#        seq2=seq2_list
    input:
#        expand("annotation/{sample}.snp.exclude.reheader.somatic.merged.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.DS_mut.v5.150.filtered.vcf", sample=config['sample'])
#        expand("annotation/{sample}.snp.exclude.reheader.somatic.merged.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt.baseq.txt.{seq}.v5.150.filtered.vcf.cont_filtered.vcf.{seq2}.v5.150.filtered.vcf.dist_filtered.vcf", sample=config['sample'],seq=seq1_list,seq2=seq2_list)
#        expand("annotation/{sample}.snp.exclude.reheader.somatic.merged.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt.baseq.txt.{seq}.v5.150.filtered.vcf", sample=config['sample'],seq=seq1_list)
        expand("annotation/{sample}.snp.fin.DS_mut.vcf",sample=config['sample'])
        
rule excluding:
    params:
        name = lambda x: config['sample'][x.sample]['name'],
    input:
        vcf = lambda x: config['sample'][x.sample]['vcf'],
    output:
        vcf = "tmp_vcf/{sample}.snp.exclude.reheader.vcf",
    log:
        "logs/{sample}.excluding.log"
    shell:
        "{BEDTOOLS} subtract -a {input.vcf} -b {EXCLUDE_BED} >tmp_vcf/{params.name}.snp.exclude.vcf ;"
        "cat {HEADER} tmp_vcf/{params.name}.snp.exclude.vcf > {output.vcf}"
#print (config['sample'])

rule somatic:
    params:
        name = lambda x: config['sample'][x.sample]['name'],
    input:
        vcf = "tmp_vcf/{sample}.snp.exclude.reheader.vcf",
    output:
        vcf = "tmp_vcf/{sample}.snp.exclude.reheader.somatic.vcf",
    log:
        "logs/{sample}.excluding.log"
    shell:
        "{PYTHON} {SOMATIC} {input.vcf}>{log};"
rule split:
    params:
        name = lambda x: config['sample'][x.sample]['name'],
        chrom = "{chrom}",
        sample = "{sample}"
    input:
        vcf = "tmp_vcf/{sample}.snp.exclude.reheader.somatic.vcf"
    output:
        vcf = "tmp_vcf/{sample}.snp.exclude.reheader.somatic.{chrom}.vcf"
#        vcf = expand("tmp_vcf/{{sample}}.snp.exclude.reheader.somatic.{chrom}.vcf",chrom=chromosomes),
#        vcf = expand("tmp_vcf/.snp.exclude.reheader.somatic.{chrom}.vcf")
    log:
        "logs/{sample}.split.{chrom}.log"
    shell:
        "{PYTHON} {SPLIT} {input.vcf} > {log};"

rule read_info:
    params:
        name = lambda x : config['sample'][x.sample]['name'],
        s_bam = lambda x : config['sample'][x.sample]['s_bam'],
        chrom = "{chrom}"
#        chrom="{chrom}"
#        sample = "{sample}"
    input:
        vcf="tmp_vcf/{sample}.snp.exclude.reheader.somatic.{chrom}.vcf"
    output:
        vcf="tmp_vcf/{sample}.snp.exclude.reheader.somatic.{chrom}.vcf.v13.info.txt"
    log:
        "logs/{sample}.read_info.{chrom}.log"
    shell:
        "{PYTHON} {READINFO} {input.vcf} {params.s_bam} > {log};"

rule rasm:
    params:
        name = lambda x: config['sample'][x.sample]['name'],
        chrom = "{chrom}",
        c_bam = lambda x : config['sample'][x.sample]['c_bam'],
        s_bam = lambda x : config['sample'][x.sample]['s_bam'],
    input:
        vcf="tmp_vcf/{sample}.snp.exclude.reheader.somatic.{chrom}.vcf.v13.info.txt"
    output:
        vcf="tmp_vcf/{sample}.snp.exclude.reheader.somatic.{chrom}.vcf.v13.info.txt.v4.rasm",
    log:
        "logs/{sample}.v4.rasm.{chrom}.log"
    shell:
        "{PYTHON} {RASM} {input.vcf} {params.s_bam} {params.c_bam} > {log};"

rule depth:
    params:
        name = lambda x: config['sample'][x.sample]['name'],
        s_bam = lambda x : config['sample'][x.sample]['s_bam'],
        chrom="{chrom}"
    input:
        vcf= "tmp_vcf/{sample}.snp.exclude.reheader.somatic.{chrom}.vcf.v13.info.txt.v4.rasm"
    output:
        vcf ="tmp_vcf/{sample}.snp.exclude.reheader.somatic.{chrom}.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt"
    log:
        "logs/{sample}.{chrom}.depth.log"
    shell:
        "{PYTHON} {DEPTH} {input.vcf} {params.s_bam} > {log}"

rule cont:
    params:
        name = lambda x: config['sample'][x.sample]['name'],
#        s_bam = lambda x : config['sample'][x.sample]['s_bam'],
        chrom="{chrom}"
    input:
        vcf= "tmp_vcf/{sample}.snp.exclude.reheader.somatic.{chrom}.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt"
    output:
        vcf ="tmp_vcf/{sample}.snp.exclude.reheader.somatic.{chrom}.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc"
    log:
        "logs/{sample}.{chrom}.cont.log"
    shell:
        "{PYTHON} {READC} {input.vcf} {HEK_bam} HEK293T > {log}"

rule read_loc:
    params:
        name = lambda x: config['sample'][x.sample]['name'],
        s_bam = lambda x : config['sample'][x.sample]['s_bam'],
        chrom="{chrom}"
    input:
        vcf= "tmp_vcf/{sample}.snp.exclude.reheader.somatic.{chrom}.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc"
    output:
        vcf ="tmp_vcf/{sample}.snp.exclude.reheader.somatic.{chrom}.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt"
    log:
        "logs/{sample}.{chrom}.read_loc.log"
    shell:
        "{PYTHON} {READLOC} {input.vcf} {params.s_bam} > {log}"

rule baseq:
    params:
        name = lambda x: config['sample'][x.sample]['name'],
        s_bam = lambda x : config['sample'][x.sample]['s_bam'],
        chrom="{chrom}"
    input:
        vcf= "tmp_vcf/{sample}.snp.exclude.reheader.somatic.{chrom}.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt"
    output:
        vcf ="tmp_vcf/{sample}.snp.exclude.reheader.somatic.{chrom}.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt.baseq.txt"
    log:
        "logs/{sample}.{chrom}.baseq.log"
    shell:
        "{PYTHON} {BASEQ} {input.vcf} {params.s_bam} > {log}"


rule context:
    params:
        name = lambda x: config['sample'][x.sample]['name'],
        s_bam = lambda x : config['sample'][x.sample]['s_bam'],
        chrom="{chrom}"
    input:
        vcf= "tmp_vcf/{sample}.snp.exclude.reheader.somatic.{chrom}.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt.baseq.txt"
    output:
        vcf ="tmp_vcf/{sample}.snp.exclude.reheader.somatic.{chrom}.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt.baseq.txt.cont"
    log:
        "logs/{sample}.{chrom}.context.log"
    shell:
        "{RSCRIPT} {CONTEXT} -I {input.vcf} > {log}"

rule norm:
    params:
        name = lambda x: config['sample'][x.sample]['name'],
        s_bam = lambda x : config['sample'][x.sample]['c_bam'],
        chrom="{chrom}"
    input:
        vcf ="tmp_vcf/{sample}.snp.exclude.reheader.somatic.{chrom}.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt.baseq.txt.cont"
    output:
        vcf ="tmp_vcf/{sample}.snp.exclude.reheader.somatic.{chrom}.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt.baseq.txt.cont.readc"
    log:
        "logs/{sample}.{chrom}.cont.log"
    shell:
        "{PYTHON} {READC} {input.vcf} {params.s_bam} norm > {log}"

rule merge:
    params:
        name = lambda x: config['sample'][x.sample]['name'],
        vcf_input = lambda wildcards, input: ' '.join([f'{fn}' for fn in str(input.vcf).split()]),
#        chrom="{chrom}"
    input:
        vcf=expand("tmp_vcf/{{sample}}.snp.exclude.reheader.somatic.{chrom}.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt.baseq.txt.cont.readc",chrom=chromosomes)
    output:
        vcf= "tmp_vcf/{sample}.snp.exclude.reheader.somatic.merged.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt.baseq.txt.cont.readc"
    log:
        "logs/{sample}.merge.log"

    shell:
        "readlink -f {params.vcf_input} > tmp_vcf/vcf_list.{params.name}.txt;"
        "{PYTHON} {MERGE} tmp_vcf/vcf_list.{params.name}.txt > {log};" 

rule filter:
    params:
        name= lambda x : config['sample'][x.sample]['name'],
        s_bam = lambda x : config['sample'][x.sample]['s_bam'],
        seq=seq1_list
    input:
        vcf ="tmp_vcf/{sample}.snp.exclude.reheader.somatic.merged.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt.baseq.txt.cont.readc",
    output:
        vcf ="annotation/{sample}.snp.exclude.reheader.somatic.merged.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt.baseq.txt.cont.readc.read_type.vcf"
#        vcf ="tmp_vcf/{sample}.snp.exclude.reheader.somatic.merged.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt.baseq.txt.cont",
#        vcf =expand("annotation/{{sample}}.snp.exclude.reheader.somatic.merged.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt.baseq.txt.{seq}.v5.150.filtered.vcf",seq=seq1_list)
    log:
        "logs/{sample}.filter.log"
    shell:
        "{PYTHON} {FILTER} {input.vcf} > {log};"
        "mv tmp_vcf/{params.name}.snp.exclude.reheader.somatic.merged.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt.baseq.txt.cont.readc.read_type.vcf annotation/;"

rule split_call:
    params:
        name= lambda x : config['sample'][x.sample]['name'],
        s_bam = lambda x : config['sample'][x.sample]['s_bam'],
    input:
        vcf ="annotation/{sample}.snp.exclude.reheader.somatic.merged.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt.baseq.txt.cont.readc.read_type.vcf"
    output:
#        vcf ="annotation/{sample}.snp.exclude.reheader.somatic.merged.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt.baseq.txt.cont.readc.read_type.vcf"
        vcf ="annotation/{sample}.snp.fin.DS_mut.vcf"
#        vcf ="tmp_vcf/{sample}.snp.exclude.reheader.somatic.merged.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt.baseq.txt.cont",
#        vcf =expand("annotation/{{sample}}.snp.exclude.reheader.somatic.merged.vcf.v13.info.txt.v4.rasm.vaf_info.v2.txt.readc.read_loc.info.txt.baseq.txt.{seq}.v5.150.filtered.vcf",seq=seq1_list)
    log:
        "logs/{sample}.split_call.log"
    shell:
        "{PYTHON} {SPLIT_CALL} {input.vcf};"
#        "logs/{sample}.filter.log"

