##################################################
# rules related to mash genomic distance estimation

##########
# auxillary functions

def get_first_fasta(wildcards):
    bb_species = config["graphs"][wildcards.graph][0]
    return f"dir/{wildcards.graph}/{bb_species}.renamed.fa"

def get_other_fasta(wildcards):
    other_species = config["graphs"][wildcards.graph][1:]
    return [f"dir/{wildcards.graph}/{x}.renamed.fa" for x in other_species]

##########
# main rules

rule mash_sketch_bb:
    input:
        get_first_fasta
    output:
        "output/mash/{graph}_bb.msh"
    log:
        "log/mash/{graph}-sketch_bb.log"
    params:
        mash = config["mash_params"],
        output = "output/mash/{graph}_bb"
    shell:
        "mash sketch {params.mash} -o {params.output} {input} 2> {log}"

rule mash_sketch_other:
    input:
        get_other_fasta
    output:
        "output/mash/{graph}_other.msh"
    log:
        "log/mash/{graph}-sketch_other.log"
    params:
        mash = config["mash_params"],
        output = "output/mash/{graph}_other"
    shell:
        "mash sketch {params.mash} -o {params.output} {input} 2> {log}"

rule mash_dist:
    input:
        query = "output/mash/{graph}_other.msh", 
        bb = "output/mash/{graph}_bb.msh"
    output:
        "output/mash/{graph}.dist"
    log:
        "log/mash/{graph}-dist.log"
    shell:
        "mash dist {input.query} {input.bb} > {output} 2> {log}"