##################################################
# rules related to coverage calculation

##########
# auxillary functions

def get_mem_mb(wildcards, attempt):
    return 64000 + ((attempt-1) * 64000)

def get_stepwise_covstats_for_graph(wildcards):
    num_of_species = len(config["graphs"][wildcards.graph])
    return [f"output/graph/{wildcards.graph}-s{i}.covstat" for i in range(1, num_of_species)]

def get_jth_fasta_for_graph(wildcards):
    sample_name = config["graphs"][wildcards.graph][int(wildcards.j)]
    return f"dir/{wildcards.graph}/{sample_name}.renamed.fa" 

def get_coverage_tmp_files(wildcards):
    return [f"output/coverage/{wildcards.graph}/s{wildcards.i}_a{j}.cov.tmp" for j in range(0, int(wildcards.i)+1)]

def get_coverage_w_all_samples_tmp_files(wildcards):
    num_of_species = len(config["graphs"][wildcards.graph])
    return [f"output/growth/{wildcards.graph}-s{i}.cov.infer.tmp.all" for i in range(1, num_of_species)]

##########
# main rules

rule align_sample_to_stepwise_graph:
    input:
        gfa = "output/graph/{graph}-s{i}.gfa",
        seq = get_jth_fasta_for_graph 
    log:
        "log/align_query_to_graph/{graph}-s{i}_a{j}.log"
    output:
        "output/coverage/{graph}/s{i}_a{j}.gaf"
    threads: 24
    resources: mem_mb=get_mem_mb
    shell:
        "(minigraph -xasm -c --cov -t{threads} {input.gfa} {input.seq} > {output}) 2> {log}"

rule get_segment_coverage_for_sample:
    input:
        "output/coverage/{graph}/s{i}_a{j}.gaf"
    output:
        temp("output/coverage/{graph}/s{i}_a{j}.cov.tmp")
    shell:
        "cat "
        "<(printf '{wildcards.j}\n') "
        "<(grep '^S' {input} | cut -f8 | cut -d: -f3) > {output}"

rule collate_segment_coverage_across_samples:
    input:
        segments = "output/graph/{graph}-s{i}.segments",
        cov_files = get_coverage_tmp_files  # from: get_segment_coverage_for_sample
    output:
        "output/coverage/{graph}/summary/s{i}-coverage_values.txt"
    shell:
        "paste -d'\t' <(cut -f1 -d, {input.segments}) {input.cov_files} > {output}"

rule calculate_cov_related_stats:
    input:
        segments = "output/graph/{graph}-s{i}.segments", 
        coverage = "output/coverage/{graph}/summary/s{i}-coverage_values.txt"
    output:
        temp("output/graph/{graph}-s{i}.covstat.tmp")
    script:
        "../script/calculate_cov_related_stats.py"

rule add_graph_name_to_graph_covstats_file:
    input:
        "output/graph/{graph}-s{i}.covstat.tmp"
    output:
        "output/graph/{graph}-s{i}.covstat"
    shell:
        "paste -d, <(printf 'graph\n{wildcards.graph}') <(printf 'sample\n{wildcards.i}') {input} > {output}"

rule collate_graph_covstats_across_steps:
    input:
        get_stepwise_covstats_for_graph   # from: add_graph_name_to_graph_cov_stats_file
    output:
        "output/growth/default_order/{graph}.covstat.growth"
    shell:
        "cat <(awk 'NR==1' {input[0]}) <(cat {input} | grep -v '^graph') > {output}"

rule collate_covstat_growth_for_all_graphs:
    input:
        expand("output/growth/default_order/{graph}.covstat.growth", graph=graph_list)
    output:
        "results/allgraphs.covstat.growth.summary"
    shell:
        "cat <(awk 'NR==1' {input[0]}) <(cat {input} | grep -v '^graph') > {output}"

rule extract_graph_covstats_at_final_step:
    input:
        "output/growth/default_order/{graph}.covstat.growth"
    output:
        temp("output/growth/default_order/{graph}.covstat.finalvalue.tmp")
    shell:
        "cat <(head -n1 {input}) <(tail -n1 {input}) > {output}"

rule collate_final_step_graph_covstats:
    input:
        expand("output/growth/default_order/{graph}.covstat.finalvalue.tmp", graph=graph_list)
    output:
        "results/allgraphs.covstat.finalvalue.summary"
    shell:
        "cat <(awk 'NR==1' {input[0]}) <(cat {input} | grep -v '^graph') > {output}"
