##################################################
# rules related to graph construction, extract graph properties and bubbles

##########
# auxillary functions

def get_mem_mb(wildcards, attempt):
    return 128000 + ((attempt-1) * 64000)  # higher for graph construction

def get_disk_mb(wildcards, input, attempt):
    if wildcards.graph == 'great_ape':
        disk_mb = max((3+attempt)*input.size_mb, 1000)
    else:
        disk_mb = max((1+attempt)*input.size_mb, 1000)
    return disk_mb

def get_threads(wildcards, input):
    if wildcards.graph == 'great_ape':
        threads = 64
    else:
        threads = 24
    return threads

def get_first_fasta(wildcards):
    bb_species = config["graphs"][wildcards.graph][0]
    return f"dir/{wildcards.graph}/{bb_species}.renamed.fa"

def get_other_fasta_up_to_ith_entry(wildcards):
    other_species = config["graphs"][wildcards.graph][1:(int(wildcards.i)+1)]
    return [f"dir/{wildcards.graph}/{x}.renamed.fa" for x in other_species]

##########
# main rules

rule build_stepwise_graph:
    input:
        bb = get_first_fasta,
        other = get_other_fasta_up_to_ith_entry
    log:
        "log/build_graph/{graph}-s{i}.log"
    output:
        "output/graph/{graph}-s{i}.gfa"
    threads: get_threads
    resources: mem_mb=get_mem_mb, disk_mb=get_disk_mb
    shell:
        "(minigraph -xggs -c -t{threads} -L50 {input} > {output}) 2> {log}"

rule gfatools_stat:
    input:
        "output/graph/{graph}-s{i}.gfa"
    output:
        temp("output/graph/{graph}-s{i}.gfastat")
    shell:
        "gfatools stat {input} > {output}"

rule gfatools_bubble:
    input:
        "output/graph/{graph}-s{i}.gfa"
    output:
        temp("output/graph/{graph}-s{i}.bubble.tmp")
    shell:
        "gfatools bubble {input} > {output}"

rule get_segments_by_bubble:
    input:
        "output/graph/{graph}-s{i}.bubble.tmp"
    output:
        temp("output/graph/{graph}-s{i}.bubble")
    shell:
        "cut -f12 {input} > {output}"

rule extract_segment_information:
    input:
        "output/graph/{graph}-s{i}.gfa"
    output:
        temp("output/graph/{graph}-s{i}.segments.tmp")
    shell:
        "paste "
        "<(grep '^S' {input} | cut -f2) "
        "<(grep '^S' {input} | cut -f4 | cut -d: -f3) "
        "<(grep '^S' {input} | cut -f7 | cut -d: -f3) "
        "<(grep '^S' {input} | cut -f5 | cut -d: -f3) > {output}"

rule add_bubble_info_to_segments:
    input:
        segments = "output/graph/{graph}-s{i}.segments.tmp",
        bubble   = "output/graph/{graph}-s{i}.bubble"
    output:
        "output/graph/{graph}-s{i}.segments" 
    script:
        "../script/add_bubble_info_to_segments.py"