"""
Snakemake workflow module of the nmrdmr pipeline.

Step 1 : get reproducible peaks across Chip-Seq replicates.
"""

import os
import itertools
from collections import OrderedDict

#extract replicates
def get_replicate_pairs(samples, replicate_group):
    rep = [i for i in samples if samples[i].items() >= replicate_group.items()]
    return ['-'.join(pair)+'_intersection.bed' for pair in itertools.permutations(rep, 2)]

rule intersect_replicates:
    """
    Compute intersection for all replicate pairs and keep peaks with > 50% bases overlap in at
    least 1 replicate pair.
    """
    input: f1 = "{pair0}.narrowPeak", f2 = "{pair1}.narrowPeak"
    output: temp("{pair0}-{pair1}_intersection.bed")
    conda: "envs/bedtools_py.yaml"
    params: overlap = config.get("overlap", 0.5)
    shell:
        "bedtools intersect -f {params.overlap} -wa -a {input.f1} -b {input.f2} > {output}"


rule merge_into_consensus:
    """
    Merge and sort reproducible peaks into a single file.
    """
    input:
        lambda wildcards: get_replicate_pairs(samples, dict(zip(COLUMNS,
                                                                [wildcards.tissue,
                                                                 wildcards.species,
                                                                 wildcards.mark])))

    output: CONSENSUS_DIR+"/Consensus_peaks-{species}-{tissue}-{mark}.bed"
    conda: "envs/bedtools_py.yaml"
    shell:
        "cat {input} | bedtools sort | bedtools merge > {output}"