import os
import sys
import re
import yaml
#import glob3
import pandas as pd
from snakemake.utils import min_version
# Enforce a minimum Snakemake version
min_version("4.7")

onstart:
    print("##########################################\n")
    print("# STARTING PIPELINE\n")
    print("##########‰################################\n")
#    print ("Running ChIP-seq pre-processing workflow for the following samples:\n " + ' \n '.join(map(str, SAMPLES)))

onsuccess:
    print("##########################################\n")
    print("# PIPELINE ENDED SUCCESSFULLY \n")
    print("##########################################\n")

ruleDisplayMessage = """\n\n####### START RULE EXECUTION ##########\n"""

threadsMax = 16

"""

Cross-correlation analysis using SPP:
https://github.com/kundajelab/phantompeakqualtools
"""


################################################################################
######################## PATHS AND DATA ########################################
################################################################################

### FOLDERS ON TIER1 ###
PROJECT_DIR = config["global"]["projectdir"]
DATA_DIR  = PROJECT_DIR + "/" + config["project_structure"]["datadir"]
LOG_DIR = PROJECT_DIR + "/" + config["project_structure"]["logdir"]
EXEC_DIR = PROJECT_DIR + "/" + config["project_structure"]["utils"]

BAM_DIR = DATA_DIR + "/ChIPseq/Alignments/with_indels"
BW_DIR = DATA_DIR + "/ChIPseq/Signal/with_indels"

### TOOLS ###
SAMTOOLS = config["tools"]["by_path"]["samtools"]

bamCoverage = config["tools"]["by_path"]["bamCoverage"]
bamCompare = config["tools"]["by_path"]["bamCompare"]
bigwigCompare = config["tools"]["by_path"]["bigwigCompare"]
# bigWigMerge and bedGraphToBigWig is installed in conda environment:
# conda install -c bioconda ucsc-bigwigmerge
# conda install -c bioconda ucsc-bedgraphtobigwig

### SAMPLES ###
# IP samples
individuals_path = config['data']['samples']['individual_ids']
individuals = open(individuals_path, 'r').read().splitlines() # with replicate numbers
lines = ["_".join(x.split("_")[0:2]) for x in individuals] # one per fly line
# input samples
# samples_path_input = config['data']['samples']['individual_ids_input']
# samples_input = open(samples_path_input, 'r').read().splitlines()

ab_tp_list  = ["mef2.68", "mef2.1012", "bin.68", "bin.1012", "ctcf.68", "zld.24", "twi.24"]
ab_tp_list_with_input  = ["mef2.68", "mef2.1012", "bin.68", "bin.1012", "ctcf.68", "zld.24", "twi.24", "input.24", "input.68", "input1012"]
timepoints = ["24", "68", "1012"]

chrom_sizes = config["data"]["genome"]["dm6"]["chrom_sizes"]

################################################################################
####################### Pipeline ###############################################
################################################################################


localrules: all


rule all:
    input:
        # expand(BW_DIR + "/rpgc/{ab_tp}.{individual}.bw", ab_tp = ab_tp_list, individual = individuals),
        # expand(BW_DIR + "/rpgc/input.{tp}.{line}_1.bw", tp = timepoints, line = lines),
        # expand(BW_DIR + "/rpgc_input_subtr/{ab_tp}.{line}_{rep}.bw", ab_tp = ab_tp_list, line = lines, rep = [1,2]),
        # expand(BW_DIR + "/rpgc_input_subtr/twi.24.vgn_307_{rep}.bw", rep = [1,2]),
        # expand(BW_DIR + "/rpgc_sum/by_dataset/{ab_tp}.avg.bw", ab_tp = ab_tp_list),
        # expand(BW_DIR + "/rpgc_sum/by_line/{ab_tp}.{line}.avg.bw", ab_tp = ab_tp_list, line = lines),
        # expand(BW_DIR + "/rpgc_sum_input_subtr/by_dataset/{ab_tp}.avg.bw", ab_tp = ab_tp_list),
        # expand(BW_DIR + "/rpgc_sum_input_subtr/by_line/{ab_tp}.{line}.avg.bw", ab_tp = ab_tp_list, line = lines),
        expand(BW_DIR + "/rpgc_sum_input_subtr/by_line/{ab_tp}.{line}.avg.bw", ab_tp = "twi.24", line = "vgn_307"),
        # expand(BW_DIR + "/rpgc_mean_input_subtr/by_dataset/{ab_tp}.mean.input_subtr.bw", ab_tp = ab_tp_list)
        expand(BW_DIR + "/rpgc_mean_input_subtr/by_dataset/{ab_tp}.mean.input_subtr.bw", ab_tp = "twi.24")
        # #expand(BW_DIR + "/normalized/{ab_tp}.{individual}_{rep}.rmdup.bw", ab_tp = ab_tp_list, individual = individuals, rep = ["1", "2"]),
        #expand(BW_DIR + "/normalized/{ab_tp}.{individual}_{rep}.rmdup.input_adj.bw", ab_tp = ab_tp_list, individual = individuals, rep = ["1", "2"])
        # expand(BAM_DIR + "/merged_samples/input.{tp}.rmdup.sort.merged.bam", tp = timepoints)



# need to remove line vgn_307 at 2-4h from here!
rule generate_bigwig:
    input: BAM_DIR + "/{ab}.{tp}.{individual}.filtered.rmdup_withUMI.bam",
    output: BW_DIR + "/rpgc/{ab}.{tp}.{individual}.bw"
    threads: 1
    message: "{ruleDisplayMessage}Generating bigwig files for {wildcards.individual}, {wildcards.ab} at {wildcards.tp} ..."
    log: LOG_DIR + "/Peaks_and_signal/bigwig/{ab}.{tp}.{individual}.rpcg.log"
    params:
        effectiveGenomeSize = config["data"]["genome"]["genome_size"]["dm6"]
    shell: """
           {bamCoverage} -b {input} -o {output} \
            --binSize 10 \
            --normalizeUsing RPGC \
            --effectiveGenomeSize {params.effectiveGenomeSize} \
            --ignoreForNormalization chrX chrM chrY \
            --extendReads --centerReads\
            2> {log}
           """

# rule generate_bigwig_merged_input:
#     input: BAM_DIR + "/merged_samples/input.{tp}.filtered.rmdup_withUMI.bam",
#     output: BW_DIR + "/rpgc/merged_input.{tp}.bw"
#     threads: 1
#     message: "{ruleDisplayMessage}Generating bigwig files for merged input at {wildcards.tp} ..."
#     log: LOG_DIR + "/Peaks_and_signal/bigwig/merged_input.{tp}.rpcg.log"
#     params:
#         effectiveGenomeSize = config["data"]["genome"]["genome_size"]["dm6"]
#     shell: """
#            {bamCoverage} -b {input} -o {output} \
#             --binSize 10 \
#             --normalizeUsing RPGC \
#             --effectiveGenomeSize {params.effectiveGenomeSize} \
#             --ignoreForNormalization chrX chrM chrY \
#             --extendReads --centerReads\
#             2> {log}
#            """

# need to remove line vgn_307 at 2-4h from here! (for now in filters out line vgn_307 from all conditions!)
rule generate_bigwig_input_adj:
    input:
        IP = BW_DIR + "/rpgc/{ab}.{tp}.{line}_{rep}.bw",
        input = BW_DIR + "/rpgc/input.{tp}.{line}_1.bw"
    output: BW_DIR + "/rpgc_input_subtr/{ab}.{tp}.{line}_{rep}.bw",
    threads: 1
    message: "{ruleDisplayMessage}Generating input-corrected bigwig files for {wildcards.line}_{wildcards.rep}, {wildcards.ab} at {wildcards.tp} ..."
    log: LOG_DIR + "/Peaks_and_signal/bigwig/{ab}.{tp}.{line}_{rep}.rpgc.input_subtr.log"
    wildcard_constraints:
        line = "(?!vgn307)"
    shell: """
           {bigwigCompare} -b1 {input.IP} -b2 {input.input} -o {output} \
            --binSize 10 \
            --operation subtract \
            -p {threads} \
            2> {log}
           """

# this is the line where input failed - replace by maternal line
rule generate_bigwig_input_adj_vgn307_24:
    input:
        IP = BW_DIR + "/rpgc/twi.24.vgn_307_{rep}.bw",
        input = BW_DIR + "/rpgc/input.24.vgn_vgn_1.bw"
    output: BW_DIR + "/rpgc_input_subtr/twi.24.vgn_307_{rep}.bw",
    threads: 1
    message: "{ruleDisplayMessage}Generating input-corrected bigwig files for vgn_307 line, twi at 2-4h ..."
    log: LOG_DIR + "/Peaks_and_signal/bigwig/twi.24.vgn_307_{rep}.rpgc.input_subtr.log"
    shell: """
           {bigwigCompare} -b1 {input.IP} -b2 {input.input} -o {output} \
            --binSize 10 \
            --operation subtract \
            -p {threads} \
            2> {log}
           """


# rule generate_bigwig_merged_input_adj:
#     input:
#         IP = BW_DIR + "/rpgc/{ab}.{tp}.{line}_{rep}.bw",
#         input = BW_DIR + "/rpgc/merged_input.{tp}.bw"
#     output: BW_DIR + "/rpgc_merged_input_subtr/{ab}.{tp}.{line}_{rep}.bw",
#     threads: 1
#     message: "{ruleDisplayMessage}Generating merged input-corrected bigwig files for {wildcards.line}_{wildcards.rep}, {wildcards.ab} at {wildcards.tp} ..."
#     log: LOG_DIR + "/Peaks_and_signal/bigwig/{ab}.{tp}.{line}_{rep}.rpgc.merged_input_subtr.log"
#     shell: """
#            {bigwigCompare} -b1 {input.IP} -b2 {input.input} -o {output} \
#             --binSize 10 \
#             --operation subtract \
#             -p {threads} \
#             2> {log}
#            """


rule merge_signal_per_condition:
    input: expand(BW_DIR + "/rpgc/{{ab}}.{{tp}}.{line}_{rep}.bw", line = lines, rep = [1,2])
    output:
        bedgraph = temp(BW_DIR + "/rpgc_sum/by_dataset/{ab}.{tp}.avg.bedGraph"),
        bigwig = BW_DIR + "/rpgc_sum/by_dataset/{ab}.{tp}.avg.bw",
    threads: 1
    params:
        chrom_sizes = chrom_sizes
    message: "{ruleDisplayMessage}Generating average bigwig files for {wildcards.ab} at {wildcards.tp} ..."
    shell: """
           bigWigMerge {input} {output.bedgraph};
           bedGraphToBigWig {output.bedgraph} {params.chrom_sizes} {output.bigwig}
           """


rule merge_signal_per_condition_input_adj:
    input: expand(BW_DIR + "/rpgc_input_subtr/{{ab}}.{{tp}}.{line}_{rep}.bw", line = lines, rep = [1,2])
    output:
        bedgraph = temp(BW_DIR + "/rpgc_sum_input_subtr/by_dataset/{ab}.{tp}.avg.bedGraph"),
        bigwig = BW_DIR + "/rpgc_sum_input_subtr/by_dataset/{ab}.{tp}.avg.bw",
    threads: 1
    params:
        chrom_sizes = chrom_sizes
    message: "{ruleDisplayMessage}Generating average bigwig files for {wildcards.ab} at {wildcards.tp} ..."
    shell: """
           bigWigMerge {input} {output.bedgraph};
           bedGraphToBigWig {output.bedgraph} {params.chrom_sizes} {output.bigwig}
           """

rule merge_signal_per_line:
    input: expand(BW_DIR + "/rpgc/{{ab}}.{{tp}}.{{line}}_{rep}.bw", rep = [1,2])
    output:
        bedgraph = temp(BW_DIR + "/rpgc_sum/by_line/{ab}.{tp}.{line}.avg.bedGraph"),
        bigwig = BW_DIR + "/rpgc_sum/by_line/{ab}.{tp}.{line}.avg.bw",
    threads: 1
    params:
        chrom_sizes = chrom_sizes
    message: "{ruleDisplayMessage}Generating average bigwig files for {wildcards.line}, {wildcards.ab} at {wildcards.tp} ..."
    shell: """
           bigWigMerge {input} {output.bedgraph};
           bedGraphToBigWig {output.bedgraph} {params.chrom_sizes} {output.bigwig}
           """

rule merge_signal_per_line_input_adj:
    input: expand(BW_DIR + "/rpgc_input_subtr/{{ab}}.{{tp}}.{{line}}_{rep}.bw", rep = [1,2])
    output:
        bedgraph = temp(BW_DIR + "/rpgc_sum_input_subtr/by_line/{ab}.{tp}.{line}.avg.bedGraph"),
        bigwig = BW_DIR + "/rpgc_sum_input_subtr/by_line/{ab}.{tp}.{line}.avg.bw",
    threads: 1
    params:
        chrom_sizes = chrom_sizes
    message: "{ruleDisplayMessage}Generating average bigwig files for {wildcards.line}, {wildcards.ab} at {wildcards.tp} ..."
    shell: """
           bigWigMerge {input} {output.bedgraph};
           bedGraphToBigWig {output.bedgraph} {params.chrom_sizes} {output.bigwig}
           """


rule mean_signal_per_condition_input_adj:
    input: expand(BW_DIR + "/rpgc_input_subtr/{{ab}}.{{tp}}.{line}_{rep}.bw", line = lines, rep = [1,2])
    output:
        wig = temp(BW_DIR + "/rpgc_mean_input_subtr/by_dataset/{ab}.{tp}.mean.input_subtr.wig"),
        bigwig = BW_DIR + "/rpgc_mean_input_subtr/by_dataset/{ab}.{tp}.mean.input_subtr.bw",
    threads: 1
    params:
        chrom_sizes = chrom_sizes
    message: "{ruleDisplayMessage}Generating average bigwig files for {wildcards.ab} at {wildcards.tp} ..."
    shell: """
           wiggletools mean {input} > {output.wig};
           wigToBigWig {output.wig} {params.chrom_sizes} {output.bigwig}
           """
