'''
https://github.com/nanoporetech/tombo
conda install -c bioconda ont-tombo


Documentation

snakemake \
    --configfile config.json \
    --resources resquiggle_limit=1 \
    --cores 4 \
    --printshellcmds 

structure for distribution
http://snakemake.readthedocs.io/en/latest/project_info/faq.html#id7


# log: "logs/abc.log"
# https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#log-files

# message: "Executing somecommand with {threads} threads on the following files {input}."
# https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#messages

'''


# import glob
# import sys
# import os


'''
------------------------------------------------------------------------------

Helper functions

------------------------------------------------------------------------------
'''


def get_model(modification):
    '''
    This function returns part of the Tombo call for various modification
    models. The idea is to change the call depending on the input modification.
    '''
    if modification == '5mC':
        return 'alternative_model --alternate-bases 5mC'
    if modification == '6mA':
        return 'alternative_model --alternate-bases 6mA'
    if modification == 'de_novo':
        return 'de_novo'
    else:
        print('Modification not known to or implemented in Tombo.')
        return


def get_index(fp):
    '''
    resquiggle creates index file:
    https://nanoporetech.github.io/tombo/resquiggle.html#tombo-index-file

    This function returns the path to which the index is automatically saved.
    '''
    fp_index = '/'.join(fp.split('/')[:-1]) 
    return fp_index + '/.reads.RawGenomeCorrected_000.tombo.index'


'''
------------------------------------------------------------------------------

Set variables, input validation

------------------------------------------------------------------------------
'''


shell.executable('/bin/bash')

threads = config['threads']

# Make sure to not have spaces in the wildcards -- e.g. use "denovo" 
# instead of "de novo".
samples = config['samples']
print('\nThe following samples will be processed:\n')
for i in samples:
    print('-', i)  # apparently we cannot use f-strings in a Snakefile

fp = config['path']
print('\nResults are written to:', fp, '\n')


# Input validation: Can the requested modification be called for the given
# molecule?
valid_models = {
    'RNA': ['5mC', 'de_novo'],
    'DNA': ['5mC', '6mA', 'de_novo']}

mods = []
molecule = config['molecule']
for i in config['modifications']:
    if i in valid_models[molecule]:
        mods.append(i)
    else:
        print('{} model not supported for {}, skipping.'.format(i, type_))


print(('\nThe following modifications will be called:\n'))
for i in mods:
    print('-', i)


'''
------------------------------------------------------------------------------

Workflow

------------------------------------------------------------------------------
'''


print('\nStarting workflow ...')


rule all:
    '''
    Ideally, this rule should name the final targets of the workflow. This
    could be a log file, which takes all results (leaves in the DAG) and
    summarizes them. Thus the final log rule will "pull" all results to be 
    created through the DAG.
    '''
    input:
        expand(fp + '/results/{sample}/{mod}/mocklog2', 
            sample=samples, mod=mods)


rule tombo_preprocess:
    '''
    This rule (1) writes basecalled data to the fast5 files, (2) aligns
    them to the raw signal and (3) creates an index of this alignment.

    The mock output links this rule to the one calling modifications. 

    Because the preprocessing steps work on the fast5 files, they don't create 
    any files we could reference besides the index. However, dynamically 
    referencing the index in the preprocessing rule is cumbersome:
    https://bitbucket.org/snakemake/snakemake/issues/735/use-lambda-functions-in-outputs

    Note: 

    > Multiple independent resquiggle commands on the same set of reads should
    NOT be run simultaneously. This can cause hard to track errors and read 
    file corruption. To protect against this, Tombo adds a lock [...] -- 
    https://nanoporetech.github.io/tombo/resquiggle.html#additional-command-line-options

    --ignore-read-locks

    On resource limitation in Snakemake:
    https://groups.google.com/forum/#!msg/snakemake/ykyWDoYBhoI/J-k6oa9HHQAJ

    resources: resquiggle_limit = 1 -- also see snakemake call (above)
    '''
    input:
        signal = fp + '/signal/{sample}/',
        reads = fp + '/reads/{sample}.fq',
        reference = fp + '/transcripts/{sample}.fa',
    output:
        fp + '/results/{sample}/mocklog'
    threads:
        threads
    resources:
        resquiggle_limit = 1
    shell:
        '''
        echo -e "\nAnnotate signal w/ basecall data ..."
        tombo preprocess annotate_raw_with_fastqs \
            --fast5-basedir {input.signal} \
            --fastq-filenames {input.reads} \
            --processes {threads} \
            --overwrite

        echo -e "\nResquiggle ..."
        tombo resquiggle \
            {input.signal} \
            {input.reference} \
            --signal-matching-score 1.3 \
            --processes 4 \
            --overwrite
        # on --signal-matching-score, more means less strict, default (RNA) 1.3
        # https://github.com/nanoporetech/tombo/issues/68
        # 2 works, 3 might be still ok

        # Create mock result.
        echo "index created" > {output}
        '''


rule tombo_call_modifications:
    '''
    rules w/o output are alwys run
    '''
    input:
        signal = fp + '/signal/{sample}/',
        reads = fp + '/reads/{sample}.fq',
        mock = fp + '/results/{sample}/mocklog',  
        # see docs rule tombo_preprocess
    output:
        fp + '/results/{sample}/{mod}/tombo.stats'
    params:
        # On the problem of programs taking prefixes as output arg (out)
        # and creating a couple of results files (out.foo, out.bar):
        # https://groups.google.com/forum/#!topic/snakemake/HbK43vrKrhw
        prefix = fp + '/results/{sample}/{mod}/foobar',
        model = lambda wildcards: get_model(wildcards.mod),
    threads:
        threads
    shell:
        '''
        echo -e "\nDetect modifications ..."
        tombo detect_modifications {params.model} \
            --fast5-basedirs {input.signal} \
            --processes {threads} \
            --statistics-file-basename {params.prefix}
        
        # echo {params.model}
        # touch {params.prefix}.tombocrazyname.stats

        # rename output
        fp=$(echo {params.prefix} | sed -e 's/foobar//g')

        # remember to escape {{}} brackets in shell commands
        mv ${{fp}}$(ls $fp) ${{fp}}tombo.stats
        '''


rule tombo_results:
    input:
        signal = fp + '/signal/{sample}/',
        stats = fp + '/results/{sample}/{mod}/tombo.stats',
    output:
        fp + '/results/{sample}/{mod}/tombo.dampened_fraction_modified_reads.minus.wig',
        fp + '/results/{sample}/{mod}/tombo.dampened_fraction_modified_reads.plus.wig',
        fp + '/results/{sample}/{mod}/tombo.valid_coverage.minus.wig',
        fp + '/results/{sample}/{mod}/tombo.valid_coverage.plus.wig',
    params:
        prefix = fp + '/results/{sample}/{mod}/tombo'
    shell:
        '''
    tombo text_output browser_files \
        --fast5-basedirs {input.signal} \
        --statistics-filename {input.stats} \
        --file-types \
            dampened_fraction \
            valid_coverage \
        --browser-file-basename {params.prefix}
        '''


rule tombo_plot:
    input:
        signal = fp + '/signal/{sample}/',
        stats = fp + '/results/{sample}/{mod}/tombo.stats',
    output:
        fp + '/results/{sample}/{mod}/most_significant.pdf'
    shell:
        '''
    tombo plot most_significant \
            --fast5-basedirs {input.signal} \
            --statistics-filename {input.stats} \
            --plot-standard-model \
            --pdf-filename {output}
        '''


rule final:
    input:
        plot = fp + '/results/{sample}/{mod}/most_significant.pdf',
        fraction = fp + '/results/{sample}/{mod}/tombo.dampened_fraction_modified_reads.minus.wig',
    output:
        fp + '/results/{sample}/{mod}/mocklog2'
    shell:
        '''
        head {input.plot} {input.fraction} > {output}
        '''


