#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import os
import argparse
import re
import subprocess
from transpo import analysis
from transpo import discovery
from transpo import util
import colorama
import coloredlogs
from datetime import datetime
from multiprocessing import Pool


def get_args():
    """Define and return  the args from the command line"""
    p = argparse.ArgumentParser(
        description=("Catch all reads that are chimeric between two "
                     "beds. Use stringtie to compute all transcripts and then "
                     "use bedtools to overlap the two beds."))
    p.add_argument('--metadata',
                   help=("List of bams and sample informations for analysis"
                         " view read me for more information"),
                   required=True)
    p.add_argument('--bed1',
                   action='store',
                   help="The first bed where transcripts have to start",
                   required=True)
    p.add_argument('--bed2',
                   action='store',
                   help="The second bed that transcripts have to "
                   'contain',
                   required=True)
    p.add_argument('--org',
                   action='store',
                   help='What organism is the data from (hg19, mm9, rheMac8)',
                   required=True)
    p.add_argument('--paired',
                   action='store_true',
                   help="If the data is paired end or not")
    p.add_argument('--transcript_cov',
                   action='store',
                   help=("The mininum coverage in reads to discover "
                         "the transcripts (default=5)"), metavar="COV",
                   default=5)
    p.add_argument('--junction_cov',
                   action='store',
                   help=("The miminum coverage in reads no junctions to "
                         "discover transcripts (default=5)"), metavar="COV",
                   default=5)
    p.add_argument('--control',
                   action='store',
                   help=("Name of the group you want to use for control "
                         "(to compute pval and foldchange"), metavar="GROUP",
                   default="C")
    p.add_argument('--usepval',
                   action='store_true',
                   help="Do not adjust the pval ",
                   default=False)
    p.add_argument('--outdir',
                   help=("The directory in which to store them results "
                         "(default: transpo)"),
                   default='transpo')
    p.add_argument('--gtfdir',
                   help=("Directory where the transcriptome GTFs are located"
                         "(default: transcriptome_gtf)"),
                   default='transcriptome_gtf')
    p.add_argument('--threads', help='Max number of threads (default: 4)',
                   default=4, type=int)
    p.add_argument('--tss_dist',
                   action='store',
                   help=("Distance [bp] around the TSS to find overlapping "
                         "elements of bed1"),
                   default=100)
    p.add_argument('--drop_inter',
                   action='store_true',
                   help=("Remove regions overlaping both bed1 and bed2 "
                         "(default: false = keep all transpochimeric)"),
                   default=False)
    p.add_argument('--first_exon',
                   action='store_true',
                   help=("Keep only chimeric transcripts which first exons "
                         "are on bed1"),
                   default=False)
    p.add_argument('--genome_fasta',
                   help=("If a fasta of the genome is given, the fasta "
                         "sequences of the chimeric transcript will be "
                         "computed"),
                   default='')
    p.add_argument('--fidis',
                   action='store_true',
                   help="Fidis mode: run discovery and analysis on fidis using full node",
                   default=False)
    p.add_argument('--scitas',
                   action='store_true',
                   help="Slurm mode: run discovery and analysis using slurm batch script",
                   default=False)
    p.add_argument('--venv',
                   help="Path of the virtual env to use",
                   default="/home/duc/local/transpo/transpo_soft_v7.1/transpoenv")
    p.add_argument('--analysis',
                   action='store_true',
                   help="Only run the analysis module of transpo",
                   default=False)
    p.add_argument('--counts_only',
                   action='store_true',
                   help="Only counts in groups, don't make pvalues and foldChange",
                   default=False)
    p.add_argument('--discovery',
                   action='store_true',
                   help="Only run the discover module of transpo",
                   default=False)
    p.add_argument('--tss_analysis',
                   action='store_true',
                   help="Make an analysis by TSS of the chimeric gene",
                   default=False)
    p.add_argument('--force',
                   action='store_true',
                   help=("Overwrite the stringtie GTF of all the "
                         "transcripts, else reuse it"))
    p.add_argument('--debug',
                   action='store_true',
                   help="Debug mode, print more stuff and don't clean tmp",
                   default=False)
    p.add_argument('--tmpdir',
                   help=argparse.SUPPRESS,
                   default='tmp')
    return p.parse_args()


def run_discovery(args, groups, metaline):
    bam, sample, group = metaline.split('\t')
    if bam == 'nobam':  # magic word to skip bam checking
        pass
    elif not os.path.isfile(bam) and args.discovery:
        util.logger.error(("This bam '{bam}' was not found. Check your "
                           "metadata file and make sure the bam "
                           "exists").format(bam=bam))
        sys.exit(1)

    elif os.path.isfile(bam) and args.discovery:
        util.check_bam_header(bam)

    if args.discovery and not args.fidis:
        util.cmd_exists('stringtie')
        if not os.path.exists(args.gtfdir):
            os.makedirs(args.gtfdir)

        if bam != 'nobam':
            util.logger.info('\n\n ~~~ Working on bam {}'.format(bam))
        discovery.main(bam, args, util.logger, args.gtfdir, sample)


def main():
    # Nice logo with colors
    util.logger.info(colorama.Fore.BLUE + 79*'=')
    util.logger.info(util.logo)
    util.logger.info('v{}'.format(util.version))
    util.logger.info(colorama.Fore.BLUE + 79*'=' + '\n\n' + colorama.Fore.RESET)

    args = get_args()

    if args.discovery is False and args.analysis is False:
        args.discovery = True
        args.analysis = True

    if args.debug:
        util.logger.setLevel('DEBUG')
        coloredlogs.install(fmt='%(msg)s', level='DEBUG', logger=util.logger)

    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)

    if args.fidis or args.scitas:
        with open(args.metadata) as metadata:
            for ln in metadata:
                if ln.split('\t')[0] == 'bams':
                    continue
                if ln.split('\t')[0] != 'nobam':
                    util.logger.error(("In fidis mode, only use nobam in the metadata"
                                       "or horrible things will happen!"))
                    sys.exit(1)
        date = datetime.today().strftime('%d%m_%H%M')
        tmpname = "/tmp/transpo_{}.run".format(date)
        sbatch_cmd = f"sbatch {tmpname}"
        newargs = " ".join(sys.argv[1:]).replace(' --fidis', '').replace(" --scitas", "")
        newargs = re.sub(r' --threads \d+', '', newargs)
        run_cmd = f"""#!/bin/bash -l
                   #SBATCH --nodes 1
                   #SBATCH --ntasks-per-node 1
                   #SBATCH --cpus-per-task 28
                   #SBATCH --time 00:15:00
                   #SBATCH --partition parallel
                   #SBATCH --mem 170GB
                   #SBATCH --error {args.outdir}/slurm_transpo.err
                   #SBATCH --output {args.outdir}/slurm_transpo.out

                   module load intel
                   module load bedtools2

                   source {os.path.join(args.venv, "bin", "activate")}

                   transpo {newargs} --threads 25 --tmpdir ${{TMPDIR}}
                   deactivate
"""
        with open(tmpname, 'w') as out:
            out.write(re.sub(' {2,}', '', run_cmd))
            if args.debug:
                print(re.sub(' {2,}', '', run_cmd))
        subprocess.call(sbatch_cmd, shell=True)
        os.remove(tmpname)
        util.logger.info((colorama.Fore.GREEN + '\nJob submitted!\n\n' +
                          colorama.Fore.RESET))
    else:
        with open(args.metadata) as metadata:
            # for line in metadata:
            lines = metadata.read().split('\n')
            groups = {ln.split()[1]: ln.split()[2].strip() for ln in lines[1:] if ln is not ''}
            funargs = [(args, groups, i) for i in lines[1:] if i is not '']
            with Pool(args.threads) as p:  # run the multiprocessing
                transcripts = p.starmap(run_discovery, funargs)

        if args.analysis and not args.fidis:
            if args.control not in groups.values() and not args.counts_only:
                util.logger.error(("ERROR: your control {} is not in the metadata "
                                   "group column").format(args.control))
                sys.exit(1)
            figdir = os.path.join(args.outdir, 'figs')
            if not os.path.exists(figdir):
                os.makedirs(figdir)

            analysis.main(args, util.logger, groups)

            util.logger.info((colorama.Fore.GREEN + '\nDone! Thanks for coming \n\n' +
                              colorama.Fore.RESET))


if __name__ == '__main__':
    main()
