#!/usr/bin/env python2
# Created by Fengjun Zhang

import os
import sys
import subprocess
import time

#
# ------------------------------------------------------------------------
# Document
#
# Input files:
# target query [config]
#
# Output files:
# log out
# ------------------------------------------------------------------------
#


#
# ------------------------------------------------------------------------
# Check options and arguments
# ------------------------------------------------------------------------
#


def opt_check():
    import argparse
    global args

    parser = argparse.ArgumentParser()
    parser.add_argument("-d", "--debug", help="debug mode",
                        action='store_true')
    parser.add_argument("-c", "--config", help="config_file")
    parser.add_argument("-l", "--log", help="log_file")
    parser.add_argument("-n", "--num_threads",
                        help="number of threads (default: 20)", type=int)
    parser.add_argument("-o", "--out", help="name of out file")
    parser.add_argument("target", help="target genome file")
    parser.add_argument("query", help="query genome file")
    args = parser.parse_args()

    if not args.log:
        args.log = "lastz_assign.log"
    if not args.out:
        args.out = "lastz_result.axt"

    if args.debug:
        print "[Log] debug mode set"

    if not args.num_threads:
        args.num_threads = 20

    if not os.path.exists(args.target):
        print "[Err] target genome not found"
        sys.exit()
    if not os.path.exists(args.query):
        print "[Err] query genome not found"
        sys.exit()

    return

#
# ------------------------------------------------------------------------
# write_log()
# ------------------------------------------------------------------------
#


def write_log(txt_log):
    time_pst = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
    f = open(args.log, 'a')
    f.write(time_pst + "\n" + txt_log + "\n")
    f.close()

#
# ------------------------------------------------------------------------
# start_lastz():
# @param: file_config or NULL
# ------------------------------------------------------------------------
#


def start_lastz(args_sub):
    write_log('[Log] ' + args_sub)
    p = subprocess.Popen(args_sub, shell=True, stdout=open(os.devnull, 'w'))

#
# ------------------------------------------------------------------------
# check_progress():
# ------------------------------------------------------------------------
#


def check_progress():
    while True:
        for root, dirs, files in os.walk("."):
            for name in files:
                if name[-4:] == '.end':
                    return name

#
# ------------------------------------------------------------------------
# lastz_singleAssign():
# ------------------------------------------------------------------------
#

def lastz_singleAssign():

    global p_working, list_p

    if p_working == 0:
        p_working = p_working +1
    else:
        if p_working > args.num_threads:
            f_finfished = check_progress()
            p_index = f_finfished[-6:-4]
            os.remove(f_finfished)
            os.remove('calc.p' + p_index + '.fa')
            subprocess.check_call('cat '+args.out+'.tmp calc.p' + p_index + '.axt > '+args.out ,shell = True)
            subprocess.check_call('echo "calc.p' + p_index + '.axt finished" >> calc.lastz.progress ' ,shell = True)
            os.remove(args.out+'.tmp')
            os.rename(args.out, args.out+'.tmp')
            os.remove('calc.p' + p_index + '.axt')
        else:
            p_index = format(p_working, '02')
            p_working = p_working +1
        os.rename('calc.splitTarget.tmp', 'calc.p' + p_index + '.fa')
        exec_sh_single = exec_sh + ' -l calc.p' + p_index + '.log -t calc.p' + p_index + \
            '.fa -q ' + args.query + ' -o calc.p' + \
            p_index + '.axt -i calc.p' + p_index + '.end'
        p_launch = start_lastz(exec_sh_single)
        list_p.append(p_launch)
        write_log("[Log] Process " + p_index + " launched")
        time.sleep(5)

#
# ------------------------------------------------------------------------
# Main function
# ------------------------------------------------------------------------
#

# shell conserved arguments
script_dir = os.path.dirname(os.path.abspath(__file__))
exec_sh = 'bash ' + script_dir + '/runLASTZ.sh '

opt_check()


# shell conserved arguments
if args.debug:
    exec_sh = exec_sh + '-d '
if args.config:
    exec_sh = exec_sh + '-c ' + args.config

# current amount of prepared sequences waiting for start_lastz()
# if p_working > num_threads: instead of assigning new process, it will try to use the finished one
# e.g. num_threads = 16, when p_working = 17 ...
p_working = 0

list_p = []
open(args.out+'.tmp', 'a').close()
with open(args.target, 'r') as fh_target:
    for line in fh_target:
        if line[0] == ">":
            if p_working != 0:
                fh_splitTarget.close()
            lastz_singleAssign()
            fh_splitTarget = open('calc.splitTarget.tmp', 'w')
        fh_splitTarget.write(line)
    else:
        fh_splitTarget.close()
        lastz_singleAssign()
        write_log("[Log] Final process launched")

# check finished processors
while True:
    f_finfished = check_progress()
    p_index = f_finfished[-6:-4]
    p_working = p_working - 1
    os.remove(f_finfished)
    os.remove('calc.p' + p_index + '.fa')
    subprocess.check_call('cat '+args.out+'.tmp calc.p' + p_index + '.axt > '+args.out ,shell = True)
    subprocess.check_call('echo "calc.p' + p_index + '.axt finished" >> calc.lastz.progress ' ,shell = True)
    os.remove(args.out+'.tmp')
    os.rename(args.out, args.out+'.tmp')
    os.remove('calc.p' + p_index + '.axt')
    write_log('[Log] process ' + p_index + ' finished')
    if p_working == 1:
        os.rename(args.out+'.tmp', args.out)
        break
if not args.debug:
    subprocess.check_call('rm calc.p*',shell = True)

write_log('[Log] All process finished')
