#!/usr/bin/env bash
# Created by Fengjun Zhang

set -eo pipefail

__base="$(basename "${BASH_SOURCE[0]}")"
__wd=$PWD

#
# ------------------------------------------------------------------------
# Document
#
# Input files:
# $input_target $input_query
#
# Output files:
# $file_log [$input_target/query link]
# (if lastz mode) ${name_target/query}.sizes
# (when LAST mode) ${name_target/query}_preSplit.tar.gz
# ------------------------------------------------------------------------
#

#
# ------------------------------------------------------------------------
# Usage document
# ------------------------------------------------------------------------
#
function usage_doc () {
  e=$(printf "\e")
  fBold="$e[1m"
  fUnL="$e[4m"
  unF="$e[0m"
  cat <<EOF

$__base document
${fBold}NAME$unF
    $fBold$__base$unF

${fBold}USAGE$unF
    bash $fBold$__base$unF [-d][-l LOG_FILE][-x limit_len] -t TARGET_FA -q QUERY_FA

${fBold}DESCRIPTION$unF
    $fBold$__base$unF is a bash script to initiate check for 2-way project

EOF
  exit
}


#
# ------------------------------------------------------------------------
# Check options and arguments
# ------------------------------------------------------------------------
#
function opt_check () {

  local OPTIND

  while getopts ":dl:t:q:x:" opt; do
    case $opt in
      d)
        is_debug=1 ;;
      l)
        file_log=$OPTARG ;;
      t)
        input_target=$OPTARG ;;
      q)
        input_query=$OPTARG ;;
      x)
        splitSize=$OPTARG ;;
      \?)
        echo "[Err] Invalid option: -$OPTARG"
        usage_doc ;;
      :)
        echo "[Err] Option -$OPTARG requires an argument."
        usage_doc ;;
    esac
  done

  # log files
  if [[ -z "$file_log" ]]; then
    file_log="init2way.log"
  fi
  exec 2>>$file_log
  if [[ ! -z "$is_debug" ]]; then
    exec >>$file_log 2>&1
    set -o xtrace
    echo "[Log] debug mode set"
  fi

  # obligatory arguments
  if [[ -z "$input_target" ]]; then
    echo "[Err] target genome should be provided." >&2
    usage_doc
  fi
  if [[ -z "$input_query" ]]; then
    echo "[Err] query genome should be provided." >&2
    usage_doc
  fi


}

opt_check "$@"
name_target=$(basename ${input_target%.fa})
name_query=$(basename ${input_query%.fa})

# check if genomes are in working dir, if no create link
dir=$(dirname $input_target)
if [[ "$dir" != "." ]] || [[ "$input_target" != *".fa" ]]; then
  [[ ! -a $name_target.fa ]] && ln -s $input_target ${name_target}.fa || true
fi
dir=$(dirname $input_query)
if [[ "$dir" != "." ]] || [[ "$input_query" != *".fa" ]]; then
  [[ ! -a $name_query.fa ]] && ln -s $input_query ${name_query}.fa || true
fi

[[ ! -a $name_target.sizes ]] && faSize -detailed ${name_target}.fa > $name_target.sizes || true
[[ ! -a $name_query.sizes ]] && faSize -detailed ${name_query}.fa > $name_query.sizes || true

if [[ ! -z "$splitSize" ]]; then
  # check ultra long seq in target fasta
  # splitting target fasta
  awk -v lim="$splitSize" '{if ($2>lim) {print $1}}' $name_target.sizes > ${name_target}_prev-long.list
  if [ -s ${name_target}_prev-long.list ]; then
    mv ${name_target}.fa ${name_target}_prev.fa
    faSplit size -oneFile -lift=${name_target}_construct.list ${name_target}_prev.fa $splitSize ${name_target}
  fi
  mv $name_target.sizes ${name_target}_prev.sizes
  tar -zcf ${name_target}_preSplit.tar.gz *prev* ${name_target}_construct.list
  rm *prev* ${name_target}_construct.list


  # check ultra long seq in target fasta
  # splitting query fasta
  awk -v lim="$splitSize" '{if ($2>lim) {print $1}}' $name_query.sizes > ${name_query}_prev-long.list
  if [ -s ${name_query}_prev-long.list ]; then
    mv ${name_query}.fa ${name_query}_prev.fa
    faSplit size -oneFile -lift=${name_query}_construct.list ${name_query}_prev.fa $splitSize ${name_query}
  fi
  mv $name_query.sizes ${name_query}_prev.sizes
  tar -zcf ${name_query}_preSplit.tar.gz *prev* ${name_query}_construct.list
  rm *prev* ${name_query}_construct.list

fi
