#!/bin/sh
set -euo pipefail

RED='\033[0;31m'
BRED='\033[1;31m' # bold

GREEN='\033[0;32m'
BGREEN='\033[1;33m' # bold

BLUE='\033[0;34m'
BBLUE='\033[1;34m'

BOLD='\033[0;1m'
NC='\033[0m' # No Color

## usage
usage() {
    echo -e "
usage:   ${BBLUE}runParser${NC} [options]
options:
  -d    data directory
  -o    output directory
  -c    directory containing read depth information to annotate calls with

  -p    prefix - operate on one sample with this prefix e.g. 'sample1'
  -f    filter
  -m    merge
  -a    annotate
  -r    remove false positives anad reannotate
  -s    only operate on somatic tumour variants

  -e    exclude bed file
  -g    genome features file (.gtf)
  -h    show this message
"
}

filter=0
merge=0
annotate=0
replace=0
out_dir="$(pwd)/filtered/"
data_dir='data/'
cnv_dir=''
exclude_file='/Users/Nick_curie/Documents/Curie/Data/Genomes/Dmel_v6.12/Mappability/dmel6_unmappable_100.bed'
features='/Users/Nick_curie/Documents/Curie/Data/Genomes/Dmel_v6.12/Features/dmel-all-r6.12.gtf' # work
somatic=0
prefix='*'

while getopts 'fmarshp:o:d:c:e:g:' flag; do
  case "${flag}" in
    f)  filter=1 ;;
    m)  merge=1 ;;
    a)  annotate=1 ;;
    r)  replace=1 ;;
    s)  somatic=1 ;;
    p)  prefix="$OPTARG*" ;;
    d)  data_dir="$OPTARG" ;;
    o)  out_dir="$OPTARG" ;;
    c)  cnv_dir="$OPTARG" ;;
    e)  exclude_file="$OPTARG" ;;
    g)  features="$OPTARG" ;;
    h)  usage
        exit 0 ;;
  esac
done

echo "Searching for files with following regex: $prefix"

if [[ $# -eq 0 ]]
then
  usage
  exit 0
fi

# Change to the 'script' dir in svParser
dir=$(dirname "$0")
script_bin="$dir/script"
script_bin="$( cd "$script_bin" ; pwd -P )"

echo -e "  o  Reading data from ${GREEN}'$data_dir'${NC}"
echo -e "  o  Writing data to ${BLUE}'$out_dir'${NC}"
echo -e "  o  Exclude file set to ${RED}'$exclude_file'${NC}"
echo -e "  o  Features file set to ${BLUE}'$features'${NC}"

mkdir -p "$out_dir/summary"
s=''
annoS=''
# Run svParser for each type of variant file
if [[ $filter -eq 1 ]]
then
  if [[ $somatic -eq 1 ]]
  then
    s='-f st=1'
    annoS='-s'
  fi

  echo "**************************"
  echo "*** Filtering variants ***"
  echo "**************************"

  for lumpy_file in $data_dir/lumpy/${prefix}.vcf
  do
    [ -f $lumpy_file ] || continue
    echo "perl "$script_bin"/svParse.pl -v $lumpy_file -m l -f chr=1 -f su=3 -f dp=10 -f sq=0.1 -f rdr=0.05 $s -e $exclude_file -o $out_dir -p"
    perl "$script_bin"/svParse.pl -v $lumpy_file -m l -f chr=1 -f su=3 -f dp=10 -f sq=0.1 -f rdr=0.05 $s -e $exclude_file -p -o $out_dir
  done

  for delly_file in $data_dir/delly/${prefix}.vcf
  do
  [ -f $delly_file ] || continue
    echo "perl "$script_bin"/svParse.pl -v $delly_file -m d -f chr=1 -f su=3 -f dp=10 -f sq=0.1 -f rdr=0.05 $s -e $exclude_file -o $out_dir -p"
    perl "$script_bin"/svParse.pl -v $delly_file -m d -f chr=1 -f su=3 -f dp=10 -f sq=0.1 -f rdr=0.05 $s -e $exclude_file -p -o $out_dir
done

  for novo_file in $data_dir/novobreak/${prefix}.vcf
  do
    [ -f $novo_file ] || continue
    echo "perl "$script_bin"/svParse.pl -v $novo_file -m n -f chr=1 -f su=3 -f dp=10 -f sq=0.1 -f rdr=0.05 $s -e $exclude_file -o $out_dir -p"
    perl "$script_bin"/svParse.pl -v $novo_file -m n -f chr=1 -f su=3 -f dp=10 -f sq=0.1 -f rdr=0.05 $s -e $exclude_file -p -o $out_dir
  done

  for freec_file in $data_dir/freec/${prefix}_filt_cnvs.txt
  do
    [ -f $freec_file ] || continue
    echo "perl "$script_bin"/parseCF.pl -c $freec_file -o $out_dir/summary"
    perl "$script_bin"/parseCF.pl -c $freec_file -o $out_dir/summary
  done

  for cnv_file in $data_dir/cnv/${prefix}.txt
  do
    [ -f $cnv_file ] || continue
    echo "perl "$script_bin"/parseCNV.pl -c $cnv_file -o $out_dir/summary"
    perl "$script_bin"/parseCNV.pl -c $cnv_file -o $out_dir/summary
  done

fi

# If CNV-Seq has been run, the cnv directory can be specified with the -c flag
# For each summary file, annotate somatic events with log2(FC) from .cnv file
if [[ -n "$cnv_dir" ]]
then
  echo "*******************************************************"
  echo "*** Annotating variants with read depth information ***"
  echo "*******************************************************"

  cd $out_dir/summary
  samples+=( $(ls -1 ${prefix}.filtered.summary.txt | cut -d '.' -f 1 | sort -u ) )

  for ((i=0;i<${#samples[@]};++i))
  do
    if [ ! -f $cnv_dir/${samples[i]}.*.cnv ]
    then
      echo -e " -> ${RED}! No corresponding CNV file for ${samples[i]} in '$cnv_dir'${NC}"
    else
      echo "perl "$script_bin"/findCNV.pl -c $cnv_dir/${samples[i]}.*.cnv -v $out_dir/summary/${samples[i]}*.filtered.summary.txt"
      perl "$script_bin"/findCNV.pl -c $cnv_dir/${samples[i]}.*.cnv -v $out_dir/summary/${samples[i]}*.filtered.summary.txt
    fi
  done
fi

cd $out_dir

# if [[ $merge -eq 1 ]]
# then
#   echo "************************"
#   echo "*** Merging variants ***"
#   echo "************************"
#
#   mergeVCF=`which mergevcf || true`
#   if [[ -z "$mergeVCF" ]]
#   then
#     usage
#     echo -e "Error: mergevcf was not found. Please set in path\n`pip install mergevcf`"
#     exit 1
#   fi
#   # echo "perl "$script_bin"/merge_vcf.pl"
#   #perl "$script_bin"/merge_vcf.pl
# fi

cd $out_dir/summary

if [[ $merge -eq 1 ]]
then
  mkdir -p "$out_dir/summary/merged/"
  if [[ $cnv_dir ]]
  then
    samples+=( $(ls -1 ${prefix}.summary.cnv.txt | cut -d '.' -f 1 | sort -u ) )

    for ((i=0;i<${#samples[@]};++i))
    do
      echo "perl "$script_bin"/svMerger.pl -f ${samples[i]}.*.summary.cnv.txt"
      perl "$script_bin"/svMerger.pl -f ${samples[i]}.*.summary.cnv.txt -o "$out_dir/summary/merged"
    done
  else
    samples+=( $(ls -1 *.summary.txt | cut -d '.' -f 1 | sort -u ) )

    for ((i=0;i<${#samples[@]};++i))
    do
      echo "perl "$script_bin"/svMerger.pl -f ${samples[i]}.*.summary.txt"
      perl "$script_bin"/svMerger.pl -f ${samples[i]}.*.summary.txt -o "$out_dir/summary/merged"
    done
  fi
fi

cd $out_dir/summary/merged

if [[ $merge -eq 1 ]]
then
  for f in ${prefix}_merged_SVs.txt
  do
    echo "perl "$script_bin"/svClusters.pl -v $f -d 500"
    perl "$script_bin"/svClusters.pl -v $f -d 500
    rm $f
  done
fi

blacklist=${out_dir}/summary/merged/all_samples_blacklist.txt
whitelist=${out_dir}/summary/merged/all_samples_whitelist.txt

if [[ $annotate -eq 1 ]]
then

  echo "***************************"
  echo "*** Annotating variants ***"
  echo "***************************"

  cd $out_dir/summary/merged

  if [ -f "all_genes.txt" ] && [ -f "all_bps.txt" ]
  then
    rm "all_genes.txt"
    rm "all_bps.txt"
  fi

  for clustered_file in ${prefix}clustered_SVs.txt
  do
    echo "Annotating $clustered_file"
    # Should check both files individually
    if [ -f $blacklist ] && [ -f $whitelist ]
    then
      echo "perl "$script_bin"/sv2gene.pl -f $features -i $clustered_file -b $blacklist -w $whitelist $annoS"
      perl "$script_bin"/sv2gene.pl -f $features -i $clustered_file -b $blacklist -w $whitelist $annoS
    elif [ -f $blacklist ]
    then
      echo "perl "$script_bin"/sv2gene.pl -f $features -i $clustered_file -b $blacklist $annoS"
      perl "$script_bin"/sv2gene.pl -f $features -i $clustered_file -b $blacklist $annoS
    elif [ -f $whitelist ]
    then
      echo "perl "$script_bin"/sv2gene.pl -f $features -i $clustered_file -w $whitelist $annoS"
      perl "$script_bin"/sv2gene.pl -f $features -i $clustered_file -w $whitelist $annoS
    else
      echo "perl "$script_bin"/sv2gene.pl -f $features -i $clustered_file $annoS"
      perl "$script_bin"/sv2gene.pl -f $features -i $clustered_file $annoS
    fi
    rm $clustered_file
  done

fi

if [[ $replace -eq 1 ]]
then
  echo "Adding any new CNV calls to '${data_dir}/cnv/'"
  for annofile in ${prefix}_annotated_SVs.txt
  do
    [ -f $annofile ] || continue
    python "$script_bin"/getCNVs.py -f $annofile -d ${data_dir}/cnv/
  done

  if [ -f "all_genes_filtered.txt" ] && [ -f "all_bps_filtered.txt" ]
  then
    rm "all_genes_filtered.txt"
    rm "all_bps_filtered.txt"
  fi

  echo "Removing calls marked as false positives in 'all_samples_blacklist.txt'"
  for annofile in ${prefix}_stitched.txt
  # do
  #   perl "$script_bin"/clean_files.pl -v $annofile -o $out_dir/summary/merged -b $blacklist -w $whitelist
  # done

  # for clean_file in *cleaned_SVs.txt
  do
    # Delete file if empty
    # if [[ ! -s $annofile ]]
    # then
    #   rm $annofile
    # else
      # Annotate un-annotated (manually added) calls
      # Append any new hit genes to 'all_genes.txt'
      perl "$script_bin"/sv2gene.pl -r -f $features -i $annofile -s
      # rm $clean_file
    # fi
  done

  echo "Writing bp info for cleaned, reannotated SV calls to 'all_bps_filtered.txt'"

  # for reanno_file in *reannotated_SVs.txt
  # do
  #   # Grab some of the fields from these newly annotated files, and write them to 'all_bps_cleaned.txt'
  #   python "$script_bin"/getbps.py -f $reanno_file
  # done
  #
  # # This shouldn't be neccessary. All calls in this file are taken from 'reannotated' files, which should have FP removed already...
  # echo "Removing false positives from bp file 'all_bps_cleaned.txt', writing new bp file to 'all_bps_filtered.txt'"
  # python "$script_bin"/update_bps.py
  # rm 'all_bps_cleaned.txt'

  # Merge all samples
  cd $out_dir/summary/merged
  # echo "Merging all samples into single file..."
  # python "$script_bin"/merge_files.py
fi


function getBase(){
  stem=$(basename "$1" )
  name=$(echo $stem | cut -d '.' -f 1)
  echo $name
}


exit 0
