#!/bin/bash
#SBATCH --mem=32G
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=2
#SBATCH --time=4:00:00
#SBATCH --account=rrg-bourqueg-ad

module load StdEnv/2023
module load python/3.11
source ../envs/env_truvari/bin/activate

pip freeze

REFERENCE="../1_alignment/data/ref/hg38.analysisSet.fa"

# External environment variables:
#  - VCF
#  - TECH (hifi, ont)
#  - TOOL (strkit, longtr, trgt)

ls_bench_dir="${SLURM_TMPDIR}/bench/"

# set TMPDIR - try to address cursed Truvari 4.3.1 errors...
export TMPDIR="${SLURM_TMPDIR}/tmp"
mkdir -p "${TMPDIR}"

pctseq='0.7'
if [[ "${TOOL}" == "straglr" ]]; then
  pctseq='0.0'  # Truvari 4.x: needed for symbolic alts  TODO: Truvari 5 resolves this
fi

# Truvari 4.x:
truvari bench \
  -b ./data/HG002_GRCh38_TandemRepeats_v1.0.1.no_homopolymers.vcf.gz \
  -c "${VCF}" \
  --includebed ./data/HG002_GRCh38_TandemRepeats_v1.0.bed.gz \
  --sizemin 5 \
  --pick ac \
  --pctseq "${pctseq}" \
  -o "${ls_bench_dir}" || exit

# TODO: use when we figure out what's up with Truvari 5.x
#truvari bench \
#  -b ./data/HG002_GRCh38_TandemRepeats_v1.0.1.no_homopolymers.vcf.gz \
#  -c "${VCF}" \
#  --includebed ./data/HG002_GRCh38_TandemRepeats_v1.0.bed.gz \
#  --sizemin 5 \
#  --pick ac \
#  -o "${ls_bench_dir}" || exit

module load mafft  # required for refine

bed_tool="${TOOL}"
if [[ "${bed_tool}" == "strkit-no-snv" ]] || [[ "${bed_tool}" == "strdust" ]] || [[ "${bed_tool}" == "straglr" ]]; then
  bed_tool="strkit"
fi

# Truvari 4.x:
truvari refine \
  --use-original-vcfs \
  --reference "${REFERENCE}" \
  --regions "../2_giab_calls/out/adotto_catalog_${bed_tool}.bed" \
  --threads 8 \
  "${ls_bench_dir}" || exit

# https://github.com/ACEnglish/truvari/wiki/refine
# - need all parameters (--coords O, --subset) for giabTR included here
# - TODO: should we keep --use-original-vcfs ?
# Truvari 5.x:
#truvari refine \
#  --use-original-vcfs \
#  --coords O \
#  --reference "${REFERENCE}" \
#  --regions "../2_giab_calls/out/adotto_catalog_${bed_tool}.bed" \
#  --subset \
#  --threads 8 \
#  "${ls_bench_dir}" || exit

tech_dir="out/hg002_benchmark/${TECH}"
bench_dir="${tech_dir}/${TOOL}${PHASED:+_phased}"
mkdir -p "${tech_dir}"
rm -rf "${bench_dir}"  # remove bench_dir if it exists and overwrite it with the new contents
chown -R dlough2:rrg-bourqueg-ad "${ls_bench_dir}"
mv "${ls_bench_dir}" "${bench_dir}"
