#!/usr/bin/env bash
# real_read_dragen_call.sh: indel realign input BAMs and run the Illumina DRAGEN Genotyper

set -ex
set -o pipefail

function download() {
    if [ ! -e "${2}" ] ; then
        aws s3 cp --no-progress "${1}" "${2}"
    fi
}

function wget_download() {
    if [ ! -e "${2}" ] ; then
        wget "${1}" -O "${2}"
    fi
}

function copy() {
    if [ ! -e "${2}" ] ; then
        cp "${1}" "${2}"
    fi
}

INDEL_REALIGN=true
WORKDIR=${HOME}/run_genotyping
SAMPLE_NAME="${1}"
BAM_FILE="${2}"
INDEL_REALIGN=${3}
INPUT_BAM=$(basename ${BAM_FILE})
REF_FASTA="GCA_000001405.15_GRCh38_no_alt_plus_hs38d1_analysis_set.compact_decoys.fna.gz"
SEQ_DICT="GCA_000001405.15_GRCh38_no_alt_plus_hs38d1_analysis_set.compact_decoys.dict"

# Where should temp files go?
mkdir -p "${WORKDIR}"
export TMPDIR="${WORKDIR}/tmp"
mkdir -p "${TMPDIR}"

# Download data input data
cd $WORKDIR
copy ${BAM_FILE} "${WORKDIR}/${INPUT_BAM}"
wget_download https://storage.googleapis.com/cmarkell-vg-wdl-dev/grch38_inputs/GCA_000001405.15_GRCh38_no_alt_plus_hs38d1_analysis_set.compact_decoys.fna.gz "${WORKDIR}/${REF_FASTA}"
wget_download https://storage.googleapis.com/cmarkell-vg-wdl-dev/grch38_inputs/GCA_000001405.15_GRCh38_no_alt_plus_hs38d1_analysis_set.compact_decoys.dict -O "${WORKDIR}/${SEQ_DICT}"

## Optionally Run INDEL-Realignment
##NOTE IF DRAGEN OR BWA-MEM ALIGNED, SKIP INDEL-REALIGNMENT SINCE THAT CAUSES WORSE PERFORMANCE FOR BOTH OF THOSE MAPPERS
if [[ $INDEL_REALIGN = true ]]; then
    # Sort and reorder input BAM
    cd $WORKDIR
    docker run \
    -e INPUT_BAM=${INPUT_BAM} \
    -v ${PWD}:${HOME} -w ${HOME} quay.io/ucsc_cgl/samtools:latest \
    samtools sort -@ 32 ${INPUT_BAM} -O BAM > positionsorted.${INPUT_BAM} && rm ${INPUT_BAM}

    docker run \
    -e INPUT_BAM=${INPUT_BAM} \
    -e SEQ_DICT=${SEQ_DICT} \
    -v ${PWD}:${HOME} -w ${HOME} broadinstitute/picard:2.21.9 \
      java -Xmx20g -XX:ParallelGCThreads=16 -jar /usr/picard/picard.jar \
      ReorderSam \
      VALIDATION_STRINGENCY=SILENT \
      INPUT=positionsorted.${INPUT_BAM} \
      OUTPUT=reordered.positionsorted.${INPUT_BAM} \
      SEQUENCE_DICTIONARY=${SEQ_DICT} && rm positionsorted.${INPUT_BAM}

    # Indel realign input BAM
    cd $WORKDIR
    samtools addreplacerg -@ 32 -O BAM -r ID:1 -r LB:lib1 -r SM:${SAMPLE_NAME} -r PL:illumina -r PU:unit1 reordered.positionsorted.${INPUT_BAM} > gatk_ready.reordered.positionsorted.${INPUT_BAM}
    samtools index -@ 32 gatk_ready.reordered.positionsorted.${INPUT_BAM}
    docker run \
    -e SAMPLE_NAME=${SAMPLE_NAME} \
    -e REF_FASTA=${REF_FASTA} \
    -e INPUT_BAM=${INPUT_BAM} \
    -v ${PWD}:${HOME} -w ${HOME} broadinstitute/gatk3:3.8-1 \
      java -jar /usr/GenomeAnalysisTK.jar -T RealignerTargetCreator \
      -R ${REF_FASTA} \
      -I gatk_ready.reordered.positionsorted.${INPUT_BAM} -o ${SAMPLE_NAME}.intervals

    awk -F '[:-]' 'BEGIN { OFS = "\t" } { if( $3 == "") { print $1, $2-1, $2 } else { print $1, $2-1, $3}}' ${SAMPLE_NAME}.intervals > ${SAMPLE_NAME}.intervals.bed
    docker run \
    -e SAMPLE_NAME=${SAMPLE_NAME} \
    -e INPUT_BAM=${INPUT_BAM} \
    -e REF_FASTA=${REF_FASTA} \
    -v ${PWD}:${HOME} -w ${HOME} quay.io/biocontainers/abra2:2.24--h7d875b9_0 \
      --targets ${SAMPLE_NAME}.intervals.bed \
      --in gatk_ready.reordered.positionsorted.${INPUT_BAM} \
      --out indel_realigned.${INPUT_BAM} \
      --ref ${REF_FASTA} \
      --threads 16
fi

# Run DRAGEN genotyper on input BAM
EXPERIMENT_NAME="${INPUT_BAM}_dragen_run"
mkdir -p ${WORKDIR}/${EXPERIMENT_NAME} ${WORKDIR}/${EXPERIMENT_NAME}/dragen_index ${WORKDIR}/${EXPERIMENT_NAME}/tmp && cd ${WORKDIR}/${EXPERIMENT_NAME}
dragen --build-hash-table true \
  --output-directory ${WORKDIR}/${EXPERIMENT_NAME}/dragen_index \
  --ht-reference ${WORKDIR}/${REF_FASTA}

dragen -f \
  -f ${WORKDIR}/${EXPERIMENT_NAME}/dragen_index \
  -b ${WORKDIR}/indel_realigned.${INPUT_BAM} \
  --verbose --bin_memory=50000000000 --enable-map-align false --enable-variant-caller true \
  --pair-by-name=true \
  --intermediate-results-dir ${WORKDIR}/${EXPERIMENT_NAME}/tmp \
  --output-directory ${WORKDIR}/${EXPERIMENT_NAME} \
  --output-file-prefix ${EXPERIMENT_NAME} 2> ${EXPERIMENT_NAME}.stderr


