#!/usr/bin/env bash
#!
#! Name of the job:
#SBATCH -J calliptera
#SBATCH --mail-type=END
#SBATCH -n 24
#SBATCH -N 1
#SBATCH -o "slurm-%j_callip.txt"
#SBATCH -p 1804

# FOR CONDA: ACTIVATE YOUR ENVIRONMENT FIRST
# single line command to run, replace as necessary
# export SBATCH_CMD="bash helloworld.sh"; mkdir -p s_out/; sbatch ~/clst/submit_sbatch_big.sh; export SBATCH_CMD=""

# -N node (how many compute units)
# -n tasks (how much each node is pushed, correlated w/  n_cores)
# -c number of cpus (how many processors per task, don't change)

# This script runs RepeatModeler and RepeatMasker on the astCal1.2_ensembl genome with the LTR module deactivated.

date

RMLIB_DIR="$GENOME_DIR/repeatmodeler/calliptera_repbase_noLTR_jun21/"
OUT_DIR="$REPO_DIR/out/repmask/calliptera_repbase_noLTR_jun21/raw_output/"

mkdir -p $RMLIB_DIR
mkdir -p $OUT_DIR

LIBRARY_NAME="calliptera_repbase"

singularity exec \
    "docker://dfam/tetools:1.3" \
    bash -c "
    cd $RMLIB_DIR
    pwd

    export LIBDIR=$HOME/raw/repeatmasker/Libraries/

    date
    printf 'building database...\n'
    BuildDatabase -name $LIBRARY_NAME $GENOME_DIR/astCal1.2_ensembl/astCal_v1.2.fa

    date
    printf 'running RepeatModeller...\n'
    RepeatModeler -pa 6 -database $LIBRARY_NAME 

    date
    printf 'running RepeatMasker...\n'
    RepeatMasker \
      -e rmblast \
      -pa 6 \
      -no_is \
      -gff \
      -lib $LIBRARY_NAME-families.fa \
      -dir $OUT_DIR  \
      $GENOME_DIR/astCal1.2_ensembl/astCal_v1.2.fa

    date
"

date
