#!/usr/bin/env bash
#!
#! Name of the job:
#SBATCH -J repmask
#SBATCH --mail-type=END
#SBATCH -n 20
#SBATCH -N 1
#SBATCH --mem=64GB
#SBATCH -o "s_out/slurm-%A_%a-repmask.txt"
#SBATCH -p 2004

# FOR CONDA: ACTIVATE YOUR ENVIRONMENT FIRST
# single line command to run, replace as necessary
# export SBATCH_CMD="bash helloworld.sh"; mkdir -p s_out/; sbatch ~/clst/submit_sbatch_big.sh; export SBATCH_CMD=""

# -N node (how many compute units)
# -n tasks (how much each node is pushed, correlated w/  n_cores)
# -c number of cpus (how many processors per task, don't change)

# command: sbatch --array 1-8 /path/to/this/script

# Script to annotate pseudoreference with respective species library.


SAMPLES_LIST=(astCal1.2_ensembl mayZeb2.0_ensembl troMau2.0 aulStu5.0 otoArg1.0 copChr1.0 rhaChi1.0 rhaChi2.0)
SAMPLES_ABRV_LIST=(astCal mayZeb troMau aulStu otoArg copChr rhaChi rhaChi2)

SAMPLE=`cat <(for i in ${SAMPLES_LIST[@]}; do echo $i; done) | awk -v line=$SLURM_ARRAY_TASK_ID '{if (NR == line) print $0}'`
SAMPLE_ABRV=`cat <(for i in ${SAMPLES_ABRV_LIST[@]}; do echo $i; done) | awk -v line=$SLURM_ARRAY_TASK_ID '{if (NR == line) print $0}'`

# parent dir where the pseudoreference fasta is, and where output will be written
GRAPHDIR="$HOME/code/malawi_transposon/cloud/graph_genome/malawi_haplochromines/"

# dir where repeatmodeler library is 
RMLIB_DIR="$REPO_DIR/cloud/repeatmodeler/all_species_repbase_aug22/$SAMPLE"

date
singularity exec \
    "docker://dfam/tetools:1.3" \
    bash -c "
    pwd
    export LIBDIR=$HOME/raw/repeatmasker/Libraries/

    date
    printf 'running RepeatMasker...\n'
    RepeatMasker \
      -e rmblast \
      -pa 20 \
      -no_is \
      -gff \
      -lib $RMLIB_DIR/$SAMPLE-families.fa \
      -dir $GRAPHDIR/pseudoreference/repeatmasker_ownlibrary/ \
      $GRAPHDIR/pseudoreference/fasta/path_$SAMPLE_ABRV.fa

    date
"
date

