#!/usr/bin/env bash
#!
#! Name of the job:
#SBATCH -J busco
#SBATCH --mail-type=END
#SBATCH -n 36
#SBATCH -N 1
#SBATCH --mem=128G
#SBATCH -o "s_out/slurm-%A_%a-busco.txt"
#SBATCH -p 2004

# FOR CONDA: ACTIVATE YOUR ENVIRONMENT FIRST
# single line command to run, replace as necessary
# export SBATCH_CMD="bash helloworld.sh"; mkdir -p s_out/; sbatch ~/clst/submit_sbatch_big.sh; export SBATCH_CMD=""

# -N node (how many compute units)
# -n tasks (how much each node is pushed, correlated w/  n_cores)
# -c number of cpus (how many processors per task, don't change)

# command: sbatch --array 1-9 /path/to/this/script

# This script runs BUSCO on all the renamed cichlid genomes.

date

FASTADIR="$HOME/code/malawi_transposon/genome/longread-decompress/renamed/"
BUSCOLIBDIR="$HOME/code/malawi_transposon/genome/busco_downloads/lineages/actinopterygii_odb10/"

FASTALIST=(astCal1.2_ensembl mayZeb2.0_ensembl troMau2.0 aulStu5.0 otoArg1.0 copChr1.0 rhaChi1.0 rhaChi2.0 astCal1.2_ONT)
SAMPLE=`cat <(for i in ${FASTALIST[@]}; do echo $i; done) | awk -v line=$SLURM_ARRAY_TASK_ID '{if (NR == line) print $0}'`

cd $FASTADIR
mkdir -p ./busco_results/$SAMPLE
busco -i ./$SAMPLE.renamed.fa -o ./busco_results/$SAMPLE/ -l $BUSCOLIBDIR -m genome -c 36 -f

date
