#!/bin/bash
#SBATCH --job-name=dorado
#SBATCH --partition="gpuq"
#SBATCH --mem=80G
#SBATCH --cpus-per-task=20
#SBATCH --gres=gpu:A30:4
#SBATCH --time="48:00:00"

#slurm script to basecall a demultiplexed pod5 folder using dorado 0.3.2
#usage: sbatch -o logs/$LIBNAME.dorado.o -e logs/$LIBNAME.dorado.e basecall_dorado0.3.2_CHM13v2.sh $LIBNAME $POD5 $REF

module load samtools
module load dorado/0.3.2

LIBNAME=$1
POD5=$2
REF=$3
#REF=~/genomes/Homo_sapiens/CHM13v2.0/chm13v2.0.fa

#uses 5kHz model, hmCG + mCG
MODEL=$DORADO_MODELS/dna_r10.4.1_e8.2_400bps_sup@v4.2.0
#first 2 flow cells sequenced at 4 kHz
#MODEL=$DORADO_MODELS/dna_r10.4.1_e8.2_400bps_sup@v4.0.0

dorado basecaller $MODEL $POD5 --reference $REF --modified-bases 5mCG_5hmCG | samtools sort > "$LIBNAME"_sup_5mCG_5hmCG.CHM13v2.bam
samtools index "$LIBNAME"_sup_5mCG_5hmCG.CHM13v2.bam

