#!/usr/bin/env bash
#!
#! Name of the job:
#SBATCH -J coverage
#SBATCH --mail-type=END
#SBATCH -n 24
#SBATCH -N 1
#SBATCH --mem=64GB
#SBATCH -o "./s_out/slurm-%A_%a-minigraphcov.txt"
#SBATCH -p 2004

# -N node (how many compute units)
# -n tasks (how much each node is pushed, correlated w/  n_cores)
# -c number of cpus (how many processors per task, don't change)

# command: sbatch --array 1-8 /path/to/this/script 

FASTADIR="$HOME/code/malawi_transposon/cloud/genome/longread-decompress/renamed"
FASTALIST=(astCal1.2_ensembl mayZeb2.0_ensembl troMau2.0 aulStu5.0 otoArg1.0 copChr1.0 rhaChi1.0 rhaChi2.0)

GRAPHDIR="$HOME/code/malawi_transposon/cloud/graph_genome/malawi_haplochromines/"
GRAPHNAME="malawi_haplochromines"

SAMPLE=`cat <(for i in ${FASTALIST[@]}; do echo $i; done) | awk -v line=$SLURM_ARRAY_TASK_ID '{if (NR == line) print $0}'`

echo "CALCULATING COVERAGE FOR " $SAMPLE
date
minigraph -t24 -K4g --cov -c \
    -k15 -w25 -j.01 -g10k -r2k \
    --max-gap-pre=10k -n5,3 -m50,30 --max-lc-skip=25 \
    --max-gc-skip=25 --min-cov-mapq=20 --min-cov-blen=1k \
    $GRAPHDIR/$GRAPHNAME-graph.gfa $FASTADIR/$SAMPLE.renamed.fa > $GRAPHDIR/$SAMPLE.coverage.gaf

paste \
    <(grep '^S' $GRAPHDIR/$SAMPLE.coverage.gaf | cut -f2) \
    <(grep '^S' $GRAPHDIR/$SAMPLE.coverage.gaf | cut -f4 | cut -d: -f3) \
    <(grep '^S' $GRAPHDIR/$SAMPLE.coverage.gaf | cut -f7 | cut -d: -f3) \
    <(grep '^S' $GRAPHDIR/$SAMPLE.coverage.gaf | cut -f5 | cut -d: -f3) \
    <(grep '^S' $GRAPHDIR/$SAMPLE.coverage.gaf | cut -f8 | cut -d: -f3) > $GRAPHDIR/$SAMPLE.coverage

rm $GRAPHDIR/$SAMPLE.coverage.gaf

date


