
#
# This is Taskfile, where all the commands to be done during the benchmarking process are written.
#

#
# - The taxonomic rank (species, genus, family, etc), it is written in the variable $taxlevel
# - The number of available core is in $thread
# - The reference folder is in $reference
# - A custom parameter is in $parameter
#
# - Keep your command on a single line, separate commands with &&, keep set -o verbose
#
# - Inputs are read only, /bbx/tmp to write
# - There might be a huge amount of file to deal with as ref. When possible, edit in place, do not duplicates, clean temp files, during the faa/fna processing to avoid filling the server disk.
# - In challenging ref building runs, all fna, unzipped, could be >500GB of raw files. So do not duplicates if not required.
#
# - Avoid command that have a limited args size, cp, ls, etc. for manipulating the input fasta files. Use find . -exec {};\
#   - BUT, put it in a separate bash file as escaping here will cause problem. See the prepare_inputs.sh and add_to_library.sh.
#
# Anything that is printed on stdout/sterr is reported in the final logs provided to the user.
#
# Refer to the detailed procedure on the wiki, and please report anything that could be improved/clarified.
#

##
#
# Part 1, code to build a reference, using the files listed in /bbx/input/training/mapping.tsv
#
# /bbx/input/training => read only
# /bbx/tmp/ => rw, but cleaned after the run, cannot be reused in analysis step
# /bbx/reference => rw, kept for the part 2, so write your database here.
#
#
##

build_ref:set -o verbose && cd /kraken && alias python=python3 && mkdir -p /bbx/tmp/input/ && mkdir -p /bbx/reference/$reference/library/ && mkdir -p /bbx/reference/$reference/taxonomy/ && /prepare_inputs.sh && cp /bbx/input/training/*.dmp /bbx/reference/$reference/taxonomy/ && python3 /prepare_inputs.py && /add_to_library.sh && kraken-build --kmer-len $(echo $parameter | cut -f1 -d",") --minimizer-len $(echo $parameter | cut -f2 -d",") --threads $thread --build --db /bbx/reference/$reference/

#
# Part 2, code to analyse the fastq files, reads.1.fastq.gz and its pair reads.2.fastq.gz, found in /bbx/input/testing. Unzip in /bbx/tmp if needed.
#
# you need to write /bbx/output/profile.$taxlevel.tsv AND /bbx/output/bins.$taxlevel.tsv as final output. You CAN have one of the two file empty (0 byte) to report you cannot deal with it.
#
# /bbx/input/testing => read only
# /bbx/tmp => but cleaned after run
# /bbx/reference => read only this time
#

analysis: set -o verbose && cd /kraken && alias python=python3 && gunzip /bbx/input/testing/reads.1.fq.gz -c > /bbx/tmp/reads.1.fq && gunzip /bbx/input/testing/reads.2.fq.gz -c > /bbx/tmp/reads.2.fq && /kraken/kraken --fastq-input --threads $thread --paired --db /bbx/reference/$reference/ /bbx/tmp/reads.1.fq /bbx/tmp/reads.2.fq --output /bbx/tmp/tmp.out && /kraken/kraken-report --db=/bbx/reference/$reference/ /bbx/tmp/tmp.out > /bbx/tmp/tmp.out.report && python3 /prepare_results.py && mv /bbx/output/bins.tsv /bbx/output/bins.$taxlevel.tsv && mv /bbx/output/profile.tsv /bbx/output/profile.$taxlevel.tsv
