
#
# Kraken2 default k-mer size (35) and example of wrapping a tool in a biobox
# This is Taskfile, where all the commands to be done during the benchmarking process are written.
#

#
# - The taxonomic rank (species, genus, family, etc), it is written in the variable $taxlevel
# - The number of available core is in $thread
#
# - Keep your command on a single line, separate commands with &&, keep set -o verbose
#
# - Inputs are read only, so need to be gunziped as copy
# - There might be a huge amount of file to deal with as ref. When possible, edit in place, do not duplicates, clean temp files, in the faa/fna/gbff processing to avoid filling the server disk.
# - In challenging ref building runs, all fna+gbff, unzipped, could be >600GB of raw files. So especially if your run requires more than 1 file types, do not duplicates if not required. Do not ask for input types you don't need.
# - Avoid command that have a limited args size, cp, ls, etc. for manipulating the input fasta files. Use find . -exec {};\
#   - BUT, put it in a separate bash file as escaping here will cause problem. See the prepare_inputs.sh and add_to_library.sh.
#
# Anything that is printed on stdout/sterr is reported in the final logs provided to the user.
#
# Refer to the detailed procedure on the wiki, and please report anything that could be improved.
#

##
#
# Part 1, code to build a reference, list in /bbx/input/training/mapping.tsv
#
# /bbx/input/training => read only
# /bbx/tmp/ => rw, but cleaned after the run
# /bbx/reference => rw, kept for the part 2, write your database here.
#
#
##

build_ref:set -o verbose && cd /kraken2 && alias python=python3 && mkdir -p /bbx/tmp/input/ && mkdir -p /bbx/reference/$reference/library/ && mkdir -p /bbx/reference/$reference/taxonomy/ && /prepare_inputs.sh && cp /bbx/input/training/*.dmp /bbx/reference/$reference/taxonomy/ && sleep 1 && /edit_fasta.sh && sleep 1 && /add_to_library.sh && kraken2-build --kmer-len $(echo $parameter | cut -f1 -d",") $(echo $parameter | cut -f2 -d",") --threads $thread --build --db /bbx/reference/$reference/

#
# Part 2, code to analyse the fastq files, reads.1.fastq.gz and its pair reads.2.fastq.gz, found in /bbx/input/testing. Unzip if needed.
#
# you need to write /bbx/output/profile.$taxlevel.tsv AND /bbx/output/bins.$taxlevel.tsv as final output. You CAN have one of the two file empty (0 byte) to report you cannot handle it.
#
# /bbx/input/testing => read only
# /bbx/tmp => but cleaned after run
# /bbx/reference => read only this time
#

analysis: set -o verbose && cd /kraken2 && alias python=python3 && /kraken2/kraken2 --fastq-input --threads $thread --paired --db /bbx/reference/$reference/ /bbx/input/testing/reads.1.fq.gz /bbx/input/testing/reads.2.fq.gz --output /bbx/tmp/tmp.out --report /bbx/tmp/tmp.out.report && python3 /prepare_results.py && mv /bbx/output/bins.tsv /bbx/output/bins.$taxlevel.tsv && mv /bbx/output/profile.tsv /bbx/output/profile.$taxlevel.tsv
