#
# This is Taskfile, where all the commands to be done during the benchmarking process are written.
#

#
# - The taxonomic rank (species, genus, family, etc), it is written in the variable $taxlevel
# - The number of available core is in $thread
# - The reference folder is in $reference
# - A custom parameter is in $parameter
#
# - Keep your command on a single line, separate commands with &&, keep set -o verbose
#
# - Inputs are read only, /bbx/tmp to write
# - There might be a huge amount of file to deal with as ref. When possible, edit in place, do not duplicates, clean temp files, during the faa/fna processing to avoid filling the server disk.
# - In challenging ref building runs, all fna, unzipped, could be >500GB of raw files. So do not duplicates if not required.
#
# - Avoid command that have a limited args size, cp, ls, etc. for manipulating the input fasta files. Use find . -exec {};\ or find . | xargs
#   - BUT, put it in a separate bash file as escaping here will cause problem. See the prepare_inputs.sh and add_to_library.sh.
#
#
# Anything that is printed on stdout/sterr is reported in the final logs provided to the user.
#
# Refer to the detailed procedure on the wiki, and please report anything that could be improved/clarified.
#


##
#
# Part 1, code to build a reference, list in /bbx/input/training/mapping.tsv
#
# /bbx/input/training => read only
# /bbx/tmp/ => rw, but cleaned after the run, cannot be reused in analysis step
# /bbx/reference => rw, kept for the part 2, write your database here.
#
#
##

build_ref:set -o verbose && mkdir -p /bbx/tmp/input/ && mkdir -p /bbx/reference/ && mkdir -p /bbx/reference/$reference/library/ && mkdir -p /bbx/reference/$reference/taxonomy/ && cp /bbx/input/training/*.dmp /bbx/reference/$reference/taxonomy/ && cd /metacache && /prepare_inputs1.sh && python3 /prepare_inputs.py && /prepare_inputs2.sh && /clean.sh  && ./metacache build /bbx/reference/$reference/library/bench /bbx/tmp/bench.fna -taxonomy /bbx/reference/$reference/taxonomy/ -taxpostmap /bbx/tmp/acc2taxid -kmerlen $(echo $parameter | cut -f1 -d",") && rm /bbx/tmp/bench.fna


#
#
# Part 2, code to analyse the fastq files, reads.1.fastq.gz and its pair reads.2.fastq.gz, found in /bbx/input/testing. Unzip in /bbx/tmp if needed.
#
# you need to write /bbx/output/profile.$taxlevel.tsv AND /bbx/output/bins.$taxlevel.tsv as final output. You CAN have one of the two file empty (0 byte) to report you cannot deal with it.
#
# /bbx/input/testing => read only
# /bbx/tmp => but cleaned after run
# /bbx/reference => read only this time
#

analysis: set -o verbose && mkdir -p /bbx/tmp/results && gunzip /bbx/input/testing/reads.1.fq.gz -c > /bbx/tmp/reads.1.fq && gunzip /bbx/input/testing/reads.2.fq.gz -c > /bbx/tmp/reads.2.fq && /metacache/metacache query /bbx/reference/$reference/library/bench /bbx/tmp/reads.1.fq /bbx/tmp/reads.2.fq -pairfiles -out /bbx/tmp/bench.out -lowest $taxlevel -mapped-only -taxids -separate-cols -threads $thread -abundances /bbx/tmp/abundances.txt -abundance-per ${taxlevel} && /prepare_profiles.sh ${taxlevel} && python3 /prepare_profiles.py && cp /bins.header /bbx/output/bins.tsv && python3 /prepare_outputs.py && mv /bbx/output/bins.tsv /bbx/output/bins.$taxlevel.tsv && cp /bbx/input/training/mapping.tsv /bbx/tmp/mapping.tsv && mv /bbx/output/profile.tsv /bbx/output/profile.$taxlevel.tsv && rm /bbx/tmp/*.fq
