#
# This is Taskfile, where all the commands to be done during the benchmarking process are written.
#

#
# - The taxonomic rank (species, genus, family, etc), it is written in the variable $taxlevel
# - The number of available core is in $thread
# - The reference folder is in $reference
# - A custom parameter is in $parameter
#
# - Keep your command on a single line, separate commands with &&, keep set -o verbose
#
# - Inputs are read only, /bbx/tmp to write
# - There might be a huge amount of file to deal with as ref. When possible, edit in place, do not duplicates, clean temp files, during the faa/fna processing to avoid filling the server disk.
# - In challenging ref building runs, all fna, unzipped, could be >500GB of raw files. So do not duplicates if not required.
#
# - Avoid command that have a limited args size, cp, ls, etc. for manipulating the input fasta files. Use find . -exec {};\ or find . | xargs
#   - BUT, put it in a separate bash file as escaping here will cause problem. See the prepare_inputs.sh and add_to_library.sh.
#
#
# Anything that is printed on stdout/sterr is reported in the final logs provided to the user.
#
# Refer to the detailed procedure on the wiki, and please report anything that could be improved/clarified.
#


##
#
# Part 1, code to build a reference, list in /bbx/input/training/mapping.tsv
#
# /bbx/input/training => read only
# /bbx/tmp/ => rw, but cleaned after the run, cannot be reused in analysis step
# /bbx/reference => rw, kept for the part 2, write your database here.
#
#
##

build_ref:set -o verbose && mkdir -p /bbx/reference/ && mkdir -p /bbx/reference/$reference && mkdir -p /bbx/reference/$reference/taxonomy/ && cp /bbx/input/training/*.dmp /bbx/reference/$reference/taxonomy/ && cp /bbx/input/training/*.dmp /bbx/tmp && cp /bbx/input/training/mapping.tsv /bbx/tmp/ && /extract_inputs.sh ${thread} && /prepare_input_files.sh && cd /ganon && /ganon/ganon build --ganon-path /ganon/build/ --input-files /bbx/tmp/All_seqs.fna --db-prefix /bbx/reference/$reference --threads ${thread} --kmer-size $(echo $parameter | cut -f1 -d",") --nodes-file /bbx/tmp/nodes.dmp --merged-file /bbx/tmp/merged.dmp --taxsbp-path /taxsbp/ --len-taxid-file /bbx/tmp/len_taxid_file.txt --verbose && rm /bbx/tmp/All_seqs.fna

#
#
# Part 2, code to analyse the fastq files, reads.1.fastq.gz and its pair reads.2.fastq.gz, found in /bbx/input/testing. Unzip in /bbx/tmp if needed.
#
# you need to write /bbx/output/profile.$taxlevel.tsv AND /bbx/output/bins.$taxlevel.tsv as final output. You CAN have one of the two file empty (0 byte) to report you cannot deal with it.
#
# /bbx/input/testing => read only
# /bbx/tmp => but cleaned after run
# /bbx/reference => read only this time
#

analysis: set -o verbose && cd /ganon && gunzip /bbx/input/testing/reads.1.fq.gz -c > /bbx/tmp/reads.1.fq && gunzip /bbx/input/testing/reads.2.fq.gz -c > /bbx/tmp/reads.2.fq && python3 /filter_reads.py && cd /ganon && /ganon/ganon classify --max-error 4 --ganon-path /ganon/build/ --db-prefix /bbx/reference/$reference --reads /bbx/tmp/reads.1.clean.fq --threads ${thread} --output-file /bbx/tmp/out.txt --output-unclassified-file /bbx/tmp/out_unc.txt --verbose && /prepare_bins.sh && mv /bbx/tmp/bins.txt /bbx/output/bins.${taxlevel}.tsv && mv /bbx/tmp/profile.txt /bbx/output/profile.${taxlevel}.tsv
