params.out="output_assemblage_datalocal"  //default value for the output directory if --out is not specified
params.local_assemblies=""
params.tab=''
params.ADAPTATOR_PATH="" 
params.dbbusco=""
params.fastq=""
params.test=""

// Default configuration for Trinity's RAM and CPU usage
params.memory_trinity="5"
params.cpus_trinity="1"
cpus_trinity = params.cpus_trinity

memory_trinity=Math.floor(params.memory_trinity * 0.9)
if( memory_trinity < 1 ) {
    memory_trinity = 1
}
else {
    memory_trinity = String.format("%.0f", memory_trinity)
}
println "memory_trinity: $memory_trinity"
println "cpus_trinity: $cpus_trinity"
//


fastq_directory="${params.fastq}"
adaptor_directory="${params.ADAPTATOR_PATH}"
dbbusco_path="${params.dbbusco}"

tableau=Channel.fromPath("$params.tab")


println "local_assemblies: $params.local_assemblies"
list_ass = file("${params.local_assemblies}/*.fasta", glob: true)
list_sp_with_ass = list_ass.collect {file -> file.name.take(file.name.indexOf('.'))}

println "assemblies found for : $list_sp_with_ass"

//###################
//# tableatolistsrr #
//###################

process tableautolistsrr {

    label 'small_mem'
    echo false
    publishDir "$params.out", mode: 'copy'

input:
  file x from tableau

output:
    file "*_assemb.csv"   optional true into list_srr_assemblage
    file "*_quantif.csv"  optional true into list_srr_quantif
    file "*_telecharge.csv" into list_srr_telecharge

script:
"""
echo ${list_sp_with_ass} > list_sp_with_assemblies.txt
./tableau2listSRR.py $x list_sp_with_assemblies.txt
rm list_sp_with_assemblies.txt
"""
}
//#####################





//######################
//# add_nom_assemblage : process associate the species name to the corresponding assembly #
//######################

process add_nom_assemblage {
    label 'small_mem'
    tag{x.getBaseName().replaceAll('_assemb', '')}
    publishDir "$params.out/ajoutnom/${x.getBaseName().replaceAll('_assemb', '')}", mode: 'copy'
    echo false

input:
    file x from list_srr_assemblage.flatten()

output :
    set val ("${x.getBaseName().replaceAll('_assemb', '')}") , file ('*') into listavecnomassemblage


script:

"""
cat $x >  ${x.getBaseName()}'list.csv'
"""
}

//######################


//###################
//# add_nom_quantif : rename files with species name#
//###################

process add_nom_quantif  {
    label 'small_mem'
    tag{x.getBaseName().replaceAll('_quantif', '')}
    publishDir "$params.out/ajoutnom/${x.getBaseName().replaceAll('_quantif', '')}", mode: 'copy'
    echo false

input:
    file x from list_srr_quantif.flatten()

output :
    set val ("${x.getBaseName().replaceAll('_quantif', '')}") , file ('*') into listavecnomquantif


script:

"""
cat $x >  ${x.getBaseName()}'list.csv'
"""
}

//######################


//##############
//# download or copy the raw fastq files, depending on whether they are public files or locally generated #
//##############


process telecharge {
    label 'small_mem'
    maxForks 11
    errorStrategy 'ignore'
    maxRetries 1
    tag {"${csv.getBaseName().replaceAll('_telecharge.csv', '')}"}
    echo false
    containerOptions "--volume ${fastq_directory}:${fastq_directory}"
    publishDir "$params.out/fastqbrut/", mode: 'symlink'

input:
    file csv  from list_srr_telecharge.flatten()

output:
    set   val("${csv.getBaseName().replaceAll('_telecharge', '')}"),    file("*")  into fastq4trim,fastq4qual

script:
"""
for prefix in  \$(cut -f1 -d, $csv)
do
    if  [ -f ${fastq_directory}/"\$prefix".R1.fastq.gz ] && [ -f ${fastq_directory}/"\$prefix".R2.fastq.gz ]
    # Le fichier est en local alors on le copie (PE)
    then
        echo "\$prefix -> local data (PE) (local prefix_R1.fastq.gz prefix_R2.fastq.gz)"
        if [ $params.test = "y"  ]
        then
            zcat ${fastq_directory}/"\$prefix".R1.fastq.gz | head -n 10000 | gzip > "\$prefix"_1.fastq.gz
            zcat ${fastq_directory}/"\$prefix".R2.fastq.gz | head -n 10000 | gzip > "\$prefix"_2.fastq.gz
        else
            cp  ${fastq_directory}/"\$prefix".R1.fastq.gz "\$prefix"_1.fastq.gz
            cp  ${fastq_directory}/"\$prefix".R2.fastq.gz "\$prefix"_2.fastq.gz
        fi
    elif  [ -f ${fastq_directory}/"\$prefix"_1.fastq.gz ] && [ -f ${fastq_directory}/"\$prefix"_2.fastq.gz ]
    # Le fichier est en local alors on le copie (PE)
    then
        echo "\$prefix -> local data (PE) (SRR: prefix.1.fastq.gz prefix.2.fastq.gz)"
        if [ $params.test = "y"  ]
        then
            zcat ${fastq_directory}/"\$prefix"_1.fastq.gz | head -n 10000 | gzip > "\$prefix"_1.fastq.gz
            zcat ${fastq_directory}/"\$prefix"_2.fastq.gz | head -n 10000 | gzip > "\$prefix"_2.fastq.gz
        else
            cp  ${fastq_directory}/"\$prefix"_1.fastq.gz "\$prefix"_1.fastq.gz
            cp  ${fastq_directory}/"\$prefix"_2.fastq.gz "\$prefix"_2.fastq.gz
        fi
    elif [ -f ${fastq_directory}/"\$prefix".fastq.gz ]
    then
        # Le fichier est en local alors on le copie (SE)
        echo "\$prefix -> local data (local prefix.fastq.gz)"
        if [ $params.test = "y"  ]
        then
            zcat ${fastq_directory}/"\$prefix".fastq.gz | head -n 10000 | gzip > "\$prefix"_1.fastq.gz
        else
            cp  ${fastq_directory}/\$prefix.fastq.gz "\$prefix"_1.fastq.gz
        fi
    elif [ -f ${fastq_directory}/"\$prefix"_1.fastq.gz ]
    then
        # Le fichier est en local alors on le copie (SE)
        echo "\$prefix -> local data (SE) (SRR: prefix.1.fastq.gz)"
        if [ $params.test = "y"  ]
        then
            zcat ${fastq_directory}/"\$prefix"_1.fastq.gz | head -n 10000 | gzip > "\$prefix"_1.fastq.gz
        else
            cp  ${fastq_directory}/"\$prefix"_1.fastq.gz "\$prefix"_1.fastq.gz
        fi
    else # le fichier n'est pas en local on essaye avec fastqdump
        echo "\$prefix -> No local data try with fastqdump"
        if [ $params.test = "y"  ]
        then
            fastq-dump --gzip --split-files --defline-seq  '@\$ac_\$si/\$ri' --defline-qual "+" -N 10000 -X 20000  \$prefix
        else
            fastq-dump --gzip --split-files --defline-seq  '@\$ac_\$si/\$ri' --defline-qual "+"  \$prefix
        fi
        # rajouter une condition si il y a un _3 ce fichier va remplacer le _2 qui pour ce que j'ai remarqué est mauvais
        if [ -f \$prefix"_3.fastq.gz" ]
        then
            zcat \$prefix"_3.fastq" | sed -e "s/\\(^@SRR[0-9]*.[0-9]*\\)\\/3/\\1\\/2/g" | gzip  > \$prefix"_2.fastq.gz"
            rm \$prefix"_3.fastq.gz"
        fi
    fi
done
"""
}
//##############




//#############
//# clean up raw files with trimmomatic to remove adapters  #
//#############

process net_trimo {
    label 'big_mem'
    //errorStrategy 'retry'
    //maxRetries 20
    tag{espece}
    echo false
    //publishDir "$params.out/fastqnet/${espece}", mode: 'copy'
    containerOptions "--volume ${adaptor_directory}:${adaptor_directory}"

input:
    set  val(espece) ,  file(f) from fastq4trim

output:
    set val(espece)  , file("*.fastq.gz") into fastqtrim4trinity,fastqtrim4kallisto,fastqtrim4quality // output the species name and fastq that are created after the cleaning by trimmomatic


script:
"""
for fastq1 in \$(ls *_1.fastq.gz)
do
    fastq2=\${fastq1/_1.fastq.gz/_2.fastq.gz}
    prefix=\${fastq1/_1.fastq.gz/}

    if [ -f \$fastq2 ] # check if the other of the pair exist if exist its paired and if not other it's single
        then
            trimmomatic PE -threads 4  \$fastq1  \$fastq2 "\$prefix"_1L_trim.fastq.gz  "\$prefix"_1L_trim_unpaired.fastq.gz "\$prefix"_2R_trim.fastq.gz "\$prefix"_2R_trim_unpaired.fastq.gz ILLUMINACLIP:${adaptor_directory}/toutPE.fa:2:30:12 TRAILING:20 MINLEN:25 AVGQUAL:20
    else
            trimmomatic SE -threads 4  \$fastq1  "\$prefix"_s_trim.fastq.gz ILLUMINACLIP:${adaptor_directory}/toutSE.fa:2:30:12 TRAILING:20 MINLEN:25 AVGQUAL:20
    fi

done


for fastq1trim in \$(ls *_1L_trim.fastq.gz)
do
    fastq2trim=\${fastq1trim/_1L_trim.fastq.gz/_2R_trim.fastq.gz}
    if [ "\$(wc -l < \$fastq1trim)" -eq "\$(wc -l < \$fastq2trim)" ]  || [ "\$(wc -l < \$fastq1trim)" -ne 0 ]
        then
        echo 'ok same number of line or no 0  '
    else
        wc -l *
        echo 'different number of line or 0 line '
        exit 1
    fi
done

"""
}
//#############


params.fastqc="y"
println "Run fastqc : $params.fastqc"
 
if ( params.fastqc == "y" ) {

    //###########
    //# quality control (if this is required in the pipeline) #
    //###########

    process qualite {
        label 'small_mem'
        time '30m'
        errorStrategy 'retry'
        maxRetries 3
        tag{"$espece"}
        echo false
        publishDir "$params.out/qualbrut/${espece}", mode: 'copy'

    input:
        set   val(espece) ,  file (x) from fastq4qual

    output:
        set   val(espece) , file ("*") optional true  into  fastqc

    script:
    """

    for fastq in \$(ls *.fastq.gz)
    do

    fastqc -q -o .  -f fastq  \$fastq  >> ${espece}.rawfastqc.out
    echo \$fastq

    done
    """
    }
    //##############


    //################
    //# quality control after read cleaning #
    //################

    process qualite_trim {
        label 'small_mem'
        time '30m'
        errorStrategy 'retry'
        maxRetries 3
        tag{"${espece}"}
        echo false
        publishDir "$params.out/qualtrim/${espece}", mode: 'copy'

    input:
        set   val(espece) ,  file (x) from fastqtrim4quality

    output:
     set   val(espece) ,   file ("*") optional true  into  fastqc_trim


    """
    for fastq in \$(ls *.fastq.gz)
    do

    fastqc  -f fastq  \$fastq >> ${espece}.trimfastqc.out
    echo \$fastq

    done
    """
    }
    //################

    //################
    //# MultiQC for the quality control before cleaning #
    //################

    process multiqcbrute{
        label 'small_mem'
       // time '30m'
        errorStrategy 'retry'
        maxRetries 3
        tag{"$espece"}
        echo false
        publishDir "$params.out/multiqc/${espece}/raw/", mode: 'copy'

    input:
        set espece ,   file(x)  from fastqc

    output:
        file "*" optional true  into multi_brute

    script:
    """
    multiqc  .
    """
    }
    //################

    //###############
    //# MultiQC for the quality control after cleaning #
    //###############

    process multiqctrim{
        label 'small_mem'
        //time '30m'
        errorStrategy 'retry'
        maxRetries 3
        tag{"$espece"}
        echo false
        publishDir "$params.out/multiqc/${espece}/trim/", mode: 'copy'

    input:
        set espece ,   file(x)  from fastqc_trim

    output:
        file "*" optional true into  multi_trim

    script:
    """
      multiqc  .

    """
      }
    //###############
}


//#############
//# Merge chanels (internal purpose) #
//#############

conca4trinity=listavecnomassemblage.concat(fastqtrim4trinity)
group_per_species_trinity=conca4trinity.groupTuple().flatMap{ it -> [[ it[0] , it[1].flatten().toList() ]]}
//##############




//################
//#Trinity assembly#
//################

process assemb_trinity {
    errorStrategy 'retry'
    maxRetries 0
    tag{"${espece}"}
    echo false
    publishDir "$params.out/assemblage/${espece}", mode: 'copy'
    containerOptions "--volume ${params.local_assemblies}:${params.local_assemblies}"

input:
    set val(espece) , file(f) from group_per_species_trinity

output:
    set val (espece) , file ("*.fasta") optional true into assemblage4busco,assemblage4transdecoder,assemblage4kallisto,assemblage4stats

script:

if ( list_sp_with_ass.contains(espece) )
    """
    echo "WARNING: a local assembly is avaible for $espece, it will be used insetad of a de novo trinity assembly"
    cp ${params.local_assemblies}/${espece}.*.fasta ${espece}.trinity.Trinity.fasta
    """
else 
    """
    echo "WARNING: no local assembly is avaible for $espece, a de novo trinity assembly will be made"
    echo memory for docker: ${params.memory_trinity}
    echo memory for trinity: ${memory_trinity}
    
    if [ -f ${espece}_assemblist.csv ]
    then
        fastq_trinity_l=`./prep_trinity_cmd.py`
        echo fastq_trinity_l: \$fastq_trinity_l

        Trinity --seqType fq --max_memory ${memory_trinity}G  \$fastq_trinity_l --CPU ${cpus_trinity} --output ${espece}.'trinity'  --full_cleanup > ${espece}.'trinity'.out || (echo error && cat ${espece}.'trinity'.out)
    else
        echo "WARNING: No assembly for $espece"
    fi
    """
}
//################


//################
//# Predict coding sequences from trinity assemblies with transdecoder #
//################

process transdecoder {
    label 'small_mem'
    //errorStrategy 'retry'
    //maxRetries 5
    echo false
    publishDir "${params.out}/transdecoder/cds/", pattern : "${espece}.cds", mode: 'copy'
    publishDir "${params.out}/transdecoder/pep/", pattern : "${espece}.pep", mode: 'copy'
    tag{espece}

    input:
        set val(espece), file (assemblage) from assemblage4transdecoder

    output:
       set val (espece), file("${espece}.pep") into transdecoder_pep_out
       set val (espece), file("${espece}.cds") into transdecoder_cds4kallisto

"""
echo "TransDecoder ${espece}"
touch ${espece}.cds
touch ${espece}.pep
TransDecoder.LongOrfs -t $assemblage -m 80 >> ${espece}.transdecoder_longorfs.out || echo "error ${espece}"
TransDecoder.Predict -t  $assemblage  --single_best_only --retain_long_orfs_length 200  > ${espece}.transdecoder_Predict.out || echo "error ${espece}"

if [ -s ${espece}.trinity.Trinity.fasta.transdecoder.cds ]
then
    #renomer les cds
    ./remove_dup_sequences_in_fasta.py ${espece}.trinity.Trinity.fasta.transdecoder.cds ${espece}.cds
    wc -l *cds

    #renomer les pep
    seqtk subseq  ${espece}.trinity.Trinity.fasta.transdecoder.pep  list_cds.txt  > ${espece}.pep
fi
"""
}
//################



if  ( "busco" == "busco_run") {
//#######
//#assess the quality of the assemblies with busco#
//#######

process busco{
    label 'small_mem'
    //errorStrategy 'retry'
    //maxRetries 3
    tag{espece}
    echo false
    publishDir "$params.out/info_assemblages/$espece/busco", mode: 'copy'
    container true

input:
    set val(espece) , file (assemblage) from assemblage4busco

output:
    file "*" into busco

script:
"""

run_busco  -i $assemblage -o $espece  -l ${dbbusco_path}  -m tran > ${espece}.out_busco


"""

}
//#################################
}




//##############
//#Assess the quality of the assemblies using TrinityStats#
//##############


process TrinityStats {
    label 'small_mem'
    errorStrategy 'retry'
    maxRetries 3
    tag{espece}
    echo false
    publishDir "$params.out/info_assemblages/$espece/TrinityStats", mode: 'copy'

input:
    set val(espece) , file (assemblage) from assemblage4stats

output:
    file "*" into TrinityStats_out

script:
"""

/usr/local/bin/trinityrnaseq/util/TrinityStats.pl  $assemblage  > out_TrinityStats_${espece}.txt

"""

}

//###########################


//########
//#merge chanel (internal usage)#
//########

//group for each species the fastqc and assemblies for quantification 

conca_ass_list=transdecoder_cds4kallisto.concat(listavecnomquantif)

group_ass_list=conca_ass_list.groupTuple().flatMap{ it -> [[ it[0] , it[1].flatten().toList() ]]}

group_ass_list.into{ group_ass_list ;  group_ass_list_blast }

// faire des kallisto, une brique par espèce  prend assemblage et list srr

concat_quantif=group_ass_list.concat(fastqtrim4kallisto)
all4quantif=concat_quantif.groupTuple().flatMap{ it -> [[ it[0] , it[1].flatten().toList() ]]}


//###############################################


//##########
//#quantify expression levels with kallisto#
//##########

process kallisto{
    label 'big_mem'
    tag{espece}
    echo false
    publishDir "$params.out/kallisto/$espece", mode: 'copy'

    input:
        set val(espece) , file (cds) from all4quantif

    output:
        set val(espece) ,  file ("*") optional true into out_kallisto

    script:
    """
    if [ -s ${espece}_quantiflist.csv ]
    then
        ./prep_commande_quantif.py ${espece} ${espece}_quantiflist.csv
        echo $espece : ` wc -l  sample.tsv `

        assembly=${espece}.cds

        if [ -s \$assembly ]
        then
        echo assembly: \$assembly
        /usr/local/bin/trinityrnaseq/util/align_and_estimate_abundance.pl --transcripts \$assembly  --seqType fq   --samples_file   sample.tsv  --est_method kallisto  --thread_count 4 --output_dir espece  --prep_reference
        else
        echo assembly: No assembly! ERROR
        fi
    fi
    """
}

//#################################################


params.blastx="n"
println "Run Blastx at the end: $params.blastx"
 
if ( params.blastx == "y" ) {
    println "   Database: $params.path_eggnog"
 
 
    //#####################
    //# blast2eggnog_cDNA #
    //#####################

    process blast2eggnog_cDNA {
        //errorStrategy 'retry'
        //maxRetries 5
        echo false
        publishDir "$params.out/blastx_eggnog/$espece", mode: 'copy'
        tag{espece}

    input:
     set val(espece) , file (cds) from group_ass_list_blast
     
    output:
      set val(espece) , file ("*") optional true into blastxegg_cDNA

    script:
    """
    echo Sp: $espece
    echo Db: `ls /db/conca_eggnog_rongeurs.fa`
    
    query=${espece}.cds
     
    if [ -s \$query ]
    then
        echo query: \$query
        blastx -query \$query -db /db/conca_eggnog_rongeurs.fa  -out "${espece}"_blastx.out -outfmt "6 qseqid sseqid evalue bitscore length pident qstart qend sstart send qlen" -max_hsps 1 -max_target_seqs 1 -evalue 0.000001
        wc -l "${espece}"_blastx.out
    else
        echo query: No query! ERROR
    fi
    """
    }
    //#####################
}
