#!/bin/sh

###############################################
##   Postalignment Processing Sam/Bam Files  ##
###############################################

## **Software required**
# picard v3.1.0 (https://github.com/broadinstitute/picard)
# samtools v1.13 (https://github.com/samtools/samtools)
# picard and samtools were executed using the NIG Supercomputer system (https://sc.ddbj.nig.ac.jp)

picard="singularity exec /usr/local/biotools/p/picard:3.1.0--hdfd78af_0 picard" # path to picard (in NIG Supercomputer)
samtools="singularity exec /usr/local/biotools/s/samtools:1.13--h8c37831_0 samtools" # path to samtools (in NIG Supercomputer)

threads=12
RAM=32

list=("RY1114 H T6 T7" "RY1115 H T2 T6")
for item in "${list[@]}"; do
    arr=($item)
    name=${arr[0]}
    tissues=("${arr[@]:1}")  # Collect all tissues in an array

    temp_dir="$name/temp"
    
    for tissue in "${tissues[@]}"; do
        MAX_RECORDS_IN_RAM=$(expr $RAM \* 250000)
        # Clean sam files
        $picard CleanSam \
        -INPUT $temp_dir/${name}_${tissue}.sam \
        -OUTPUT $temp_dir/${name}_${tissue}.cleaned.bam \
        -VALIDATION_STRINGENCY LENIENT

        # Sort bam files
        $samtools sort -@ "$threads" \
        $temp_dir/${name}_${tissue}.cleaned.bam \
        -o $temp_dir/${name}_${tissue}.cleaned.sorted.bam

        # Assign all the reads in a file to a single new read-group
        $picard AddOrReplaceReadGroups \
        -I $temp_dir/${name}_${tissue}.cleaned.sorted.bam \
        -O $temp_dir/${name}_${tissue}.cleaned.sorted.readgroups.bam \
        -ID 1 -LB Lib1 -PL ILLUMINA -PU Run1 -SM $tissue \
        -MAX_RECORDS_IN_RAM $MAX_RECORDS_IN_RAM

        # Mark duplicates
        $picard MarkDuplicates \
        -INPUT $temp_dir/${name}_${tissue}.cleaned.sorted.readgroups.bam \
        -OUTPUT $temp_dir/${name}_${tissue}.cleaned.sorted.readgroups.marked.bam \
        -METRICS_FILE $name/results/QC/${name}_${tissue}.duplicate_metrics.txt \
        -REMOVE_DUPLICATES false -ASSUME_SORTED true \
        -VALIDATION_STRINGENCY LENIENT \
        -MAX_RECORDS_IN_RAM $MAX_RECORDS_IN_RAM
    done
done

