#!/bin/bash
set -e
set -o pipefail

#set -e and set -o pipefail causes the entire script to fail if any single line failes or any single part of a pipe fails

#this script only requires 1 node.

# Written by Stephen Tran (@ Grace Xiao's lab, UCLA)


if [ ${#BASH_ARGV[*]} -ne 8 ]
then
	echo -e "--help requires arguments : <input_unmapped_fastq1> <input_unmapped_fastq2> <phred_encoding> <hisat_index_normal_genome> <hisat_index_reverse_complement_genome> <ref.fa> <output_prefix> <original_sam_file>"\
	"\n\tinput_unmapped_fastq1 : input unmapped R1 fastq"\
	"\n\tinput_unmapped_fastq2 : input unmapped R2 fastq"\
	"\n\tphred_encoding : quality encoding of fastq files (33 or 64)"\
	"\n\thisat_index_normal_genome : hisat index of normal genome"\
	"\n\thisat_index_reverse_complement_genome : hisat index of reverse genome"\
	"\n\tref.fa : reference genome in fasta format"\
	"\n\toutput_prefix : output prefix of final output (will become output_prefix.sam)"\
	"\n\toriginal_sam_file : originally mapped sam file before hyperediting pipeline. Must be sam format"
	exit 1
fi

input_unmapped_fastq1=$1
input_unmapped_fastq2=$2
phred_encoding=$3
hisat_index_normal_genome=$4
hisat_index_reverse_complement_genome=$5
ref_fa=$6
output_prefix=$7
original_sam_file=$8

#get phred encoding
if [ $phred_encoding -eq 33 ]
then
	quality_scheme="--phred33"
elif [ $phred_encoding -eq 64 ]
then
	quality_scheme="--phred64"
else
	echo phred_encoding must be 33 or 64
	exit 1
fi

start_time=$(echo $SECONDS)

dir_name=$(dirname $0)                                                                                                         
#make index files
if !  python $dir_name/change_As_to_Gs_in_fastq_reads.py $input_unmapped_fastq1 ${output_prefix}.R1.As_to_Gs.fq | ${dir_name}/sort_sam_files_lexicographically.sh > ${output_prefix}.R1.index 
then
	echo job failed converting As to Gs for R1	
	exit 1
fi
if !  python $dir_name/change_As_to_Gs_in_fastq_reads.py $input_unmapped_fastq2 ${output_prefix}.R2.As_to_Gs.fq | ${dir_name}/sort_sam_files_lexicographically.sh > ${output_prefix}.R2.index 
then
	echo job failed converting As to Gs for R2
	exit 1
fi

#now run the pipeline for R1, R2, R1 reverse, and R2 reverse
#R1 reverse
if ! cat ${output_prefix}.R1.As_to_Gs.fq | hisat2 -q -x $hisat_index_reverse_complement_genome -U - -k 50 --no-unal --reorder --no-hd $quality_scheme | ${dir_name}/sort_sam_files_lexicographically.sh | python ${dir_name}/change_back_Gs_to_As_in_mapped_sam_file.py ${output_prefix}.R1.index | python ${dir_name}/remove_minus_strand_mappings.py | python ${dir_name}/return_reverse_sam_file_to_normal_coordinates.py ${ref_fa}.fai >${output_prefix}.R1.reverse.sam 
then
	echo processing R1 fastq to reverse genome failed
	exit 1
fi

#R1 forward
if ! cat ${output_prefix}.R1.As_to_Gs.fq | hisat2 -q -x $hisat_index_normal_genome -U - -k 50 --no-unal --reorder --no-hd $quality_scheme | ${dir_name}/sort_sam_files_lexicographically.sh | python ${dir_name}/change_back_Gs_to_As_in_mapped_sam_file.py ${output_prefix}.R1.index | python ${dir_name}/remove_minus_strand_mappings.py >${output_prefix}.R1.forward.sam 
then
	echo processing R1 fastq to forward genome failed
	exit 1
fi

#R2 reverse
if ! cat ${output_prefix}.R2.As_to_Gs.fq | hisat2 -q -x $hisat_index_reverse_complement_genome -U - -k 50 --no-unal --reorder --no-hd $quality_scheme | ${dir_name}/sort_sam_files_lexicographically.sh | python ${dir_name}/change_back_Gs_to_As_in_mapped_sam_file.py ${output_prefix}.R2.index | python ${dir_name}/remove_minus_strand_mappings.py | python ${dir_name}/return_reverse_sam_file_to_normal_coordinates.py ${ref_fa}.fai >${output_prefix}.R2.reverse.sam 
then
	echo processing R2 to reverse genome failed
	exit 1
fi

#R2 forward
if ! cat ${output_prefix}.R2.As_to_Gs.fq | hisat2 -q -x $hisat_index_normal_genome -U - -k 50 --no-unal --reorder --no-hd $quality_scheme | ${dir_name}/sort_sam_files_lexicographically.sh | python ${dir_name}/change_back_Gs_to_As_in_mapped_sam_file.py ${output_prefix}.R2.index | python ${dir_name}/remove_minus_strand_mappings.py >${output_prefix}.R2.forward.sam 
then
	echo processing R2 to forward genome failed
	exit 1
fi

#clean up the files before the next step
rm ${output_prefix}.R1.As_to_Gs.fq #because you do not need this file anymore
rm ${output_prefix}.R2.As_to_Gs.fq #because you do not need this file anymore
rm ${output_prefix}.R1.index #you don't need the indeces anymore
rm ${output_prefix}.R2.index #you don't need the indeces anymore

#now agglomerate the R1.forward, R1.reverse and the R2.forward,R2.reverse
if ! python $dir_name/combine_R_and_RC_files.py ${output_prefix}.R1.forward.sam ${output_prefix}.R1.reverse.sam ${output_prefix}.R1_RC.sam 
then
	echo combining forward and reverse R1 sam files failed
	exit 1
fi
if ! python $dir_name/combine_R_and_RC_files.py ${output_prefix}.R2.forward.sam ${output_prefix}.R2.reverse.sam ${output_prefix}.R2_RC.sam 
then
	echo combining forward and reverse R2 sam files failed
	exit 1
fi

#you can remove the old forward and reverse sam files now
rm ${output_prefix}.R1.forward.sam 
rm ${output_prefix}.R1.reverse.sam
rm ${output_prefix}.R2.forward.sam
rm ${output_prefix}.R2.reverse.sam


#now get uniqulely mapped reads and get final sam file
if ! python $dir_name/find_uniquely_mapped_reads.py ${output_prefix}.R1_RC.sam ${output_prefix}.R2_RC.sam | $dir_name/append_hyperediting_reads_to_normal_reads.sh $original_sam_file | $dir_name/keep_only_required_sam_fields.sh > ${output_prefix}.sam 
then
	echo getting uniquely mapped reads and creating sam file failed
	exit 1
fi

#remove old files
rm ${output_prefix}.R1_RC.sam
rm ${output_prefix}.R2_RC.sam



end_time=$(echo $SECONDS)
total_time=$(($end_time - $start_time))
echo run_hyperediting_pipeline_EX.sh took $total_time seconds
echo run_hyperediting_pipeline_EX.sh completed  #use this line to test your script completed without any errors
