####Script_for_HISAT2_based_alignment_of_RNA-Seq_datasets
References_Directory=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/SCRIPTS/REFERENCES_USED_RENAMED_AS_PER_SCRIPTS

mkdir /data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/HISAT2/Alignment_OsJaponica_IRGSP

output_directory=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/HISAT2/Alignment_OsJaponica_IRGSP
Input_fq_files=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/trimmomatic/trimmed_fq_files

Reference_genome=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/SCRIPTS/REFERENCES_USED_RENAMED_AS_PER_SCRIPTS/References/IRGSP_FORHISAT2/IRGSP
Transcripts_annotation=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/SCRIPTS/REFERENCES_USED_RENAMED_AS_PER_SCRIPTS/References/IRGSP_genes.gff3

#####################################HISAT2_Build_Index#############################################
echo "Building Index for the reference using HISAT build - creating .ht files"
hisat2-build /data/shivaprasad/Vivek/NRPD1_RNA_Seq/SCRIPTS/REFERENCES_USED_RENAMED_AS_PER_SCRIPTS/References/IRGSP_FORHISAT2/IRGSP.fa 
echo "Genome Index built"

gtf_file=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/SCRIPTS/REFERENCES_USED_RENAMED_AS_PER_SCRIPTS/References/IRGSP-1.0.42.gtf

#####################################Creating_Splicesite_file#############################################
echo "Identifying splicesites using hisat2_extract_splice_sites.py"
Splicesites_file=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/HISAT2/IRGSP_Splicesites_HISAT2.txt
python /data/shivaprasad/Softwares/hisat2-master/hisat2_extract_splice_sites.py /data/shivaprasad/Vivek/NRPD1_RNA_Seq/SCRIPTS/REFERENCES_USED_RENAMED_AS_PER_SCRIPTS/References/IRGSP-1.0.42.gtf > /data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/HISAT2/IRGSP_Splicesites_HISAT2.txt

#####################################HISAT2_Alignment#############################################
echo "Aligning using HISAT2"
fqfiles=`find /data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/trimmomatic/trimmed_fq_files/paired_end_fq/ -type f -name "*_R1_P_trimmed.fastq"| sort`
for R1 in $fqfiles; do
    R2=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/trimmomatic/trimmed_fq_files/paired_end_fq/`basename "${R1/_R1_P_trimmed.fastq/_R2_P_trimmed.fastq}"`
    single_end_read_R1=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/trimmomatic/trimmed_fq_files/unpaired_fq/`basename "${R1/_R1_P_trimmed.fastq/_R1_UP_trimmed.fastq}"`
    single_end_read_R2=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/trimmomatic/trimmed_fq_files/unpaired_fq/`basename "${R1/_R1_P_trimmed.fastq/_R2_UP_trimmed.fastq}"`
    echo "catenating the unpaired reads into a single file $single_end_read_R1 and $single_end_read_R2" 
    # cat $single_end_read_R2 >> $single_end_read_R1
    HISAT2_Output=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/HISAT2/Alignment_OsJaponica_IRGSP/`basename "${R1/_R1_P_trimmed.fastq/_HS2out}"`
    mkdir $HISAT2_Output
    echo "Processing with HISAT2 $R1 - PAIRED_END_READS"
    PE_Samfile_Output=$HISAT2_Output/`basename "${R1/_R1_P_trimmed.fastq/_PE_HISAT2_ouput.sam}"`
    PE_bamfile_Output=$HISAT2_Output/`basename "${R1/_R1_P_trimmed.fastq/_PE_HISAT2_ouput.bam}"`
    PE_novelsplicesites=$HISAT2_Output/`basename "${R1/_R1_P_trimmed.fastq/_PE_novel_splicesites.txt}"`
    PE_summaryfile=$HISAT2_Output/`basename "${R1/_R1_P_trimmed.fastq/_PE_Summaryfile.txt}"`
    hisat2 -x $Reference_genome -q -1 $R1 -2 $R2 -S $PE_Samfile_Output --known-splicesite-infile $Splicesites_file --phred33 --secondary -p 30 -k 25 --no-unal --dta-cufflinks --summary-file $PE_summaryfile
    echo "Processing with HISAT2 $R1 - SINGLE_END_READS"
    SE_Samfile_Output=$HISAT2_Output/`basename "${R1/_R1_P_trimmed.fastq/_SE_HISAT2_ouput.sam}"`
    SE_bamfile_Output=$HISAT2_Output/`basename "${R1/_R1_P_trimmed.fastq/_SE_HISAT2_ouput.bam}"`
    SE_novelsplicesites=$HISAT2_Output/`basename "${R1/_R1_P_trimmed.fastq/_SE_novel_splicesites.txt}"`
    SE_summaryfile=$HISAT2_Output/`basename "${R1/_R1_P_trimmed.fastq/_SE_Summaryfile.txt}"`
    hisat2 -x $Reference_genome -q -U $single_end_read_R1 -S $SE_Samfile_Output --known-splicesite-infile  $Splicesites_file --phred33 --secondary -p 30 -k 25 --no-unal --dta-cufflinks --summary-file $SE_summaryfile
    echo "Converting samfile to bamfile and sorting them using SAMTOOLS"
    Final_bam_file=$HISAT2_Output/`basename "${R1/_R1_P_trimmed.fastq/_merged_HISAT2_ouput.bam}"`
    Sorted_Final_bam_file=$HISAT2_Output/`basename "${R1/_R1_P_trimmed.fastq/_sorted_HISAT2_ouput.bam}"`
    samtools view -b $PE_Samfile_Output | samtools sort -@ 20 -m 1G -o $PE_bamfile_Output
    samtools index $PE_bamfile_Output
    samtools view -b $SE_Samfile_Output | samtools sort -@ 20 -m 1G -o $SE_bamfile_Output
    samtools index $SE_bamfile_Output
    samtools merge -@ 20  $Final_bam_file $PE_bamfile_Output $SE_bamfile_Output
    samtools sort -@ 20 $Final_bam_file -o $Sorted_Final_bam_file
    samtools index $Sorted_Final_bam_file
    rm -f $PE_Samfile_Output
    rm -f $SE_Samfile_Output
    rm -f $Final_bam_file
    echo "COMPLETE PROCESSING DONE FOR $R1"
done



