#script_for_removing rRNA reads form the RNA seq files fastq using sortmeRNA.
#Output_directory=sftp://svp@172.16.223.31/data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/sortmerna
#SORTMERNA_DB="~/Softwares/sortmerna-master/rRNA_databases/silva-bac-16s-id90.fasta,~/Softwares/sortmerna-master/index/silva-bac-16s-db:~/Softwares/sortmerna-master/rRNA_databases/silva-bac-23s-id98.fasta,~/Softwares/sortmerna-master/index/silva-bac-23s-db:~/Softwares/sortmerna-master/rRNA_databases/silva-arc-16s-id95.fasta,~/Softwares/sortmerna-master/index/silva-arc-16s-db:~/Softwares/sortmerna-master/rRNA_databases/silva-arc-23s-id98.fasta,~/Softwares/sortmerna-master/index/silva-arc-23s-db:~/Softwares/sortmerna-master/rRNA_databases/silva-euk-18s-id95.fasta,~/Softwares/sortmerna-master/index/silva-euk-18s-db:~/Softwares/sortmerna-master/rRNA_databases/silva-euk-28s-id98.fasta,~/Softwares/sortmerna-master/index/silva-euk-28s:~/Softwares/sortmerna-master/rRNA_databases/rfam-5s-database-id98.fasta,~/Softwares/sortmerna-master/index/rfam-5s-db:~/Softwares/sortmerna-master/rRNA_databases/rfam-5.8s-database-id98.fasta,~/Softwares/sortmerna-master/index/rfam-5.8s-db"
#Input_directory=sftp://svp@172.16.223.31/data/shivaprasad/Vivek/NRPD1_RNA_Seq/DATA
#gunzip /data/shivaprasad/Vivek/NRPD1_RNA_Seq/DATA/*.gz
mkdir /data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/sortmerna/merged_fq/
mkdir /data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/sortmerna/unmerged_fq/
mkdir /data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/sortmerna/rRNA_aligned_reads/
mkdir /data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/sortmerna/unaligned_reads/

#Raw_fq=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/DATA/
#merged_fq=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/sortmerna/merged_fq/
#unmerged_fq=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/sortmerna/unmerged_fq/
#rRNA_aligned_fq=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/sortmerna/rRNA_aligned_reads/
#unaligned_fq=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/sortmerna/unaligned_reads/

echo "Merging the paired end reads"
Original_Files=`find /data/shivaprasad/Vivek/NRPD1_RNA_Seq/DATA/ -type f -name "*_R1.fastq" | sort`
for R1 in $Original_Files ; do
  R2=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/DATA/`basename "${R1/_R1.fastq/_R2.fastq}"`
  Read1=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/DATA/`basename $R1`
  Read2=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/DATA/`basename $R2`
  Merged_file=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/sortmerna/merged_fq/`basename "${R1/_R1.fastq/_merged.fastq}"`
  merge-paired-reads.sh $Read1 $Read2 $Merged_file
done

echo "Running sortmerna"
merged_fq_files=`find /data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/sortmerna/merged_fq/ -type f -name "*.fastq" | sort`
for m in $merged_fq_files ; do
  rRNAalignedfile=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/sortmerna/rRNA_aligned_reads/`basename "${m/_merged.fastq/_rRNAaligned.fastq}"`
  Nonalignedfile=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/sortmerna/unaligned_reads/`basename "${m/_merged.fastq/_NONrRNA.fastq}"`
  sortmerna --ref $SORTMERNA_DB --reads $m --paired_in -a 16 --log --fastx --aligned $rRNAalignedfile --other $Nonalignedfile
done

echo "Unmerging the paired reads from sortmerna output"
NONrRNAfiles=`find /data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/sortmerna/unaligned_reads/ -type f -name "*.fastq" | sort`
for NONrRNA in $NONrRNAfiles ; do
  unmergedRead1=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/sortmerna/unmerged_fq/`basename "${NONrRNA/_NONrRNA.fastq/_R1_sortmerna.fastq}"`
  unmergedRead2=/data/shivaprasad/Vivek/NRPD1_RNA_Seq/RESULTS/sortmerna/unmerged_fq/`basename "${NONrRNA/_NONrRNA.fastq/_R2_sortmerna.fastq}"`
  unmerge-paired-reads.sh $NONrRNA $unmergedRead1 $unmergedRead2
done
