#####REDOING WITH -k1 and no -m condition

#cutadapt_execution_script followed by Bowtie alignemnt
#Cutadapt_minimum length 30
#current_directory=/data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Scripts


Original_Files=`find /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Datasets -type f -maxdepth 1 -name "*.fastq.gz" | sort`
mkdir /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files
mkdir /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie
mkdir /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Cutadapt
logfile=/data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Cutadapt_Bowtie_LOGFILE.txt
ADAPTER_SEQUENCE="AGATCGGAAGAGCACACGTCTGAACTCCAGTCA"
for f in $Original_Files; do
    OutputFile=/data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Cutadapt/`basename "${f/.fastq.gz/.fastq}"`
    echo "Adapter trimming for $f"
    echo "File is being written in $OutputFile"
    cutadapt -a $ADAPTER_SEQUENCE -m 30 -o $OutputFile $f 2>> $logfile
done

#Script_to_run_BOWTIE_in_tandem_with_cutadapt
#Using -v mode, 1mismatch and saving aligned as well as unaligned reads
#Genome Japonica genome IRGSP, stored as reference at /data/shivaprasad/Vivek/NRPD1_RNA_Seq/SCRIPTS/REFERENCES_USED_RENAMED_AS_PER_SCRIPTS/References
#This Genome built using bowtie and indexed - Ensemble_japonica
cd /data/shivaprasad/SwethaChenna/Vivek/PolIV/sRNA-Seq/Analysis/Reference/Ensemble_japonica/Bowtie-build/
mkdir /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Bowtie_k1/
mkdir /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Bowtie_k1/samfiles/
mkdir /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Bowtie_k1/aligned/
mkdir /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Bowtie_k1/unaligned/
mkdir /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Bowtie_k1/multialigned/
mkdir /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Bowtie_k1/bamfiles/
mkdir /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Bowtie_k1/bamfiles/PCR_dup_removed
files=`find /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Cutadapt -type f -maxdepth 1 -name "*.fastq" | sort`
Reference_genome=/data/shivaprasad/SwethaChenna/Vivek/PolIV/sRNA-Seq/Analysis/Reference/Ensemble_japonica/Bowtie-build/IRGSP
echo "Aligning files with -v 1 -k 1 -y --best" 2>> $logfile
for f in $files; do
  echo "Processing $f " 2>> $logfile
  samfiles=/data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Bowtie_k1/samfiles/`basename "${f/.fastq/_k1.sam}"`
  aligned=/data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Bowtie_k1/aligned/`basename "${f/.fastq/_k1_aligned.fa}"`
  unaligned=/data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Bowtie_k1/unaligned/`basename "${f/.fastq/_k1_unaligned.fa}"`
  multialigned=/data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Bowtie_k1/multialigned/`basename "${f/.fastq/_k1_multialigned.fa}"`
  bamfiles=/data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Bowtie_k1/bamfiles/`basename "${f/.fastq/_k1.bam}"`
  echo "Aligning $f to IRGSP Ensemble_japonica genome and writing the output to $samfiles" 2>> $logfile
  bowtie -q -v 1 -k 1 -y --best -p 30 $Reference_genome $f -S $samfiles --al $aligned --un $unaligned --max $multialigned   2>> $logfile
  echo "Converting samfile $samfiles to bamfile $bamfiles and sorting them using SAMTOOLS"
  samtools view -b $samfiles | samtools sort -@ 20 -o $bamfiles
  rm -f $samfiles
  samtools index $bamfiles
  OutputnonredundantbamFile=/data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Bowtie_k1/bamfiles/PCR_dup_removed/`basename "${f/.fastq/_k1_rmdup.bam}"`
  echo "removing PCR duplicates from $bamfiles"
  samtools rmdup -s $bamfiles $OutputnonredundantbamFile
  samtools index $OutputnonredundantbamFile
done


