#!/usr/bin/perl
use warnings;
use strict;
use Bio::SeqIO::fastq;

my $file=shift; #bam file with mapped reads to the genome after sequential trimming
my $file2=shift; #fastq file with the original reads, before sequential trimming
my $gff=shift; #gff file with miR coordinates
my $bowtie=shift; #bowtie format output file with mapped reads to the genome 

my %output;
my %multiple;
my %mirs;

# select reads corresponding to known mirs regions
system "bedtools intersect -abam $file -b $gff -wo > $file.tmp.sam";

open (FILE, "$file.tmp.sam");
while (<FILE>) {
my @tmp = split (/\s+/,$_);
$output{$tmp[3]} = [ $tmp[2]-$tmp[1], $tmp[21]];
}
close FILE;

# create a hash with mapping position numbers (to avoid multimappers overcounting)
open (FILE2, "$bowtie");
while (<FILE2>) {
    my @tmp = split (/\s+/,$_);
    $multiple{$tmp[0]}+=1;
}
close FILE2;

open (FILE3, "$gff");
while (<FILE3>) {
    my @tmp = split (/\s+/,$_);
    $mirs{$tmp[8]}=$tmp[5]-$tmp[4]; 
}

## check the original form of the mapped reads, and what tail had to be cut

my $seq  = Bio::SeqIO::fastq->new('-format'=>'fastq' , '-file'=>$file2);

while( (my $i = $seq->next_seq())) {
	my $name = $i->id();$name=~s/\@//g;
	my $seq= $i->seq ;
    if (exists $output{$name})    {
	if ($output{$name}->[0] > $mirs{$output{$name}->[1]}) {
	print $name, "\t",$seq,"\t",$output{$name}->[0],"\t",$multiple{$name},"\t",$output{$name}->[1],"\n";
	}}
}


unlink ("$file.tmp.sam");

