#!/usr/bin/perl
use strict;
use warnings;
use Cwd;
use Getopt::Long;
use Data::Dumper;
use File::Basename qw(basename dirname);
use FindBin qw($Bin $Script);
use PerlIO::gzip;

my @line, 
my %ID ;
my $fIn;
my $fOut;
my $max_score = 5;
my $key1;
my $key2;

GetOptions(
            "help|h" => \&USAGE,
            "out|o:s" => \$fOut,
            "cutoff:s" => \$max_score,
            "in|i:s" => \$fIn,
            ) or &USAGE;
&USAGE unless ($fIn and $fOut);

sub USAGE {#
        my $usage = <<"USAGE";
        
Copyright (C) - belong to Ma et al., 2021, Genome research. If you use or modify it, please cite the paper.

Description: Keep the minimum score (<= 5 (default) or defined cutoff), if target have multiple miRNA binding site penalty scores (TargetFinder Identification); 

Usage: perl Target_info_filter.pl -cutoff 5 -i INPUT_FILE -o OUTPUT_FILE 

  Options:
  
  -cutoff <VALUE>     allowed maxium penalty score (default: 5);
  
  --in 	  <file>
  -i	  <file>      input file

  --out   <file>
  -o	  <file>   	  output file 
            
  --help
  -h            	  Help

USAGE
        print $usage;
        print $usage;
        exit;
}

open IN,  "$fIn" or die "cannot open the file:$!";
open OUT, ">$fOut" or die "cannot open the file:$!";

# print header
print OUT "miRNA\tGeneID\tScore\n";

while(<IN>){
	chomp;
	@line = split(/\t/,$_);
	if(!($ID{$line[0]}->{$line[1]})){	
		$ID{$line[0]} -> {$line[1]} = $line[2];			
	}elsif($ID{$line[0]}->{$line[1]} > $line[2]){
		$ID{$line[0]} -> {$line[1]} = $line[2];
	}else{
		next;
	}
}	

for $key1(sort (keys %ID)){
	for $key2(keys %{$ID{$key1}}){
		if($max_score >= $ID{$key1} -> {$key2}){
			print OUT sprintf "%s\t%s\t%s\n",$key1, $key2, $ID{$key1} -> {$key2};
			}else{
				next;
			}
		}
}
	
close IN;
close OUT;
