#!/usr/bin/perl

#./overlapTranscripts.pl -g /data/projects/nguyen/Schnitzler/2021_02_23/symbio/Hsym_genemodels.bed -t /data/projects/nguyen/Schnitzler/2021_02_23/symbio/Hsym_transcripts.bed -o /data/projects/nguyen/Schnitzler/2021_02_23/symbio/Hsym.overl.out

use strict;
use warnings;

use Bio::Perl;
use Data::Dumper;
use Getopt::Std;
use Bio::Range;



my $usage=
  "\nUSAGE:\n".
  "        $0 -g [gene models] -t [transcript] -f [fasta] -o [output]\n".
  "\n".
  " -g: .bed gene models\n".
  " -t: .bed transcripts\n".
  " -o: Output file\n\n";


my %args;
getopt('gto', \%args);

my $outfile = $args{'o'} if $args{'o'};
my $gene_models = $args{'g'} if $args{'g'};
my $transcript = $args{'t'} if $args{'t'};

die $usage unless ($outfile && $gene_models && $transcript);

my (@arr,$transcript_data);
open (FH,"$transcript") or die "Can't open file for reading: $transcript\n";
while (<FH>){
    chomp($_);
    #HyS0001	88378	110798	align_id:79809|asmbl_1
    #HyS0001	105845	106179	align_id:79810|asmbl_2
    @arr = split("\t", $_);
    $transcript_data->{$arr[0]} = [] unless (exists $transcript_data->{$arr[0]});
    push @{$transcript_data->{$arr[0]}},[$arr[1],$arr[2]];
}
close FH;

open (OUT,">$outfile") or die "Can't open file for writing: $outfile\n";
print OUT "#scaffold\tgene\tgene_start\tgene_end\ttranscript_start\ttranscript_end\talign_start\talign_end\talign_length\n";
open (FH,"$gene_models") or die "Can't open file for reading: $gene_models\n";
my (@data,$scaffold,$g_start,$g_end,$gene);
while (<FH>){
    chomp($_);
    #HyS0001	43811	44158	HyS0001.1
    #HyS0001	56439	56948	HyS0001.2
    ($scaffold,$g_start,$g_end,$gene) = split("\t",$_);
    my $data = $transcript_data->{$scaffold};
    my ($t_start,$t_end,$i);
    foreach $i (@$data){
	$t_start = $i->[0];
	$t_end = $i->[1];
	unless ($t_start > $g_end || $t_end < $g_start){
	    my $range1 = new Bio::Range(-start=>$g_start, -end=>$g_end, -strand=>+1); 
  	    my $range2 = new Bio::Range(-start=>$t_start, -end=>$t_end, -strand=>+1);

	    if($range1->intersection($range2)){
	    	my ($start, $stop, $strand) = $range1->intersection($range2);
	    	my $intersect_length = ($stop - $start) + 1;
		print OUT "$scaffold\t$gene\t$g_start\t$g_end\t$t_start\t$t_end\t$start\t$stop\t$intersect_length\n";
	    }
	}    
	
    } 
}
close FH;
close OUT;
#
