#!/usr/bin/env perl
use strict;
use warnings;
use Math::Complex;
sub log2;


(my $infile, my $baseline) = @ARGV;
die "Usage: $0 <file.sjj> <threshold value>\n" if @ARGV < 2;

my $prefix;
($prefix = $infile) =~ s/(\w+).(\w+)/$1/;

open(FILE1, "<", $infile) or die "can't open $infile $!\n";

open (OUTFILE1, ">$prefix.bed");
open (FIVESS, ">$prefix\_5ss.bed");
open (THREESS, ">$prefix\_3ss.bed");
open (BP, ">$prefix\_BP.bed");

open (OUTFILE2, ">$prefix.out");
print OUTFILE2 "seq_id\tGene\tchr\tExEnd\tsecondExstart\tintron_Length\tstrand\ttheta_5\ttheta_3\ttheta_int\tpsi_5\tpsi_3\tpsi\tall_reads\n";

 
while(<FILE1>) {
	chomp $_;
	my @line = split(/\t/, $_);
	
	my $fillGene = 11-length($line[6]);
	my $gene = "ENSG"."0"x$fillGene.$line[6];
	
	my @locinfo = split(/_/, $line[0]);
	my $chr = $locinfo[0];
	my $firstExstart = $locinfo[1]-1;
	my $secondExstart = $locinfo[2];
	my $intronLength = log10(abs($firstExstart - $secondExstart));
	my $strand = $locinfo[3];
	if ($strand == -1) {
		$strand = "-";
	}
	else {
		$strand = "+";
	}
	my $ID = $line[0].$gene;
	
	my $DA_SJ = $line[1];	#the number of reads supporting SJ from Donor (D) to Acceptor (A).
	my $DAp_SJ = $line[2];	#the number of reads supporting SJ from D to any other acceptor site A', NOT including A
	my $DpA_SJ = $line[3];	#the number of reads supporting SJ from any other donor site D', NOT including D, to A 
	my $Dpre = $line[4];	#the number of reads that cover D and intron
	my $Apre = $line[5]; 	#the number of reads that cover A and intron

	my $DA_SJ_rev = $line[7];	#opposite strand control same as above for all variables
	my $DAp_SJ_rev = $line[8];
	my $DpA_SJ_rev = $line[9];
	my $Dpre_rev = $line[10];
	my $Apre_rev = $line[11];

	


	if ( ((($DA_SJ + $DAp_SJ) > $baseline) and (($DA_SJ + $DpA_SJ) > 0))  or ((($DA_SJ + $DAp_SJ) > 0) and (($DA_SJ + $DpA_SJ) > $baseline)) ) {
		
		my $psi_5 = $DA_SJ/ ($DA_SJ + $DAp_SJ) ;	# fraction of D to A splice sites vs total D containing splice sites
		my $psi_3 = $DA_SJ/ ($DA_SJ + $DpA_SJ);	# fraction of D to A splice sites vs total A containing splice sites
		my $psi = $DA_SJ/ ($DA_SJ + $DAp_SJ + $DpA_SJ);	# spliced in index for D-A exon
		
		# pre-mRNA metrics
		my $theta_5 = ($DA_SJ + $DAp_SJ)/($DA_SJ + $DAp_SJ + $Dpre);	# fraction of D spliced
		my $theta_3 = ($DA_SJ + $DpA_SJ)/($DA_SJ + $DpA_SJ + $Apre);	# fraction of A spliced
		my $theta_int = ($DA_SJ + $DAp_SJ + $DpA_SJ)/($DA_SJ + $DAp_SJ + $DpA_SJ + $Dpre + $Apre); #fraction of intron spliced
		my $reads = log2($DA_SJ + $DAp_SJ + $DpA_SJ + $Dpre + $Apre);

		print OUTFILE2 "$ID\t$gene\t$chr\t$firstExstart\t$secondExstart\t$intronLength\t$strand\t$theta_5\t$theta_3\t$theta_int\t$psi_5\t$psi_3\t$psi\t$reads\n";
		print OUTFILE1 "$chr\t$firstExstart\t$secondExstart\t$ID\t$theta_int\t$strand\n";

		if ($strand eq '+')  {
	                my $firstExstart5UP = $firstExstart - 3;
        	        my $secondExstart5DOWN = $firstExstart + 6;
                	my $firstExstart3UP = $secondExstart - 20;
                	my $BP_P = $secondExstart - 75;
                	my $secondExstart3DOWN = $secondExstart + 3;
                	my $up_I = $secondExstart - 100;
                	my $down_I = $firstExstart + 100;
                	

                        print FIVESS "$chr\t$firstExstart5UP\t$secondExstart5DOWN\t$ID\t$theta_5\t$strand\n";
                        print THREESS "$chr\t$firstExstart3UP\t$secondExstart3DOWN\t$ID\t$theta_3\t$strand\n";
                        print BP "$chr\t$BP_P\t$secondExstart\t$ID\t$theta_3\t$strand\n";
                        
                        
		}
		else {

	                my $NEG5UP = $secondExstart + 3;
			my $NEG5DOWN = $secondExstart - 6;

        	        my $NEG3UP = $firstExstart + 20;
        	        my $BP_N = $firstExstart + 75;
        	        my $NEG3DOWN = $firstExstart - 3;

			print FIVESS "$chr\t$NEG5DOWN\t$NEG5UP\t$ID\t$theta_5\t$strand\n"; 
			print THREESS "$chr\t$NEG3DOWN\t$NEG3UP\t$ID\t$theta_3\t$strand\n";
			print BP "$chr\t$firstExstart\t$BP_N\t$ID\t$theta_3\t$strand\n";			
		}

	}

}
close OUTFILE1;
close OUTFILE2;
close FILE1;
close FIVESS;
close THREESS;
close BP;

sub log2 {
	my $n = shift;
	return log($n)/log(2);
}

