#!/usr/bin/perl

use strict;
use warnings;

use lib '../';
use GeneCoord;
use CPDReadValues;

# ask for probe filename to analyze
print STDERR "Enter filename of plus strand reads\n";
my $plusfile = <STDIN>;
chomp($plusfile);

print STDERR "Enter filename of minus strand reads\n";
my $minusfile = <STDIN>;
chomp($minusfile);

print STDERR "Loading Gene coordinates\n";
my $genes = GeneCoord->new();
print STDERR "Loading Probe Values\n";
my $reads = CPDReadValues->new($plusfile, $minusfile);

# location offsets
my $upstream_offset = -500;
my $downstream_offset = 640;

# 15 bp bins
my $binsize = 15;
#my $binwindow = ( $binsize - 1 )/ 2;
my $binwindow = $binsize;

my %chromosomes = $genes->get_chromosomes();
my %trxstart = $genes->get_tss();
my %trxend = $genes->get_tts();
my %strand = $genes->get_strand();

my $sumvals = 0;
my $countvals = 0;

#print header
print "Bin size = $binsize\tData from file: $plusfile\t$minusfile\n";
#print "Gene\tCOUNT_TYPE";

print "YORF";

for (my $i = $upstream_offset; $i <= $downstream_offset; $i += $binsize )
{
	print "\t$i (TS)";
}
for (my $i = $upstream_offset; $i <= $downstream_offset; $i += $binsize )
{
        print "\t$i (NTS)";
}
print "\n";

foreach my $chr (sort keys %chromosomes)
{
	print STDERR "Starting $chr\n";
	my %plusreads = $reads->get_plus_reads_for_chromosome($chr);
	my $num_plusreads = scalar keys %plusreads;
	my %minusreads = $reads->get_minus_reads_for_chromosome($chr);
	my $num_minusreads = scalar keys %minusreads;
	print STDERR "$chr reads: $num_plusreads plus reads and $num_minusreads minus reads\n";
	foreach my $acc ( @{$chromosomes{$chr}} )
	{
		my $tss = $trxstart{$acc};
		my $tts = $trxend{$acc};
		
		my @cpd_ts = ();
		my @cpd_nts = ();
		my @normcpd_ts = ();
		my @normcpd_nts = ();
		# calculate read sums (CPD and DIPY bkgd) for gene
		for ( my $i = $upstream_offset; $i <= $downstream_offset; $i += $binsize)
		{
			my $pos;
			if ( $strand{$acc} eq "+" )
			{
				$pos = $tss + $i;
			}
			elsif ( $strand{$acc} eq "-" )
			{
				$pos = $tss - $i;
			}
			else
			{
				die "No strand information for gene: $acc\n";
			}

			my $plus_cpds = 0;
			my $plus_dipys = 0;
			my $minus_cpds = 0;
			my $minus_dipys = 0;

                        for ( my $j = $pos - $binwindow; $j <= $pos + $binwindow; $j++ )
                        {
				if ( exists $plusreads{$j} )
				{
					$plus_cpds += $plusreads{$j};
					$plus_dipys++;
				}
				if ( exists $minusreads{$j} )
				{
					$minus_cpds += $minusreads{$j};
					$minus_dipys++;
				}
			}

                        my $plus_norm = 0;
			if ( $plus_dipys > 0 )
			{
				$plus_norm = 1.0 * $plus_cpds / $plus_dipys;
			}
			my $minus_norm = 0;
			if ( $minus_dipys > 0 )
			{
                        	$minus_norm = 1.0 * $minus_cpds / $minus_dipys;
			}

                        if ( $strand{$acc} eq "+" )
                        {
				push @cpd_nts, $plus_cpds;
				push @normcpd_nts, $plus_norm;

				push @cpd_ts, $minus_cpds;
				push @normcpd_ts, $minus_norm;
			}
			elsif ( $strand{$acc} eq "-" )
                        {
                                push @cpd_ts, $plus_cpds;
                                push @normcpd_ts, $plus_norm;

                                push @cpd_nts, $minus_cpds;
                                push @normcpd_nts, $minus_norm;
			}
		}

		if ( scalar @cpd_ts != scalar @cpd_nts || scalar @cpd_ts != scalar @normcpd_ts || scalar @cpd_nts != scalar @normcpd_nts )
		{
			die "Arrays are of different sizes!\n";
		}

		# bin values
=pod
		# print average probe values for acc
		print "$acc\tCPDs";

		foreach my $val (@cpd_ts)
		{
			print "\t$val";
		}
		foreach my $val (@cpd_nts)
		{
			print "\t$val";
		}
		print "\n";
=cut

		#print "$acc\tNorm. CPDs";
		print "$acc";

                foreach my $val (@normcpd_ts)
                {
                        print "\t$val";
			if ( $val ne "" )
			{
				$sumvals += $val;
				$countvals++;
			}
                }
                foreach my $val (@normcpd_nts)
                {
                        print "\t$val";
			if ( $val ne "" )
			{
				$sumvals += $val;
				$countvals++;
			}
                }
		print "\n";
	}

}
my $avgval = $sumvals * 1.0 / $countvals;
print STDERR "Average value in tss region: $avgval\n";
