#!/usr/bin/perl

use strict;
use warnings;

print STDERR "Please enter filename for data matrix\n";
my $filename = <STDIN>;
chomp $filename;

open( FILE, $filename ) || die "Couldn't open file: $filename\n";

my $upstream_offset = -500;
my $downstream_offset = 640;
my $totalbins = $downstream_offset - $upstream_offset + 1;
my @cpd_sum;
my $genetotal = 0;
#my @dipy_sum;
while ( my $line = <FILE> )
{
	chomp $line;
	if ( $line =~ /^(Y[A-P][LR][0-9]{3}[CW]\-?[A-H]?)/ )
	{
		my $acc = $1;
		my @fields = split /\t/, $line;	

                if ( scalar @fields != ( 2 * $totalbins + 1 ) )
                {
                        die "Wrong number of bins for gene: $acc\n";
                }
		
		for( my $i = 1; $i < scalar @fields; $i++ )
		{
			$cpd_sum[$i - 1] += $fields[$i];
		}
		$genetotal++;
	}		
	else
	{
		print STDERR "No match for line: $line\n";
	}
}

print "From file: $filename\t for transcription frequency subsets\n";
# print results:
print "TS";
for (my $i = $upstream_offset; $i <= $downstream_offset; $i++ )
{
	print "\t$i";
}
print "\nDyad Coverage";
my $midway = (scalar @cpd_sum) / 2; 
for (my $i = 0; $i < $midway; $i++)
{
	print "\t$cpd_sum[$i]";
}
print "\nNumber of Genes";
if ( $midway != $totalbins )
{
	die "Mismatch in number of bins in input matrix and sum_all file\n";
}
for (my $i = 0; $i < $midway; $i++)
{
        print "\t$genetotal";
}
print "\nAvg Dyad Coverage";
for (my $i = 0; $i < $midway; $i++)
{
	my $avg = "";
	if ( $genetotal > 0 )
	{
		$avg = 1.0 * $cpd_sum[$i]/$genetotal;
	}
        print "\t$avg";
}
print "\n\n";
print "NTS";
for (my $i = $upstream_offset; $i <= $downstream_offset; $i++ )
{
        print "\t$i";
}
print "\nDyad Coverage";
for (my $i = $midway; $i < scalar @cpd_sum; $i++)
{
        print "\t$cpd_sum[$i]";
}
print "\nNumber of Genes";
for (my $i = $midway; $i < scalar @cpd_sum; $i++)
{
        print "\t$genetotal";
}
print "\nAvg Dyad Coverage";
for (my $i = $midway; $i < scalar @cpd_sum; $i++)
{
	my $avg = "";
	if ( $genetotal > 0 )
	{
        	$avg = 1.0 * $cpd_sum[$i]/$genetotal;
	}
        print "\t$avg";
}
print "\n\n";
print "Strand Avg";
for (my $i = $upstream_offset; $i <= $downstream_offset; $i++ )
{
        print "\t$i";
}
print "\nDyad Coverage";
for (my $i = 0; $i < $midway; $i++)
{
	my $bothstrand_cpd = $cpd_sum[$i] + $cpd_sum[($midway + $i)];
        print "\t$bothstrand_cpd";
}
print "\nAvg Nuc Coverage";
for (my $i = 0; $i < $midway; $i++)
{
	my $bothcpd = $cpd_sum[$i] + $cpd_sum[($midway + $i)];
	my $bothdipy = 2 * $genetotal;
	my $avg = "";
	if ( $bothdipy > 0 )
	{
        	$avg = 1.0 * $bothcpd/$bothdipy;
	}
        print "\t$avg";
}

print "\n";
