#!/usr/bin/perl

use strict;
use warnings;

use lib '../';

#use ArbitraryCoord;
use CPDNucReads;

my $numbins = 8;
my $binwidth = 463;
# ask for probe filename to analyze
print STDERR "Enter filename of plus strand reads\n";
my $plusfile = <STDIN>;
chomp($plusfile);

print STDERR "Enter filename of minus strand reads\n";
my $minusfile = <STDIN>;
chomp($minusfile);

# add filename here for probe values 
#my $filename = "h2bwtdata.txt";

print STDERR "Loading Probe Values\n";
my $reads = CPDNucReads->new($plusfile, $minusfile);

#print header
print "HML\nSequencing data from file: $plusfile\t$minusfile\n";
print "\n";

my %cpdval;
my %cpdcount;

open ( CHROMS, "../HMLInfo.txt") || die "Couldn't open chromosome file\n";
my %hml;
while ( <CHROMS> )
{
	chomp $_;
	my @temp = split /\t/, $_;
	if ( $temp[0] =~ /chr[XIV]+/ )
	{
		$hml{$temp[0]}{"start"} = $temp[1];
		$hml{$temp[0]}{"end"} = $temp[2];
	}
}

foreach my $chr (sort keys %hml)
{
	print STDERR "Starting $chr\n";
	my %plusreads = $reads->get_plus_reads_for_chromosome($chr);
	my $num_plusreads = scalar keys %plusreads;
	my %minusreads = $reads->get_minus_reads_for_chromosome($chr);
	my $num_minusreads = scalar keys %minusreads;
	print STDERR "$chr reads: $num_plusreads plus reads and $num_minusreads minus reads\n";

	my $hmlstart = $hml{$chr}{"start"};
	my $hmlend = $hml{$chr}{"end"};

=pod
	# calculate left edge HML
	for (my $i = 0; $i < $numbins; $i++ )
	{
		# calculate left edge HML
		my $start = $hmlstart - (($i + 1) * $binwidth) + 1;
		my $end = $hmlstart - ($i * $binwidth);
		my $middle = int( (($start + $end )/2.0) + 0.5 );
                for ( my $pos = $start; $pos <= $end; $pos++ )
                {       
		
                        if ( exists $plusreads{$pos} )
                        {       
                                $cpdval{$middle} += $plusreads{$pos};
                                $cpdcount{$middle}++;
                        }       
                        if ( exists $minusreads{$pos} )
                        {       
                                $cpdval{$middle} += $minusreads{$pos};
                                $cpdcount{$middle}++;
                        }       

                }  
	}

        for (my $i = 0; $i < $numbins; $i++ )
        {
                # calculate right edge HML
                my $start = $hmlend + ($i * $binwidth); 
                my $end = $hmlend + (($i + 1) * $binwidth) - 1;
                my $middle = int( (($start + $end )/2.0) + 0.5 );
                for ( my $pos = $start; $pos <= $end; $pos++ )
                {

                        if ( exists $plusreads{$pos} )
                        {
                                $cpdval{$middle} += $plusreads{$pos};
                                $cpdcount{$middle}++;
                        }
                        if ( exists $minusreads{$pos} )
                        {
                                $cpdval{$middle} += $minusreads{$pos};
                                $cpdcount{$middle}++;
                        }

                }
        }
=cut

	for ( my $j = $hmlstart; $j < ($hmlend - 10); $j += $binwidth )
	{
		# calculate HML block
		my $start = $j;
		my $end = $j + $binwidth - 1;
                my $middle = int( (($start + $end )/2.0) + 0.5 );
                for ( my $pos = $start; $pos <= $end; $pos++ )
                {
                        if ( exists $plusreads{$pos} )
                        {
                                $cpdval{$middle} += $plusreads{$pos};
                                $cpdcount{$middle}++;
                        }
                        if ( exists $minusreads{$pos} )
                        {
                                $cpdval{$middle} += $minusreads{$pos};
                                $cpdcount{$middle}++;
                        }

                }
        }
}
my $header = "ChrIII location (kb)";
my $avg = "\nAverage:";
my $cpd = "\nCPD count:";
my $dipy = "\nDIPY count:";
foreach my $loc (sort {$a <=> $b} keys %cpdval )
{
	my $formattedloc = $loc / 1000.0;
	$header .= "\t$formattedloc";
	my $mean = 1.0 * $cpdval{$loc} / $cpdcount{$loc};
	$avg .= "\t$mean";
	$cpd .= "\t$cpdval{$loc}";
	$dipy .= "\t$cpdcount{$loc}";
}

print $header, $avg, $cpd, $dipy, "\n";
print "\n";


