#!/usr/bin/perl

use strict;
use warnings;

print "Enter name of cluster file to center\n";
my $clusterfile = <STDIN>;
chomp $clusterfile;

my $clusterout = $clusterfile;
$clusterout =~ s/\.txt/_centered\.txt/ || die "file not .txt file\n";
open ( CLUSTER, "$clusterfile" ) || die "Couldn't open file $clusterfile\n";

=pod
print "What is the avg. value for centering?\n";
my $avg = <STDIN>;
chomp $avg;
my $logavg = log($avg)/log(2.0);
=cut

open ( OUT, ">$clusterout" ) || die "Couldn't open file $clusterout\n";

my $extraheader = <CLUSTER>;
if ( $extraheader =~ /YORF/ )
{
	print OUT $extraheader;
}
else
{
	print "1st line: $extraheader\n";
	my $header = <CLUSTER>;
	print OUT $header;
}

my @data = ();
my @matrix = ();
while ( my $line = <CLUSTER> )
{
	chomp $line;
	
	# to keep empty last tab, see https://stackoverflow.com/questions/3711649/perl-split-with-empty-text-before-after-delimiters
	my @fields = split /\t/, $line, -1;

	my $row = $fields[0];
	for ( my $i = 1; $i < scalar @fields; $i++ )
	{
		my $val = "";
		if ( $fields[$i] ne "" && $fields[$i] != 0 )
		{
			$val = log($fields[$i])/log(2.0);
			push @data, $val;
		}
		$row .= "\t$val";
	}
	push @matrix, $row;
}

# calculate median, based off of code from: https://stackoverflow.com/questions/5119034/using-perl-to-find-median-mode-standard-deviation

my $median;
my $midindex = int ( (scalar @data) / 2 );
my @sorted = sort {$a <=> $b} @data;
if ( (scalar @data) % 2 == 1 )
{
	$median = 1.0 * $sorted[$midindex];
}
else
{
	$median = ( $sorted[$midindex] + $sorted[$midindex - 1])/ 2.0;
}

print STDERR "median is $median\n";

foreach my $line (@matrix)
{
	my @fields = split /\t/, $line, -1;
	print OUT $fields[0];

        for ( my $i = 1; $i < scalar @fields; $i++ )
        {
                my $val = "";
                if ( $fields[$i] ne "" )
                {
                        $val = $fields[$i] - $median; 
                }
                print OUT "\t$val";
        }

	print OUT "\n";
}
