#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Std;


my $USAGE = "perl fixed_bin_sites.pl inputfile.txt > test\n";
my $WINDOW_SIZE; # = 1000;

my %option;
getopts( 'w:h', \%option );
if ( $option{w} ) {
    $WINDOW_SIZE = $option{w};
}

##print "window size: $WINDOW_SIZE\n";
#die;
#my $WINDOW_SIZE = 1000;


my @stoplist = qw/chrM chrU chrXHet chrYHet chr2LHet chr2RHet chr3LHet chr3RHet chrUextra/;

my $input_filename = $ARGV[0];
open(my $INFILE, "<", $input_filename) 
        or die "unable to open ct file $$input_filename";

my $SKIPLINE = 0;
my $arr2d=[];
my $first_line;
while ( my $line = <$INFILE> ) {
    chomp $line;
#    if ($line=~/^Identifier/) {
    if ($line=~/^Insertion/) {
	$first_line = $line;
	next;
    }
    
    my @arr = split(/\t/, $line);
    #skip unnecessary chr in stoplist 
    foreach my $stop (@stoplist) {
	if ($arr[1]=~/$stop/i) {
	    $SKIPLINE=1;
	    last;
	}
    } 
    if ( $SKIPLINE) {
	$SKIPLINE = 0;
	next;
    }
    
    push @{$arr2d}, \@arr;
    
#    my $refsize = @{$ghash->{$gene_name}};  
#   print "num of element: $refsize\n";
    
}


close $INFILE;

#my @sortarr = sort { ($a->[2] cmp $b->[2]) || ($a->[3] <=> $b->[3]) } @{$arr2d};
my @sortarr = sort { ($a->[1] cmp $b->[1]) || ($a->[2] <=> $b->[2]) } @{$arr2d};


my $lenchr = { "chr2L" => 23011544,
	       "chr2R" =>   21146708,
	       "chr3L" =>   24543557,
	       "chr3R" =>   27905053,
	       "chr4" => 1351857,
	       "chrX" => 22422827
	       };


my $allchr = {};
my $allchr_str = {}; # use this to store strings
#initialize the chr hash list
foreach my $chr (keys %$lenchr) {
    my $len = $lenchr->{$chr};
    my $chr_hash = {};
    my $chr_str_hash = {};
    $allchr->{$chr}=$chr_hash;
    $allchr_str->{$chr} = $chr_str_hash;
    #The limit is assumed to be 1000, unless we hit the last bin    
    my $mod = ($len%$WINDOW_SIZE); 
    my $floor = int($len/$WINDOW_SIZE);  
    
    foreach my $num (1..($floor+1)) { 
	
	my $high = $num*$WINDOW_SIZE;
	my $low = (($num-1)*$WINDOW_SIZE) + 1;
	my $key = $low."-".$high;
#	print "$key\n";
	$chr_hash->{$low}=0;
	$chr_str_hash->{$low}="";
    }
    
#---print the hash
#    foreach my $key (sort {$a <=> $b} keys %$chr_hash ) {
#	
#	print "$key\n";
#    }
#--------------

#have to add the last one separately

#    die;
}


#my $mod = ($end%$WINDOW_SIZE) + 1;




foreach my $line (@sortarr) {
    my $identifier=$line->[0];
    my $chr = $line->[1];
#------------ additional code for deletion line
#    my $site = $line->[2];
#    my $site_3prime = $line->[6];
#    if ($site_3prime < $site ) {
#	$site = $line->[6];	
#	$site_3prime = $line->[2];
#    }
#    
#    my $site_mid =  ($site + $site_3prime)/2; 
#    $site_mid = sprintf( "%.0f", $site_mid);
#---------------
    my $coord  = $line->[2];
#    my $coord  = $site_mid; #used for deletion bins
    my $chr_hash = $allchr->{$chr};
    my $chr_str_hash = $allchr_str->{$chr};
    my $reads = $line->[7];
#    my $reads = $line->[8]; #used for deletion bins
#   my $mod = ($coord%$WINDOW_SIZE);
    my $floor = int($coord/$WINDOW_SIZE)*$WINDOW_SIZE;
    my $ceiling = $floor+$WINDOW_SIZE;
    $floor = $floor + 1;

    $chr_hash->{$floor} = $reads + $chr_hash->{$floor};
    $chr_str_hash->{$floor} = $identifier.','.$chr_str_hash->{$floor};
    # need floor, need ceiling... 
    
#    print "$chr\t$coord\tmod: $mod\tfloor: $floor\tceiling: $ceiling\n";    
 #   die;
#    
}


foreach my $chr (keys %$allchr) {
    my $chrhash = $allchr->{$chr};
    my $chr_str_hash = $allchr_str->{$chr};
    
    foreach my $bin (sort  {$a <=> $b } keys %$chrhash) {
	my $val = $chrhash->{$bin} ;
	my $low = $bin;
	my $high = $low+$WINDOW_SIZE-1;
	my $key = "$low".'-'."$high";
	my $str = $chr_str_hash->{$bin};
	$str=~ s/,$//;
	print "$chr\t$low\t$key\t$val\t$str\n";
	
    }

}

