#!/usr/bin/perl
use strict;
use warnings;
use diagnostics;
use Statistics::Descriptive;
$|=1;

########## GLOBAL AVERAGE for pairs ##########
#Purpose: To find the local average of intervals along the shore. 
#Fiorella C. Grandi. 
#last revision: 08/07/2014

my$interval_start=0;
my$interval_end=100;

my@array;
my@array2;

#SPECIFY THE FILE TO BIN HERE
open INPUT,"Ziller2013_sperm_allpairs_low_low_surroundingCpGs.txt" or die "Couldn't open file1: $!\n";

#subroutines
sub average {
my$den;
my$total=0;
my$average;
my $stat;
my$test="downstream_pair1";
#my$test="upstream_pair2";

foreach my $line (<INPUT>) {
  chomp ($line);
  @array= split("\t", $line);
  my$ID2=$array[1];
  my$ID=$ID2;
  $ID2=~s/^\s+//g; # strip white space from the beginning
  $ID2=~s/\r*\n//g;
  $ID2=~s/\s+$//g; # strip white space from the end
  my$distance=$array[5];
  if ( $distance >= 5000 && $distance <6000){ #change the distance that you want the CGIs that you bin to be. 
  if ($ID eq $test){
  if ($array[3]>= $interval_start && $array[3] < $interval_end){
    push(@array2,$array[3]);
    $total =$total + $array[4];
  }
}
}
}
  $den=scalar(@array2);
  if($den>0){
  $average=$total/$den;
  $stat=Statistics::Descriptive::Full->new();
  $stat->add_data(@array2);
  my $sd=$stat->standard_deviation();
  print"$interval_start:$interval_end\t$average\t$sd\t$den\n";

}
else{
print"Not valid for $interval_start: $interval_end\n";
}
undef(@array2);
undef(@array);
 $interval_start=$interval_start +100; #change the step size here
 $interval_end= $interval_end +100;
}

foreach(0...30){
average($_);
}

