#!/usr/bin/perl
use strict;
use warnings;
use diagnostics;
use POSIX;
$|=1;
#########  CGI Pair Surrounding #################
#Purpose: To find the surrounding CpG methylation for pairs.

#Fiorella C. Grandi. 
#last revision: 08/07/2014 

#Declare main variables

my @array; #holds the CGI pairs
my %hash; #holds the methlyation data. 

#INPUT methylation data
open INPUT,"Meth_sperm_mchr19_Kobayashi.txt" or die "Couldn't open file1: $!\n";
my $tmpvar1; #location
my $tmpvar2; #percent methylation
my $tmpvar3; #coverage

while (<INPUT>) {
  ($tmpvar1, $tmpvar2, $tmpvar3) = split(/\t/, $_);
  
  $tmpvar4=~s/^\s+//g; # strip white space from the beginning
  $tmpvar4=~s/\r*\n//g;
  $tmpvar4=~s/\s+$//g; # strip white space from the end
  $tmpvar5=~s/\r*\n//g;
  $tmpvar5=~s/^\s+//g; # strip white space from the beginning
  $tmpvar6=~s/\r*\n//g;
  $tmpvar5=~s/\s+$//g; # strip white space from the end
  $tmpvar6=~s/^\s+//g; # strip white space from the beginning
  $tmpvar6=~s/\s+$//g; # strip white space from the end        
  #associate the location and the percent methylation in a hash variable. 
          $hash{$tmpvar1} = $tmpvar2;
}
close INPUT;

#INPUT pair file 
open INPUT2,"test_pairs.txt" or die "Couldn't open file: $!\n";
open OUTPUT, ">>test_pairs_surrounding.txt";

############## Main Program  #####################
foreach my $line (<INPUT2>) {
  chomp ($line);
  @array= split("\t", $line);

#get distance and other postional information
my$distance=$array[3];
my$half_distance=ceil($distance/2);
my $island_start = $array[0];
my $island_end = $array[0] + $array[1];
my $island_downstream_half_distance = $island_end + $half_distance;
my $second_island_start=$array[2];
my $second_island_upstream_half=$second_island_start-$half_distance;

#find upstream CpGs for second CGI in pair
  foreach my$key (keys %hash){
    if ($key >= $second_island_upstream_half && $key <= $second_island_start){
      my$distance3= $second_island_start - $key;
      print OUTPUT "$array[2]\tupstream_pair2\t$key\t$distance3\t$hash{$key}\t$distance\n";
      #prints location of CGI2 in pair, location of CpG, distance to CGI, methylation of CpG, distance of CGI pair
    }
  }

#find downstream CpGs for first CGI in pair 
  foreach my$key (keys %hash){
    if ($key >= $island_end && $key <= $island_downstream_half_distance){
      #calculate distance from the end of the CGI to this CpG
      my$distance2= abs($island_end-$key);
      print OUTPUT "$array[0]\tdownstream_pair1\t$key\t$distance2\t$hash{$key}\t$distance\n";
      #prints location of CGI1 in pair, location of CpG, distance to CGI, methylation of CpG, distance of CGI pair
    }
  }


}

undef(@array);

