# created May 15th, 2014 - ACD
# this program parses a narrowPeak file generated by MACS2 with the subpeaks option set, and writes out a new narrowPeak file that splits multi-summit peaks up
# Specifically each summit now has different start and end sites.
# As a first pass, I'm just going to split the distance between summits in half

# An example of the input
# chrI	331	947	/srv/gsfs0/projects/kundaje/users/csfoo/atac/worm/peaks_withdups_split//N2embA_ce10mito_mapq10.shifted.bed_VS_NoControl_peak_1a	229	.	2.85953	22.94812	21.17934	75
# chrI	331	947	/srv/gsfs0/projects/kundaje/users/csfoo/atac/worm/peaks_withdups_split//N2embA_ce10mito_mapq10.shifted.bed_VS_NoControl_peak_1b	827	.	5.16327	82.75455	80.49708	314

use strict;
use warnings;
use POSIX;

# make sure the right arguments are being passed
die "USEAGE: this program parses a narrowPeak file generated by MACS2 with the subpeaks option set, and writes out a new narrowPeak file that splits multi-summit peaks up.
Specifically each summit now has different start and end sites.
As a first pass, I'm just going to split the distance between summits in half.
Arguments required: 1. .narrowPeak file to be parsed \n" unless($ARGV[0] =~ m/narrowPeak$/);

#pull off the files and create the outputs
my $input = shift;
die "cannot open $input.\n" unless(open(IN, "<$input"));

my @prevLine = undef;
my @withinPeak;
while(my $line = <IN>){
    chomp $line;
    my @line = split/\t/, $line;
    if(defined($prevLine[0])){
    # If the end coordinates of 2 adjacent lines are the same on the same chromosome, it's the same peak
	if ($prevLine[2] ~~ $line[2] && $prevLine[0] ~~ $line[0]){ 
	    push @withinPeak,[@prevLine];
	}elsif(defined($withinPeak[0])){
	    push @withinPeak,[@prevLine];
	    my $start = $withinPeak[0][1];
	    my @split;
	    for (my $i =1; $i<@withinPeak; $i++){
		my @leftSummitLine = @{$withinPeak[$i-1]};
		my @rightSummitLine = @{$withinPeak[$i]};
		
		my $halfDistBetween = floor(($rightSummitLine[-1] - $leftSummitLine[-1])/2);
		 # that is, the original start plus the distance to the summit, plus the half distance
		$leftSummitLine[2] = $start+$leftSummitLine[-1]+$halfDistBetween;		
		if ($i >1){
		    my @prevPeak=@{$split[-1]};
		    # this peaks' start is the same as the end of the previous peak
		    $leftSummitLine[1] = $prevPeak[2]; 
		    # adjust the summit so it is now with respect to the new start
		    $leftSummitLine[-1]-=($leftSummitLine[1]-$start); 
		}else{
		# this should only be for the first situation
		    $leftSummitLine[1] = $start 
		}
		
		push @split, [@leftSummitLine]
		
	    }
	    # Take care of the last one
	    $prevLine[1] = $split[$#split][2];
	    # adjust the summit so it is now with respect to the new start
	    $prevLine[-1]-=($prevLine[1]-$start); 
	    
	    foreach my$subPeak (@split){
		print join("\t", @{$subPeak})."\n";
	    }
	    print join("\t", @prevLine)."\n";
		
	    @withinPeak=();
	}else{
	    print join("\t", @prevLine)."\n";
	}
    }
    @prevLine = @line;
}

# I shuld have just made a couple subfunctions, but this takes cares of the situation where the last peak has a sub peak
if(defined($withinPeak[0])){
    push @withinPeak,[@prevLine];
    my $start = $withinPeak[0][1];
    my @split;
    for (my $i =1; $i<@withinPeak; $i++){
	my @leftSummitLine = @{$withinPeak[$i-1]};
	my @rightSummitLine = @{$withinPeak[$i]};
	
	my $halfDistBetween = floor(($rightSummitLine[-1] - $leftSummitLine[-1])/2);
	
	$leftSummitLine[2] = $start+$leftSummitLine[-1]+$halfDistBetween; # that is, the original start plus the distance to the summit, plus the half distance
	
	if ($i >1){
	    my @prevPeak=@{$split[-1]};
	    $leftSummitLine[1] = $prevPeak[2]; # this peaks' start is the same as the end of the previous peak
	    $leftSummitLine[-1]-=($leftSummitLine[1]-$start); # adjust the summit so it is now with respect to the new start
	}else{
	    $leftSummitLine[1] = $start # this should only be for the first situation
	}
	
	push @split, [@leftSummitLine]
	
    }
    # Take care of the last one
    $prevLine[1] = $split[$#split][2];
    $prevLine[-1]-=($prevLine[1]-$start); # adjust the summit so it is now with respect to the new start
    
    foreach my$subPeak (@split){
	print join("\t", @{$subPeak})."\n";
    }
    print join("\t", @prevLine)."\n";
	
    @withinPeak=();
}else{
    print join("\t", @prevLine)."\n";
}

close IN;
exit;

