#! /usr/bin/perl

use warnings;
use strict;
use Statistics::Descriptive;
use List::Util qw(sum);

###############################################
# USAGE STATEMENT
unless (scalar @ARGV >= 3) {
	die "\nIncorrect command line arguments.\n".
		"Usage : merge_homer_intersect_files.pl <Homer file REF> <IntersectBed file 1>... <IntersectBed file  n>.\n";
}
###############################################

###############################################
# 0. Parse command line arguments
# file that contains annotation of reference peaks
my $Homerfile = shift @ARGV;
my @intersectBeds = @ARGV;

###############################################


###############################################
# 1. Parse annotated merged reference peaks
my %annot_hash = ();

open (HOMER,$Homerfile) or die "Couldn't open $Homerfile: $!\n";

my $headerline = <HOMER>;

my @headerdata = get_line_data ($headerline);
my $newheaderline = "Peak_Name\t$headerdata[1]\t$headerdata[2]\t$headerdata[3]\t$headerdata[10]\t$headerdata[11]\t$headerdata[15]\t$headerdata[9]";

while (my $line = <HOMER>) {
	my @linedata = get_line_data ($line);
	
	unless ($linedata[1] =~ /^chrM/) {	
		$annot_hash{$linedata[0]} = "$linedata[0]\t$linedata[1]\t$linedata[2]\t$linedata[3]\t$linedata[10]\t$linedata[11]\t$linedata[15]\t$linedata[9]";
	}
	
}

close HOMER;
###############################################



###############################################
# 2. Parse intersected Bed files

my @names = ();
my %overlap_hash = ();
my %breadth_hash = ();

my $file_idx = 0;

foreach my $file (@intersectBeds) {
	
	$file =~ m/_intersected_by_(.+)\.bed/;
	
	push (@names, $1);

	open (FILE,$file) or die "Couldn't open $file: $!\n";

	ITEM:while (my $line = <FILE>) {
			
		my @linedata = get_line_data ($line);
		
		next ITEM if ($line =~ /^chrM/);
		
		# autovivify anonymous array of arrays if this is the first loop
		# the array that is the $file_idx-th element of the array or arrays
		push(@{ ${ $overlap_hash{$linedata[3]} }[$file_idx] } , $linedata[8]);
		
		
		my $breadth = $linedata[6] - $linedata[5];
		
		push(@{ ${ $breadth_hash{$linedata[3]} }[$file_idx] }, $breadth);
		
		
	}

	close FILE;
	++$file_idx;

}

###############################################


###############################################
# 3. Output result

for (my $i = 0; $i < $file_idx; ++$i) {
	$newheaderline .= "\tMaxBreadth_$names[$i]\tOverlap_bp_$names[$i]\tCov_Fraction_$names[$i]";
}

print $newheaderline."\n";

foreach my $key (sort keys %annot_hash) {
	
	my $annotation = $annot_hash{$key};
	
	my $remainingfields = "";
	
	for (my $i = 0; $i < $file_idx; ++$i) {
	
		if (defined @{ ${ $breadth_hash{$key} }[$i] } ) {
			my @breadths = @{ ${ $breadth_hash{$key} }[$i] };
			my $maxB = get_max (@breadths);
			# http://www.perlmonks.org/?node_id=75660; get index of a given array value
			#my ( $index ) = grep { $breadths[$_] eq $maxB } 0..$#breadths; 

			my @overlaps = @{ ${ $overlap_hash{$key} }[$i] };
			my $overlap = sum (@overlaps) ;
		
			my @refdata = get_line_data ($annotation);		
			my $coverage = $overlap / ($refdata[3] - $refdata[2] );

		
			$remainingfields .= "\t".$maxB."\t".$overlap."\t".$coverage ; # leading tab to account for joining
		
		} else {
			$remainingfields .= "\tNA\tNA\tNA" ; # leading tab to account for joining

		}
	}
	
	my $outline = $annotation.$remainingfields;

	print $outline."\n";
	
}





###############################################


exit;

#####################################
######      SUBROUTINES        ######
#####################################

###########################################################
# a subroutine that separates fields from a data line and
# returns them in an array

sub get_line_data {

    my $line = $_[0];
    
    chomp $line;
    
    my @linedata = split(/\t/, $line);
       
    return @linedata;
}

###########################################################
# a subroutine that returns statistical data in an array format
# on the scores contained in a gff file given as argument to 
# the subroutine

sub get_max {
	#gets filename to analyze from the main program
	my @lengths = @_;

	# create objet for stats (new full statistics object)
	my $stat_FULL = Statistics::Descriptive::Full->new();
	$stat_FULL->clear();

	# add data to the object
	$stat_FULL->add_data(@lengths);
	$stat_FULL->sort_data();
	my $max = $stat_FULL->max();

	return $max;
}

