#! /usr/bin/perl

use warnings;
use strict;
use IO::CaptureOutput qw(capture_exec);

unless (scalar @ARGV >= 2) {
	die "\nNot enough command line arguments.\nUsage : get_traveling_ratio_v3.pl <Cell type> <bed ChIP> (<bed Input>)\n".
	"\n";

}

#Traveling ratio calculation
#Pol II levels peak in the 5’ region of many genes. To quantify this effect, we have
#developed a measure called Traveling Ratio (TR) that compares the ratio
#between Pol II density in the promoter and in the gene region.
#We first defined the promoter region from -300 to +300 relative to the TSS and the
#gene body as the remaining length of the gene. We next calculated the average
#density/nt from rank normalized ChIP-seq density files (described in Bilodeau et
#al. 2009) for each region and computed the TR as the ratio between the two.

# get input files
my $cell_type = shift @ARGV;

my $bedChIP = shift @ARGV;

my $inputstatus = 1;
my $bedInput = shift @ARGV or $inputstatus = 0;


### 1. get files with bed regions to compute on 
my $prom = "2016-1-22_mm9_promoter_regions.bed";
my $gene_region = "2016-1-22_mm9_gene_body_regions.bed";

#my $prom = "prom.test.bed";
#my $gene_region = "body.test.bed";

### 2. get sequencing depth to normalize further calculations

my $cmd = "wc -l $bedChIP".' | perl -lane \'$_ =~ m/(\d+)\s/ ;print $1;\'';
my $mapped_million = capture_exec($cmd);
chomp $mapped_million;
$mapped_million =~ s/\s//g;

my $mapped_million2 = $mapped_million;

if ($inputstatus) {
	my $cmd2 = "wc -l $bedInput".' | perl -lane \'$_ =~ m/(\d+)\s/ ;print $1;\'';
	my $mapped_million2 = capture_exec($cmd2);
	chomp $mapped_million2;
	$mapped_million2 =~ s/\s//g;

}

my $diffdepth = $mapped_million / $mapped_million2; #1 if no input

### 3. get region coverage
# call coverageBed and capture output
	my $coverage_prom_IP = system("coverageBed -counts -a $bedChIP -b $prom > temp1.bed");
	open (IN,"temp1.bed") or die "temp1.bed was not found\n";
	my @coverage_prom_IP = <IN>;
	close IN;

my @coverage_prom_Input =();
if ($inputstatus) {
	my $coverage_prom_Input = system("coverageBed -counts -a $bedInput -b $prom > temp2.bed");
	open (IN,"temp2.bed") or die "temp2.bed was not found\n";
	@coverage_prom_Input = <IN>;
	close IN;
}

my $coverage_gb_IP = system("coverageBed -counts -a $bedChIP -b $gene_region > temp3.bed");
open (IN,"temp3.bed") or die "temp3.bed was not found\n";
my @coverage_gb_IP = <IN>;
close IN;


my @coverage_gb_Input =();
if ($inputstatus) {
	my $coverage_gb_Input = system("coverageBed -counts -a $bedInput -b $gene_region > temp4.bed");
	open (IN,"temp4.bed") or die "temp4.bed was not found\n";
	@coverage_gb_Input = <IN>;
	close IN;
}


############
# output file

my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
my $realday=$mday+1;
my $realmth=$mon+1;
$year += 1900;
my $datestrg = "$year\-$realmth\-$realday";


my $out1 = $datestrg."_".$cell_type."_mm9_TravelingRatios.txt";

my $gene_num = scalar @coverage_prom_IP;

#print $gene_num."\n";

open (OUT,'>',$out1) or die "Could not open $out1: $!\n";
my $header = "Chrom\tStart\tEnd\tGeneName\tCount1\tCount2\tCount3\tCount4\tProm_density\tGeneBody_density\tTraveling_ratio\n";

print OUT $header;

for (my $i = 0; $i < $gene_num ; ++$i) {
	#chr1	201326136	201330136	TNNT2	21
	
	my @data1 = get_line_data($coverage_prom_IP[$i]);
	
	my @data2 = (1,1,1,1,1,1,1,1);
	if ($inputstatus) {
		@data2 = get_line_data($coverage_prom_Input[$i]);
	}
	my @data3 = get_line_data($coverage_gb_IP[$i]);
	
	my @data4 = (1,1,1,1,1,1,1,1);
	if ($inputstatus) {
		@data4 = get_line_data($coverage_gb_Input[$i]);
	}
	
	my $promlength = $data1[2] - $data1[1] +1;
	my $gblength = $data3[2] - $data3[1] +1;
	
	my $prom_av_density = (1/$promlength)*($data1[4]/($diffdepth*($data2[4]+1e-6)));
	my $gb_av_density = (1/$gblength)*($data3[4]/($diffdepth*($data4[4]+1e-6)));
	
	my $traveling_ratio = '';
	
	if ($gb_av_density > 0 && $prom_av_density > 0) {
		$traveling_ratio = $prom_av_density / $gb_av_density;
	} else {
		$traveling_ratio = 0;
	}
	
	my $result_line = "$data1[0]\t$data1[1]\t$data1[2]\t$data1[3]\t$data1[4]\t".($diffdepth*$data2[4]).
					  "\t$data3[4]\t".($diffdepth*$data4[4])."\t$prom_av_density\t$gb_av_density\t$traveling_ratio\n";
	print OUT $result_line;
}

close OUT;

#unlink ("temp1.bed","temp2.bed","temp3.bed","temp4.bed");

exit;


###########################################################
# SUBROUTINES
###########################################################

###########################################################
# a subroutine that separates fields from a data line and
# returns them in an array

sub get_line_data {

    my $line = $_[0];
    
    chomp $line;
    
    my @linedata = split(/\t/, $line);
       
    return @linedata;
}
