#!usr/bin/perl
use strict;
use warnings;
use threads;
use threads::shared;

# requires bedtools and threads library from CPAN.

my $start_time = time();

my $file1 = shift; # peak 1 file
my $file2 = shift; # peak 2 file
my $simulations = shift; # number of simulations required
my $processors = shift; # number of processors available

my $sim_per_thread = (int($simulations / $processors) + 1);

my $sims_run = $sim_per_thread * $processors;

my $genome = "/data/as/annotation/Homo_sapiens/UCSC/hg19/Annotation/Genes/ChromInfo.txt";
my $unmappable = "/data/as/annotation/Homo_sapiens/UCSC/hg19/Annotation/Genes/unmappable.bed";

my @threads;

my $jaccard = qx(bedtools jaccard -a $file1 -b $file2);
my @jacs = split(/\s+/, $jaccard);
my $jacs_count = $jacs[-1];
my $value = $jacs[-2];
my $bases = $jacs[-4];

#open(OUT, ">", "test.count") || die "Cannot open OUT $!\n";

for (my $i = 1; $i <= $processors; $i++) {
	my $t = threads->new(\&sub1, $file1, $file2, $unmappable, $value);
	push(@threads, $t);
}

foreach (@threads) {
        my $num = $_->join;
}

print "\n### Monte Carlo ###\n";

# Jaccard Index
my $more_ext = 0;
my $less_ext = 0;

open(FILE, "test.jaccardIndex") || die "Cannot open FILE$!\n";

while (my $line = <FILE>) {
	chomp($line);
	if ($line >= $value) {
		$more_ext++;
	}
	elsif ($line < $value) {
		$less_ext++;
	}
}

my $pvalue = (($more_ext + 1) / ($sims_run +1));

print "\n*jaccard index*\n";
print"More extreme value found $more_ext times\nLess extreme value found $less_ext times\nNo. of simulations: $sims_run\npvalue is $pvalue\n";

close(FILE);
# Overlap Count
$more_ext = 0;
$less_ext = 0;

open(FILE, "test.jaccardCount") || die "Cannot open FILE$!\n";

while (my $line = <FILE>) {
	chomp($line);
	if ($line >= $jacs_count) {
		$more_ext++;
	}
	elsif ($line < $jacs_count) {
		$less_ext++;
	}
}

$pvalue = (($more_ext + 1) / ($sims_run +1));

print "\n*overlap counts*\n";
print"More extreme value found $more_ext times\nLess extreme value found $less_ext times\nNo. of simulations: $sims_run\npvalue is $pvalue\n";

close(FILE);
#Overlap of Bases
$more_ext = 0;
$less_ext = 0;

open(FILE, "test.jaccardBases") || die "Cannot open FILE$!\n";

while (my $line = <FILE>) {
	chomp($line);
	if ($line >= $bases) {
		$more_ext++;
	}
	elsif ($line < $bases) {
		$less_ext++;
	}
}

$pvalue = (($more_ext + 1) / ($sims_run +1));

print "\n*overlap bases*\n";
print"More extreme value found $more_ext times\nLess extreme value found $less_ext times\nNo. of simulations: $sims_run\npvalue is $pvalue\n";

close(FILE);
#
qx(rm test.jaccardIndex test.jaccardCount test.jaccardBases);

my $end_time = time();
my $time_taken = $end_time - $start_time;
#print "Job took $time_taken seconds\n";

sub sub1 {
	my $file1 = shift;
	my $file2 = shift;
	my $unmappable = shift;
	my $value = shift;
	for (my $i = 1; $i <= $sim_per_thread; $i++) {
		my $new_jaccard = qx(bedtools shuffle -excl $unmappable -f 0.01 -maxTries 10000 -i $file1 -g $genome | sort -k1,1V -k2,2n | bedtools jaccard -a stdin -b $file2);
		my @new_jacs = split(/\s+/, $new_jaccard);
		my $new_value = $new_jacs[-2];
		my $new_jacs_count = $new_jacs[-1];
		my $new_bases = $new_jacs[-4];
		qx(echo $new_value >> test.jaccardIndex);
		qx(echo $new_jacs_count >> test.jaccardCount);
		qx(echo $new_bases >> test.jaccardBases);
	}
}
