#!/usr/bin/perl

@chra = ("2micron","chrI","chrII","chrIII","chrIV","chrV","chrVI","chrVII","chrVIII","chrIX","chrX","chrXI","chrXII","chrXIII","chrXIV","chrXV","chrXVI");
my @totals = (6318, 230208, 813178, 316617, 1531919, 576869, 270148, 1090947, 562643, 439885, 745742, 666454, 1078175, 924429, 784333, 1091289, 948062);

my ($lowsize, $highsize, $all) = @ARGV;

unless (defined $highsize) { die "Requires 2 line arguments: [Lowersize_limit] [highersize_limit] [OPTIONAL switch 'all' prevents unique filtering]\nEX: perl cpsOut_to_znt4.pl 140 160\nWould limit treated pair sizes to those within 140-160 nt\n0 and 2000 would treat everything but those pairs separated by over 2000 bp\nSetting Highsize Limit to 0 will treat ALL pairs regardless of length\n";}

my %ucount;
my $max;
if ($hgihsize != 0) {print STDERR "Lowersizelimit set to $lowsize\nUppersizelimit set to $highsize\n";}
for(my $x=0; $x <= 16; $x++) {
	my ($okay, $skip, $filt) = (0, 0, 0);
	my $chr = $chra[$x];
	my $infile = "cpsOut.$chr";
	my $outfile = "$chr.znt4";
	print STDERR "Working on $infile\n";
	my @bigarr;
	open(FILE,$infile) || die "Could not open $infile\n";
	my %unique;
	while(<FILE>) {
		chomp;
		my ($start, $end) = split(/\t/,$_);
		if ($end < $start) {($start, $end) = ($end, $start);}
		my $size = $end - $start + 1;
		if ($size > $max) {$max = $size;}
		if ((defined $unique{$start}{$end}) && (!(defined $all))) {
			$ucount{'filt'}{$size}++;
			$filt++;
			next;
		}
		$unique{$start}{$end} = 1;
		if ((($end - $start + 1 > $highsize) || ($end - $start + 1 < $lowsize)) && ($highsize != 0)) {
			$skip++;
			$ucount{'skip'}{$size}++;
			next;
		} else {
			$okay++;
			$ucount{'kept'}{$size}++;
			for(my $i = $start; $i <= $end; $i++) {
				$bigarr[$i]++;
			}
		}
	}
	close(FILE);
	print STDERR "Found $okay treated pairs\nSkipped $skip records based on length cutoffs\nFiltered $filt pairs as duplicates\nWriting to $outfile\n";
	open(OUT,">$outfile") || die "Could not write $outfile\n";
	binmode OUT;
	for(my $i = 1; $i <= $totals[$x]; $i++) {
		my $value = $bigarr[$i];
		print OUT pack("V",$value);
	}
	close(OUT);
}
print STDERR "Printing length report\n";
print "#Pair Size\tRetained\tLength Filtered\tDuplicate Filtered\n";
for(my $i = 1; $i <= $max; $i++) {
	print "$i\t";
	if (defined $ucount{'kept'}{$i}) {
		print "$ucount{'kept'}{$i}\t";
	} else {
		print "0\t";
	}
	if (defined $ucount{'skip'}{$i}) {
		print "$ucount{'skip'}{$i}\t";
	} else {
		print "0\t";
	}
	if (defined $ucount{'filt'}{$i}) {
		print "$ucount{'filt'}{$i}\n";
	} else {
		print "0\n";
	}
}
print STDERR "DONE\n";

	