#####################################################################
# Perl script to genotype deletions in homologue specific BAM files #
#####################################################################

#!/usr/bin/perl -w
use strict;
use Data::Dumper;
use List::Util qw(sum max);

my $infile = shift; #list of deletion coordinates

open IN, '<', $infile or die "ERROR!!!\n";
open OUT, '>', $infile.'_strandSeq' or die "ERROR!!!\n";

print OUT join "\t", qw/chr start end size reads_C1 reads_C2 reads_F1 reads_F2 reads_M1 reads_M2/, "\n";

my $header = <IN>;

while (<IN>) {
	chomp;
	my ($chr, $start, $end) = (split ",", $_)[1,2,3];
	warn "Inspecting region $chr:$start-$end\n";
	$chr =~ s/chr//;

	$chr = 'X' if $chr == 23;	

	my $reads_C1 = `samtools view -q10 NA12878_hap1.bam $chr:$start-$end | wc -l`;
	my $reads_C2 = `samtools view -q10 NA12878_hap2.bam $chr:$start-$end | wc -l`;

	my $reads_F1 = `samtools view -q10 NA12891_hap1.bam $chr:$start-$end | wc -l`;
	my $reads_F2 = `samtools view -q10 NA12891_hap2.bam $chr:$start-$end | wc -l`;

	my $reads_M1 = `samtools view -q10 NA12892_hap1.bam $chr:$start-$end | wc -l`;
	my $reads_M2 = `samtools view -q10 NA12892_hap2.bam $chr:$start-$end | wc -l`;

	chomp($reads_C1,$reads_C2,$reads_F1,$reads_F2,$reads_M1,$reads_M2);
	
	my $C_ratio = 0;
	my $F_ratio = 0;
	my $M_ratio = 0;

	if ($reads_C1 < $reads_C2) {
		$C_ratio = $reads_C1/$reads_C2;
	} elsif ($reads_C1 == 0 and $reads_C2 == 0) {
		$C_ratio = 0;
	} else {
	 	$C_ratio = $reads_C2/$reads_C1;
	}
	
	if ($reads_F1 < $reads_F2) {
		$F_ratio = $reads_F1/$reads_F2;
	} elsif ($reads_F1 == 0 and $reads_F2 == 0) {
		$F_ratio = 0;
	} else {
	 	$F_ratio = $reads_F2/$reads_F1;
	}
	
	if ($reads_M1 < $reads_M2) {
		$M_ratio = $reads_M1/$reads_M2;
	} elsif ($reads_M1 == 0 and $reads_M2 == 0) {
		$M_ratio = 0;
	} else {
	 	$M_ratio = $reads_M2/$reads_M1;
	}

	#normalize read counts per length of the deletion and 1000kb
	my $size = $end - $start;
	my $reads_C1_norm = sprintf("%.1f", ($reads_C1/$size)*1000);
	my $reads_C2_norm = sprintf("%.1f", ($reads_C2/$size)*1000);
	my $reads_F1_norm = sprintf("%.1f", ($reads_F1/$size)*1000);
	my $reads_F2_norm = sprintf("%.1f", ($reads_F2/$size)*1000);
	my $reads_M1_norm = sprintf("%.1f", ($reads_M1/$size)*1000);
	my $reads_M2_norm = sprintf("%.1f", ($reads_M2/$size)*1000);

	print OUT "$chr\t$start\t$end\t$size\t$reads_C1\t$reads_C2\t$reads_F1\t$reads_F2\t$reads_M1\t$reads_M2\n";
}
