#!/usr/bin/perl

#input_file is a BAM from BWA-SW sorted by read id (streamed to STDIN as a SAM)
#first arg is min frag size allowed
#second arg is max frag size allowed

$min_frag_size = $ARGV[0];
$max_frag_size = $ARGV[1];

$first_line=1;

while ($line = <STDIN>){
	chomp($line);
	@line_split = split("\t",$line);
	
	#print $line_split[0]."\n";
	#need to filter on acceptable read flags
	$read_flag=$line_split[1];
	
	#acceptable read flags: 99 83 147 163
	
	if($read_flag==99 || $read_flag==83 || $read_flag==147 || $read_flag==163){
		if($first_line == 1){
			#make first read current read if it's the first time through the while loop
			$current_read = $line_split[0];
			@last_line_split=@line_split;
			$first_line=0;
		}
		else{
			if($line_split[0] eq $current_read){
				#print "current read =".$current_read."\n";
				
				#only runs if this is the second read (mate pair) because BAM is read sorted
				#unaligned reads have insert length of 0 so they will be filtered out
				$insert_length = $last_line_split[8];
				#only go forward with reads that pass the insert restrictions
				
				#check for both insert lengths being negative; this indicates colliding mate, reject
				$second_mate_width = $line_split[8];
				if($insert_length < 0 && $second_mate_width < 0){$insert_length = 0}
				
				if(abs($insert_length) <= $max_frag_size && abs($insert_length) >= $min_frag_size){
					$chr=$last_line_split[2];
					#figure out what strand (negative insert sizes mean reverse strand for first read)
					if($insert_length>0){
						$strand="+";
						$start= $last_line_split[3];
						$end=$start+($insert_length-1);
					}
					else{
						$strand="-";
						$start= $line_split[3];
						$end=$start+($line_split[8]-1);
					}
					#print out bed line
					print $chr."\t".$start."\t".$end."\t".$current_read."\t".$read_flag."\t".$strand."\n";
				}
			}
			else{
				$current_read = $line_split[0];
				@last_line_split=@line_split;
			}
		}
	}
}

