#!/usr/bin/perl

# see yeast_Clark.project/eland_sam.sampleFormat.txt and Solexa/sam_bam.formatInfo.SAM1.pdf
# program to extract chrN-specific paired reads
# command line syntax cpe.pl infile chrN strain_designation (chr paired_end extract)

$argc = scalar(@ARGV);

$regSelect = 0;

if($regSelect == 0) {
	# $finp = "./test.sam";
	# $finp = $ARGV[0] . "_s1.eland.txt";
	$finp = "./samLNS";
	$chrN = $ARGV[0];
	$lpLim = $ARGV[1];
	$rpLim = $ARGV[2];
	chomp $rpLim;
	}
elsif($regSelect == 1) {
	$finp = $ARGV[0];
	$chrN = $ARGV[1];
	$strn = $ARGV[2];
	chomp $strn;
	}

$limitRange = 0;
$minLim = 145;
$maxLim = 155;

if($regSelect == 1) {
	print "\nleft chr position limit? ";
	$lpLim = <STDIN>;
	chomp $lpLim;

	print "\nright chr position limit? ";
	$rpLim = <STDIN>;
	chomp $rpLim;

	print "\nlimit range of read lengths accepted (y/n)? ";
	$yn = lc(<STDIN>);
	chomp $yn;

	if($yn eq 'y') {
		$limitRange = 1;
		print "\nminimum length? ";
		$minLim = <STDIN>;
		chomp $minLim;

		print "\nmaximum length? ";
		$maxLim = <STDIN>;
		chomp $maxLim;
		}
	}

$chrM = $chrN;
unless(open(FINP,"$finp")) {
	print "Unable to open $finp in file; argc $argc\n";
	}

if($regSelect == 0) {
	$fpp = "cpsOut." . $chrN;
	unless(open(FPP,">$fpp")) {
		print "Unable to open FPP out file\n";
		}
	}
elsif($regSelect == 1) {
	unless(open(FPP,">cps.out.txt")) {
		print "Unable to open FPP out file\n";
		}
	}


# $read -> QNAME -- $ta[0]
# $chrm1 -> RNAME -- $ta[2]
# $mp1 -> POS -- $ta[3]
# $chrm2 -> RNEXT -- $ta[6] - note should be "="
# $ofs -> PNEXT -- $ta[7]
# $abofs -> TLEN -- $ta[8]
# sequence -> SEQ -- $ta[9]
# quality -> QUAL -- $ta[10]


$mlp1 = 0;
$finCt = 0;
while($temp = <FINP>) {
	chomp $temp;
	@ta = split (/\t/,$temp);
	@qa = split(//,$ta[0]);
	if($qa[0] ne "@") {
		if($finCt == 0) {
			$bqn = $ta[0];
			$brname = $ta[2];
			$bpos = $ta[3];
			$brnext = $ta[6];
			$bpnext = $ta[7];
			$btlen = $ta[8];
			$becom = $ta[11];
			$finCt++;
			}
		else {
			$qn = $ta[0];
			$rname = $ta[2];
			$pos = $ta[3];
			$rnext = $ta[6];
			$pnext = $ta[7];
			$tlen = $ta[8];
			$ecom = $ta[11];
			$finCt++;
			}

		@ea = split(//,$ecom);
		$eaLen = scalar(@ea);
		
		# if($eaLen == 0 && $chrm1 eq $chrM && $finCt > 0) {
		if($rname eq $chrM && $finCt >= 2) {
			if($qn eq $bqn && $rnext eq "=" && $brnext eq "=" && ($limitRange == 0 || (abs($tlen) >= $minLim && abs($tlen) <= $maxLim)))
				{
				if($tlen > $btlen) {
					@va = ($chrN,$pos,$tlen);
					$dh{$qn} = [ @va ];
					push @qna, $qn;
					}
				elsif($tlen < $btlen) {
					@va = ($chrN,$bpos,$btlen);
					$dh{$bqn} = [ @va ];
					push @qna, $bqn;
					}
				if($mlp1 < 5 && $tlen > $btlen && $finCt > 0) {
					print "$chrN\t$pos\t$tlen\n";
					$mlp1++;
					}
				if($mlp1 < 5 && $tlen < $btlen && $finCt > 0) {
					print "$chrN\t$bpos\t$btlen\n";
					$mlp1++;
					}
				}
			}
		if($finCt > 1) {
			$bqn = $qn;
			$brname = $rname;
			$bpos = $pos;
			$brnext = $rnext;
			$bpnext = $pnext;
			$btlen = $tlen;
			$becom = $ecom;
			$ecom = "";
			}
		}
	}

for $name (@qna) {
	if($dh{$name}->[1] > $lpLim && $dh{$name}->[1] < $rpLim)
		{
		$lpos = $dh{$name}->[1] - $lpLim;
		$rpos = $dh{$name}->[1] + $dh{$name}->[2] - $lpLim;
		print FPP "$lpos\t$rpos\n";
		}
	}

close(FINP);
close(FPP);
exit;
