#!/usr/bin/perl
use strict;
use warnings;
use List::Util qw/min max/;
if(@ARGV!=2)
{
	warn "Usage: $0 <W.soap> <C.soap>\n";
	exit 1;
}
if (! -e $ARGV[0] || ! -e $ARGV[1] ) {
	die "Input not Exists!\n";
}
my %fa;
open LL,"./mm9.fa.size";   #### user-defined reference genome size file
while(<LL>)
{
        chomp;
        my @sp=split;
        $fa{$sp[0]}=$sp[1];
}
close LL;


my %count;
my $flt_count;
for my $file(@ARGV)
{
	open IN,"$file";
	while(<IN>)
	{
		chomp;
		my $L1=$_;
		chomp (my $L2=<IN>);
		my @F1=split /\t/, $L1;
		my @F2=split /\t/, $L2;

		$F1[0]=~s/#\S+\/1$//;
		$F2[0]=~s/#\S+\/2$//;
		$F1[0]=~s/#\S+\/1$//;
		$F2[0]=~s/#\S+\/2$//;

		if($F1[0] ne $F2[0])
		{

			warn "This SOAP is not paired-end format ($F1[0] vs $F2[0]), please check it!\n";
			exit;
		}
		next if $F1[7] =~ /chr[L|E|H]/;#skip non human/mouse DNA
		next if $F1[3]+$F2[3]>2;#skip non-uniq mappping
		if($F1[6] eq '+' && $F2[6] eq '-' && $F1[8] > $F2[8] )
		{
			$flt_count++;
			next;
		}
		if($F1[6] eq '-' && $F2[6] eq '+' && $F1[8] < $F2[8])
		{
			$flt_count++;
			next;
		}

		my ($n1)=$F1[0]=~/(\S+)#/;
		my ($n2)=$F2[0]=~/(\S+)#/;
		my ($s1,$e1,$s2,$e2)=(0,0,0,0);
		if($F1[-1] eq "C" && $F2[-1] eq "C")
		{
			my $chr_len=$fa{$F1[7]};
			$e1=$chr_len-$F1[8]+1;
			$s1=$e1-length($F1[1]);
			$s2=$chr_len-$F2[8]-length($F2[1])+1;
			$e2=$chr_len-$F2[8]+1;
			my $start=min($s1, $s2);
			my $end=max($e1, $e2);
			my $len=$end-$start;
				print "$F1[7]\t$start\t$end\t$len\t0\t-\tNNNN\tNNNN\n";
		}
		elsif($F1[-1] eq "W" && $F2[-1] eq "W")
		{
			$s1=$F1[8]-1;
			$e1=$s1+length($F1[1]);
			$s2=$F2[8]-1;
			$e2=$s2+length($F2[1]);
			my $start=min($s1, $s2);
			my $end=max($e1, $e2);
			my $len=$end-$start;
				print "$F1[7]\t$start\t$end\t$len\t0\t+\tNNNN\tNNNN\n";
		}
		else
		{
			warn "The Bsaligement file maybe something wrong, please check\n";
		}
	}
	close IN;
}
