use strict;
use warnings;


if(@ARGV!=1)
{
	warn "Usage: <*.bsp> \n";
	exit 1;
}

my %h1_CH;
my %h2_CH;
my %h1_CG;
my %h2_CG;

my %stat;
my $tot;
while(<>)
{
	chomp;
	my $ln1=$_;
	my @F1=split /\t/, $ln1;
	chomp (my $ln2=<>);
	my @F2=split /\t/, $ln2;
#M01624:132:000000000-C66CF:1:2107:17969:1048	AATACTTAATATCAAGTAACGATACAAAACCCTTTTATTTAAAATAACAA	GGGGFEGGEGGFC,<,@GGGGGGGGGGFGGGFCGFGGGGGGGGGGCCCCC	UM	chr2	67010471	-+	158	aaGATGCTTAGTATCAATTGACGGTGCAGGGCCCTTTTATTTGAAGTAGCAGaa	1	0:1:0:0:0
#M01624:132:000000000-C66CF:1:2107:17969:1048	NAACGCCATAAAAAAAACTACTAACTCTATAAATTCAAACAATAAAAAAT	#8ACCFFEFFGGGGGCGGFGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGG	UM	chr2	67010363	--	158	aaCAACGCCATAAAGAAAACTGCTGACTCTATAAGTTCAAGCAATAGGAGATtt	0	1:0:0:0:0
	my $len=$F1[7];
	$stat{tot}++;
	#Watson strand
	
	$F1[1]=uc $F1[1];
	$F1[8]=uc $F1[8];
	$F2[1]=uc $F2[1];
	$F2[8]=uc $F2[8];
	$tot++;
	if($F1[0] eq $F2[0] && $F1[7]>0 && $F1[6] eq '++' && $F2[6] eq '+-')
	{
		my @R1=split //, $F1[1];
		my @R1_ref=split //, $F1[8];
		
		my @C1_idx=findC($F1[8], 2);

		for my $i (@C1_idx)
		{
			if($R1_ref[$i] eq 'C' && $R1_ref[$i+1] ne 'G')
			{
				$h1_CH{$i-2}{C}++ if $R1[$i-2] eq 'C';
				$h1_CH{$i-2}{T}++ if $R1[$i-2] eq 'T';
				$h1_CH{$i-2}{tot}++;
			}
			if($R1_ref[$i] eq 'C' && $R1_ref[$i+1] eq 'G')
			{
				$h1_CG{$i-2}{C}++ if $R1[$i-2] eq 'C';
				$h1_CG{$i-2}{T}++ if $R1[$i-2] eq 'T';
				$h1_CG{$i-2}{tot}++;				
			}
		}
		
		my @R2=split //, $F2[1];
		my @R2_ref=split //, $F2[8];
		my $read_len=length($F2[1]);
		
		my @C2_idx=findC($F2[8], 2);
		#print join("\t", $F2[1], $F2[8], @C2_idx), "\n";

		my $flag_CH=0;
		my $flag_CG=0;
		for my $i (@C2_idx)
		{
			if($R2_ref[$i] eq 'C' && $R2_ref[$i+1] ne 'G')
			{
				$h2_CH{$i-2}{C}++ if $R2[$i-2] eq 'C';
				$h2_CH{$i-2}{T}++ if $R2[$i-2] eq 'T';
				$h2_CH{$i-2}{tot}++;
				$flag_CH++  if $R2[$i-2] eq 'C' && $i>=$read_len-20;
			}
			elsif($R2_ref[$i] eq 'C' && $R2_ref[$i+1] eq 'G')
			{
				$h2_CG{$i-2}{C}++ if $R2[$i-2] eq 'C';
				$h2_CG{$i-2}{T}++ if $R2[$i-2] eq 'T';
				$h2_CG{$i-2}{tot}++;
				$flag_CG++  if $R2[$i-2] eq 'T' && $i>=$read_len-20;
			}
		}

		$stat{CH_1}++ if $flag_CH>=1;
		$stat{CH_2}++ if $flag_CH>=2;
		$stat{CH_3}++ if $flag_CH>=3;

		$stat{CG_1}++ if $flag_CG>=1;
		$stat{CG_2}++ if $flag_CG>=2;
		$stat{CG_3}++ if $flag_CG>=3;	
		
	}
	elsif($F1[0] eq $F2[0] && $F1[7]>0 && $F1[6] eq '-+' && $F2[6] eq '--')#Crick strand
	{
		$F1[1]=revcom($F1[1]);
		$F1[8]=revcom($F1[8]);
		#print "R1: $F1[1]\t$F1[8]\n";
		
		my @R1=split //, $F1[1];
		my @R1_ref=split //, $F1[8];
		
		my @C1_idx=findC($F1[8], 2);

		for my $i (@C1_idx)
		{
			if($R1_ref[$i] eq 'C' && $R1_ref[$i+1] ne 'G')
			{
				$h1_CH{$i-2}{C}++ if $R1[$i-2] eq 'C';
				$h1_CH{$i-2}{T}++ if $R1[$i-2] eq 'T';
				$h1_CH{$i-2}{tot}++;
			}
			if($R1_ref[$i] eq 'C' && $R1_ref[$i+1] eq 'G')
			{
				$h1_CG{$i-2}{C}++ if $R1[$i-2] eq 'C';
				$h1_CG{$i-2}{T}++ if $R1[$i-2] eq 'T';
				$h1_CG{$i-2}{tot}++;				
			}
		}
		#read2	
		$F2[1]=revcom($F2[1]);
		$F2[8]=revcom($F2[8]);
		#print "R2: $F2[1]\t$F2[8]\n";
		
		my @R2=split //, $F2[1];
		my @R2_ref=split //, $F2[8];
		my $read_len=length($F2[1]);
				
		my @C2_idx=findC($F2[8], 2);

		my $flag_CH=0;
		my $flag_CG=0;
		for my $i (@C2_idx)
		{
			if($R2_ref[$i] eq 'C' && $R2_ref[$i+1] ne 'G')
			{
				$h2_CH{$i-2}{C}++ if $R2[$i-2] eq 'C';
				$h2_CH{$i-2}{T}++ if $R2[$i-2] eq 'T';
				$h2_CH{$i-2}{tot}++;
				$flag_CH++  if $R2[$i-2] eq 'C' && $i>=$read_len-20;
			}
			elsif($R2_ref[$i] eq 'C' && $R2_ref[$i+1] eq 'G')
			{
				$h2_CG{$i-2}{C}++ if $R2[$i-2] eq 'C';
				$h2_CG{$i-2}{T}++ if $R2[$i-2] eq 'T';
				$h2_CG{$i-2}{tot}++;
				$flag_CG++  if $R2[$i-2] eq 'T' && $i>=$read_len-20;
			}
		}

		$stat{CH_1}++ if $flag_CH>=1;
		$stat{CH_2}++ if $flag_CH>=2;
		$stat{CH_3}++ if $flag_CH>=3;

		$stat{CG_1}++ if $flag_CG>=1;
		$stat{CG_2}++ if $flag_CG>=2;
		$stat{CG_3}++ if $flag_CG>=3;
	}
	else
	{
		warn "$. not paired; $ln1\n";
		warn "$. not paired; $ln2\n";
	}
}

for my $pos (sort {$a<=>$b} keys %h1_CH)
{
	print join("\t", 'CH', 'R1', $pos, $h1_CH{$pos}{C}||0, $h1_CH{$pos}{tot}||0, $h1_CH{$pos}{C}/$h1_CH{$pos}{tot}*100), "\n";
}

for my $pos (sort {$a<=>$b} keys %h2_CH)
{
	print join("\t", 'CH', 'R2', $pos, $h2_CH{$pos}{C}||0, $h2_CH{$pos}{tot}||0, $h2_CH{$pos}{C}/$h2_CH{$pos}{tot}*100), "\n";
}

for my $pos (sort {$a<=>$b} keys %h1_CG)
{
	print join("\t", 'CG', 'R1', $pos, $h1_CG{$pos}{C}||0, $h1_CG{$pos}{tot}||0, $h1_CG{$pos}{C}/$h1_CG{$pos}{tot}*100), "\n";
}

for my $pos (sort {$a<=>$b} keys %h2_CG)
{
	print join("\t", 'CG', 'R2', $pos, $h2_CG{$pos}{C}||0, $h2_CG{$pos}{tot}||0, $h2_CG{$pos}{C}/$h2_CG{$pos}{tot}*100), "\n";
}

my $r_CH1= sprintf "%.2f", $stat{CH_1}/$stat{tot}*100;
my $r_CH2= sprintf "%.2f", $stat{CH_2}/$stat{tot}*100;
my $r_CH3= sprintf "%.2f", $stat{CH_3}/$stat{tot}*100;

my $r_CG1= sprintf "%.2f", $stat{CG_1}/$stat{tot}*100;
my $r_CG2= sprintf "%.2f", $stat{CG_2}/$stat{tot}*100;
my $r_CG3= sprintf "%.2f", $stat{CG_3}/$stat{tot}*100;

my $r=sprintf "%.2f", $stat{tot}/$tot*100;

my $fold1=$r_CH1/$r_CG1;
my $fold2=$r_CH2/$r_CG2;
my $fold3=$r_CH3/$r_CG3;

sub revcom
{
	my $seq=shift;
	$seq=reverse $seq;
	$seq=~tr/ACGTacgt/TGCAtgca/;
	return $seq;
}

sub findC
{
	my $seq=shift;
	my $offset=shift;
	my $result = index($seq, 'C', $offset);
	my $len=length($seq);
	my @res=();
	while ($result != -1 && ($result) < ($len-2) )
	{
		push @res, $result;
		$offset = $result + 1;
		$result = index($seq, 'C', $offset);
	}
	return @res;
}
