#!/usr/bin/perl

use strict;
use warnings;

my %genome;
my $chr = "";
while ( <STDIN> )
{
	chomp $_;
	if ( $_ =~ /(chr[XIVM]+)/ )
	{
		print $_ . "\n";
		$chr = $1;
	}
	else
	{
		$genome{$chr} .= $_;
	}

}

open ( PLUS, ">initial_plus_CPDU_Cdipy.wig" );
open ( MINUS, ">initial_minus_CPDU_Cdipy.wig" );
# find dipyrimindes in plus strand
my $cdipycount = 0;
foreach my $chrom (sort keys %genome )
{
	print "Processing $chrom\n";
	print PLUS "variableStep chrom=$chrom span=1\n";
        print MINUS "variableStep chrom=$chrom span=1\n";

	my $seq = $genome{$chrom};
=pod
	process first nuc;
	my $dinuc = substr ($seq, 0, 2 );
	if ($dinuc =~ /C[CT]/ )
	{
                print PLUS "1\t0.0\n";
                $cdipycount++;
	}
	elsif ( $dinuc =~ /G[GA]/ )
        {
                print MINUS "1\t0.0\n";
                $cdipycount++;        
        }
=cut
		
	for ( my $i = 1; $i < length($seq) - 1; $i++ )
	{
		my $nucseq = substr( $seq, $i, 1 );
	
                my $start = $i + 1; # make 1-based for wig
		my $trinuc = substr ( $seq, $i - 1, 3 );
		if ( $nucseq eq "C" && $trinuc =~ /[CT][CT]/ )
		{
			print PLUS "$start\t0.0\n";
			$cdipycount++;
		}
                elsif ( $nucseq eq "G" && $trinuc =~ /[GA][GA]/ )
                {
                        print MINUS "$start\t0.0\n";
			$cdipycount++;
                }
	}

=pod
	process last nuc
	my $lastpos = length($seq);
        my $dinuc = substr ($seq, $lastpos - 2, 2 );
        if ($dinuc =~ /[CT]C/ )
        {
                print PLUS "$lastpos\t0.0\n";
                $cdipycount++;        
        }
        elsif ( $dinuc =~ /[GA]G/ )
        {
                print MINUS "$lastpos\t0.0\n";
                $cdipycount++;
        }
=cut

} 
close (PLUS);
close (MINUS);
print STDERR "Cdipy: $cdipycount\n";
