#!/usr/bin/perl

use warnings;
use strict;

my @motifs_C = ("AC","CC","GC","TC",
		"CA","CC","CG","CT",
		"ACA","ACC","ACG","ACT",
		"CCA","CCC","CCG","CCT",
		"GCA","GCC","GCG","GCT",
		"TCA","TCC","TCG","TCT");
my @motifs_T = ("AT","CT","GT","TT",
		"TA","TC","TG","TT",
		"ATA","ATC","ATG","ATT",
		"CTA","CTC","CTG","CTT",
		"GTA","GTC","GTG","GTT",
		"TTA","TTC","TTG","TTT");

my @counts_CA = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
my @counts_CG = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
my @counts_CT = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
my @counts_TA = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
my @counts_TC = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
my @counts_TG = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);

my @null_C = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
my @null_T = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);

my $file = "03/SNPs/i03-04_6.LM-SNV.txt";
if ($ARGV[0]) { $file = $ARGV[0]; }
unless (open(FILE,$file)) {
    print STDERR "Can't open file '",$file,"'.\n";
    exit;
}
my $prev_chr = "";
my ($header,$seq) = ("","");
while (my $line = <FILE>) {
    if (substr($line,0,1) eq "#") { next; }
    #my ($chr,$pos,$id,$ref,$alt) = split(/\t/,$line);
    my ($chr,$pos,$ref,$alt) = split(/\t/,$line);
    if ($chr ne $prev_chr) {
	$prev_chr = $chr;
	#($header,$seq) = parseChromosome("/home/aa65/G1K_reference/chr".$chr.".fa");
	($header,$seq) = parseChromosome("reference/chr".$chr.".fa");
	print STDERR $header,"\n";
#	if ($chr eq "21") {
	    scanChromForMotifs($seq);
#	}
    }
    my $c = substr($seq,$pos - 1,1);
    if ($c ne $ref) {
	print STDERR "Error in ref.\n";
	next;
    }
    my $motif = substr($seq,$pos - 2,3);
    $motif =~ tr/[a-z]/[A-Z]/;
    if ($ref eq "A" || $ref eq "G") {
	$motif = revcom($motif);
	$ref   = revcom($ref);
	$alt   = revcom($alt);
    }

    my $c_arr = 0;
    my $m_arr = 0;
    if    ($ref eq "C") { $m_arr = \@motifs_C; }
    elsif ($ref eq "T") { $m_arr = \@motifs_T; }
    else { next; }
    if    ($ref eq "C" && $alt eq "A") { $c_arr = \@counts_CA; }
    elsif ($ref eq "C" && $alt eq "G") { $c_arr = \@counts_CG; }
    elsif ($ref eq "C" && $alt eq "T") { $c_arr = \@counts_CT; }
    elsif ($ref eq "T" && $alt eq "A") { $c_arr = \@counts_TA; }
    elsif ($ref eq "T" && $alt eq "C") { $c_arr = \@counts_TC; }
    elsif ($ref eq "T" && $alt eq "G") { $c_arr = \@counts_TG; }
    else { next; }
    add_count($motif,$m_arr,$c_arr);
}
close(FILE);

print "C>A\n";
printarr(\@motifs_C,\@counts_CA,\@null_C);
print "C>G\n";
printarr(\@motifs_C,\@counts_CG,\@null_C);
print "C>T\n";
printarr(\@motifs_C,\@counts_CT,\@null_C);
print "T>A\n";
printarr(\@motifs_T,\@counts_TA,\@null_T);
print "T>C\n";
printarr(\@motifs_T,\@counts_TC,\@null_T);
print "T>G\n";
printarr(\@motifs_T,\@counts_TG,\@null_T);

exit;

sub add_count
{
    my ($motif,$m_arr,$c_arr) = @_;
    my $m1 = substr($motif,0,2);
    my $m2 = substr($motif,1,2);
    for (my $i = 0;$i < 4;$i++) {
	if ($m1 eq $$m_arr[$i])    { $$c_arr[$i]++; last; }
    }
    for (my $i = 4;$i < 8;$i++) {
	if ($m2 eq $$m_arr[$i])    { $$c_arr[$i]++; last; }
    }
    for (my $i = 8;$i < 24;$i++) {
	if ($motif eq $$m_arr[$i]) { $$c_arr[$i]++; last; }
    }
}


sub scanChromForMotifs
{
    my $seq = shift;
    my $len = length($seq) - 1;
    my $n = 0;
    for (my $i = 1;$i < $len;$i++) {
	if ($i%1000000 == 0) { print STDERR $i,"\n"; }
	my $motif = substr($seq,$i - 1,3);
	$motif =~ tr/[a-z]/[A-Z]/;
	my $pos = substr($motif,1,1);
	if ($pos eq "G" || $pos eq "A") {
	    $motif = revcom($motif);
	    $pos = substr($motif,1,1);
	}
	$n++;
	if    ($pos eq "C") { add_count($motif,\@motifs_C,\@null_C); }
	elsif ($pos eq "T") { add_count($motif,\@motifs_T,\@null_T); }
	else { $n--; }
    }
    print STDERR "Scaned ",$n,"\n";
}

sub parseChromosome
{
    my $file = shift;
    my ($head,$seq) = ("","");
    my $first = 1;
    unless (open(FILE_TO_PARSE,$file)) {
        print STDERR "Can't open file '",$file,"'.\n";
    } else {
        while (my $line = <FILE_TO_PARSE>) {
            chomp($line);
            if (substr($line,0,1) eq ">") {
                if ($first) {
                    $head = substr($line,1);
                    $first = 0;
                } else { last; }
            } else {
                $seq .= $line;
            }
        }
        close(FILE_TO_PARSE);
    }
    return ($head,$seq);
}

sub printarr
{
    my @aa = @_;
    my ($n1,$n2,$n3) = (0,0,0);
    my $n = -1;
    foreach my $arr (@aa) {
	if (!defined($arr)) { next; }
	if ($n < 0 || scalar(@$arr) < $n) { $n = scalar(@$arr); }
    }
    foreach (my $i = 0;$i < $n;$i++) {
	my $first = 1;
	foreach my $arr (@aa) {
	    if (!defined($arr)) { next; }
	    if ($first) { $first = 0; }
	    else        { print "\t"; }
	    print $$arr[$i];
	}
	print "\n";
    }
}

sub revcom
{
    my $seq = shift;
    $seq = reverse($seq);
    $seq =~ tr/[ACTGactg]/[TGACtgac]/;
    return $seq;
}
