#!/usr/bin/perl -w
#A perl function to calculate the number of A/T sequences of each nucleotide for given coordinates
use strict;
use Getopt::Long;
use warnings; 
use Bio::SeqIO;
my $file1;
my $file2;
my $output = "";
my $num1 =0;
my $num2=0;
if(!@ARGV)
{
	print "$0:  A perl function to calculate the number of A/T sequences of each nucleotide for given coordinates\n";
         print "Author:  Shaofang Li, UC Riverside\n";
         print "Contact:  Shaofang Li <sfli001\@gmail.com>\n";
        print "\nUsage: $0 [options]\n";
        print "\tfile1: input file contains the coordinate of the sequences\n";
	print "\tfile2: the fasta file of the genome sequence\n";
        print "\tsize1: the length needs to be count on the left of the coordinate\n";
        print "\tsize2: the length needs to be count on the right of the coordinate\n";
        print "\to: the output file\n";

        print "\nExample:\n";
        print "$0 --file1 DNA_coor --file2 arabidopsis_whole_genome.fasta --size1 1000 --size2 200 -o output
\n";
        print ("==========================| $0  end  |==================================\n\n");
	exit 1;
}
#GetOptions("file=s" =>\$file, "size1=i"=>\$num1,"size2=i"=>\$num2, "o=s" =>\$output);
GetOptions("file1=s" =>\$file1,"file2=s"=>\$file2,"size1=i"=>\$num1,"size2=i"=>\$num2, "o=s" =>\$output);
my $genome = Bio::SeqIO-> new(-file =>$file2,-format =>"fasta");

my %hash;
while(my $seq = $genome->next_seq)
{
	my $Chrn = $seq->id();
	my $chrseq = $seq->seq();
	for(my $count = 1; $count <= length($chrseq); $count++)
	{
		$hash{$Chrn}{$count} = substr $chrseq, $count, 1;
	}
}

my %result;
my $num = $num1 + $num2 + 1;
for(my $count = 1; $count<= $num; $count++)
{
 $result{$count}{"A"}= 0;
 $result{$count}{"T"}= 0;
 $result{$count}{"G"}= 0;
 $result{$count}{"C"}= 0;
}


open(SAM, $file1);
while(<SAM>)
{
	 my ($re)= $_;
         my @read = split("\t", $re);
         for(my $count = 1; $count<= $num; $count++)
         {
		my $pos= $read[1]-1- $num1+ $count;	
		if($hash{$read[0]}{$pos} eq "A")
	        {  
		$result{$count}{"A"}  +=1;      
        	}
		if($hash{$read[0]}{$pos} eq "T")
                { 
		 $result{$count}{"T"}  +=1;
                }
		if($hash{$read[0]}{$pos} eq "G")
                { 
		 $result{$count}{"G"}  +=1;
                }
		if($hash{$read[0]}{$pos} eq "C")
                { 
		 $result{$count}{"C"}  +=1; 
		}
	}
}

open(OUT, ">$output");
my $k1;
my $k2;
foreach  $k1 (sort{$a<=>$b} keys%result)
{       
	print OUT $k1,"\t";
	foreach $k2(sort keys %{$result{$k1}})
   	{
	print OUT  $k2,"\t", $result{$k1}{$k2},"\t";
	}
	print OUT "\n";
}




exit;
