#!/usr/bin/perl

use strict;
use warnings;

print STDERR "Opening FASTA file\n";
my $fa_file = "saccer3_genome.fa";
# process sequence in fasta file
open(FASTA, $fa_file ) || die "Couldn't open fasta file\n";
print STDERR "Finished opening FASTA file\n";
my $totalnuc = 0;
my %mononucs;
my $chr = "";
my $flag = 0;
my $seq = "";
while ( my $line = <FASTA> )
{
        chomp $line;
	if ( $line =~ /^>([a-zA-Z0-9_]+)/ )
	{
		# count mononucs in sequence from previous chromosome
		for ( my $i = 0; $i < (length($seq)); $i++ )
        	{
                	my $nuc = substr( $seq, $i, 1 );
                	$mononucs{$nuc}++;
			$totalnuc++;
        	}
		$chr = $1;
		$seq = "";
		if ( $chr eq "pUC19" )
		{
			print STDERR "$chr sequence was skipped\n";
			$flag = 1;
		}
		elsif ( $chr =~ /_/ )
		{
			print STDERR "Skipping chromosome $chr\n";
			$flag = 1;
		}
		else
		{
			print STDERR "Starting chromosome $chr\n";
			$flag = 0;
		}
		next;	# header of fasta
	}

	if ( $flag == 1 )
	{
		next;
	}
	else
	{
		$seq .= $line;
	}
}

if ( $flag == 0 )
{

        for ( my $i = 0; $i < (length($seq)); $i++ )
        {
                my $nuc = substr( $seq, $i, 1 );
                $mononucs{$nuc}++;
		$totalnuc++;
        }
}
my $outfile = "saccer3_nuc_background.txt";
open (OUT, ">$outfile" ) || die "Couldn't open OUT file: $outfile\n";

print OUT "Data from bed file: saccer3.fa\n";
print OUT "Total nucleotide count: $totalnuc\n";

print OUT "nucleotide\tNucleotide count\tFraction of total nucleotides\n";
foreach my $di ( sort keys %mononucs )
{
	my $fraction = 1.0 * $mononucs{$di} / $totalnuc;
	print OUT "$di\t$mononucs{$di}\t$fraction\n";
}
