#!/usr/bin/perl

use strict;
use warnings;

#get gene coordinates
my $filename = "gencode_v19_hg19_genes.bed";

open ( GENE, "$filename" ) || die "couldn't open file\n";

my %genstart = ();
my %genend = ();
my %genstrand = ();
my %genlist = ();
my $prevend = "";
my $prevline = "";
while ( my $gene = <GENE> )
{
	chomp $gene;
	my @fields = split /\t/, $gene;

	if ( exists $genlist{$fields[3]} )
	{
		die "Duplicate gene ids!\n";	
	}

	my $start = $fields[1] + 1;	# make 1-based, like nucs
	$genstart{$fields[0]}{$fields[3]} = $start;
	$genend{$fields[3]} = $fields[2];
	$genstrand{$fields[3]} = $fields[5];
	$genlist{$fields[3]} = 1;

	if ( $prevend > $start )
	{
		print STDERR "Overlapping gene $prevline\t$gene\n";
	}
	$prevline = $gene;
	$prevend = $fields[2];
}

my $chrom = "";

my @chromgenes = ();

while ( my $line = <STDIN> )
{
	chomp $line;

	my @fields = split /\t/, $line;
	my $chr = $fields[0];
	my $dyad = $fields[1];
	#print STDERR "\nStarting ${chr}:$dyad\tchecking:";

	if ( $chr ne $chrom )
	{
		print STDERR "Starting chromosome $chr\n";
		@chromgenes = sort { $genstart{$chr}{$a} <=> $genstart{$chr}{$b} } keys %{$genstart{$chr}};
	        $chrom = $chr;
	}

	my $geneid = "NA";
	my $strand = "";

	for ( my $i = 0; $i < scalar @chromgenes; $i++ )
	{
		my $acc = $chromgenes[$i];
		my $start = $genstart{$chr}{$acc};
		if ( $dyad >= $start )
		{
			if ( $dyad <= $genend{$acc} )
			{
				if ( $geneid eq "NA" )
				{
					$geneid = $acc;
					$strand = $genstrand{$acc};
				}
				else
				{
					$geneid .= ";$acc";
					if ( $strand ne $genstrand{$acc} )
					{
						$strand = "AMBIG";
					}
				}	
			}
		}
		else
		{
			last;
		}
	}
	
	print "$line\t$geneid\t.\t$strand\n";
}
