# FF_Index --> Flat-File index package.  Written by Philip Johnson.

package FF_Index;
use strict;

#-----------------------------------------------------------------------------
# PRE : filename of flat file
# POST: FF_Index object constructed
sub new($$) {
    my ($class, $filename) = @_;
    my $this = {};
    $this->{m_Filename} = $filename;

    open INFILE, $filename or die "'$filename': $!";
    $this->{m_FileHandle} = *INFILE;

    return bless $this, $class;
}

#-----------------------------------------------------------------------------
# PRE : none
# POST: FF_Index object destroyed
sub DESTROY {
    my $this = shift;
    close $this->{m_FileHandle};
}

#-----------------------------------------------------------------------------
# PRE : reference to function that scans for the next record in the file
# POST: key/position index created & saved to disk
sub CreateIndex($$) {
    my ($this, $findNextCB) = @_;
    
    seek($this->{m_FileHandle}, 0, 0);

    # a unicode setting can cause *massive* slow downs in sorting
    my $lang = $ENV{'LANG'};
    $ENV{'LANG'} = 'C';
    open INDEX, "|sort -k1,1 > $this->{m_Filename}.idx" or die $!;
    my %record;
    my $c=0;
    while (&$findNextCB($this->{m_FileHandle}, \%record)) {
	print STDERR "$c\r" if (++$c % 10000 == 0);
	#push @index, [$record{key}, $record{pos}];
	if (!defined $record{key}  ||  !defined $record{pos}) {
	    die "Error in FINDNEXT callback function\n";
	}
	print INDEX join("\t", $record{key}, $record{pos}), "\n";
    }
    close INDEX or die $!;
    $ENV{'LANG'} = $lang;
}

#-----------------------------------------------------------------------------
# PRE : two scalars
# POST: the larger of the two
sub max($$) {
    return ($_[0] > $_[1]) ? $_[0] : $_[1];
}
#-----------------------------------------------------------------------------
# PRE : open filehandle
# POST: seeked to beginning of current line
sub SeekLineBegin($) {
    my ($fh) = @_;

    my ($buff, $pos);
    do {
	my $prevPos = tell($fh);
	my $newPos = max(0, $prevPos - 50);
	
	seek($fh, $newPos, 0);
	read($fh, $buff, $prevPos - $newPos);
	$pos = rindex($buff, "\n");
	seek($fh, $newPos, 0);
    } until ($pos >= 0  ||  tell($fh) == 0);
    seek(INDEX, tell($fh)+$pos+1, 0);
}

#-----------------------------------------------------------------------------
# PRE : key
# POST: filehandle pointing to this record (undef if doesn't exist)
sub GetRecord($$) {
    my ($this, $target) = @_;

    die "GetRecord called with undefined target!" if !defined $target;

    my $fwd = 0;
    my $rev = (-s $this->{m_Filename}.".idx") - 1;

    open INDEX, $this->{m_Filename}.".idx" or die $!;
    while ($fwd <= $rev) {
	my $mid = int(($fwd + $rev) / 2);
	seek(INDEX, $mid,0);

	SeekLineBegin(\*INDEX);
	
	$_ = <INDEX>;
	#print STDERR "$mid\t$_";
	chomp;
	my ($key, $pos) = split /\t/;

	if ($target lt $key) {
	    if ($rev == $mid) {
		#$fwd -= 2;
		#print STDERR "yep--$target\n"
	    }
	    $rev = $mid-1;#tell(INDEX) - length($_);
	} elsif ($target gt $key) {
	    if ($fwd == $mid) {
		#print STDERR "hmmm--$target\n";
	    }
	    $fwd = $mid+1;#tell(INDEX) - length($_);
	} else {
	    seek($this->{m_FileHandle}, $pos, 0) or die $!;
	    close INDEX;
	    return $this->{m_FileHandle};
	}
    }
    close INDEX;
    return undef;
}

#perl wants modules to return "TRUE"
1;
