#!/usr/bin/perl

use DBI;
use vars qw/ %opt /;

# process the command-line args
sub init() {
        use Getopt::Std;
        my $opt_string = 'ht:f:g:q:s:';
        getopts("$opt_string", \%opt) or usage();
        usage() if $opt{h} or !$opt{t} or !$opt{f} or !$opt{g} or !$opt{q};
}

# print if requested or insufficent args
sub usage() {
        print STDERR << "EOF";
        
adds bwt to table

usage: $0 [-h] [-t tablename]
        
 -h     : this message
 -t     : table name
 -f	: filename
 -g	: genome name
 -q	: sequence type (acceptable values: ABI, Illumina, 454)
 -s	: server

example: $0 -t master -f something.bwt 

EOF
        exit;
}

sub getlastid {
	my ($table,$dbh) = @_;
	my $query = "SELECT max(id) from $table";
	my $sth = $dbh->prepare($query);
	$sth->execute();
	my $max;
	$sth->bind_columns(\$max);
	$sth->fetch();
	return $max;
}

init();


my $tablename = $opt{t};
my $database = 'l1hsgeno';
my $server = 'localhost';
$server = $opt{s} if ($opt{s});
my $user = 'l1';
my $passwd = 'l1';
my $cstring = "dbi:mysql:database=$database;host=$server;port=8081;mysql_socket=/gpfs/fs0/u/ewingad/mysql/mysql.sock";
my $dbh = DBI->connect($cstring, $user, $passwd);

my $table = $opt{t};
my $genome = $opt{g};
my $seqtype = $opt{q};
my $tabfile = $opt{f} . ".extern";
if ($seqtype ne "ABI" && $seqtype ne "Illumina" && $seqtype ne "454") {
	die("valid sequence types: ABI, Illumina, 454")
}

print STDERR "writing $tabfile, progress (. = 1000 rows added): ";

my $r = getlastid($table,$dbh);

open(OFH, ">$tabfile");
open (FH, $opt{f});
while (<FH>) {
	$r ++;
	chomp;
	my ($seqid, $strand, $chr, $loc, $seq, $qs, $x, $mmstring) = split(/\t/, $_);
	my @rcols = split(/:/, $seqid);
	my $n = scalar @rcols;
	my $tnstrand = $rcols[$n-1];
	my $tnloc = $rcols[$n-2];
	$chr =~ s/chr//;

	my @mm = split(/,/,$mmstring);
	my $nmm = (scalar @mm);

	# compute quality score of sequence
	my $qsum = 0;
	my $i = 0;
	for my $qb (split(//, $qs)) {
		$qsum += (ord($qb) - 31);
		$i ++;
	}
	my $avgqual = $qsum/$i;

	print OFH "$r\t$genome\t$seqid\t$chr\t$loc\t$strand\t$tnloc\t$tnstrand\t$seq\t$avgqual\t$mmstring\t$nmm\t$seqtype\n";
	
	print STDERR "." if ($r % 1000 == 0);
}
close FH;
close OFH;

# replace with $HOME
print STDERR "\nLoading $tabfile into $table...";
my $query = "load data infile '/gpfs/fs0/u/ewingad/1000genomes/database/$tabfile' INTO TABLE $table";
my $sth = $dbh->prepare($query);
$sth->execute();

print STDERR "done.\n";
unlink($tabfile);
