#!/usr/bin/env perl

#
# ========================================================================
# This script is looking for new input data in an endless loop and
# executes a calculation process for every request.
#
# After a successful run the directory if the id is renamed, so it is
# not taken again for calculation and the script for visualization can
# take the output values.
# ========================================================================
#

use strict;

use FileHandle;
use File::Path;
use File::Touch;
use Proc::ProcessTable;
use Sys::Hostname;

my $PROJDIR = qq(/data/projects/n-way);
my $DBDIR   = qq($PROJDIR/db);
my $DATADIR = qq($PROJDIR/data);

my $LOGFILE  = qq($DATADIR/.log);
my $STATFILE = qq($DATADIR/.stat);
my $PIDFILE  = qq($DATADIR/.pid);
my $CALCDIR  = qq($PROJDIR/calculate);
my $CALCPGM  = qq($CALCDIR/nway_calc);

my $GENOMESDIR = qq(/data/databases/genomes);

# Age of request in seconds, before request folders will be deleted
my $REQAGE = 30 * 86400;

# How many calculators may run in parallel
my $MAXCALCPGMS = 2;

my $INPUTPRAEFIX = qq(input);
my $CALCPRAEFIX  = qq(calc);

#
# ------------------------------------------------------------------------
# Kills all calculator processes of current user.
# ------------------------------------------------------------------------
#
sub termcalc {
	print "Terminate calculator processes... ";
	my $procs = Proc::ProcessTable->new();
	foreach my $p (@{$procs->table()}) {
		if ($p->cmndline =~ m/$CALCPGM /) {
			print $p->pid . " ";
			kill(15, $p->pid);
			sleep 1;
		}
	}
	print "\n";
}

#
# ------------------------------------------------------------------------
# Act on pressing a key like CTRL-C or terminating the process.
# ------------------------------------------------------------------------
#
sub terminate {
	termcalc;
	print "Nanopipe queue is terminated!\n";
	close(LOGFILE);
	unlink $PIDFILE if (-f $PIDFILE);
	exit(0);
}

#
# ------------------------------------------------------------------------
# Reads the content of a file
# ------------------------------------------------------------------------
#
sub readFile {
	my ($filename) = @_;
	open(F, "<", $filename);
	my $content = join("", <F>);
	close(F);
	return $content;
}

#
# ------------------------------------------------------------------------
# Remove all requests older than REQAGE seconds
# ------------------------------------------------------------------------
#
sub cleanRequests {
	for my $praefix ('c', 'r', 'x', 'e') {
		for my $dir (<$DATADIR/$praefix*>) {
			next if (!-d $dir);
			my $mtime = (stat($dir))[9];
			if (time - $mtime > $REQAGE) {
				rmtree($dir);
				my $id = $dir;
				$id =~ s/$DATADIR\/.//;
				printf LOGFILE "%s deleted\n", $id;
			}
		}
	}
}

#
# ------------------------------------------------------------------------
# Returns the date as a string
# ------------------------------------------------------------------------
#
sub getDate {
	my ($time) = @_;
	my ($sec, $min, $hour, $mday, $mon, $year) = localtime($time);
	return sprintf("%4d-%02d-%02d %02d:%02d:%02d", $year + 1900, $mon + 1, $mday, $hour, $min, $sec);
}

#
# ------------------------------------------------------------------------
# Wait till number of processes is low enough
# ------------------------------------------------------------------------
#
sub wait2run {
	while (1) {
		my $count = 0;
		my $t     = new Proc::ProcessTable();
		foreach my $p (@{$t->table}) {
			$count++ if ($p->cmndline =~ m/$CALCPGM /);
		}
		return if ($count < $MAXCALCPGMS);
		sleep(5);
	}
}

#
# ------------------------------------------------------------------------
# The main part
# ------------------------------------------------------------------------
#
sub main {
	$SIG{INT}  = \&terminate;
	$SIG{TERM} = \&terminate;

	if (-f $PIDFILE) {
		print STDERR "Queue is already running! Check and remove pid file!\n";
		exit(0);
	}

	open(PIDFILE, ">$PIDFILE");
	print PIDFILE hostname . " " . getDate(time);
	close(PIDFILE);

	open(LOGFILE, ">>$LOGFILE") or die qq(Cannot open logfile $LOGFILE!);
	LOGFILE->autoflush(1);

	# Reactivate previous running requests
	my @dirs = <$DATADIR/r*>;
	for my $dir (@dirs) {
		if ($dir =~ m|/r(\d+)$|) {
			my $id    = $1;
			my @files = <$dir/calc.*>;
			unlink @files;
			print LOGFILE "Reactivate request $id\n";
			rename(qq($DATADIR/r$id), qq($DATADIR/o$id));
		}
	}

	while (1) {
		cleanRequests();

		# Loop over every open directory
		my @dirs = <$DATADIR/o*>;
		for my $dir (@dirs) {
			$dir =~ m|/o(\d+)$|;
			my $id = $1;

			wait2run();

			print "=== Start $id ===\n";
			rename(qq($DATADIR/o$id), qq($DATADIR/r$id));

			my $commands;

			eval {
				my $start = time;

				printf LOGFILE "=== %s / %s ===\n", $id, getDate(time);

				my $target  = readFile(qq($DATADIR/r$id/input.target));
				my $species = readFile(qq($DATADIR/r$id/input.species));
				my $params  = readFile(qq($DATADIR/r$id/input.params));

				$commands = qq(
cd $DATADIR/r$id
$CALCPGM -t $target -s $species $params -pd $DBDIR -pg $GENOMESDIR >calc.log 2>calc.error
if [ \$? -eq 0 ]; then mv $DATADIR/r$id $DATADIR/x$id; else mv $DATADIR/r$id $DATADIR/e$id; fi
);
				print LOGFILE
				  "Execute: $CALCPGM -t $target -s $species $params -pd $DBDIR -pg $GENOMESDIR\n";

				system("($commands)&") == 0 or die;
			};
			if ($@) {
				print LOGFILE $@;
				print LOGFILE "Cannot execute:\n$commands\n";
				rename($dir, qq($DATADIR/e$id));
			}

			sleep(5);
		}

		sleep(10);
	}
}

main();
