#!/usr/bin/env perl

#
# ========================================================================
# This script is looking for new input data in an endless loop and
# executes a calculation process for every request.
#
# After a successful run the directory if the id is renamed, so it is
# not taken again for calculation and the script for visualization can
# take the output values.
# ========================================================================
#

use strict;

use FileHandle;
use File::Path qw(rmtree);
use Sys::Hostname;
use Proc::ProcessTable;

my $PROJDIR = qq(/data/projects/two-way);
my $DATADIR = qq($PROJDIR/data);
my $LOGFILE = qq($PROJDIR/data/.log);
my $PIDFILE = qq($PROJDIR/data/.pid);
my $CALCDIR = qq($PROJDIR/calculate);
my $CALCPGM = qq($CALCDIR/twoway_calc.sh);

# Age of request in seconds, before request folders will be deleted (30 days)
my $REQAGE = 30 * 86400;

# Age of long request in seconds, before request folders will be deleted (about 3 months)
my $REQAGE_LONG = 3 * $REQAGE;

# The ips for which long requests are generated
my %REQ_LONG_IPS = (
	"10.36.160.175"  => 1,
	"10.36.161.37"   => 1,
	"128.176.186.37" => 1    # Fengjun
);

# The executed command
my $cmd;

#
# ------------------------------------------------------------------------
# Reads a file and returns the content
# ------------------------------------------------------------------------
#
sub readFile {
	my ($file) = @_;

	my $content;
	if (-r $file) {
		open(F, "<", $file);
		$content = join("", <F>);
		close(F);
	}

	return $content;
}

#
# ------------------------------------------------------------------------
# Get the sub process ids
# ------------------------------------------------------------------------
#
sub getSubpids {
	my ($ppid, $procs, $name) = @_;

	my @pids;
	foreach my $p (@{$procs->table}) {
		if ($p->ppid == $ppid) {
			next if ($name && !($p->cmndline =~ m/$name/));
			push(@pids, $p->pid);
			push(@pids, getSubpids($p->pid, $procs));
		}
	}

	return @pids;
}

#
# ------------------------------------------------------------------------
# Kills processes by pid
# ------------------------------------------------------------------------
#
sub termPids {
	for my $signal (15, 9) {
		for my $pid (@_) {
			print "Terminate process $pid...\n";
			kill($signal, $pid) if (kill(0, $pid));
			sleep 1;
		}
	}
}

#
# ------------------------------------------------------------------------
# Kills all calculator processes
# ------------------------------------------------------------------------
#
sub termCalc {
	print "Terminate processes $CALCPGM...\n";

	my @pids = getSubpids($$, Proc::ProcessTable->new(), "$CALCPGM");
	for my $pid (@pids) {
		termPids($pid);
		sleep 2;
		termPids(getSubpids($pid, Proc::ProcessTable->new()));
		sleep 2;
	}
}

#
# ------------------------------------------------------------------------
# Act on pressing a key like CTRL-C or terminating the process.
# ------------------------------------------------------------------------
#
sub term {
	termCalc();

	close(LOGFILE);
	unlink $PIDFILE if (-f $PIDFILE);

	print "twoway queue is terminated!\n";

	exit(0);
}

#
# ------------------------------------------------------------------------
# Send a mail to the user if request is finished
# ------------------------------------------------------------------------
#
sub sendMail {
	my ($id) = @_;

	my $dir = -d qq($DATADIR/x$id) ? qq($DATADIR/x$id) : qq($DATADIR/e$id);

	my $emailfile = qq($dir/input.email);
	return if (!-r $emailfile);

	my $email = join("", readFile($emailfile));
	$email =~ s/\s+//g;
	return if (!$email);

	my $title = join("", readFile(qq($dir/input.title)));
	$title = $title ? "$title ($id)" : $id;

	my ($mailmsg, $mailtitle);
	if ($dir =~ m/\/x$id/) {
		$mailtitle = qq(TwoWay job '$title' has finished);
		$mailmsg =
		  qq(Click here http://retrogenomics.uni-muenster.de/tools/twoway/generate/index.pl?id=$id to see results);
	}
	else {
		$mailtitle = qq(TwoWay job '$title' had problems);
		$mailmsg   = qq(Problems with request id $id);
	}

	if (system(qq(echo "$mailmsg" | mail -s "$mailtitle" $email -fwww\@retrogenomics.uni-muenster.de)) != 0) {
		print LOGFILE qq(Cannot send mail for id $id to $email);
	}
}

#
# ------------------------------------------------------------------------
# Remove all requests older than REQAGE seconds
# ------------------------------------------------------------------------
#
sub clean {
	for my $praefix ('c', 'o', 'r', 'x', 'e', 's') {
		for my $dir (<$DATADIR/$praefix*>) {
			next if (!-d $dir);
			my $mtime = (stat($dir))[9];
			if (time - $mtime > ($REQ_LONG_IPS{readFile("$dir/input.ip")} ? $REQAGE_LONG : $REQAGE)) {
				rmtree($dir);
				my $id = $dir;
				$id =~ s/$DATADIR\/.//;
				printf LOGFILE "%s deleted\n", $id;
			}
		}
	}
}

#
# ------------------------------------------------------------------------
# Returns a date string
# ------------------------------------------------------------------------
#
sub getDate {
	my ($time) = @_;
	my ($sec, $min, $hour, $mday, $mon, $year) = localtime($time);
	return sprintf("%4d-%02d-%02d %02d:%02d:%02d", $year + 1900, $mon + 1, $mday, $hour, $min, $sec);
}

#
# ------------------------------------------------------------------------
# The main part
# ------------------------------------------------------------------------
#
sub main {
	$SIG{INT}  = \&term;
	$SIG{TERM} = \&term;

	if (-f $PIDFILE) {
		print STDERR "Queue is already running!  Check and remove pid file!\n";
		exit(0);
	}

	open(PIDFILE, ">$PIDFILE");
	print PIDFILE hostname . " " . getDate(time);
	close(PIDFILE);

	open(LOGFILE, ">>$LOGFILE") or die qq(Cannot open logfile!);
	LOGFILE->autoflush(1);

	# Reactivate previous running requests
	my @dirs = <$DATADIR/r*>;
	for my $dir (@dirs) {
		if ($dir =~ m|/r(\d+)$|) {
			my $id    = $1;
			my @files = <$dir/calc.*>;
			unlink grep {-f $_} @files;
			map {
				if (-d $_) {rmtree $_}
			} @files;
			print LOGFILE qq(Reactivate: $id\n");
			rename(qq($DATADIR/r$id), qq($DATADIR/o$id));
		}
	}

	while (1) {
		clean();

		# Over every open directory
		for my $dir (<$DATADIR/o*>) {
			$dir =~ m|/o(\w+)$|;
			my $id = $1;

			# Change to running state
			rename($dir, qq($DATADIR/r$id));
			$dir =~ s/o$id/r$id/;

			eval {
				printf LOGFILE "%s: %s\n", $id, getDate(time);

				print "==== $id ===\n";

				$cmd = qq(cd $dir; $CALCPGM);
				print LOGFILE qq(Execute: $cmd\n);
				system("$cmd >calc.exec.logs 2>&1") == 0 or die;

				rename($dir, qq($DATADIR/x$id));
			};
			if ($@) {
				print LOGFILE $@;
				print LOGFILE "$cmd\n" if ($cmd);
				rename($dir, qq($DATADIR/e$id));
			}

			sendMail($id);
		}

		sleep 10;
	}
}

main();
