use lib "$ENV{FIREDIR}/SCRIPTS";

use Sets;
use Table;
use strict;

my $quantfile	= Sets::get_parameter(\@ARGV, "-quantfile");
my $expfile	= Sets::get_parameter(\@ARGV, "-expfile");
my $outtag	= Sets::get_parameter(\@ARGV, "-outtag");

my $CVx	= 3;
if (Sets::exist_parameter(\@ARGV, "-crossval") == 1) 
	{
	$CVx	= Sets::get_parameter(\@ARGV, "-crossval");
	}

my $crossvaldir	= "";
if (Sets::exist_parameter(\@ARGV, "-crossvaldir") == 1) 
	{
	$crossvaldir	= Sets::get_parameter(\@ARGV, "-crossvaldir");
	}


my $quantized	= 1;
if (Sets::exist_parameter(\@ARGV, "-quantized") == 1) 
	{
	$quantized	= Sets::get_parameter(\@ARGV, "-quantized");
	}

my $divbins	= 50.0;
if (Sets::exist_parameter(\@ARGV, "-divbins") == 1) 
	{
	$divbins	= Sets::get_parameter(\@ARGV, "-divbins");
	}

my $mbins	= 2;
if (Sets::exist_parameter(\@ARGV, "-mbins") == 1) 
	{
	$mbins	= Sets::get_parameter(\@ARGV, "-mbins");
	}

my $ebins	= undef;
if (Sets::exist_parameter(\@ARGV, "-ebins") == 1) 
	{
	$ebins	= Sets::get_parameter(\@ARGV, "-ebins");
	}

my $seed	= 2324;
if (Sets::exist_parameter(\@ARGV, "-seed") == 1) 
	{
	$seed	= Sets::get_parameter(\@ARGV, "-seed");
	}

srand($seed);


if ($quantized==0)
	{
	my @trainfiles = qw();
	my @testfiles  = qw();
	for (my $ix=0; $ix<$CVx; $ix++)
		{
		my $cvtag     = $outtag . ".CV" . $ix;
		my $cvtag_tes = $cvtag . ".tes";
		my $cvtag_tra = $cvtag . ".tra";
		$trainfiles[$ix] = $crossvaldir . "/" . $cvtag_tra;
		$testfiles[$ix]  = $crossvaldir . "/" . $cvtag_tes;
		}
	generate_files($quantfile, $quantfile, \@trainfiles, \@testfiles);

	my @trainfiles = qw();
	my @testfiles  = qw();
	for (my $ix=0; $ix<$CVx; $ix++)
		{
		my $cvtag     = $outtag . ".CV" . $ix;
		my $cvtag_tes = $cvtag . ".tes";
		my $cvtag_tra = $cvtag . ".tra";
		$trainfiles[$ix] = $crossvaldir . "/" . $cvtag_tra . ".nonquantized";
		$testfiles[$ix]  = $crossvaldir . "/" . $cvtag_tes . ".nonquantized";
		}
	generate_files($quantfile, $expfile, \@trainfiles, \@testfiles);
	}
else
	{
	my @trainfiles = qw();
	my @testfiles  = qw();
	for (my $ix=0; $ix<$CVx; $ix++)
		{
		my $cvtag     = $outtag . ".CV" . $ix;
		my $cvtag_tes = $cvtag . ".tes";
		my $cvtag_tra = $cvtag . ".tra";
		$trainfiles[$ix] = $crossvaldir . "/" . $cvtag_tra;
		$testfiles[$ix]  = $crossvaldir . "/" . $cvtag_tes;
		}
	generate_files($quantfile, $expfile, \@trainfiles, \@testfiles);
	}


#####################################################################################################
#####################################################################################################
#####################################################################################################



sub generate_files
{
	my $cidfile   		= shift;
	my $expfile 		= shift;
	my $expfileTrainPnt  	= shift;
	my $expfileTestPnt  	= shift;

	my @infile1     = @$expfileTrainPnt;
	my @infile2     = @$expfileTestPnt;
	my $setNR       = scalar(@infile1);


	# map gene names for cluster IDs
	my $NameToCluster;
	my $firstline;
	open(FILE, $cidfile) or die;
	my $cc = 0;
	while (<FILE>)
		{
		my $line = $_;
		chomp $line;
		my @x = split(/\t/, $line);
		# ignore the first line
		if ($cc==0)
			{
			$firstline = $line;
			}
		else
			{
			$NameToCluster->{$x[0]} = $x[1];
			}
		$cc++;
		}
	close FILE;

	# map gene names for expression values
	my $NameToValue;
	open(FILE, $expfile) or die;
	$cc = 0;
	while (<FILE>)
		{
		my $line = $_;
		chomp $line;
		my @x = split(/\t/, $line);
		# ignore the first line
		if ($cc==0)
			{
			$firstline = $line;
			}
		else
			{
			$NameToValue->{$x[0]} = $x[1];
			}
		$cc++;
		}
	close FILE;


	my $NameToTraining;

	my @trainFileHandles;
	my @testFileHandles;
	#make array of file handles
	for(my $ix=0; $ix<$setNR; $ix++)
	{
		#localize the file glob, so FILE is unique to
		#    the inner loop.
		local *FILE1;
		local *FILE2;
		open(FILE1, ">$infile1[$ix]") || die;
		open(FILE2, ">$infile2[$ix]") || die;
		#push the typeglobe to the end of the array
		push(@trainFileHandles, *FILE1);
		push(@testFileHandles , *FILE2);
	}


	my @array;
	my $cluster = 0;
	my $i = 0;
	my @splitPoints;

	foreach my $gID (sort {$NameToCluster->{$a} <=> $NameToCluster->{$b}} keys %{$NameToCluster} )
	{
		my $cID = $NameToCluster->{$gID};
		my $line = $gID . "\t" . $cID;
		my @x = split(/\t/,$line);

		# array holds all the genes in one cluster
		if($x[1] == $cluster)
		{
		     $array[$i] = $x[0];
		     ++$i;
		}
		elsif($x[1] != $cluster)
		{
		for (my $ix=0; $ix<($setNR+1); $ix++)
			{
			$splitPoints[$ix] = int($i * $ix/$setNR);
			}
		# $i is the size of the array that i have  filled in up to now
		# shuffle
		for(my $j = 0; $j < $i; ++$j)
			{
			my $sign = $i - $j;
			my $ran = $j + int(rand($sign));
			my $temp = $array[$j];
			$array[$j] = $array[$ran];
			$array[$ran] = $temp;
			}
		# split in parts
		for(my $j = 0; $j < $i; ++$j)
			{
			for (my $ix=0; $ix<$setNR; $ix++)
				{
				if(($j>=$splitPoints[$ix]) && ($j<$splitPoints[$ix+1]))
					{
					$NameToTraining->{$ix}->{$array[$j]} = 0;
					}
				else
					{
					$NameToTraining->{$ix}->{$array[$j]} = 1;
					}
				}
			}
		$#array = -1;
		$cluster = $x[1];
		$array[0] = $x[0];
		$i = 1;
		}	
	}


	#the last cluster
	for (my $ix=0; $ix<($setNR+1); $ix++)
		{
		$splitPoints[$ix] = int($i * $ix/$setNR);
		}
	for(my $j = 0; $j < $i; ++$j)
	{
		my $sign = $i - $j;
		my $ran = $j + int(rand($sign));
		my $temp = $array[$j];
		$array[$j] = $array[$ran];
		$array[$ran] = $temp;
	}
	for(my $j = 0; $j < $i; ++$j)
	{
		for (my $ix=0; $ix<$setNR; $ix++)
			{
			if(($j>=$splitPoints[$ix]) && ($j<$splitPoints[$ix+1]))
				{
				$NameToTraining->{$ix}->{$array[$j]} = 0;
				}
			else
				{
				$NameToTraining->{$ix}->{$array[$j]} = 1;
				}
			}
	}

	for (my $ix=0; $ix<$setNR; $ix++)
	{
		my $OUT1 = $trainFileHandles[$ix];
		my $OUT2 = $testFileHandles[$ix];
		print $OUT1 "ID\tValue\n";
		print $OUT2 "ID\tValue\n";

		foreach my $gID (sort {$NameToValue->{$a} <=> $NameToValue->{$b}} keys %{$NameToValue} )
			{
			if(defined($NameToTraining->{$ix}->{$gID}))
				{
				if($NameToTraining->{$ix}->{$gID}==0)
				{
				print $OUT2 "$gID\t" . $NameToValue->{$gID} . "\n";
				}
			else
				{
				print $OUT1 "$gID\t" . $NameToValue->{$gID} . "\n";
				}
				}
			}


		close $OUT1;
		close $OUT2; 
	}

}




