#!/usr/bin/env perl

# Author: NTM

#
# Copyright (C) Nicolas Thierry-Mieg, 2009.
#
#
# This is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This script is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this script; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA


# takes up to 2 args: second arg is the dist to use for decoding 
# (eg 4212, 4211, 5511), 4212 is used if this arg is not supplied; 
# and first arg can be either:
# - a sigDir holding subdirs, each containing
# a batch of sig files for one bait;
# - a baitDir containing a batch of sig files (for one bait) ;
# - a sig file (must end in .sig).
# In case 1, for each subdir $bait of sigDir, create a new subdir 
# of $decDir called $bait.Decoded if it doesn't exist, and fill it 
# with  the decoding results of each sig in $bait (renamed to add 
# ".dist$dist" before ".Decoded", at the end of the file name).
# In case 2, do the same as case 1 except it happens for a single bait.
# In case 3, just decode the sig and leave the result in InterPool.Results/,
# renamed to add ".dist$dist" before ".Decoded" (at end of file name).
# 
# In cases 1 and 2, you must not have an InterPool.Results subdir (just
# rename it temporarily, or clean it up).

use strict ;
use warnings ;


# ipoolDecoding binary to use is $ipoolDecoding$dist
#my $ipoolDecoding = "/home/nthierry/Codages/CSimulator/bin.C/ipoolDecode.WASP/ipoolDecoding.WASP.dist" ;
# NO: for the Genome Res package, just use the single binary with 
# 6412 distance, which is configured in the interpool-090303_GR/src/ tree.
my $ipoolDecoding = "../interpool-090303_GR/src/ipoolDecoding" ;


# design files are in $designDir.
# The correct file to use for each batch is given in $B2D<2|6>
my $designDir = "../Designs/" ;
my $B2D = "$designDir/batch2design.WASP" ;

# parse $B2D<2|6>, save in @designs2 and @designs6
# $design2[$i] contains design file (including $designDir)
# to use for batch $i of WASP2, idem design6 for WASP6.
my @design2 = () ;
my @design6 = () ;

foreach my $type (2,6)
{
    open(B2D, "$B2D$type") ||
	die "dying: cannot open $B2D$type\n" ;
    # skip first 2 lines, check second
    <B2D> ;
    my $line = <B2D> ;
    chomp $line ;
    ($line =~ /^Batch,DesignFile$/) ||
	die "while reading $B2D$type, second line is wrong, dying\n" ;
    while ($line = <B2D>)
    {
	chomp $line ;
	($line =~ /^(\d+),(.+)$/) ||
	    die "while reading $B2D$type, cannot parse line;\n$line\n" ;
	my ($batch, $design) = ($1,$2) ;
	if ($type==2)
	{
	    (defined $design2[$batch]) && 
		die "while reading $B2D$type, found design $design for batch $batch but it's already defined: $design2[$batch]\n" ;
	    $design2[$batch] = "$designDir/$design" ;
	}
	else
	{
	    # WASP6
	    (defined $design6[$batch]) && 
		die "while reading $B2D$type, found design $design for batch $batch but it's already defined: $design6[$batch]\n" ;
	    $design6[$batch] = "$designDir/$design" ;
	}
    }

    close(B2D) ;
}


# get args == sigDir or baitdir or sig, dist
(@ARGV != 1) && (@ARGV != 2) &&
    die "needs one or two args:\nFirst is either \n- a dir holding subdirs (one for each bait), each containing a batch of sig files,\n- a dir (corresponding to one bait) containing a batch of sig files,\n- a single sig file;\nSecond arg is the distance to use (optional, 4212 is default)\n" ;

# deal with dist
my $dist = 4212 ;
(@ARGV == 2) && ($dist = $ARGV[1]) ;

# $ipoolDecoding .= "$dist" ;
# again, for GR package use a single distance (6412) as configured in src/

if (! -f $ipoolDecoding)
{
    # for cygwin, binaries end in .exe, try it
    $ipoolDecoding .= ".exe" ;
    (-f $ipoolDecoding) ||
	die "ipoolDecoding binary to use ($ipoolDecoding) doesn't exist, maybe you didn't compile interpool? look in ../interpool-090303_GR/\n" ;
}

# first arg: 
if ($ARGV[0] =~ /\.sig$/)
{
    my $sig = $ARGV[0] ;
    (-f $sig) ||
	die "first arg ($sig) seems to be a sig (ends in .sig), but it isn't a file. Dying.\n" ;
    ($sig =~ /\.WASP([26])\.batch(\d+)\.sig$/) ||
	($sig =~ /\.WASP([26])_384\.batch(\d+)\.sig$/) ||
	die "arg seems to be a sig, but cannot find WASP type and batch in name\n" ;
    # $type: 2 or 6 (for WASP2 or WASP6, WASP2_384 is same as WASP2 after sig creation)
    my ($type,$batch) = ($1,$2) ;
    # number of vars n and of pools nbPools, and design file to use
    my ($n, $nbPools, $designFile) ;
    if ($type==6)
    {
	# all WASP6 batches have $n==1014 except batch 13, which has only 507
	$n = 1014 ;
	($batch == 13) && ($n = 507) ;
	# all WASP6 batches have 169 pools
	$nbPools = 169 ;
	$designFile = $design6[$batch] ;
    }
    else
    {
	# WASP2: n==338 except for batches 1-4 and 40, which have 169
	$n = 338 ;
	if (($batch <= 4) || ($batch==40))
	{
	    $n = 169 ;
	}
	#nbPools: 169, except the split batches
	$nbPools = 169 ;
	$designFile = $design2[$batch] ;
	if (($designFile =~ /\.SplitFirst(\d+)$/) ||
	    ($designFile =~ /\.SplitLast(\d+)$/))
	{
	    # split design: just add number of split pools
	    $nbPools += $1 ;
	}
    }
    
    # OK, decode!
    warn "$ipoolDecoding $n $nbPools $designFile $sig\n" ;
    system("$ipoolDecoding $n $nbPools $designFile $sig\n") ;
    # rename decoding result file to add dist$dist before .Decoded
    my $sigNoPath = $sig ;
    $sigNoPath =~ s~^.*/([^/]+)$~$1~ ;
    (-e "InterPool.Results/$sigNoPath.dist$dist.Decoded") &&
	die "InterPool.Results/$sigNoPath.dist$dist.Decoded already exists, leaving result as InterPool.Results/$sigNoPath.Decoded\n" ;
    # if eg sigfile was bad, ipoolDecoding will have died. Don't try to
    # mv the result in this case.
    (-e "InterPool.Results/$sigNoPath.Decoded") ||
	die "ipoolDecoding probably died, there's no InterPool.Results/$sigNoPath.Decoded\n" ;
    system("mv InterPool.Results/$sigNoPath.Decoded InterPool.Results/$sigNoPath.dist$dist.Decoded\n") ;
    # add names
    warn "adding names to decoding files in InterPool.Results/ that don't have them yet.\n" ;
    system("addNamesToDecodings.pl InterPool.Results\n") ;
}
else
{
    my $sigDir = $ARGV[0] ;

    opendir(SIGDIR, "$sigDir") || 
	die "cannot opendir $sigDir\n" ;
    my @allBaits = grep(/^[^.]/, readdir(SIGDIR)) ;
    closedir(SIGDIR) ;

# decoding results will go into $decDir
    my $decDir = "Decodings/" ;
    (-d $decDir) || 
	(mkdir($decDir) && warn "created dir $decDir\n") || 
	die "cannot make dir $decDir\n" ;
 
# if sigDir holds files ending in .sig, it is actually a baitdir (hopefully)
    my @tmpFiles = grep(/\.sig$/, @allBaits) ;
    if (@tmpFiles == @allBaits)
    {
	# same number of elements so it's the same: we have a baitdir
	
	($sigDir =~ m~^(.*/)([^/]+)/?$~) ||
	    die "arg $sigDir seems to be a baitdir, but cannot decompose it\n" ;
	$sigDir = $1 ;
	@allBaits = ("$2") ;
    }
    elsif (@tmpFiles != 0)
    {
	# strange
	die "Dying: arg seems to contain some sig files and also other stuff.\nIt must either have just sigfiles (ending in .sig), or hold no sigfiles but just a bunch of baitdirs\n" ;
    }
   
    foreach my $bait (@allBaits)
    {
	(-e "InterPool.Results") &&
	    die "dying: InterPool.Results exists, rename it temporarily\n" ;
	(-e "$decDir/$bait.Decoded") ||
	    (mkdir("$decDir/$bait.Decoded") && warn "created dir $decDir/$bait.Decoded\n") || 
	    die "cannot make dir $decDir/$bait.Decoded\n" ;
	
	# find type: WASP2 or WASP6? must be in $bait dir name
	($bait =~ /\.WASP([26])$/) ||
	    ($bait =~ /\.WASP(2)_384$/) ||
	    ((warn "$sigDir/$bait doesn't end with .WASP[26] or WASP2_384, not a sig dir? skipping") && next) ; 
	# $type: 2 or 6 (for WASP2 or WASP6, WASP2_384 is same as WASP2 for decoding)
	my $type = $1 ;
	
	opendir(SIGS, "$sigDir/$bait") ||
	    die "cannot open dir $sigDir/$bait\n" ;
	while (my $sigFile = readdir(SIGS))
	{
	    # skip . and .., and anything starting with .
	    ($sigFile =~ /^\./) && next ;
	    # if sigFile was already decoded with this dist, skip
	    (-e "$decDir/$bait.Decoded/$sigFile.dist$dist.Decoded") && 
		(warn "$decDir/$bait.Decoded/$sigFile.dist$dist.Decoded already exists, skipping.\n") &&
		next ;

	    ($sigFile =~ /\.batch(\d+)\.sig$/) || 
		((warn "sigFile $sigFile doesn't end in batch\\d+.sig, skipping.\n") && next) ;
	    my $batch = $1 ;
	    
	    # number of vars n and of pools nbPools, and design file to use
	    my ($n, $nbPools, $designFile) ;
	    if ($type==6)
	    {
		# all WASP6 batches have $n==1014 except batch 13, which has only 507
		$n = 1014 ;
		($batch == 13) && ($n = 507) ;
		# all WASP6 batches have 169 pools
		$nbPools = 169 ;
		$designFile = $design6[$batch] ;
	    }
	    else
	    {
		# WASP2: n==338 except for batches 1-4 and 40, which have 169
		$n = 338 ;
		if (($batch <= 4) || ($batch==40))
		{
		    $n = 169 ;
		}
		#nbPools: 169, except the split batches
		$nbPools = 169 ;
		$designFile = $design2[$batch] ;
		if (($designFile =~ /\.SplitFirst(\d+)$/) ||
		    ($designFile =~ /\.SplitLast(\d+)$/))
		{
		    # split design: just add number of split pools
		    $nbPools += $1 ;
		}
	    }
	    
	    # OK, decode!
	    warn "$ipoolDecoding $n $nbPools $designFile $sigDir/$bait/$sigFile\n" ;
	    system("$ipoolDecoding $n $nbPools $designFile $sigDir/$bait/$sigFile\n") ;
	    # don't have to test -e $sig.dist$dist because InterPool.Results is new
	    # still have to test if ipoolDecoding died, though. If it did, just skip.
	    if (-e "InterPool.Results/$sigFile.Decoded")
	    {
		system("mv InterPool.Results/$sigFile.Decoded InterPool.Results/$sigFile.dist$dist.Decoded\n") ;
	    }
	    else
	    {
		warn "ipoolDecoding probably died, there's no InterPool.Results/$sigFile.Decoded\n" ;
	    }
	}

	# if InterPool.Results doesn't exist, there was nothing to do, try next bait
	(-e "InterPool.Results/") || 
	    ((warn "there was nothing to do for $bait, trying next bait\n\n") && 
	     next) ;
	     
	# Otherwise: add names and move to correct dir
	system("addNamesToDecodings.pl InterPool.Results\n") ;
	warn "\nFinished decoding $bait and adding names, moving to $decDir/$bait.Decoded/\n\n" ;
	system("mv InterPool.Results/* $decDir/$bait.Decoded/\n") ;
	rmdir("InterPool.Results/") ||
	    die "cannot rmdir InterPool.Results, impossible!\n" ;
    }
}
