#!/usr/bin/env perl

# created: 7/12/04
# author: NTM

#
# Copyright (C) Nicolas Thierry-Mieg, 2009.
#
#
# This is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This script is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this script; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA


# reads a bunch of micro-pool codefiles, and outputs in $outdir the 
# cherry-picking files for the robot.
# I am starting off with the script used for the human 940-preys pilot project,
# and adapting it for the C. elegans full ORFeome project with Xiaofeng.

# The script was initially written to deal with the first 60 
# batches of 169 ORFs, ie source plates 11001 through 11109 
# except 11083 (which is not full). This allowed to build 
# destination plates 1A-9A, 1B-9B, etc... up to 1L-9L.
# The idea was to adapt it later to deal with the remaining batches.
# Now, the script deals with everything cleanly: to add more source plates, 
# you must just add a few bits of code (search for "first 60").


# we have all source plate numbers from 11001 to 11114 inclusive;
# each source plate uses 94 wells (all wells except G12 and H12), except:
# 11083, which ends with A07 (columns 1-6, and A07, are full);
# 11114, which ends with H08 (columns 1-8 are full).
#
# we will consider each source plate one at a time, except for 11083 which
# will be dealt with in the end along with 11114.
# We consider a continuous flow of source wells: when you reach well 94 of
# a plate, the next source is the first well of the next plate.
# Roughly, the first 9 source plates will go into destination plates 1A to 9A;
# the next 9 go into 1B to 9B; etc..., until we have source plates
# 101 to 109 (given that we skip 83) going into dest plates 1L to 9L.
# All A dest plates get built according to STD.n2028.q13.k13.part1,
# B plates according to STD.n2028.q13.k13.part2, etc until L plates
# which get built according to STD.n2028.q13.k13.part12.
# FOLLOWING IS OBSOLETE (we solved this issue by skipping one empty
# well in every set of 9 source plates), LEFT HERE POUR MEMOIRE.
# I said roughly because the last well of every ninth plate must be transferred
# by hand into a temporary deep-well plate: SOURCE_EXTRA (or maybe in a
# sealed tube?). SOURCE_EXTRA will be used in the end, along with 11083
# as well as 11110-11114 (total number of preys for this final batch: 501).
# This way, every time we are done with a batch of 9 source plates, the 
# 9 corresponding dest plates are all done, and can be copied to glycerol 
# and frozen. The first 8 of these will have just 2 empty wells (G12 and H12),
# but the ninth will also have F12 empty.


use strict ;
use warnings ;

#debug: set to zero for normal usage, >0 for extra output
my $debug = 1 ;


# dir where command files are written
my $outdir = "CommandFiles_tmp/" ;
(-d $outdir) || die "you must mkdir $outdir\n" ;

# single file where important info is saved (eg mapping between  preys and
# variable numbers in each batch, pooling designs used, etc...)
my $logFile = "$outdir/NTM_mappingGlobal" ;


# number of vars (ie preys) in a batch.
# We are building micropools, so this is always 169 (except the last batch).
my $n = 169 ;

# q and k values. number of pools is $q*$k
my $q = 13 ;
my $k = 13 ;
my $nbPools = $q * $k ;


##########################################################
# source volume
##########################################################

# sourceVolume: volume, in microlitres, that should be transferred 
# from each source well into each destination well.
# Using 30 ul.
my $sourceVolume = 30 ;


##########################################################
# design files to use
##########################################################

# files are $baseDesign."1" through $baseDesign."12"
my $baseDesign = "../Designs/MicroPools/STD.n2028.q13.k13.blocsPerPart1/STD.n2028.q13.k13.part" ;


##########################################################
# info on source plates and wells
##########################################################

# list of source plate names to use, in that order
# for first 60 batches:
my @sourcePlates = (11001..11082, 11084..11109) ;
# remaining batches
@sourcePlates = (@sourcePlates,11110..11113,31001..31022,11114,11083) ;
# corresponding number of wells to use
# for first 60 batches:
my @usedSourceWells = (94)x108 ;
# remaining batches: 31022 has 72, 11114 has 64, 11083 has 49
@usedSourceWells = (@usedSourceWells,94,94,94,94,(94)x21,72,64,49) ;

# sanity check: both vectors should have same number of elements
(@sourcePlates != @usedSourceWells) && 
    die "in source, mismatch between sizes of Plates and usedWells\n" ;

# index (in @sourcePlates) of last plate used in the previous batch
my $prevSourcePlateIndex = 0 ;

# last well used (in $sourcePlates[$prevSourcePlateIndex]) in previous batch
# init to 0, will get incremented before first usage
my $prevSourceWell = 0 ;


# key == source plate id, value == comma-separated short names of 
# dest plates where this source goes, eg "01A,02A,03A,04A" for source 11002
# This is used for generating the command file names
my %dstPlatesForSrc = () ;


# we have some empty wells in the source plates. In order to make every 
# set of 9 plates (in the first 60 batches) finish both a source and a dest 
# plate, we skip one of these empty wells in every set of 9 consecutive source 
# plates.
# When a set of 9 has an empty well in a plate's 12th column, we skip that well;
# otherwise, skipping a well can lead to a lot of wasted time, so to avoid
# this we deal with the corresponding column just before column 12.
# we will skip: 3_C12, 13_D12, 21_B11, 30_B11, 43_B8, 51_G8, 
# 61_H11, 68_B12,76_A11, 88_A11, 95_A12, 108_A10.
my %wellsToSkip = ("11003"=>"91", 
		   "11013"=>"92",
		   "11021"=>"82",
		   "11030"=>"82",
		   "11043"=>"58",
		   "11051"=>"63",
		   "11061"=>"88",
		   "11068"=>"90",
		   "11076"=>"81",
		   "11088"=>"81",
		   "11095"=>"89",
		   "11108"=>"73") ;



##########################################################
# info on destination plates and wells
##########################################################

# destName: some code to represent the destination plates 
# (I get to choose this)
my $destName = "" ; # could be "WAMP_", Worm AD Micro-Pools

# list of dest plate names to use, in that order
my @destPlates = () ;

# for first 60 batches:
foreach my $destSuf ("A".."L")
{ 
    foreach my $destPlNum (1..9) 
    {
	# add leading zero if $destPlNum<10
	($destPlNum < 10) && ($destPlNum = "0".$destPlNum) ;
	push(@destPlates, "$destName"."$destPlNum"."$destSuf") ;
    }
}
# remaining batches:
# 2 batches per letter for A to C...
foreach my $destSuf ("A".."C")
{
    foreach my $destPlNum (10..13)
    {
	# add leading zero if $destPlNum<10
	($destPlNum < 10) && ($destPlNum = "0".$destPlNum) ;
	push(@destPlates, "$destName"."$destPlNum"."$destSuf") ;
    }
}
# and a single batch for D to L
foreach my $destSuf ("D".."L")
{
    foreach my $destPlNum (10..11)
    {
	# add leading zero if $destPlNum<10
	($destPlNum < 10) && ($destPlNum = "0".$destPlNum) ;
	push(@destPlates, "$destName"."$destPlNum"."$destSuf") ;
    }
}


# corresponding number of wells to use
#  for first 60 batches: 94 if ($destPlate != 9), 93 otherwise.
my @usedDestWells = (94,94,94,94,94,94,94,94,93)x12 ;
# for remaining batches: 
# 2 batches in A to C: 94,94,94,56
@usedDestWells = (@usedDestWells, (94,94,94,56)x3) ;
# single batch in D to L, ie 94 in plates 10, and leftover (==75) in plates 11
@usedDestWells = (@usedDestWells, (94,75)x9) ;


# sanity check: both vectors should have same number of elements
(@destPlates != @usedDestWells) && 
    die "in dest, mismatch between sizes of Plates and usedWells\n" ;

# index (in @destPlates) of last plate used in the previous batch
my $prevDestPlateIndex = 0 ;
# last well used (in $destPlates[$prevDestPlateIndex]) in previous batch
# init to 0, will get incremented before first usage
my $prevDestWell = 0 ;


#####################################################
# $output{$sp:$sw:$j} is a string ($sp is the source plate, $sw is the
# source well, 0 <= j < $k): it is the robot's command line for moving 
# source prey $sp,$sw to its jth destination well
my %output ;

# $outputsDone{$sp:$sw} is the number of output lines already done for source prey $sp:$sw
my %outputsDone ;
foreach my $spi (0..$#sourcePlates)
{
    my $sp = $sourcePlates[$spi] ;
    foreach my $sw (1..$usedSourceWells[$spi])
    {
	# don't define $outputsDone for skipped wells, so we'll
	# get a warning if we still try to access or modify it
	if ((defined $wellsToSkip{"$sp"}) && ($wellsToSkip{"$sp"} == $sw))
	{
	    next ;
	}
	else
	{
	    $outputsDone{"$sp:$sw"} = 0 ;
	}
    }
}

##########################################################
# begin main loop: build the output strings
##########################################################

open (LOG, ">$logFile") || die "cannot open $logFile for writing\n" ;


# for first 60 batches:
# foreach my $batch (1..60) { my $part = 1 + int(($batch - 1) / 5) ; 
# for remaining batches:
# 2 batches for A to C
# foreach my $batch (1..6) { my $part = 1 + int(($batch - 1) / 2) ; 
# 1 batch for D to L
# foreach my $batch (4..12) { my $part = $batch ; 
# Unified version, for all batches:
foreach my $batch (1..75)
{
    my $part ;
    if ($batch <= 60)
    {
	$part = 1 + int(($batch - 1) / 5) ;
    }
    elsif ($batch <= 66)
    {
	$part = 1 + int(($batch - 61) / 2) ;
    }
    else
    {
	$part = $batch - 63 ;
    }

    my $design = $baseDesign."$part" ;
    
    print LOG "##########################################################\n" ;
    print LOG "batch $batch: working with design $design\n" ;

    open(DESIGN, "$design") || die "cannot open $design for reading\n" ;


    ############################################
    # fill up sourceMapping table for current batch

    print LOG "\nMapping of preys to variables for batch $batch:\n" ;

    # sourceMapping: vector of n strings, sourceMapping[i] is the full id 
    # (ie plate,well) of prey number i (0 <= i < n) in current batch.
    my @sourceMapping = () ; 

    my $currentSrcPlateIndex = $prevSourcePlateIndex ;
    my $currentSrcPlate = $sourcePlates[$currentSrcPlateIndex] ;
    my $currentSrcWell = $prevSourceWell ;

    foreach my $i (0..$n-1)
    {
	$currentSrcWell++ ;

	# skip well if required
	if ( (defined $wellsToSkip{"$currentSrcPlate"}) && 
	     ($wellsToSkip{"$currentSrcPlate"} == $currentSrcWell))
	{
	    $currentSrcWell++ ;
	}

	if ($currentSrcWell >= $usedSourceWells[$currentSrcPlateIndex] + 1)
	{
	    # >= instead of == because may get incremented several times once
	    # we have finished the last sourcePlate (really?? not convinced, but nevermind)
	    if ($currentSrcPlateIndex == $#sourcePlates)
	    {
		if ($debug > 0)
		{
		    warn "want to switch to next src plate but none left.\n" ;
		    warn "batch==$batch, stopping before i==$i\n" ;
		}
		# even without debug, log something
		print LOG "no more preys, this batch has only $i variables\n" ;
		last ;
	    }
	    
	    $currentSrcWell = 1 ;
	    $currentSrcPlateIndex++ ;
	    $currentSrcPlate = $sourcePlates[$currentSrcPlateIndex] ;

	    # again we might have to skip the well, if a skipped well is
	    # the first well of a plate. This isn't the case currently, but
	    # let's test it anyways
	    if ( (defined $wellsToSkip{"$currentSrcPlate"}) && 
		 ($wellsToSkip{"$currentSrcPlate"} == $currentSrcWell))
	    {
		$currentSrcWell++ ;
	    }
	}

	$sourceMapping[$i] = "$currentSrcPlate,$currentSrcWell" ;
	print LOG "variable $i is: $currentSrcPlate,$currentSrcWell\n" ;
    }

    # set $prevSource* for next batch
    $prevSourcePlateIndex = $currentSrcPlateIndex ;
    $prevSourceWell = $currentSrcWell ;

    ############################################
    # idem for destinations

    print LOG "\nMapping of pools for batch $batch:\n" ;

    # destMapping: vector of nbPools strings, destMapping[i] is the full id 
    # (format is plate,well) of destination pool i (0 <= i < nbPools) in 
    # current batch.

    my @destMapping = () ;

    # fill up destMapping table for current batch
    my $currentDstPlateIndex = $prevDestPlateIndex ;
    my $currentDstPlate = $destPlates[$currentDstPlateIndex] ;
    my $currentDstWell = $prevDestWell ;

    foreach my $i (0..$nbPools - 1)
    {
	($debug > 2) && 
	    (warn "in Dest: i is $i, plate is $currentDstPlate, well is $currentDstWell.\n") ;
	
	$currentDstWell++ ;
	if ($currentDstWell == $usedDestWells[$currentDstPlateIndex] + 1)
	{
	    ($currentDstPlateIndex == $#destPlates) && 
		(die "want to switch to next dst plate but none left.\n") ;
	    $currentDstWell = 1 ;
	    $currentDstPlateIndex++ ;
	    $currentDstPlate = $destPlates[$currentDstPlateIndex] ;
	}

	$destMapping[$i] = "$currentDstPlate,$currentDstWell" ;

	# we just log the first and last wells of each dest plate for this batch
	if ($i==0)
	{
	    print LOG "first pool ($i) for batch is located at $currentDstPlate,$currentDstWell\n" ;
	}
	elsif ($i == $nbPools-1)
	{
	    print LOG "last pool ($i) for batch is located at $currentDstPlate,$currentDstWell\n" ;
	    # also, final newline for this batch
	    print LOG "\n" ;
	}
	elsif ($currentDstWell == $usedDestWells[$currentDstPlateIndex])
	{
	    print LOG "pool $i completes a plate, at $currentDstPlate,$currentDstWell\n" ;
	}
	elsif ($currentDstWell == 1)
	{
	    print LOG "pool $i starts a new plate, at $currentDstPlate,$currentDstWell\n" ;
	}
    }

    # set $prevDest* for next batch
    $prevDestPlateIndex = $currentDstPlateIndex ;
    $prevDestWell = $currentDstWell ;


    ############################################
    # parse design file and fill up %output

    # destCnt: counter to keep track of current pool in batch (ie destination)
    my $destCnt = 0 ;
    while(<DESIGN>)
    {
	chomp ;
	
	if ($debug > 1)
	{
	    warn "in batch $batch, examining line:\n$_\n" ;
	}
	
	my @preysInPool = split(/:/, $_) ;
	
	my ($dp,$dw) = split(/,/, $destMapping[$destCnt]) ;
	# remove $destName to get short name, for dstPlatesForSrc
	my $dpShort = $dp ;
	$dpShort =~ s/^$destName// || die "cannot remove destName from $dpShort\n" ;

	foreach my $prey (@preysInPool)
	{
	    if ($prey >= $n)
	    {
		die "prey $prey is too large\n" ;
	    }

	    if (! defined ($sourceMapping[$prey]))
	    {
		($debug > 1) &&
		    warn "while parsing design, skipping unused prey $prey\n" ;
		next ;
	    }

	    my ($sp,$sw) = split(/,/, $sourceMapping[$prey]) ;

	    # enrich $dstPlatesForSrc{$sp} if necessary
	    if ( (! defined $dstPlatesForSrc{$sp}) || (! grep(/$dpShort/, $dstPlatesForSrc{"$sp"})) )
	    {
		$dstPlatesForSrc{$sp} .= "$dpShort," ;
	    }

	    my $j = $outputsDone{"$sp:$sw"}++ ;
	    
	    $output{"$sp:$sw:$j"} = "A;;$sp;Mike 96well DeepWell Qiagen;$sw;;$sourceVolume" ;
	    if ($j == 0)
	    {
		# first aspiration from this well: mix before aspiration
		$output{"$sp:$sw:$j"} .= ";Xiaofeng Mix"
	    }

	    # add CRLF instead of \n for newlines, the robot software may need DOS text files
	    $output{"$sp:$sw:$j"} .= "\015\012" ;
	    $output{"$sp:$sw:$j"} .= "D;;$dp;Mike 96well DeepWell Qiagen;$dw;;$sourceVolume" ;
	    $output{"$sp:$sw:$j"} .= "\015\012W;\015\012" ;

	    # old version generating csv file was:
	    # $output{"$sp:$sw:$j"} = "RA001,1," . $sourceMapping[$prey] . ",ORF,$sourceVolume,$dp,$dw\n" ;
	}

	$destCnt++ ;
    }
    
    close(DESIGN) ;
}

close(LOG) ;



##########################################################
# printing the output strings (in a favorable order)
##########################################################

# now output the lines in a favorable order for the cherry-picking robot: have each set of
# 8 successive lines correspond to sources in a single column of wells from the same plate

# ADDED NTM 20/04/05: also, do all the occurrences of each source column before moving to 
# the next. This is so we can do the up-and-down program only once on each well.

# ADDED NTM 22/08/06: also, if current plate has a skipped well, do the
# corresponding column after column 12, so we don't propagate misalignments
# which slow down the robot


# sub to sort the short dst names (ie, \d\d\w)
sub destSort
{
    ($a =~ /^(\d\d)(\w)$/) || die "in destSort, cannot parse first arg: $a\n" ;
    my ($aN,$aL) = ($1,$2) ;
    ($b =~ /^(\d\d)(\w)$/) || die "in destSort, cannot parse second arg: $b\n" ;
    my ($bN,$bL) = ($1,$2) ;

    if (($aL cmp $bL) != 0)
    { 
	return ($aL cmp $bL) ; 
    }
    else
    {
	return ($aN <=> $bN) ;
    }
}

foreach my $srcPlateIndex (0..$#sourcePlates)
{
    my $srcPlate = $sourcePlates[$srcPlateIndex] ;

    # outFile: name of file to produce (we want one for each source plate)
    my @tmpDests = sort (destSort split(/,/, $dstPlatesForSrc{"$srcPlate"})) ;
    my $destsShort = "$tmpDests[0]-$tmpDests[$#tmpDests]" ;
    my $outDir2 = $outdir."/NTM_src$srcPlate"."_dsts$destsShort/" ;
    (-e $outDir2) && die "outDir2 $outDir2 already exists!\n" ;
    mkdir $outDir2 ;
    open (OUTFILE, ">$outDir2/Worklist.gwl") ;
    
    # number of used columns in this source plate
    my $numCols = 1 + int(($usedSourceWells[$srcPlateIndex] - 1) / 8) ;

    # columns to use, in correct order
    my @allCols ;
    if (defined  $wellsToSkip{"$srcPlate"})
    {
	my $colToDelay = 1 + int(($wellsToSkip{"$srcPlate"} - 1) / 8) ;
	@allCols = (1..$colToDelay-1, $colToDelay+1..$numCols, $colToDelay) ;
	# sanity
	(@allCols != $numCols) && die "trying to delay a column but problem with allCols\n" ;
    }
    else
    {
	@allCols = (1..$numCols) ;
    }

    foreach my $srcColumn (@allCols)
    {
	# number of (used, non-empty) preys in this column, not taking 
	# into account the skipped wells
	my $numPreysInCol ;
	if ($srcColumn < $numCols)
	{
	    $numPreysInCol = 8 ;
	}
	else
	{
	    $numPreysInCol = $usedSourceWells[$srcPlateIndex] - 8 * ($numCols - 1) ;
	}

	my $firstPreyInCol = 8 * ($srcColumn - 1) + 1 ;
	my $lastPreyInCol = 8 * ($srcColumn - 1) + $numPreysInCol ;

	# list of wells to use in this column
	my @allWells ;
	if ( (defined  $wellsToSkip{"$srcPlate"}) && 
	     ($wellsToSkip{"$srcPlate"} >= $firstPreyInCol) &&
	     ($wellsToSkip{"$srcPlate"} <= $lastPreyInCol) )
	{
	    @allWells = ($firstPreyInCol..$wellsToSkip{"$srcPlate"}-1,$wellsToSkip{"$srcPlate"}+1..$lastPreyInCol) ;
	}
	else
	{
	    @allWells = ($firstPreyInCol..$lastPreyInCol) ;
	}

	foreach my $layer (0..$k-1)
	{
	    foreach my $srcWell (@allWells)
	    {
		my $line = $output{"$srcPlate:$srcWell:$layer"} ;
		($line) || die "current line is empty! SP==$srcPlate, SW==$srcWell, layer==$layer\n" ;
		print OUTFILE $line ;
		
		# NTM 25/07/06: no empty commands anymore (or maybe just 2 per source plate?)
		# In any case, be careful: some unused wells may not be empty! these shouldn't
		# go into the control wells 95 and 96 of the dest plates!
		#
		# add some "empty" commands when a column has some empty wells,
		# so the robot remains "optimized" (ie it always takes 8 source wells
		# which make up a complete column in the source plates)
		#if (($numPreysInCol < 8) && ($srcWell == $lastPreyInCol))
		#{
		    # add (8-$numPreysInCol) lines for the remaining wells of this column 
		    # (but make sure they are EMPTY!), taking 1 microlitre and going to an 
		    # empty dest well (using wells 95 and 96 of current destination plate)
		    
		    # change volume to 1 microlitre and dest well to 95
		    # $line =~ s/,ORF,$sourceVolume,(\w+),\d+\n$/,ORF,1,$1,95\n/ 
			# || die "cannot change volume and dest well to 95\n" ;
		    
		    # change source well to 95
		    # $line =~ s/,9[34],ORF,1,/,95,ORF,1,/ || die "for empty command: cannot change 93/94 to 95\n" ;
		    # print OUTFILE $line ;
		    
		    # now change source and dest wells to 96
		    # $line =~ s/,95,ORF,1,/,96,ORF,1,/ || die "for empty command: cannot change 95 to 96 in src\n" ;
		    # $line =~ s/,95\n$/,96\n/ || die "for empty command: cannot change 95 to 96 in dst\n" ;
		    # print OUTFILE $line ;
		#}
	    }
	}
    }

    close(OUTFILE) ;
}


