#!/usr/bin/env perl

# Created: 03/11/2006
# Author: NTM

#
# Copyright (C) Nicolas Thierry-Mieg, 2009.
#
#
# This is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This script is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this script; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA


# use NTM_mappingGlobal and our knowledge of who went where
# in the superposition process, to generate separate files
# for each set of WASPs, with the mappings of variables
# within each batch; and also with the mappings of
# WASP plate+well coordinates to batch+poolnums.
# We have 2 scripts, one for WASP2 and one for WASP6;
# some parts are identical but it was simpler to make
# 2 separate files.
# I initially wrote the WASP6 script, and then copied
# and adapted it for WASP2.

# files are created in current dir, and are named
# vars2orfs.WASP<2|6> and coords2pools.WASP<2|6>.
# content is:
# - possible comment lines starting with '#'
# - comma-separated mappings:
#   - vars2orfs has <batch>,<var>,<platenum>@<well>
#     where var starts at 0, platenum is the ORFeome plate 
#     number, and well is [A-H]\d\d;
#     var will usually end at 1013 for WASP6 and at 337 for WASP2,
#     but some batches may be smaller.
#   - coords2pools has <plate>,<col>,<row>,<batch>,<poolnum>
#     where poolnum starts at 0 and all others start at 1.

use strict ;
use warnings ;


###############################################################################

# WAMP batches are numbered from 1 to 75, see NTM_mappingGlobal.
# These numbers correspond to WAMP plates 01A-09A (batches 1-5),
# 01B-09B (batches 6-10), etc... for the first 60 batches; and
# then plates 10A-13A (batches 61 and 62), 10B-13B (batches 63-64),
# 10C-13C (batches 65-66); and finally 10D-11D (batch 67), 10E-11E
# (batch 68), up to 10L-11L (batch 75).
# Read the mappings for each wamp batch from NTM_mappingGlobal, and
# save them in @wampVarMapping: variable $V of batch $B corresponds
# to ORF $wampVarMapping[169*($B-1) + $V],
# where $B varies from 1 to 75 and $V from 0 to 168.
my @wampVarMapping ;
# Also save mapping between wamp plates+wells and wamp batch+poolnum
# $wampPoolMapping{"$wmpP:$W"} == "$wmpB:$poolnum", where:
# $wmpP ranges from "01A" to "11L", including up to 13 for A-C;
# $W ranges from 1 to 96;
# $wmpB ranges from 1 to 75
# $poolnum ranges from 0 to 168
my %wampPoolMapping ;

my $mappingGlobal = "NTM_mappingGlobal" ;
open (MAP, "$mappingGlobal") || die "cannot open $mappingGlobal for reading\n" ;
{
    my $batch ;
    my ($prevWmpP,$prevW,$prevPoolnum) ;
    while(my $line = <MAP>)
    {
	chomp $line ;
	if ($line =~ /^Mapping of preys to variables for batch (\d+):$/)
	{
	    $batch = $1 ;
	    next ;
	}
	elsif ($line =~ /^variable (\d+) is: (\d\d\d\d\d),(\d+)$/)
	{
	    my ($var,$plate,$well) = ($1,$2,$3) ;
	    # $well is a number in 1..96, build $wellTxt if the form C04
	    my $col = 1 + int(($well-1) / 8) ;
	    ($col < 10) && ($col = "0$col") ;
	    my $rownum = $well - 8 * ($col - 1) ;
	    my $row ;
	    if ($rownum == 1) { $row = "A" ; }
	    elsif ($rownum == 2) { $row = "B" ; }
	    elsif ($rownum == 3) { $row = "C" ; }
	    elsif ($rownum == 4) { $row = "D" ; }
	    elsif ($rownum == 5) { $row = "E" ; }
	    elsif ($rownum == 6) { $row = "F" ; }
	    elsif ($rownum == 7) { $row = "G" ; }
	    elsif ($rownum == 8) { $row = "H" ; }
	    my $wellTxt = "$row$col" ;

	    # save in wampVarMapping
	    (defined $wampVarMapping[169*($batch-1) + $var]) &&
		die "trying to save wampVarMapping[169*($batch-1) + $var] but it's already defined\n" ;
	    $wampVarMapping[169*($batch-1) + $var] = "$plate"."@"."$wellTxt" ;
	    next ;
	}
	elsif (($line =~ /^first pool \((0)\) for batch is located at (\d\d[A-L]),(\d+)$/) ||
	       ($line =~ /^pool (\d+) starts a new plate, at (\d\d[A-L]),(\d+)$/))
	{
	    my ($poolnum,$wmpP,$W) = ($1,$2,$3) ;
	    $prevWmpP = $wmpP ;
	    $prevW = $W ;
	    $prevPoolnum = $poolnum ;
	}
	elsif (($line =~ /^pool (\d+) completes a plate, at (\d\d[A-L]),(\d+)$/) ||
	       ($line =~ /^last pool \((168)\) for batch is located at (\d\d[A-L]),(\d+)$/))
	{
	    my ($poolnum,$wmpP,$W) = ($1,$2,$3) ;
	    ($wmpP ne $prevWmpP) && 
		die "when saving wampPoolMapping, wmpP ($wmpP) ne prev ($prevWmpP)\n" ;
	    (($poolnum - $prevPoolnum) != ($W - $prevW)) &&
		die "when saving wampPoolMapping, ($poolnum - $prevPoolnum) != ($W - $prevW)\n" ;
	    # save all intermediate values in wampPoolMapping
	    my $PNtmp = $prevPoolnum ;
	    foreach my $Wtmp ($prevW..$W)
	    {
		$wampPoolMapping{"$wmpP:$Wtmp"} = "$batch:$PNtmp" ;
		$PNtmp++ ;
	    }
	}
	elsif (($line =~ /^\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#/) ||
	       ($line =~ /^batch \d+: working with design/) ||
	       ($line =~ /^Mapping of pools for batch \d+:/) ||
	       ($line =~ /^$/))
	{
	    # skip all these lines
	    next ;
	}
	else
	{
	    die "while reading $mappingGlobal, cannot parse line:\n$line\n" ;
	}
    }
}
close(MAP) ;

# sanity check, wampVarMapping should have a value for every index 
# from 0 to 12674 (==75*169-1)
($#wampVarMapping != 12674) && die "last index in wampVarMapping is wrong\n" ;
foreach my $i (0..$#wampVarMapping)
{
    ($wampVarMapping[$i] =~ /^\d\d\d\d\d@[A-H]\d\d$/) ||
	die "bad wampVarMapping value at index $i: $wampVarMapping[$i]\n" ;
}

# and wampPoolMapping should have a total of 12675 entries
(keys(%wampPoolMapping) != 12675) && die "wampPoolMapping has bad number of entries\n" ;


###############################################################################

# The WAMP batches were assembled to obtain the WASP2
# batches. However, some pairs of WAMP plates were
# superposed, and others were not. This leads to
# various types of WASP2 batches:
# - some are just WAMPs (no superposition);
# - some are regular WASP2s (superposition of 2 WAMPs);
# - others are mixed batches, where starting from 2 WAMPs
#   we kept some pools of size 13 identical to a part of the 
#   original WAMPs and other pools of size 26 are identical
#   to a part of the superposition of the 2 WAMPs... There
#   are 3 different types among this category.
#
# Some of this info is summarized in @waspBs, as follows:
# $waspBs[<wasp2 batch num>] = "b1:b2:", where b1 and b2 
# are the WAMP batch numbers for that wasp2 batch (there can be
# a single number if the WASP2 is actually a single WAMP).
# The wasp batch numbers are somewhat arbitrary (whereas the
# wamp batch numbers are given in NTM_mappingGlobal).
# I am choosing now what they represent. This choice is
# implemented in the following construction of @waspBs.
# Basically, I'm just reading a table from left to right,
# and top to bottom. (This table should be in my draft papers
# somewhere; it has 12 columns named A through L and 13 rows
# labeled 01 through 13, representing the WAMP plates;
# overlayed, I have the 75 WAMP batches (each is on a single 
# column but overlaps several rows); and on top of that I
# have the info on which plates got superposed when building
# WASP2x4.)
my @waspBs ;

# first 4 are just WAMPs
$waspBs[1] = "1:" ;
$waspBs[2] = "6:" ;
$waspBs[3] = "11:" ;
$waspBs[4] = "16:" ;

# next 4 have 94x2 pools of size 13, + 75 pools of size 26
$waspBs[5] = "21:26:" ;
$waspBs[6] = "31:36:" ;
$waspBs[7] = "41:46:" ;
$waspBs[8] = "51:56:" ;

# next 2 have 19x2 pools of size 13, + 150 pools of size 26
$waspBs[9] = "2:7:" ;
$waspBs[10] = "12:17:" ;

# We then have a lot of regular superpositions of 2 WAMPs
$waspBs[11] = "22:27:" ;
$waspBs[12] = "32:37:" ;
$waspBs[13] = "42:47:" ;
$waspBs[14] = "52:57:" ;

foreach my $j (3..5)
{
    foreach my $i (0..5)
    {
	my $w1 = 10*$i + $j ;
	my $w2 = $w1 + 5 ;
	$waspBs[15 + 6*($j-3) + $i] = "$w1:$w2:" ;
    }
}

$waspBs[33] = "61:63:" ;
$waspBs[34] = "65:67:" ;
$waspBs[35] = "68:69:" ;
$waspBs[36] = "70:71:" ;

# Then again some strange guys, with 94 pools of 
# size 26 + 75x2 pools of size 13
$waspBs[37] = "72:73:" ;
$waspBs[38] = "74:75:" ;

# and finally one regular WASP2...
$waspBs[39] = "62:64:" ;

# ...and one WAMP
$waspBs[40] = "66:" ;



# sanity: we should have a batch for each index in 1..40
# and each WAMP batch number should be present exactly once
{
    my @wmpSeen = () ;
    foreach my $i (1..40)
    {
	(defined $waspBs[$i]) ||
	    die "when building waspBs, no value at index $i\n" ;
	my @wmpTmp = split(/:/, $waspBs[$i]) ;
	foreach my $w (@wmpTmp)
	{
	    (defined $wmpSeen[$w]) &&
		die "when building waspBs, WAMP batch $w seen twice\n" ;
	    $wmpSeen[$w] = 1 ;
	}
    }
    # check that each wamp was seen
    foreach my $i (1..75)
    {
	((defined $wmpSeen[$i]) && ($wmpSeen[$i] == 1)) ||
	    die "in waspBs, WAMP batch $i never seen\n" ;
    }
}


###############################################################################

# OK, print WASP2 var to ORF mappings
my $waspVtoOfile = "var2orf.WASP2" ;
(-f $waspVtoOfile) && die "$waspVtoOfile already exists, remove or rename it\n" ;
open(V2O, ">$waspVtoOfile") || die "cannot open $waspVtoOfile for writing\n" ;
print V2O "# Mapping of variables to ORFs, for each WASP2 batch\n" ;
print V2O "Batch,Variable,ORF\n" ;

foreach my $B (1..40)
{
    my @wamps = split(/:/, $waspBs[$B]) ;
    my $var = 0 ;
    foreach my $wmpB (@wamps)
    {
	foreach my $wmpV (0..168)
	{
	    print V2O "$B,$var,", $wampVarMapping[169*($wmpB-1) + $wmpV], "\n" ;
	    $var++ ;
	}
    }

    # sanity: number of vars per WASP batch
    if (($B <= 4) || ($B == 40))
    {
	($var != 169) && 
	    die "when writing var2orf, for batch $B, $var != 169\n" ;
    }
    else
    {
	($var != 338) && 
	    die "when writing var2ORF, for batch $B, $var != 338\n" ;
    }
}
close(V2O) ;


###############################################################################

# now generate a mapping between WASP2x4 plates+wells and 
# WASP2 batch+poolnumber
# format will be: "<plate>,<col>,<row>,<batch>,<poolnum>" where:
# <plate> is in 1..20;
# <col> is in 1..24;
# <row> is in 1..16;
# <batch> is in 1..40;
# <poolnum> is in 0..168 for regular batches, but goes up to
#    262 for batches 5-8, 187 for batches 9 and 10, and 243 for
#    batches 37 and 38.

# first fill plate2pool (so we can fill it out of order),
# content will then be printed in the correct order to a file.
# "$batch,$poolnum" is stored in $plate2pool[($P-1)*384 + ($C-1)*16 + ($R-1)]
my @plate2pool ;

# we have to do this "plate by plate" rather than batch by batch,
# but in fact we can consider just the first of any pair of plates
# that have been superposed.
# generate the list of wamp plates to consider
my @wmpPlates = () ;
foreach my $let ("A".."L") { push(@wmpPlates, "01$let") ; }
foreach my $let ("A".."D") { push(@wmpPlates, "02$let") ; }
foreach my $let ("E","G","I","K") { push(@wmpPlates, "02$let") ; }
foreach my $num (3..10)
{
    ($num < 10) && ($num = "0$num") ;
    foreach my $let ("A","C","E","G","I","K") 
    { push(@wmpPlates, "$num$let") ; }
}
foreach my $let ("A","C","E","G") { push(@wmpPlates, "11$let") ; }
foreach my $let ("I".."L") { push(@wmpPlates, "11$let") ; }
push(@wmpPlates, "12A","12C","13A","13C") ;

# The order of wamp plates in @wmpPlates happens to
# have a nice pattern in terms of whether the pools
# went to .A, .B, .C or .D in the WASP2x4 plates: it
# simply starts at .A and cycles in that order.
# We use the index $i to set $rowDec and $colDec (these
# are used later to calculate the wasp2 coordinates from
# the wamp coordinates).
# We also have a nice pattern for wasp plates, it is 1 + int($i/4)

foreach my $i (0..$#wmpPlates)
{
    my $wmpP = $wmpPlates[$i] ;

    # calculate wasp2x4 plate
    my $P = 1 + int($i/4) ;

    # set $rowDec = -1 for .A and .B (odd rows), 0 otherwise
    # similarly, set $colDec = -1 for .A and .C, 0 otherwise
    my ($rowDec,$colDec) = (0,0) ;
    ((($i % 4) == 0) || (($i % 4) == 1)) && ($rowDec= -1) ;
    (($i % 2) == 0) && ($colDec= -1) ;

    foreach my $wmpW (1..96)
    {
	(defined $wampPoolMapping{"$wmpP:$wmpW"}) || next ;
	
	my ($wmpB,$wmpPoolnum) = split(/:/,$wampPoolMapping{"$wmpP:$wmpW"}) ;

	# find the wasp2 batch $B
	my $B = -1 ;
	foreach my $i (1..$#waspBs)
	{
	    if (($waspBs[$i] =~ /^$wmpB:/) || ($waspBs[$i] =~ /:$wmpB:/))
	    {
		$B = $i ;
		last ;
	    }
	}
	($B == -1) && die "cannot find wamp batch $wmpB in waspBs\n" ;
	
	# calculate the wasp2 poolnum
	# default value $wspPoolnum is OK for the normal batches,
	# and for some plates of strange batches.
	my $poolnum = $wmpPoolnum ;
	# For special designs, some of the regular WASP2 pools are
	# split into 2 and the second half is appended at the end of the
	# normal design, so add 169 to these second halfs
	if ( ($wmpP eq "01F") || ($wmpP eq "01H") || ($wmpP eq "01J") || ($wmpP eq "01L") ||
	     (($wmpP eq "02B") && ($wmpB == 7)) || (($wmpP eq "02D") && ($wmpB == 17)) )
	{
	    $poolnum += 169 ;
	}
	# For the following 2, the split pools are at the end, so just add 75
	# (they don't start at 0 like the above)
	elsif (($wmpP eq "11J") || ($wmpP eq "11L"))
	{
	    $poolnum += 75 ;
	}

	# calculate coords in wasp2x4 plate from $wmpW
	my ($wmpCol,$wmpRow) ;
	$wmpCol = 1 + int(($wmpW - 1) / 8) ;
	$wmpRow = 1 + (($wmpW - 1) % 8) ;
	my ($R,$C) ;
	$R = $wmpRow * 2 + $rowDec ;
	$C = $wmpCol * 2 + $colDec ;
	
	# OK, save
	$plate2pool[($P-1)*384 + ($C-1)*16 + ($R-1)] = "$B,$poolnum" ;
    }
}




# sanity: plate2pool should have 169*40 + 94*4 + 19*2 + 75*2 == 7324
# non-empty entries (empty entries occur for empty wells, eg bottom 
# right corner of every plate)
{
    my $cnt = 0 ;
    foreach my $i (0..$#plate2pool)
    {
	(defined $plate2pool[$i]) && ($cnt++) ;
    }
    ($cnt != 7324) && die "plate2pool has $cnt elements, should be 7324\n" ;
}


###############################################################################

# OK, print mapping between WASP2x4 plates+wells and WASP2 batch+poolnumber
# format will be: "<plate>,<col>,<row>,<batch>,<poolnum>"
my $waspCtoPfile = "coords2pools.WASP2x4" ;
(-f $waspCtoPfile) && 
    die "$waspCtoPfile already exists, remove or rename it\n" ;
open(C2P, ">$waspCtoPfile") || 
    die "cannot open $waspCtoPfile for writing\n" ;
print C2P "# Mapping of WASP2x4 plates+wells to WASP2 batch+poolnumber\n" ;
print C2P "Plate,Col,Row,Batch,Poolnum\n" ;
foreach my $P (1..20)
{
    foreach my $C (1..24)
    {
	foreach my $R (1..16)
	{
	    (defined $plate2pool[($P-1)*384 + ($C-1)*16 + ($R-1)]) && 
		(print C2P "$P,$C,$R,", $plate2pool[($P-1)*384 + ($C-1)*16 + ($R-1)], "\n") ;
	}
    }
}

close(C2P) ;


