#!/usr/bin/env perl

# Created: 03/11/2006
# Author: NTM

#
# Copyright (C) Nicolas Thierry-Mieg, 2009.
#
#
# This is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This script is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this script; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA


# use NTM_mappingGlobal and our knowledge of who went where
# in the superposition process, to generate separate files
# for each set of WASPs, with the mappings of variables
# within each batch; and also with the mappings of
# WASP plate+well coordinates to batch+poolnums.
# We have 2 scripts, one for WASP2 and one for WASP6;
# some parts are identical but it was simpler to make
# 2 separate files.
# I initially wrote the WASP6 script, and then copied
# and adapted it for WASP2.

# files are created in current dir, and are named
# vars2orfs.WASP<2|6> and coords2pools.WASP<2|6>.
# content is:
# - possible comment lines starting with '#'
# - comma-separated mappings:
#   - vars2orfs has <batch>,<var>,<platenum>@<well>
#     where var starts at 0, platenum is the ORFeome plate 
#     number, and well is [A-H]\d\d;
#     var will usually end at 1013 for WASP6 and at 337 for WASP2,
#     but some batches may be smaller.
#   - coords2pools has <plate>,<col>,<row>,<batch>,<poolnum>
#     where poolnum starts at 0 and all others start at 1.

use strict ;
use warnings ;

# set $debug to 0 for no extra output
my $debug = 0 ;


###############################################################################

# WAMP batches are numbered from 1 to 75, see NTM_mappingGlobal.
# These numbers correspond to WAMP plates 01A-09A (batches 1-5),
# 01B-09B (batches 6-10), etc... for the first 60 batches; and
# then plates 10A-13A (batches 61 and 62), 10B-13B (batches 63-64),
# 10C-13C (batches 65-66); and finally 10D-11D (batch 67), 10E-11E
# (batch 68), up to 10L-11L (batch 75).
# Read the mappings for each wamp batch from NTM_mappingGlobal, and
# save them in @wampVarMapping: variable $V of batch $B corresponds
# to ORF $wampVarMapping[169*($B-1) + $V],
# where $B varies from 1 to 75 and $V from 0 to 168.
my @wampVarMapping ;
# Also save mapping between wamp plates+wells and wamp batch+poolnum
# $wampPoolMapping{"$wmpP:$W"} == "$wmpB:$poolnum", where:
# $wmpP ranges from "01A" to "11L", including up to 13 for A-C;
# $W ranges from 1 to 96;
# $wmpB ranges from 1 to 75
# $poolnum ranges from 0 to 168
my %wampPoolMapping ;

my $mappingGlobal = "NTM_mappingGlobal" ;
open (MAP, "$mappingGlobal") || die "cannot open $mappingGlobal for reading\n" ;
{
    my $batch ;
    my ($prevWmpP,$prevW,$prevPoolnum) ;
    while(my $line = <MAP>)
    {
	chomp $line ;
	if ($line =~ /^Mapping of preys to variables for batch (\d+):$/)
	{
	    $batch = $1 ;
	    next ;
	}
	elsif ($line =~ /^variable (\d+) is: (\d\d\d\d\d),(\d+)$/)
	{
	    my ($var,$plate,$well) = ($1,$2,$3) ;
	    # $well is a number in 1..96, build $wellTxt if the form C04
	    my $col = 1 + int(($well-1) / 8) ;
	    ($col < 10) && ($col = "0$col") ;
	    my $rownum = $well - 8 * ($col - 1) ;
	    my $row ;
	    if ($rownum == 1) { $row = "A" ; }
	    elsif ($rownum == 2) { $row = "B" ; }
	    elsif ($rownum == 3) { $row = "C" ; }
	    elsif ($rownum == 4) { $row = "D" ; }
	    elsif ($rownum == 5) { $row = "E" ; }
	    elsif ($rownum == 6) { $row = "F" ; }
	    elsif ($rownum == 7) { $row = "G" ; }
	    elsif ($rownum == 8) { $row = "H" ; }
	    my $wellTxt = "$row$col" ;

	    # save in wampVarMapping
	    (defined $wampVarMapping[169*($batch-1) + $var]) &&
		die "trying to save wampVarMapping[169*($batch-1) + $var] but it's already defined\n" ;
	    $wampVarMapping[169*($batch-1) + $var] = "$plate"."@"."$wellTxt" ;
	    next ;
	}
	elsif (($line =~ /^first pool \((0)\) for batch is located at (\d\d[A-L]),(\d+)$/) ||
	       ($line =~ /^pool (\d+) starts a new plate, at (\d\d[A-L]),(\d+)$/))
	{
	    my ($poolnum,$wmpP,$W) = ($1,$2,$3) ;
	    $prevWmpP = $wmpP ;
	    $prevW = $W ;
	    $prevPoolnum = $poolnum ;
	}
	elsif (($line =~ /^pool (\d+) completes a plate, at (\d\d[A-L]),(\d+)$/) ||
	       ($line =~ /^last pool \((168)\) for batch is located at (\d\d[A-L]),(\d+)$/))
	{
	    my ($poolnum,$wmpP,$W) = ($1,$2,$3) ;
	    ($wmpP ne $prevWmpP) && 
		die "when saving wampPoolMapping, wmpP ($wmpP) ne prev ($prevWmpP)\n" ;
	    (($poolnum - $prevPoolnum) != ($W - $prevW)) &&
		die "when saving wampPoolMapping, ($poolnum - $prevPoolnum) != ($W - $prevW)\n" ;
	    # save all intermediate values in wampPoolMapping
	    my $PNtmp = $prevPoolnum ;
	    foreach my $Wtmp ($prevW..$W)
	    {
		$wampPoolMapping{"$wmpP:$Wtmp"} = "$batch:$PNtmp" ;
		$PNtmp++ ;
	    }
	}
	elsif (($line =~ /^\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#/) ||
	       ($line =~ /^batch \d+: working with design/) ||
	       ($line =~ /^Mapping of pools for batch \d+:/) ||
	       ($line =~ /^$/))
	{
	    # skip all these lines
	    next ;
	}
	else
	{
	    die "while reading $mappingGlobal, cannot parse line:\n$line\n" ;
	}
    }
}
close(MAP) ;

# sanity check, wampVarMapping should have a value for every index 
# from 0 to 12674 (==75*169-1)
($#wampVarMapping != 12674) && die "last index in wampVarMapping is wrong\n" ;
foreach my $i (0..$#wampVarMapping)
{
    ($wampVarMapping[$i] =~ /^\d\d\d\d\d@[A-H]\d\d$/) ||
	die "bad wampVarMapping value at index $i: $wampVarMapping[$i]\n" ;
}

# and wampPoolMapping should have a total of 12675 entries
(keys(%wampPoolMapping) != 12675) && die "wampPoolMapping has bad number of entries\n" ;


###############################################################################

# The WAMP batches were assembled to obtain the WASP6
# batches by superposing up to 6 WAMP batches, as
# shown in Superposing_9.xls.
# This info is summarized in @waspBs, as follows:
# $waspBs[<wasp6 batch num>] = "b1:b2:b3:b4:b5:b6:", where b1..b6 
# are the WAMP batch numbers for that wasp6 batch (there can be
# less than 6 numbers if the WASP6 is composed of fewer wamps).
# The wasp batch numbers are somewhat arbitrary (whereas the
# wamp batch numbers are given in NTM_mappingGlobal).
# The only fixed info is that there are 13 WASP6 batches.
# I am choosing now what they represent, in terms of the plates
# and wells that contain these WASP pools. This choice is
# implemented in the following construction of @waspBs.
my @waspBs ;
# the first 10 wasp6 batches are from the regular first 60 WAMPs.
# batches 1,3,5,7,9 are the left half (ie plates 01A-01F, 02A-02F, 
# ..., 09A-09F), and batches 2,4,6,8,10 are right half (ie plates
# 01G-01L, 02G-02L, ..., 09G-09L)
foreach my $i (1..5)
{
    # odd wasp batches
    (defined $waspBs[$i * 2 - 1]) && die "when building waspBs, waspBs[$i * 2 - 1] already defined, dying\n" ;
    $waspBs[$i * 2 - 1] = "" ;
    foreach my $j (0..5)
    {
	my $wmpb = $i + 5*$j ;
	$waspBs[$i * 2 - 1] .= "$wmpb:" ;
    }

    # even batches
    (defined $waspBs[$i * 2]) && die "when building waspBs, waspBs[$i * 2] already defined, dying\n" ;
    $waspBs[$i * 2] = "" ;
    foreach my $j (0..5)
    {
	my $wmpb = 30 + $i + 5*$j ;
	$waspBs[$i * 2] .= "$wmpb:" ;
    }
}

# last 3 wasp6 batches are different:
# batch 11 is again the left half and batch 12 the right half, of 
# the wamp batches 61,63,65,67-75;
# batch 13 has the 3 batches held mostly in plates 12A-12C and 13A-13C.
$waspBs[11] = "61:63:65:67:68:69:" ;
$waspBs[12] = "70:71:72:73:74:75:" ;
$waspBs[13] = "62:64:66:" ;


if ($debug > 1)
{
    print "WASP6 batch i is composed of following WAMP batches:\n" ;
    foreach my $i (1..$#waspBs)
    {
	print "index $i: ", $waspBs[$i], "\n" ;
    }
}

###############################################################################

# OK, print WASP6 var to ORF mappings
my $waspVtoOfile = "var2orf.WASP6" ;
(-f $waspVtoOfile) && die "$waspVtoOfile already exists, remove or rename it\n" ;
open(V2O, ">$waspVtoOfile") || die "cannot open $waspVtoOfile for writing\n" ;
print V2O "# Mapping of variables to ORFs, for each WASP6 batch\n" ;
print V2O "Batch,Variable,ORF\n" ;

foreach my $B (1..13)
{
    my @wamps = split(/:/, $waspBs[$B]) ;
    my $var = 0 ;
    foreach my $wmpB (@wamps)
    {
	foreach my $wmpV (0..168)
	{
	    print V2O "$B,$var,", $wampVarMapping[169*($wmpB-1) + $wmpV], "\n" ;
	    $var++ ;
	}
    }

    # sanity
    ($B < 13) && ($var != 1014) && 
	die "when writing var2ORF, for batch $B, $var != 1014\n" ;
    ($B == 13) && ($var != 507) && 
	die "when writing var2ORF, for batch 13, $var != 507\n" ;
}
close(V2O) ;


###############################################################################

# now generate a mapping between WASP6 plates+wells and 
# WASP6 batch+poolnumber
# format will be: "<plate>,<col>,<row>,<batch>,<poolnum>" where:
# <plate> is in 1..6;
# <col> is in 1..24;
# <row> is in 1..16;
# <batch> is in 1..13;
# <poolnum> is in 0..168.

# first fill plate2pool (so we can fill it out of order),
# content will then be printed in the correct order to a file.
# "$batch,$poolnum" is stored in $plate2pool[($P-1)*384 + ($C-1)*16 + ($R-1)]
my @plate2pool ;

# we have to do this "plate by plate" rather than batch by batch

# for each wasp6 batch, we choose one representative wamp batch:
# simply take the first wmpB of $waspBs[$B]

foreach my $wmpPnum (1..13)
{
    # calculate wasp6 plate $P
    my $P ;
    if ($wmpPnum <= 10)
    {
	$P = int(($wmpPnum + 1) / 2) ;
    }
    else
    {
	$P = 6 ;
    }

    ($wmpPnum < 10) && ($wmpPnum = "0$wmpPnum") ;
    foreach my $wmpPlet ("A","G")
    {
	($wmpPnum > 11) && ($wmpPlet eq "G") && next ;

	my $wmpP = "$wmpPnum$wmpPlet" ;

	foreach my $wmpW (1..96)
	{
	    (defined $wampPoolMapping{"$wmpP:$wmpW"}) || next ;

	    my ($wmpB,$poolnum) = split(/:/,$wampPoolMapping{"$wmpP:$wmpW"}) ;
	    # the poolnum is the same in wasp6,
	    # now find the wasp6 batch $B
	    my $B = -1 ;
	    foreach my $i (1..$#waspBs)
	    {
		($waspBs[$i] =~ /^(\d+):/) || die "cannot split waspBs\n" ;
		if ($1 == $wmpB)
		{
		    $B = $i ;
		    last ;
		}
	    }
	    ($B == -1) && die "cannot find wamp batch $wmpB in waspBs\n" ;

	    # calculate coords in wasp6 plate from $wmpW
	    my ($wmpCol,$wmpRow) ;
	    $wmpCol = 1 + int(($wmpW - 1) / 8) ;
	    $wmpRow = 1 + (($wmpW - 1) % 8) ;

	    if ($P <= 5)
	    {
		# first 5 wasp6 plates are identically built
		my ($C,$R) ;
		if (($wmpPnum % 2) == 1)
		{
		    # odd wamp numbers went in .A and .B, ie odd rows
		    $R = $wmpRow * 2 - 1 ;
		}
		else
		{
		    $R = $wmpRow * 2 ;
		}
		if ($wmpPlet eq "A")
		{
		    # "A" wamps went in .A and .C, ie odd columns
		    $C = $wmpCol * 2 - 1 ;
		}
		else
		{
		    $C = $wmpCol * 2 ;
		}
		# OK, save
		$plate2pool[($P-1)*384 + ($C-1)*16 + ($R-1)] = "$B,$poolnum" ;
	    }
	    else
	    {
		# wasp6 plate 6 is special
		my ($C,$R) ;
		if ($wmpP eq "11A")
		{
		    # 11A-11F went into .A
		    $R = $wmpRow * 2 - 1 ;
		    $C = $wmpCol * 2 - 1 ;
		}
		elsif ($wmpP eq "11G")
		{
		    # 11G-11L went into .C
		    # 22/01/07: Xiaofeng rebuilt 384 WASP6 plates from the
		    # WASP6-in-96-format plates, but he switched B and C
		    #$R = $wmpRow * 2 ;
		    #$C = $wmpCol * 2 - 1 ;
		    $R = $wmpRow * 2 - 1 ;
		    $C = $wmpCol * 2 ;
		}
		elsif ($wmpP eq "12A")
		{
		    # 12A-12C went into .B
		    # see .C above: switch B and C
		    #$R = $wmpRow * 2 - 1 ;
		    #$C = $wmpCol * 2 ;
		    $R = $wmpRow * 2 ;
		    $C = $wmpCol * 2 - 1 ;
		}
		elsif ($wmpP eq "13A")
		{
		    # 13A-13C went into .D
		    $R = $wmpRow * 2 ;
		    $C = $wmpCol * 2 ;
		}
		else
		{
		    die "dealing with wasp6 plate 6, but bad wmpP $wmpP\n" ;
		}

		# save
		$plate2pool[($P-1)*384 + ($C-1)*16 + ($R-1)] = "$B,$poolnum" ;
	    }
	}
    }
}

# sanity: plate2pool should have 169*13==2197 non-empty entries
# (empty entries occur for empty wells, eg bottom right corner of every plate)
{
    my $cnt = 0 ;
    foreach my $i (0..$#plate2pool)
    {
	(defined $plate2pool[$i]) && ($cnt++) ;
    }
    ($cnt != 2197) && die "plate2pool has $cnt elements, should be 2197\n" ;
}


###############################################################################

# OK, print mapping between WASP6 plates+wells and WASP6 batch+poolnumber
# format will be: "<plate>,<col>,<row>,<batch>,<poolnum>"
my $waspCtoPfile = "coords2pools.WASP6" ;
(-f $waspCtoPfile) && 
    die "$waspCtoPfile already exists, remove or rename it\n" ;
open(C2P, ">$waspCtoPfile") || 
    die "cannot open $waspCtoPfile for writing\n" ;
print C2P "# Mapping of WASP6 plates+wells to WASP6 batch+poolnumber\n" ;
print C2P "Plate,Col,Row,Batch,Poolnum\n" ;
foreach my $P (1..6)
{
    foreach my $C (1..24)
    {
	foreach my $R (1..16)
	{
	    (defined $plate2pool[($P-1)*384 + ($C-1)*16 + ($R-1)]) && 
		(print C2P "$P,$C,$R,", $plate2pool[($P-1)*384 + ($C-1)*16 + ($R-1)], "\n") ;
	}
    }
}

close(C2P) ;


