#!/usr/bin/env perl

# Author: NTM

#
# Copyright (C) Nicolas Thierry-Mieg, 2009.
#
#
# This is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This script is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this script; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA


# take a single arg: a dir containing:
# - <bait>.Decoded subdirs, each corresponding to one bait and 
#   holding the decoding results for each batch of this bait,
# AND/OR
# - <bait>.sig.Decoded files, corresponding to individual batches.
#
# In each subdir and/or for each sig.Decoded file, create a new
# file ith the same name but with $suffix appended, and enriched with:
# - next to variable numbers, the corresponding ORFeome ID;
# - next to pool numbers, the corresponding plate+well coordinates.
# Both of these infos are taken from the Mappings/ subdir.


use warnings ;
use strict ;

# name of suffix to add to new files
my $suffix = ".Named" ;


#####################################################################

# files holding variable to ORF mappings are $var2orfFile<2|6>
my $var2orfFile = "Mappings/var2orf.WASP" ;

# NOTE: WASP2_384 var2orf is the same as WASP2
# parse and save var2orf mappings:
# $var2orf2[($B-1)*338 + $V] holds the orf name for var $V of
# batch $B in WASP2 or WASP2_384 (same);
# $var2orf6[($B-1)*1014 + $V] idem for WASP6.
my @var2orf2 = () ;
my @var2orf6 = () ;

foreach my $type (2,6)
{
    open(V2O, "$var2orfFile$type") ||
	die "cannot open $var2orfFile$type\n" ;
    # skip first line, test second
    <V2O> ;
    my $line = <V2O> ;
    chomp $line ;
    ($line =~ /^Batch,Variable,ORF$/) || 
	die "while reading $var2orfFile$type, second line is wrong:\n$line\n" ;
    while ($line = <V2O>)
    {
	chomp $line ;
	($line =~ /^(\d+),(\d+),(\d\d\d\d\d@[A-H]\d\d)$/) ||
	    die "while reading $var2orfFile$type, cannot parse line:\n$line\n" ;
	my ($B,$V,$orf) = ($1,$2,$3) ;
	if ($type==2)
	{
	    (defined $var2orf2[($B-1)*338 + $V]) && 
		die "while reading $var2orfFile$type, found $orf for var $V of batch $B, but it's already defined as ", $var2orf2[($B-1)*338 + $V], "\n" ;
	    $var2orf2[($B-1)*338 + $V] = $orf ;
	}
	else
	{
	    (defined $var2orf6[($B-1)*1014 + $V]) && 
		die "while reading $var2orfFile$type, found $orf for var $V of batch $B, but it's already defined as ", $var2orf6[($B-1)*1014 + $V], "\n" ;
	    $var2orf6[($B-1)*1014 + $V] = $orf ;
	}
    }
    close(V2O) ;
}

#####################################################################

# files holding coordinates (plate+well) of each pool are $C2P2, $C2P2_384, and 
# $C2P6orig / $C2P6switched (orig for first 4 baits and ForPaper, switched for Next12)
my $C2P2 = "Mappings/coords2pools.WASP2x16" ;
my $C2P2_384 = "Mappings/coords2pools.WASP2x4" ;
my $C2P6orig = "Mappings/coords2pools.WASP6.original" ;
my $C2P6switched = "Mappings/coords2pools.WASP6.BandCswitched" ;

# parse and save:
# $pool2coord2[($B-1)*263 + $poolnum] holds coords (eg "P3@03x17" for
# plate 3, row 03, column 17) of pool $poolnum of batch $B in 
# plates WASP2x16.
# $pool2coord2_384[($B-1)*263 + $poolnum] idem for WASP2_384.
# $pool2coord6<orig|switched>[($B-1)*169 + $poolnum] idem for WASP6.
my @pool2coord2 = () ;
my @pool2coord2_384 = () ;
my @pool2coord6orig = () ;
my @pool2coord6switched = () ;


foreach my $type ("2","2_384","6o", "6s")
{
    my $file = $C2P2 ;
    ($type eq "2_384") && ($file = $C2P2_384) ;
    ($type eq "6o") && ($file = $C2P6orig) ;
    ($type eq "6s") && ($file = $C2P6switched) ;
    open(P2C, "$file") ||
	die "cannot open $file\n" ;
    # skip first line, test second
    <P2C> ;
    my $line = <P2C> ;
    chomp $line ;
    ($line =~ /^Plate,Col,Row,Batch,Poolnum$/) || 
	die "while reading $file, second line is wrong:\n$line\n" ;
    while ($line = <P2C>)
    {
	chomp $line ;
	($line =~ /^(\d+),(\d+),(\d+),(\d+),(\d+)$/) ||
	    die "while reading $file, cannot parse line:\n$line\n" ;
	my ($P,$C,$R,$B,$poolnum) = ($1,$2,$3,$4,$5) ;
	my $coord = "P$P@" ;
	($R <10) && ($R = "0$R") ;
	($C <10) && ($C = "0$C") ;
	$coord .= "$R"."x"."$C" ;
	if ($type eq "2")
	{
	    (defined $pool2coord2[($B-1)*263 + $poolnum]) && 
		die "while reading $file, found $coord for pool $poolnum of batch $B, but it's already defined as ", $pool2coord2[($B-1)*263 + $poolnum], "\n" ;
	    $pool2coord2[($B-1)*263 + $poolnum] = $coord ;
	}
	elsif ($type eq "2_384")
	{
	    (defined $pool2coord2_384[($B-1)*263 + $poolnum]) && 
		die "while reading $file, found $coord for pool $poolnum of batch $B, but it's already defined as ", $pool2coord2_384[($B-1)*263 + $poolnum], "\n" ;
	    $pool2coord2_384[($B-1)*263 + $poolnum] = $coord ;
	}
	elsif ($type eq "6o")
	{
	    (defined $pool2coord6orig[($B-1)*169 + $poolnum]) && 
		die "while reading $file, found $coord for pool $poolnum of batch $B, but it's already defined as ", $pool2coord6orig[($B-1)*169 + $poolnum], "\n" ;
	    $pool2coord6orig[($B-1)*169 + $poolnum] = $coord ;
	}
	else
	{
	    # WASP6 switched
	    (defined $pool2coord6switched[($B-1)*169 + $poolnum]) && 
		die "while reading $file, found $coord for pool $poolnum of batch $B, but it's already defined as ", $pool2coord6switched[($B-1)*169 + $poolnum], "\n" ;
	    $pool2coord6switched[($B-1)*169 + $poolnum] = $coord ;

	}
    }
    close(P2C) ;
}


#####################################################################

# sub to deal with one file
# one arg: the file (with full path)
sub addNamesToFile
{
    (@_ != 1) && die "addNamesToFile needs one arg\n" ;
    my $infile = $_[0] ;
    my $outfile = $infile.$suffix ;

    # if $outfile exists, skip file with warning
    (-e $outfile) &&
	(warn "$outfile already exists, skipping $infile\n") &&
	(return(1)) ;

    open(IN, "$infile") ||
	die "cannot open $infile for reading\n" ;
    open(OUT, ">$outfile") ||
	die "cannot open $outfile for writing\n" ;

    ($infile =~ /\.WASP(\d)\.batch(\d+)\./) ||
	($infile =~ /\.WASP(2_384)\.batch(\d+)\./) ||
	die "cannot find WASP type and batch in $infile\n" ;
    my ($type,$batch) = ($1,$2) ;
    # $wasp6switched is true iff the current bait uses a switched 
    # WASP6 mapping; will be ignored if this is WASP2
    my $wasp6switched = 0 ;
    # find bait and series, to see if this uses switched WASP6
    if ($type eq "6")
    {
	($infile =~ /T(\d\d\d)[-_]s(\d+)[-_]/) || 
	    (die "\n\nERROR addNames WASP6: cannot find baitnum and series in $infile\n\n") ;
	my ($baitnum,$ser) = ($1, $2) ;
	if ( ($ser <= 4) &&
	     ( ($baitnum == 196) || ($baitnum == 330) || ($baitnum == 370) || ($baitnum == 381) || 
	       ($baitnum == 382) || ($baitnum == 385) || ($baitnum == 386) || ($baitnum == 387) || 
	       ($baitnum == 392) || ($baitnum == 469) || ($baitnum == 474) || ($baitnum == 479) ) )
	{
	    # warn "WARNING addNames: using WASP6 mapping with switched B and C in plate 6 for T$baitnum, series $ser\n" ;
	    $wasp6switched = 1 ;
	}
    }

    while (my $line = <IN>)
    {
	chomp $line ;
	if ($line =~ /^(\d+)( \(cost \d+\))$/)
	{
	    # this is a var
	    my ($var,$rest) = ($1,$2) ;
	    my $orf = "" ;
	    if (($type eq "2") || ($type eq "2_384"))
	    {
		$orf = $var2orf2[($batch-1)*338 + $var] ;
	    }
	    elsif ($type eq "6")
	    {
		# WASP6
		$orf =  $var2orf6[($batch-1)*1014 + $var] ;
	    }
	    else
	    {
		die "in addNames: illegal type $type, debug me!\n" ;
	    }
	    print OUT "$var (==$orf)$rest\n" ;
	}
	elsif ($line =~ /^(\d+)$/)
	{
	    # just an int on a line: this is a pool
	    my $poolnum = $1 ;
	    my $coord = "" ;
	    if ($type eq "2")
	    {
		$coord = $pool2coord2[($batch-1)*263 + $poolnum] ;
	    }
	    elsif ($type eq "2_384")
	    {
		$coord = $pool2coord2_384[($batch-1)*263 + $poolnum] ;
	    }
	    else
	    {
		# WASP6
		if ($wasp6switched == 0)
		{
		    $coord =  $pool2coord6orig[($batch-1)*169 + $poolnum] ;
		}
		else
		{
		    $coord =  $pool2coord6switched[($batch-1)*169 + $poolnum] ;
		}
	    }
	    print OUT "$poolnum (==$coord)\n" ;
	}
	else
	{
	    # all other lines just get copied
	    print OUT "$line\n" ;
	}
    }

    close(IN) ;
    close(OUT) ;
    return(1) ;
}


#####################################################################

# OK, deal with the data

(@ARGV != 1) && 
    die "requires one arg: a dir (eg Decodings/) holding individual files and/or subdirs with decoding results\nFiles and subdirs, as well as files in these subdirs, must end with .Decoded to be dealt with\n" ;

my $decDir = $ARGV[0] ;

opendir(DECDIR, "$decDir") ||
    die "cannot opendir $decDir\n" ;
my @allBaits = grep(/\.Decoded$/, readdir(DECDIR)) ;
closedir(DECDIR) ;

foreach my $bait (@allBaits)
{
    (-f "$decDir/$bait") &&
	(&addNamesToFile("$decDir/$bait")) &&
	next ;
    (! -d "$decDir/$bait") &&
	die "$decDir/$bait is neither a file nor a dir?? Dying\n" ;

    warn "\nDealing with $decDir/$bait/\n" ;

    opendir(DECFILES, "$decDir/$bait") ||
	die "cannot opendir $decDir/$bait\n" ;
    my @allFiles = grep(/\.Decoded$/, readdir(DECFILES)) ;
    closedir(DECFILES) ;

    foreach my $file (@allFiles)
    {
	&addNamesToFile("$decDir/$bait/$file") ;
    }
}
