/*----------------------------------------------------------------*
 *
 * File : solvexpClosure.c
 * Author : NTM
 * Created : 12/11/04
 *
 *
 * Copyright (C) Nicolas Thierry-Mieg, 2006.
 *
 *
 * This file is part of InterPool, written by 
 * Nicolas Thierry-Mieg (CNRS, France) Nicolas.Thierry-Mieg@imag.fr
 *
 * InterPool is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * InterPool is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with InterPool; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 *-----------------------------------------------------------------*/

#include <stdio.h> // for printing
#include <stdlib.h> // for memory access


#include "types.h" /* MOT */
#include "signa.h" /* setOfSigs */
#include "closure.h" /* everything */
#include "unitClosures.h" /* everything */
#include "conflicts.h" /* enrichSig, costOfConflictingPosWeak */
#include "distance.h" /* DIST_XXX */
#include "orderVectors.h" /* everything */
#include "design.h" /* maxInterBetweenVars, maxPoolsPerVar */

#include "solvexpClosure.h"


/* SANITY: perform extra sanity checks, should be undef'd in production builds */
//#define SANITY
#undef SANITY

/* DEBUG: produce extra output (several levels possible: 1, 2, ...).
   If SANITY is on, DEBUG adds extra output to the sanity checks. */
//#define DEBUG 1
#undef DEBUG



/* Initial size for orderVectors sortedByScore and sortedByCost.
   This is just a performance tweak, the vectors will grow
   if needed (in increments of ORDERSIZEINIT). 
   50 seems like a good value. */
#define ORDERSIZEINIT 50


/************************************************************************
 ******************* LOCAL FUNCTIONS ************************************
 ************************************************************************/

/////////////// DECLARATIONS ///////////////

/*!
  \brief return the max negPoolCost of a (substracted) unit such that
  inclusion-invalidation should be attempted.
*/
static int maxNegPCForInclInval(MOT* tabpool, int n, int nbPools) ;

/*!
  Perform an advanced test, by anticipating several moves in
  advance, to see if we can prune a node and all its younger
  brothers (and their descendance) in updateBestClosuresRecSubstract.

  NOTE:
  - myUnits MUST be substracted (both the scores, and the negPoolCosts)!
  - this function can extend sortedByScore and sortedByCost (ie fill
  unitNums and update size), and it also updates sortedByCost->firstValid
  and sortedByScore->firstValid. No one else should touch firstValid.
*/
static bool smartPrunable(orderVector* sortedByScore, orderVector* sortedByCost,
			  unitClosures* myUnits, int minScore, int maxCost,
			  MOT* tabpool, int n, int nbPools) ;


/*!
  \brief update bestClosuresP by adding to previousClosureP all
  possible sets of unit closures from unitClosuresP whose index
  is in firstValidUnit..lastValidUnit.
  Return the new best score.
  This is the rather simple Rec algorithm, should be dependable
  and therefore useful for cross-validation. Much faster than
  solvexpNaive but much slower than RecSubstracted.
*/
static int updateBestClosuresRec(setOfClosures* bestClosuresP, int bestScore,
				 unitClosures* unitClosuresP,
				 int firstValidUnit, int lastValidUnit,
				 closure* previousClosureP, int costOfConfPW,
				 int nbPools) ;


/*!
  \brief Variation on updateBestClosuresRec, which substracts
  the unit closures.
  unitClosuresP holds unit closures, which MUST BE substracted.
  NOTE: *bestClosuresP gets modified; but so does unitClosuresP->valid!
  Make sure you don't use unitClosuresP after calling this function, or
  remember to make a copy of the valid vector before calling, and restore
  it after the call.
*/

static int updateBestClosuresRecSubstract(setOfClosures* bestClosuresP, int bestScore,
					  unitClosures* unitClosuresP,
					  closure* currentClosureP, int costOfConfPW,
					  MOT* tabpool, int n, int nbPools) ;


/*!
  \brief Find the highest-scoring closures when performing simulations 
  (using the known maxDist to a coherent interpretation, since we know what
  errors were introduced).
  The strategy is different from decoding a real experiment, because
  we know the number of errors present: therefore there must exist a closure
  of score at least costOfConfPW-maxDist, and we can limit the search
  to closures which at least equal that score.
  This function does the preparatory work, then calls updateBestClosuresRec.
  Returns the highest-scoring set of closures.
  If method==1, use regular updateBestClosuresRec;
  if method==2, use updateBestClosuresRecSubstract.
*/
static setOfClosures* findBestClosuresSim(unitClosures* unitClosuresP, int costOfConfPW,
					  int maxDist, int method, 
					  MOT* tabpool, int n, int nbPools) ;


/*!
  Find the highest-scoring closures when performing real decodings of
  experimental observations.
  This differs slightly from findBestClosuresSim, because we don't a
  priori know the number of errors... Therefore it is crucial to find
  high-scoring closures as fast as possible, whereas findBestClosuresSim
  doesn't really care that much.

  Returns the highest-scoring set of closures.
  If method==3, use regular updateBestClosuresRec;
  if method==4, use updateBestClosuresRecSubstract.
*/
static setOfClosures* findBestClosuresReal(unitClosures* unitClosuresP, int costOfConfPW,
					   int method, MOT* tabpool, int n, int nbPools) ;


//////////////// BODIES ///////////////////

/*!
  Return the max negPoolCost of a (substracted) unit such that
  inclusion-invalidation should be attempted.
  The idea is that trying inclusion-invalidation has a substantial
  overhead (see substractNegPoolsFromUnits and 
  substractNegPoolsFromUnitsInclInval), so we want to avoid it
  except if it has reasonable chances of invalidating units.
  Clearly, if the unit is large these chances go down.
*/
static int maxNegPCForInclInval(MOT* tabpool, int n, int nbPools)
{
 /* following attempts inclusion-inval as soon as there's
     a chance that it will invalidate someone.
     You could use smaller values (but larger is useless
     and will slow down decoding) */
  int result = minDistNeg() * maxInterBetweenVars(tabpool, n, nbPools) ;

  /* for testing the effect of inclusion-invalidation:
     returning -1 means no inclusion-invalidation at all. */
  // result = -1 ;

  return(result) ;
}


/*!
  Perform an advanced test, by anticipating several moves in
  advance, to see if we can prune a node and all its younger
  brothers (and their descendance) in updateBestClosuresRecSubstract.

  sortedByScore: vector of unitNums sorted by decreasing score;
  sortedByCost: vector of unitNums sorted by increasing negPoolCosts;
  both vectors will get extended if necessary.
  minScore: minimum score that a union of units must attain to have
  a chance of making a best score (when merged with the current 
  closure in caller). Typically, should be called with 
  minScore==bestScore-previousClosureP->score.
  maxCost: max NegPoolsCost of a union of units, for this
  union to have any chance of producing a best score. Typically, should be 
  called with maxCost = costOfConfPW - bestScore - previous->negCost.

  \return: TRUE if no union of units in myUnits has any chance of
  producing a best score (ie we can prune the whole current search space).

  Algorithm:
  1. find the smallest p such that the union of p units may have
  a score >= minScore. This relies on the fact that the score
  of a union cannot be much larger than the sum of their scores
  (the "not much" can be bounded using maxInterBetweenVars).
  2. Calculate a lower bound NCLB(p) on the negPoolsCost of the 
  union of p units; if NCLB(p) > maxCost, the union of less than p units
  cannot be useful according to point 1 above and the union of at least
  p units is too large to be of any use: we can prune.

  NOTE:
  - myUnits MUST be substracted (both the scores, and the negPoolCosts)!
  - this function can extend sortedByScore and sortedByCost (ie fill
  unitNums and update size), and it also updates sortedByCost->firstValid
  and sortedByScore->firstValid. No one else should touch firstValid.
*/
static bool smartPrunable(orderVector* sortedByScore, orderVector* sortedByCost,
			  unitClosures* myUnits, int minScore, int maxCost,
			  MOT* tabpool, int n, int nbPools)
{
  /* gamma: max number of pools that can contain any 2 variables. */
  int gamma = maxInterBetweenVars(tabpool, n, nbPools) ;
  /* maxVarSize: max number of pools that can contain any single variable. */
  int maxVarSize = maxPoolsPerVar(tabpool, n, nbPools) ;
  /* maxSingleCost: maximal cost to change the interpretation of a 
     single negative pool */
  int maxSingleCost = maxDistNeg();

  /* p: number of vars such that we KNOW that the union of
     less than p unit closures cannot attain minScore */
  int p = 0 ;

  /* maxScoreOfUnion: max score of the union of p units */
  int maxScoreOfUnion = 0 ;
  /* nextByScore: index, in sortedByScore, of the next unitNum 
     to take into account for maxScoreOfUnion.
     First, find the first valid unit */
  int nextByScore = sortedByScore->firstValid - 1 ; 
  /* -1 because gets incremented immediately in loop */
  do
    {
      nextByScore++ ;
      while (nextByScore >= sortedByScore->size)
	if (! fillByScore(sortedByScore, myUnits, (sortedByScore->size + ORDERSIZEINIT)))
	  /* couldn't extend sortedByScore: no valid units at all.
	     This shouldn't happen? but we can return TRUE just in case. */
	  return(TRUE) ;
    }
  while (! myUnits->valid[sortedByScore->unitNums[nextByScore]]) ;
  /* OK, set firstValid */
  sortedByScore->firstValid = nextByScore ;

  /* STEP 1: find the smallest p such that the union of p units has
     a chance of producing a best score */
  do
    {
      p++ ;
      /* make sure we have a unit in sortedByScore, extend otherwise */
      while (nextByScore >= sortedByScore->size)
	/* not enough units, try extending */
	if (! fillByScore(sortedByScore, myUnits, (sortedByScore->size + ORDERSIZEINIT)))
	  /* couldn't extend sortedByScore: no more valid units.
	     If we got here, it means the union of less than
	     p units CANNOT lead to a best score.
	     However, in fact p valid units don't exist, there are 
	     only p-1. So, we can return TRUE straight away. */
	  return(TRUE) ;

      /* OK, nextByScore exists but may not be valid.
	 Find next valid unit in sortedByScore, extend 
	 sortedByScore if necessary */
      while (! myUnits->valid[sortedByScore->unitNums[nextByScore]])
	{
	  nextByScore++ ;
	  if (nextByScore >= sortedByScore->size)
	    if (! fillByScore(sortedByScore, myUnits, (sortedByScore->size + ORDERSIZEINIT)))
	      /* couldn't extend sortedByScore: no more valid units.
		 If we got here, it means the union of less than
		 p units CANNOT lead to a best score.
		 However, in fact p valid units don't exist, there are 
		 only p-1. So, we can return TRUE straight away. */
	      return(TRUE) ;
	}

      /* ok, we found the next valid unit in byScore.
	 The max score of the union of p units, maxSOU(p), is at most:
	 maxSOU(p) <= maxSOU(p-1) + score(unit p) + maxCost[(\bigcup_i=1^(p-1)(Nc_i)) \cap Nc_p].
	 But maxCost[(\bigcup_i=1^(p-1)(Nc_i)) \cap Nc_p]
                   <= min((p-1)gamma, (maxVarSize-1))*maxSingleCost.
	 (maxVarSize-1 because we consider only conflicting pools). */
      maxScoreOfUnion += myUnits->scores[sortedByScore->unitNums[nextByScore]] ;
      if ((p-1) * gamma < maxVarSize - 1)
	maxScoreOfUnion += (p-1) * gamma * maxSingleCost ;
      else
	maxScoreOfUnion += (maxVarSize - 1) * maxSingleCost ;

      nextByScore++ ;
    }
  while (maxScoreOfUnion < minScore) ;


  /* STEP 2: at this point p is the smallest int such that the 
     union of p units may attain minScore score.
     It is useless to try merging less than p units with
     the current closure.
     Now calculate NCLB(p): a lower bound on the negPoolsCost of
     the union of p units. */

  /*
     Algorithm:
     note Nc_i the negPoolCost of the i^th smallest valid unit.
     We have, for all alpha in {1..p}
     NCLB(p) >= |\bigcup_{i=alpha}^p{Nc_i}|
             >= |\bigcup_{i=alpha+1}^p{Nc_i}| + |Nc_alpha| - 
	           |(\bigcup{i=alpha+1}^p{Nc_i} \cap Nc_alpha|.
     This last member is <= min(|Nc_alpha|, (p-alpha)*gamma*maxSingleCost).
     So: if |Nc_alpha| < (p-alpha)*gamma*maxSingleCost, we can just
     use |\bigcup_{i=alpha+1}^p{Nc_i}| as a bound (it isn't improved
     by starting at alpha instead of alpha+1).
     Otherwise, we add |Nc_alpha| - (p-alpha)*gamma*maxSingleCost
     to the bound for alpha+1. 
     Therefore we get the following algorithm. */

  /* we need to find the unit with the pth smallest cost.
     Since some units can be invalid in sortedByCost, we must
     look at it's content. */
  int nextByCost = sortedByCost->firstValid - 1 ; 
  /* -1 because gets incremented immediately in loop */
  /* find first valid and update sortedByCost->firstValid. */
  do
    {
      nextByCost++ ;
      while (nextByCost >= sortedByCost->size)
	if (! fillByCost(sortedByCost, myUnits, (sortedByCost->size + ORDERSIZEINIT)))
	  /* couldn't extend sortedByCost: no more valid units.
	     This shouldn't happen, we know there are at least
	     p valid units otherwise step 1 would have returned TRUE. */
	  {
	    fprintf(stderr, "in smartPrunable, cannot extend sortedByCost (first run), impossible!\n") ;
	    exit(1) ;
	  }
    }
  while (! myUnits->valid[sortedByCost->unitNums[nextByCost]]) ;

  sortedByCost->firstValid = nextByCost ;

  /* number of lowest cost units found, we need p of them */
  int nbFound = 1 ;
  while(nbFound < p)
    {
      nextByCost++ ;
      while (nextByCost >= sortedByCost->size)
	if (! fillByCost(sortedByCost, myUnits, (sortedByCost->size + ORDERSIZEINIT)))
	  /* couldn't extend sortedByCost: no more valid units.
	     This shouldn't happen, we know there are at least
	     p valid units otherwise step 1 would have returned TRUE. */
	  {
	    fprintf(stderr, 
		    "in smartPrunable, cannot extend sortedByCost (run %d), impossible!\n",
		    nbFound) ;
	    exit(1) ;
	  }
      if (myUnits->valid[sortedByCost->unitNums[nextByCost]])
	nbFound++ ;
    }

  /* OK, nextByCost is the index of the p^th smallest costing unit */
  int NCLB = myUnits->negPoolCosts[sortedByCost->unitNums[nextByCost]] ;
  /* nbUsed: number of terms taken into account */
  int nbUsed ;
  for (nbUsed = 1; nbUsed < p; nbUsed++)
    {
      /* find previous valid unit in sortedByCost */
      do
	nextByCost-- ;
      while (! myUnits->valid[sortedByCost->unitNums[nextByCost]]) ;

      int term = myUnits->negPoolCosts[sortedByCost->unitNums[nextByCost]]
	- nbUsed*gamma*maxSingleCost;
      if (term <= 0)
	/* no need to take any other terms into account */
	break ;
      else
	{
	  NCLB += term ;
	  /* we can try pruning right now */
	  if (NCLB > maxCost)
	    return(TRUE) ;
	}
    }

  /* NCLB is now the tightest we can make it. See if it's enough to prune. */
  if (NCLB > maxCost)
    return(TRUE) ;

  else
    /* can't prune */
    return(FALSE) ;
}


/*!
  This function implements the search algorithm for finding the highest-scoring
  closures, descending from node *previousClosureP and merging it with each
  remaining valid unit closure (ie, whose index is >= firstValidUnit).
  This algorithm just passes unitClosuresP to its children: it doesn't
  copy, substract, or sort the unit closures, and never invalidates anybody.
  This makes it "dumb" (it never knows that a unit is invalid until trying to
  merge it with previousClosureP), but still pretty fast (because no expensive copying
  or substracting ever happens). It's much faster than solvexpNaive, and much simpler 
  than RecSubstracted; therefore it's very useful for cross-validation.
  It can be called from RecSubstracted (it uses "valid" information if present,
  and doesn't mind that the units have been substracted).
  However, it should never call RecSubstracted, which expects units to be
  at least substracted.
  NOTE 04/07/2006: I remember that at some point I had this type of interaction
  between RecSubstracted and Rec, but I no longer do that. I think RecSubstracted
  now really blows away Rec, and there's probably no need to mix the 2 methods.
  It does remain a possibility, though (still compatible in the direction 
  RecSubstracted->Rec).
  *bestClosuresP holds the best previously found closures and bestScore is the
  score to use for pruning (it is the best score found until now or a score which
  we know can be attained).
  This function updates *bestClosuresP if any child of current node is a best-scoring
  closure, and returns the new best score (which is identical to bestScore if
  children of current node do not improve on it).
  Importantly, *bestClosuresP is the only argument that can get modified. All other 
  args are copied if necessary.

  \param bestClosuresP holds a pointer to the best closures found until now, WILL 
  GET UPDATED!;
  \param bestScore is the score that must be at least equalled (generally it will be
  equal to bestClosuresP->allClosures[0].score, but not always: when performing
  simms we know the errors and can therefore calculate a score that we know can
  be reached. In this context, bestScore can be set while *bestClosuresP
  is empty).
  \param *unitClosuresP is the set of unit closures.
  \param firstValidUnit, lastValidUnit are the indexes (in unitClosuresP) of
  the first and last units that should be considered.
  \param *previousClosureP is the (non-unit) closure built in the parent node, 
  on which we are going to grow;
  \param costOfConfPW cost of changing all conflicting pos and weak pools 
  (used for pruning);

  \return the new best score ==max(bestScore, best score of current node's children).

  Algorithm is:
  for i = firstValidUnit..lastValidUnit do
    (0. build a fresh currentClosure from previousClosure to avoid 
        modifying the latter)
    1/2. if (unit[i] invalid) continue;
    1. merge thisunitClosure->negPools and thisunitClosure->faintPools
       with currentClosure->negPools.
    2. PRUNING: 
       if currentClosure->negPoolsCost  >  costOfConfPW - bestScore, 
          we know that this node and all of its descendants cannot attain 
	  the best score: prune;
       it is possible to use a cost-of-closure cutoff, a la MAXPROF but probably
          dynamic (to stay exact and complete), and build a border when that cost is
	  reached. But this would probably go into a different function with Border 
	  in its name.
    3. finish updating currentClosure (with the posPools+weakPools and score).
    4. update *bestClosuresP and bestScore if needed.
    5. updateBestClosuresRec(currentClosure, i+1, ...)
       (note that since *bestClosuresP gets updated, each iteration can use a
       different and better bestClosures)
  end for;
*/
static int updateBestClosuresRec(setOfClosures* bestClosuresP, int bestScore,
				 unitClosures* unitClosuresP,
				 int firstValidUnit, int lastValidUnit,
				 closure* previousClosureP, int costOfConfPW,
				 int nbPools)
{
#ifdef DEBUG
  fprintf(stderr, 
	  "\nentering updateBestClosuresRec: firstValidUnit==%d, lastValidUnit==%d\n", 
	  firstValidUnit, lastValidUnit) ;
#endif /* DEBUG */

  /* allocate memory for currentClosure once and for all */
  closure* currentClosureP = buildEmptyClosure(nbPools) ;

  /* begin main loop: pick each remaining valid unit closure in turn */
  int unitNum ;
  for (unitNum = firstValidUnit ; unitNum <= lastValidUnit ; unitNum++)
    {
      if (! unitClosuresP->valid[unitNum])
	continue ; /* ignore this unit closure */

      /* begin filling currentClosureP with the union of previous and unitNum */
      beginUnionWithUnit(currentClosureP, previousClosureP, unitClosuresP, unitNum, nbPools) ;

      /* can we prune? */
      if (currentClosureP->negPoolsCost > (costOfConfPW - bestScore))
      	{
	  /* yes! no need for recursive call, current node and all descendants are bad */
	  /* NOTE: In some cases (if the bestScore received initially was large enough,
	     and that will often be the case in the context of simms where we know
	     a good score, costOfConfPW - nbOfErrors, straight away), it is possible to
	     prune unit unitNum before doing any recursive calls.
	     Current algorithm will never try to add unitNum to a younger (not yet examined) 
	     brother, but older brothers WILL have tested it (in their descendance), uselessly. 
	     To correct this, we could have 2 loops: a first one to find some bad closures (using
	     the bestScore received by caller), and mark them as invalid for ALL descendants;
	     and a second one, with a second pruning test (in case bestScore changed in an
	     older brother) and the recursive call.
	     In addition, if we use unitClosuresP->valid to mark bad closures, we must 
	     remember the ones that were marked so we can set them back to valid before 
	     returning.
	     UPDATE 05/04/05: these ideas seem pretty close to the RecSubstracted algorithm...
	  */
#ifdef DEBUG
	  if (DEBUG > 2)
	    fprintf(stderr, "pruning closure number %d\n", unitNum) ;
#endif /* DEBUG */

	  continue ;
	}

      else
	{

	  /* FOR BORDER: checking cost-of-closure and building border should go here */

	  /* no pruning, finish building currentClosureP */
	  finishUnionWithUnit(currentClosureP, previousClosureP, unitClosuresP, unitNum, nbPools) ;
	  
	  /* compare score of currentClosure with bestScore, update if needed */
	  bestScore = updateBestSetOfClosures(currentClosureP, bestClosuresP, bestScore, nbPools) ;

	  /* We wish to examine the union of currentClosureP with each remaining valid
	     unit, starting from index unitNum+1.
	     We can therefore skip any initial invalids (these must have been tagged as
	     invalid when function was called, because this function doesn't invalidate
	     anyone... but they can still be skipped!) */
	  int newFirstValidUnit ;
	  for (newFirstValidUnit = unitNum+1; newFirstValidUnit <= lastValidUnit; newFirstValidUnit++)
		{
		  if (unitClosuresP->valid[newFirstValidUnit])
		    break ;
		}

	  /* if newFirstValidUnit > lastValidUnit, there is nothing left to do.
	     Otherwise, recursive call to update bestScore with children */
	  if (newFirstValidUnit <= lastValidUnit)
	    /* recursive call */
	    bestScore = updateBestClosuresRec(bestClosuresP, bestScore, unitClosuresP,
					      newFirstValidUnit, lastValidUnit,
					      currentClosureP, costOfConfPW, nbPools) ;

	}
    }
  freeClosure(currentClosureP) ;

#ifdef DEBUG
  fprintf(stderr, 
	  "\nreturning from updateBestClosuresRec where firstValidUnit==%d and lastValidUnit==%d\n", 
	  firstValidUnit, lastValidUnit) ;
  fprintf(stderr, "bestScore=%d\n", bestScore) ;
#endif /* DEBUG */

  return(bestScore) ;
}


/*!
  \brief Variation on updateBestClosuresRec, which substracts
  the unit closures.
  bestClosuresP holds the best closures found up to now, 
  including *previousClosureP.
  unitClosuresP holds unit closures, which MUST BE substracted.
  Any unit whose valid is FALSE will not be considered.

  Algorithm is the following:
    for i=0..nbValid-1 do
       j = sortedByScore[i];
       if smartPrunable, break;
       if (unit[j] invalid) continue;
       invalidate unit[j];
       currentClosure.(neg+faint)Pools = union(previousClosure.(neg+faint)Pools, unit[j].(neg+faint)Pools) ;
       if (currentClosure.negPoolsCost is prunable) 
          continue
       else
          currentClosure.(pos+weak)Pools = union(previousClosure.(pos+weak)Pools, unit[j].(pos+weak)Pools) ;
	  copy valid units in unitClosures;
	  substract currentClosure from copiedUnits, enriching currentClosure.posPools
	     with units whose negPools become empty, and invalidating as many
	     copiedUnits as possible (and in fact trashing them as much as possible).
	  if currentClosure is a best, updatebestSetOfClosures.
	  if some valid units remain in copiedUnits:
	     bestScore = recurse(copiedUnits, currentClosureP) ;
    return(bestScore).

  NOTE: *bestClosuresP gets modified; but so does unitClosuresP->valid!
  Make sure you don't use unitClosuresP after calling this function, or
  remember to make a copy of the valid vector before calling, and restore
  it after the call.
*/

static int updateBestClosuresRecSubstract(setOfClosures* bestClosuresP, int bestScore,
					  unitClosures* unitClosuresP,
					  closure* previousClosureP, int costOfConfPW,
					  MOT* tabpool, int n, int nbPools)
{
  int nbInitValid = unitClosuresP->nbValid ;
#ifdef SANITY
  if (nbInitValid == 0)
    {
      /* we could just return bestScore outside SANITY, but it's better to 
	 test nbValid before calling updateBestClosuresRecSubstract, so we die instead */
      fprintf(stderr, "entering updateBestClosuresRecSubstract with nbValid==0! Dying.") ;
      exit(1) ;
    }
  /* expensive sanity checking of unitClosuresP content */
  checkUnits(unitClosuresP, nbPools) ;
#endif /* SANITY */

#ifdef DEBUG
  fprintf(stderr,
	  "\nentering updateBestClosuresRecSubstract: nbOfClosures==%d, nbValid==%d\n", 
	  unitClosuresP->nbOfClosures, nbInitValid) ;
#endif /* DEBUG */

  /* allocate sortedByScore and sortedByCost once and for all.
     Won't be needing more than nbInitValid */
  orderVector* sortedByScore = buildVector(nbInitValid) ;
  orderVector* sortedByCost = buildVector(nbInitValid) ;

  /* fill them both up with ORDERSIZEINIT elements. */
  if ( (! fillByScore(sortedByScore, unitClosuresP, ORDERSIZEINIT)) ||
       (! fillByCost(sortedByCost, unitClosuresP, ORDERSIZEINIT)) )
    {
      /* either ORDERSIZEINIT==0 (stupid), or unitClosuresP contains
	 no valid units, shouldn't have called updateBestClosuresRecSubstract */
      fprintf(stderr, "in RecSubstract, cannot fill byScore/byCost.\n") ;
      fprintf(stderr, "probable cause: no valid units (number:%d).\n", nbInitValid) ;
      exit(1) ;
    }

  
  /* allocate memory for currentClosure once and for all */
  closure* currentClosureP = buildEmptyClosure(nbPools) ;

  /* begin main loop: pick each remaining unit closure in turn, in order
     of decreasing score. Since any initially invalid unit will never
     make it into sortedByScore, we can stop at nbInitValid.
     We can't use unitClosuresP->nbValid however, because this can decrease
     due to inclusion-invalidation; but if a unit is invalidated AFTER it
     made it into sortedByScore, then this unit stays in sortedByScore forever... */
  int orderIndex ;
  for (orderIndex = 0 ; orderIndex < nbInitValid ; orderIndex++)
    {
      if (orderIndex == sortedByScore->size)
	/* sortedByScore too small. Extend it with ORDERSIZEINIT 
	   additional units */
	if (! fillByScore(sortedByScore, unitClosuresP, (sortedByScore->size + ORDERSIZEINIT)))
	  {
	    /* no more valid units. */
#ifdef DEBUG
	    fprintf(stderr, 
		    "for index %d: fillByScore can't extend, no more valid units.\n",
		    orderIndex) ;
#endif /* DEBUG */	      
	    break;
	  }
      
      /* OK, next unit to examine is: */
      int unitNum = sortedByScore->unitNums[orderIndex] ;

#ifdef SANITY
      /* pre-condition: we must have 0 <= unitNum < unitClosuresP->nbOfClosures */
      if ((unitNum < 0) || (unitNum >= unitClosuresP->nbOfClosures))
	{
	  fprintf(stderr, 
		  "in RecSubstract: unitNum (%d) >= nbOfClosures (%d) or negative!\n",
		  unitNum, unitClosuresP->nbOfClosures) ;
	  exit(1) ;
	}
#endif /* SANITY */

      /* all units in sortedByScore->unitNums[0..sortedByScore->size-1] 
	 were initially valid, but this can have changed with inclusion-invalidation */
      if (! unitClosuresP->valid[unitNum])
	continue ;

      /* Before even trying to pick unit unitNum:
	 If all remaining units have 'small' scores, we can sometimes totally 
	 prune the rest of this search sub-tree, ie break immediately. */
      {
	int minScore = bestScore - previousClosureP->score ;
	int maxCost = costOfConfPW - bestScore - previousClosureP->negPoolsCost ;
	if (smartPrunable(sortedByScore, sortedByCost, unitClosuresP,
			  minScore, maxCost, tabpool, n, nbPools))
	  {
#ifdef DEBUG
	    fprintf(stderr, "break-pruning with smartPrunable at orderIndex %d\n", orderIndex) ;
#endif /* DEBUG */
	    break ;
	  }
      }

      /* Oh well, couldn't smartPrune... pick unit unitNum: it can be invalidated 
	 in all children and in younger brothers */
      unitClosuresP->valid[unitNum] = FALSE ;
      unitClosuresP->nbValid-- ;

      /* begin filling currentClosureP with the union of previous and unitNum */
      beginUnionWithUnit(currentClosureP, previousClosureP, unitClosuresP, unitNum, nbPools) ;
      
      /* Can we prune? at most, currentClosureP can make all initially
	 conflicting pos or weak pools non-conflicting. Therefore the
	 highest score that it can attain is:
	 costOfConfPW - currentClosureP->negPoolsCost.
	 NOTE: this test is very similar to the test in 
	 substractNegPoolsFromUnits, but since bestScore can have
	 improved (compared to what it was in the father) due to
	 older brothers and their descendance, it should still be
	 useful to test here.
	 It is also almost included in smartPrunable (with p==0),
	 except that smartPrunable uses the unit with smallest NegPoolsCost
	 whereas this test uses the NPC of the unit with the largest score (whose
	 NPC can be larger, and hence this test can succeed even if smartPrunable
	 failed).
      */
      if ((costOfConfPW - currentClosureP->negPoolsCost) < bestScore)
      	{
	  /* yes! no need for recursive call, current node and all its 
	     descendants are bad.
	     We must still examine brothers, so don't break: just continue */
#ifdef DEBUG
	  fprintf(stderr, 
		  "continue-pruning after beginUnion at orderIndex %d, costOfConfPW=%d, ",
		  orderIndex, costOfConfPW) ;
	  fprintf(stderr,
		  "bestScore=%d, current negcost is %d, previous was %d, unit negCost is %d\n",
		  bestScore, currentClosureP->negPoolsCost, previousClosureP->negPoolsCost, 
		  unitClosuresP->negPoolCosts[unitNum]) ;
#endif /* DEBUG */
	  continue ;
	}

      else
	{
	  /* finish building currentClosureP */
	  finishUnionWithUnit(currentClosureP, previousClosureP, unitClosuresP, unitNum, nbPools) ;
	  
	  /* if all remaining units are invalid, don't substract
	     the units: just update best and return */
	  if (unitClosuresP->nbValid == 0)
	    {
#ifdef DEBUG
	      int oldBestScore = bestScore ;
#endif /* DEBUG */
	      bestScore = updateBestSetOfClosures(currentClosureP, bestClosuresP, bestScore, nbPools) ;
#ifdef DEBUG
	      fprintf(stderr, 
		      "breaking at index %d without substract, no more valid units.", 
		      orderIndex) ;
	      if (bestScore != oldBestScore)
		fprintf(stderr, " updating bestScore from %d to %d.", oldBestScore, bestScore) ;
	      fprintf(stderr, "\n") ;
#endif /* DEBUG */
	      break ; /* from main "for orderIndex" loop: we could just "continue"
			 but this just gains a little time, since we know that
			 if (! unitClosuresP->valid[unitNum]) will fail 
			 for all remaining unitNum's. */
	    }

	  else
	    {
	      /* some valid units remain:
		 substract currentClosureP from units to obtain newUnits. */
	      
	      /* maxNegPoolCost: max cost that any (substracted) unit closure's 
		 negPools+faintPools can have to be of any use when merged with 
		 currentClosureP. The negPoolsCost of the union would be 
		 currentClosureP->negPoolsCost + unit.negPoolsCost,
		 so if this is > (costOfConfPW - bestScore) we know that this unit 
		 will get pruned in all descendants of current node.
		 substractNegPoolsFromUnits will therefore invalidate any such units.
		 Note that we must still try pruning on the current node (code
		 above), in case an older brother improved the best score (compared
		 to what was known in the father, where substractNegPoolsFromUnits
		 was called), but this allows to limit the size of copied units in 
		 all children.*/
	      int maxNegPoolCost = costOfConfPW - bestScore - currentClosureP->negPoolsCost ;
	      
	      /* now do the actual copying and substracting of negPools, and use
		 maxNegPoolCost to invalidate (ie, in newUnitsP: trash!) as many 
		 units as possible.
		 pre-condition to substractNeg*: nbValid>0, is verified
		 due to the if (unitClosuresP->nbValid == 0) above */
	      unitClosures* newUnitsP ;

	      /* only perform inclusion invalidation if current (substracted) unit
		 is small (ie, cost at most maxNegPCForInclInval(tabpool,n,nbPools)).
		 This is just a performance tweak: results are identical, whether using
		 inclusion invalidation or not */
	      if (unitClosuresP->negPoolCosts[unitNum] <= maxNegPCForInclInval(tabpool,n,nbPools))
		{
#ifdef DEBUG
		  if (DEBUG > 1)
		    fprintf(stderr, 
			    "calling substractNegPoolsFromUnitsInclInval on %d valid units\n",
			    unitClosuresP->nbValid) ;
#endif /* DEBUG */
		  newUnitsP = substractNegPoolsFromUnitsInclInval(currentClosureP, unitClosuresP, 
								  unitNum, maxNegPoolCost, nbPools) ;
		}
	      else
		{
		/* no inclusion-invalidation, use regular substract */
#ifdef DEBUG
		  if (DEBUG > 1)
		    fprintf(stderr, 
			    "calling substractNegPoolsFromUnits on %d valid units\n",
			    unitClosuresP->nbValid) ;
#endif /* DEBUG */
		  newUnitsP = substractNegPoolsFromUnits(currentClosureP, unitClosuresP, 
							 maxNegPoolCost, nbPools) ;
		}
	      
#ifdef SANITY
#ifdef DEBUG
	      fprintf(stderr, "checking newUnits after substractNeg.\n") ;
#endif /* DEBUG */
	      checkUnits(newUnitsP, nbPools) ;
#endif /* SANITY */

	      /* substract currentClosureP->posPools from each valid unit's posPools,
		 and invalidate any unit whose posPools becomes empty */
	      substractPosPoolsFromUnits(currentClosureP, newUnitsP, nbPools) ;
	      
	      /* at this point,  currentClosureP holds a complete closure (ie it's score
		 is exactly known, its posPools cannot be extended).
		 In addition, it is fully substracted from every valid unit in newUnitsP.
		 Finally, as many units as possible have been invalidated. 
		 The last things to do are save it if it is a best, and
		 then examine the children. */

#ifdef DEBUG
	      int oldBestScore = bestScore ;
#endif /* DEBUG */
	      bestScore = updateBestSetOfClosures(currentClosureP, bestClosuresP, bestScore, nbPools) ;
#ifdef DEBUG
	      if (bestScore != oldBestScore)
		fprintf(stderr, " updating bestScore from %d to %d.\n", oldBestScore, bestScore) ;
#endif /* DEBUG */
	      
	      if (newUnitsP->nbValid != 0)
		bestScore = updateBestClosuresRecSubstract(bestClosuresP, bestScore, newUnitsP,
							   currentClosureP, costOfConfPW, 
							   tabpool, n, nbPools) ;
#ifdef DEBUG
	      else
		/* no recursive call if no more valid units */
		fprintf(stderr, 
			"At index %d: best updated, no more valid units: no recursive call.\n",
			orderIndex) ;
#endif /* DEBUG */

	      /* newUnitsP got modified by updateBestClosuresRecSubstract, but we
		 don't need it anymore so that's OK: we want to free it anyways */
	      freeUnitClosures(newUnitsP) ;
	    }
	}
    }

  freeClosure(currentClosureP) ;
  freeVector(sortedByScore) ;
  freeVector(sortedByCost) ;

#ifdef DEBUG
  fprintf(stderr, 
	  "\nreturning from updateBestClosuresRecSubstract where nbOfClosures==%d, nbValid==%d\n",
	  unitClosuresP->nbOfClosures, nbInitValid) ;
  fprintf(stderr, "new bestScore=%d\n", bestScore) ;
#endif /* DEBUG */

  return(bestScore) ;
}


/*!
  Find the highest-scoring closures when performing simulations (using
  the known maxDist to a coherent interpretation, since we know what
  errors were introduced).
  The strategy is different from decoding a real experiment, because
  we know the number of errors present: therefore there must exist a closure
  of score at least costOfConfPW-maxDist, and we can limit the search
  to closures which at least equal that score.
  This function does the preparatory work, then call updateBestClosuresRec.
  Returns the highest-scoring set of closures.
  If method==1, use regular updateBestClosuresRec;
  if method==2, use updateBestClosuresRecSubstract.

  NOTE: if method==2, unitClosuresP->valid gets modified. Make sure
  you don't use it afterwards, or save it before and restore after.
*/
static setOfClosures* findBestClosuresSim(unitClosures* unitClosuresP, int costOfConfPW,
					  int maxDist, int method, 
					  MOT* tabpool, int n, int nbPools)
{

  /* build the initial bestClosures and bestScore */
  setOfClosures* bestClosuresP  = buildEmptySetOfClosures() ;

  int bestScore = costOfConfPW - maxDist ;

  /* The empty closure is the starting point for previousClosure */
  closure* emptyClosureP = buildEmptyClosure(nbPools) ;

  /* Actually, the empty closure can always be considered as a possible
     solution: it corresponds to the signature where every positive 
     conflicting pool becomes negative.
     It's score is 0, so if bestScore <= 0 we can use it as initial bestClosure */
  bestScore = updateBestSetOfClosures(emptyClosureP, bestClosuresP, bestScore, nbPools) ;

  if (method==1)
    /* start recursive search: previousClosure is the empty closure and firstValidUnit is 0 */
    bestScore = updateBestClosuresRec(bestClosuresP, bestScore, unitClosuresP,
				      0, unitClosuresP->nbOfClosures - 1,
				      emptyClosureP, costOfConfPW, nbPools) ;

  else if (method==2)
    {
      /* unitClosuresP comes straight from buildUnitClosures. Therefore
	 the units are all valid, and are already sorted. */
      
      bestScore = updateBestClosuresRecSubstract(bestClosuresP, bestScore, unitClosuresP,
						 emptyClosureP, costOfConfPW, tabpool, n, nbPools) ;
    }

  else
    {
      fprintf(stderr, "in findBestClosuresSim, method==%d is unknown.\n", method) ;
      exit(1) ;
    }
  /* ok, clean up and return */
  freeClosure(emptyClosureP) ;
  return(bestClosuresP) ;
}

/*!
  Find the highest-scoring closures when performing real decodings of
  experimental observations.
  This differs slightly from findBestClosuresSim, because we don't a
  priori know the number of errors... Therefore it is crucial to find
  high-scoring closures as fast as possible, whereas findBestClosuresSim
  doesn't really care that much.

  Returns the highest-scoring set of closures.
  If method==3, use regular updateBestClosuresRec;
  if method==4, use updateBestClosuresRecSubstract.

  Algorithm: initially, I am just trying each recursive method without any
  additional control, starting with the empty closure (whose score is 0).
  I will have to test the effects of:
  1. inclusion-invalidation (play with INCL_INVAL_MINUNITS);
  2. sorting in updateBestClosuresRecSubstract;
  3. possibility: use some value (>0) for InitialBestScore, and leave bestClosuresP
  empty (don't place emptyClosure in it); then search for all closures
  whose score is >= initialBestScore, using the standard algorithms; If any closures
  are found, it is guaranteed that we have all best-scoring closures: return.
  Otherwise, if bestClosuresP is returned empty, use a smaller value for
  InitialBestScore and try again... We could receive an expectedNumberOfErrors
  parameter as a start-off point, and maybe substract 2 or 3 from it if nothing
  comes out (and repeat this operation until something is found).
  The problem with this method is that if bestClosuresP returns empty, all
  work is wasted and we have to start over. This could perhaps be avoided by
  saving a "border" of the currently explored search space, although the
  substracted algo could cause problems (since pruned units are completely
  gone...).

  NOTE: if method==4, unitClosuresP->valid gets modified. Make sure
  you don't use it afterwards, or save it before and restore after.
*/
static setOfClosures* findBestClosuresReal(unitClosures* unitClosuresP, int costOfConfPW,
					   int method, MOT* tabpool, int n, int nbPools)
{

  /* build the initial bestClosures and bestScore */
  setOfClosures* bestClosuresP  = buildEmptySetOfClosures() ;

  int bestScore = -1 ; /* dummy value, empty closure below has score 0 so will become best */

  /* The empty closure is the starting point for previousClosure */
  closure* emptyClosureP = buildEmptyClosure(nbPools) ;

  /* Actually, the empty closure can always be considered as a possible
     solution: it corresponds to the signature where every positive 
     conflicting pool becomes negative.
     It's score is 0, so if bestScore <= 0 we can use it as initial bestClosure */
  bestScore = updateBestSetOfClosures(emptyClosureP, bestClosuresP, bestScore, nbPools) ;

  if (method==3)
    /* start recursive search: previousClosure is the empty closure and firstValidUnit is 0 */
    bestScore = updateBestClosuresRec(bestClosuresP, bestScore, unitClosuresP,
				      0, unitClosuresP->nbOfClosures - 1,
				      emptyClosureP, costOfConfPW, nbPools) ;

  else if (method==4)
    {
      /* unitClosuresP comes straight from buildUnitClosures. Therefore
	 the units are all valid, and are already sorted. */
      
      bestScore = updateBestClosuresRecSubstract(bestClosuresP, bestScore, unitClosuresP,
						 emptyClosureP, costOfConfPW, tabpool, n, nbPools) ;
    }

  else
    {
      fprintf(stderr, "in findBestClosuresReal, method==%d is unknown.\n", method) ;
      exit(1) ;
    }
  /* ok, clean up and return */
  freeClosure(emptyClosureP) ;
  return(bestClosuresP) ;
}



/************************************************************************
 ******************* EXPORTED FUNCTIONS *********************************
 ************************************************************************/


/*!
  \brief Given an observed signature observedSig, find all nearest coherent
  interpretations and return them in a setOfSigs.

  Algorithm is:
  - 1. Enrich observedSig, by identifying conflicting pools;
  - 2. Build the unit closures (ie closures of single vars), for each 
  conflicting var (ie var appearing in at least one conflicting pos pool);
  - 3. Explore the set of parts of the set of unit closures, and find the parts
  with maximal score;
  
  These maximal score unions of closures yield the nearest coherent signatures,
  as follows: change the interpretation of each negative pool in the union; 
  then change the interpretation of any remaining conflicting pos pool.

  method==1 or 2 means do findBestClosuresSim, with Rec or RecSubstracted;<br>
  method==3 or 4 means do findBestClosuresReal (with Rec or RecSubstracted).

  maxDist: max distance to a coherent interpretation, when known
  (ie in Simm mode (method==1 or 2), where we know how many errors were 
  introduced); if method==3 or 4 this arg is ignored.
*/
setOfSigs solvexpClosure(MOT* tabpool, signature* observedSig, 
			 int maxDist, int method, int n)
{
#ifdef SANITY
  /* warning message, since SANITY mode does very expensive checkUnitNFPools calls */
  fprintf(stderr, "WARNING: solvexpClosure.c has SANITY mode on.\n") ;
  fprintf(stderr, "Remember to turn it off if everything is OK (very expensive sanity checks)\n\n") ;
#endif /* SANITY */

  setOfSigs coherentSigs; /* will be returned */

  /* build enriched signature, where conflicting pools are marked
     SIG_xxxCONF, instead of SIG_xxx as in the observed sig */
  signature* enrichedObs = enrichSig(tabpool, observedSig, n) ;
  
  /* cost of changing all conflicting pos/weak pools */
  int costOfConfPW = costOfConflictingPosWeak(enrichedObs) ;
  if (costOfConfPW == 0)
    {
      /* no conflicting pos pools: observedSig is coherent.
	 just build the canonical interpretation of observedSig. */
      signature* interp = canonicalInterp(observedSig) ;
      /* place it as sole member of coherentSigs */
      coherentSigs = buildEmptySetOfSigs() ;
      addSigToSet(&coherentSigs, interp) ;

      /* we now have correct distance (from buildEmptySetOfSigs) and nbOfSigs
	 (set to 0 by buildEmptySetOfSigs and incremented by addSigToSet). 
	 All done! Just remember: do NOT free interp, addSigToSet doesn't copy it! */
    }

  else
    {
      /* build unit closures */
      unitClosures* myUnitClosuresP = buildUnitClosures(tabpool, enrichedObs, n) ;

      /* to build coherentSigs we will find the set of best closures */
      setOfClosures* bestClosuresP ;
      
      /* exploration: find all closures of maximal score! */
      
      if (method == 3 || method == 4)
	/* method == 3 or 4 means we want to use findBestClosuresReal */
	bestClosuresP = findBestClosuresReal(myUnitClosuresP, costOfConfPW, method, 
					     tabpool, n, observedSig->nbPools) ;
      else
	/* method == 1 or 2 means we want to use findBestClosuresSim, taking maxDist into account */
	bestClosuresP = findBestClosuresSim(myUnitClosuresP, costOfConfPW, maxDist, method, 
					    tabpool, n, observedSig->nbPools) ;
      
      /* unitClosures may have been modified, but anyways it's not needed anymore */
      freeUnitClosures(myUnitClosuresP) ;

      /* remove duplicate closures in bestClosures */
      removeDuplicateClosures(bestClosuresP, observedSig->nbPools) ;

      /* convert the best closures to interpretations */
      coherentSigs = allClosuresToSigs(bestClosuresP, costOfConfPW, enrichedObs) ;

      /* clean up and return */
      freeSetOfClosures(bestClosuresP) ;
    }

  freeSig(enrichedObs) ;

  return(coherentSigs) ;
}

