/*----------------------------------------------------------------*
 *
 * File : simulObservation.c
 * Author : NTM
 * Created : 28/06/06
 *
 *
 * Copyright (C) Nicolas Thierry-Mieg, 2006.
 *
 *
 * This file is part of InterPool, written by 
 * Nicolas Thierry-Mieg (CNRS, France) Nicolas.Thierry-Mieg@imag.fr
 *
 * InterPool is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * InterPool is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with InterPool; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 *-----------------------------------------------------------------*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h> /* memset */

#include "types.h" /* MOT */
#include "pools.h" /* pool, getpoolvalue */
#include "myrand.h" /* myrandom */
#include "signa.h" /* signature, set/getsigvalue, allocSig */
#include "distance.h" /* DIST_XXX */
#include "errorModel.h" /* XX_FRAC */

#include "simulObservation.h"


#undef DEBUG 
/* #define DEBUG for more output */


/************************************************************************
 ******************* LOCAL FUNCTIONS ************************************
 ************************************************************************/

/////////////// DECLARATIONS ///////////////

/*!
  \brief Build and return the expected noiseless interpretation, 
  given a list of positive variables.
  The pools are stored in tabpool, see pools.h for format.<br>
  posvars is a vector, it holds the number of positive variables at 
  index 0, followed by the (sorted) list of positive variables (starting 
  at index 1). It is built by simulateVars/2 in varia.h.
*/
static signature* buildSig(MOT* tabpool, int* posvars, int n, int nbPools) ;


//////////////// BODIES ///////////////////


/*!
  \brief Build and return the expected noiseless interpretation, 
  given a list of positive variables.
  The pools are stored in tabpool, see pools.h for format.<br>
  posvars is a vector, it holds the number of positive variables at 
  index 0, followed by the (sorted) list of positive variables (starting 
  at index 1). It is built by simulateVars/2 in varia.h.

  Basically this function examines each pool from tabpool successively.
  for each pool, it loops on the list of positive variables posvars, and
  checks to see if that variable is present in the pool.
  if it is, the signature for that pool is set to SIG_POS and next pool;
  if none of the posvars was found, set the signature to SIG_NEG.
*/
static signature* buildSig(MOT* tabpool, int* posvars, int n, int nbPools)
{
  signature* newSig = allocSig(nbPools) ;
  /* newSig->sig is initialized with SIG_NEG, we will later only change 
     the required values to SIG_POS */

  // loop on absolute pool number
  {
    int absPoolNum ;
    for (absPoolNum=0; absPoolNum < nbPools; absPoolNum++)
      {
	MOT* currentpool=pool(tabpool,absPoolNum,n);
	
	int nbPosVars = posvars[0];
	int pvindex ;
	for(pvindex=1;pvindex<=nbPosVars;pvindex++)
	  { // start at 1 and end at nbPosVars because posvars[0] holds nbPosVars
	    // grab next positive variable
	    int posvar=posvars[pvindex];
	    
	    /* CAVEAT: using 11 instead of something like POOL_POS! */
	    if(getpoolvalue(currentpool,posvar) == 11)
	      {
		// posvar is present in currentpool, set the signature to SIG_POS
		setsigvalue(newSig, absPoolNum, SIG_POS);
		// and go to the next pool
		break;
	      }
	  }
	/* done checking each positive variable, none was found in current 
	   pool: sig=SIG_NEG. Due to initialization, the sig value 
	   for this pool is already SIG_NEG, do nothing. */
      }
  }
  return newSig ;
}


/*!
  \brief return the number of false STRONG (==POS, not WEAK).
*/
int falseStrong(int falsePos)
{
  return( (int)(falsePos * PN_FRAC + 0.5) ) ;
}

/*!
  \brief return the number of false WEAK.
*/
int falseWeak(int falsePos)
{
  return( falsePos - falseStrong(falsePos) ) ;
}

/*!
  \brief return the number of false FAINT.
*/
int falseFaint(int falseNeg)
{
  return( falseNeg - falseNone(falseNeg) ) ;
}

/*!
  \brief return the number of false NONE (==NEG, not FAINT).
*/
int falseNone(int falseNeg)
{
  return( (int)(falseNeg * NP_FRAC + 0.5) ) ;
}


/************************************************************************
 ******************* EXPORTED FUNCTIONS *********************************
 ************************************************************************/

/*!
  \brief Build and return a simulated observation, given:
  - the (simulated) positive vars;
  - the numbers of false positives and false negatives;
  - the fractions of observations that should be POS or WEAK
  among true positives on one hand and false positives on the
  other, and idem for NEG and FAINT wrt true and false negatives.
  These fractions are specified in simulObservation.h (xx_FRAC). <br>
  Of course falsePos must be smaller than the number of true 
  negative observations, and similarly falseNeg must be smaller 
  than the number of true positives.
  This is checked.
  The returned signature is not enriched (conflicts aren't tagged):
  it resembles as much as possible what we expect to obtain when
  performing an experiment.
*/
signature* buildObservation(MOT* tabpool, int n, int nbPools,
			    int* posvars, int falsePos, int falseNeg)
{

  /* calculate absolute number of true and false observations of each level.
   These will be used later but can be calculated now. */
  int falseS = falseStrong(falsePos) ;
  int falseW = falseWeak(falsePos) ;
  int falseN = falseNone(falseNeg) ;
  int falseF = falseFaint(falseNeg) ;

  // first build the noiseless interpretation
  signature* mySig = buildSig(tabpool, posvars, n, nbPools) ;


  ///////////////////////////////////////////
  // check that the constraints are satisfied
  ///////////////////////////////////////////

  // calculate numbers of (real) positive and negative observations
  int realPos = 0 ;
  int realNeg = 0 ;

  int absPoolNum ;
  // loop on the absolute pool number
  for (absPoolNum=0; absPoolNum<nbPools; absPoolNum++)
    {
      // find value of current pool in sig
      int sigvalue = getsigvalue(mySig, absPoolNum);
      if (sigvalue == SIG_NEG)
	realNeg++ ;
      else if (sigvalue == SIG_POS)
	realPos++ ;
      else
	{
	  fprintf(stderr,
		  "in buildObservation, a signature value is neither SIG_NEG nor SIG_POS.\n") ;
	  fprintf(stderr, "This should not happen. value is %d\n", sigvalue) ;
	  exit(1);
	}
    }

  // we now have the total number of positive and negative pools.
  // compare these numbers with falsePos and falseNeg
  if ((falsePos > realNeg) || (falseNeg > realPos))
    {
      fprintf(stderr, "in buildObservation, too many errors!\n") ;
      exit(1) ;
    }


  ///////////////////////////////////////////
  // randomly pick the pools to change
  ///////////////////////////////////////////

  /* we will simultaneously:
     - change interpretations of erroneous pools;
     - decide whether they are POS or WEAK for false-positives and
     NEG or FAINT for false-negatives
  */

  /* changedSig: a vector to remember pools that have been modified 
     (to avoid changing the same pool signature twice).
     format is: changedSig[abspoolnum]==FALSE if not modified yet, TRUE otherwise
  */
  bool* changedSig = (bool*)malloc(nbPools*sizeof(bool)) ;
  if (changedSig==NULL)
    {
      fprintf(stderr, "in buildObservation, no more memory for changedSig\n") ;
      exit(1) ;
    }
  /* init to FALSE */
  {
    int i ;
    for (i=0; i<nbPools; i++)
      changedSig[i] = FALSE ;
  }

  /* current algo is naive, we randomly pick in the whole sig
     and see if pool can be changed. We could instead build separate
     vectors with the realPos and realNeg pools, and randomly
     pick in these.
     This shouldn't be necessary except if almost all pools of a type 
     (pos or neg) are erroneous, but then the whole experiment is 
     probably useless...
  */
  int totalerrors = falsePos + falseNeg ;
  while (totalerrors>0)
    {
      int falsepool = myrandom(nbPools) ; // random int in {0,..,nbPools-1}

      // make sure falsepool has not already been changed
      if (changedSig[falsepool])
	// falsepool has already been changed, skip it
	continue ;

      else
	{
	  // grab the signature of current pool
	  int poolsig = getsigvalue(mySig, falsepool) ;
	  
	  //if falsepool is currently negative
	  if(poolsig==SIG_NEG)
	    {
	      if ((falseS+falseW) > 0)
		{
		  // add falsepool to changedSig vector, change its value and update counters
		  changedSig[falsepool] = TRUE;
		  totalerrors--;
		  if (falseS>0)
		    {
		      setsigvalue(mySig,falsepool,SIG_POS) ;
		      falseS-- ;
#ifdef DEBUG
		      printf("false strong: absolute pool number %d\n",falsepool);
#endif /* DEBUG */
		    }
		  else // (falseW > 0
		    {
		      setsigvalue(mySig,falsepool,SIG_WEAK) ;
		      falseW-- ;
#ifdef DEBUG
		      printf("false weak: absolute pool number %d\n",falsepool);
#endif /* DEBUG */
		    }
		}
	      /* otherwise no more false positives needed, skip this pool */
	    }
	  
	  else if (poolsig==SIG_POS)
	    {
	      if ((falseN+falseF) > 0)
		{
		  // change this positive pool to false negative:
		  // add falsepool to changedSig vector, change its value and update counters
		  changedSig[falsepool] = TRUE;
		  totalerrors--;
		  if (falseN>0)
		    {
		      setsigvalue(mySig,falsepool,SIG_NEG) ;
		      falseN-- ;
#ifdef DEBUG
		      printf("false none: absolute pool number %d\n",falsepool);
#endif /* DEBUG */
		    }
		  else // (falseF > 0
		    {
		      setsigvalue(mySig,falsepool,SIG_FAINT) ;
		      falseF-- ;
#ifdef DEBUG
		      printf("false faint: absolute pool number %d\n",falsepool);
#endif /* DEBUG */
		    }
		}
	      /* otherwise no more false negatives needed, skip this pool */
	    }
	}
    }
  
  /* SANITY: ok, totalerrors is 0, falseS etc should also be 0, check */
  if ((falseS!=0) || (falseW!=0) || (falseF!=0) || (falseN!=0))
    {
      fprintf(stderr, "in buildObservation, a falseX doesn't end at 0, problem!\n") ;
      exit(1) ;
    }


  ///////////////////////////////////////////
  // finally, split the true pos into strong and weak, idem for negs
  ///////////////////////////////////////////

  /* in this case we just use PP_FRAC and NN_FRAC as probabilities,
     we don't do exact counts.
     This is just easier to implement and shouldn't be a problem.

     The alternative would be to use exact values as follows:
     int trueStrong = (int)((realPos - falseNeg) * PP_FRAC + 0.5) ;
     int trueWeak = realPos - falseNeg - trueStrong ;
     int trueNone = (int)((realNeg - falsePos) * NN_FRAC + 0.5) ;
     int trueFaint = realNeg - falsePos - trueNone ;
  */
  for (absPoolNum=0; absPoolNum<nbPools; absPoolNum++)
    {
      if (changedSig[absPoolNum])
	// skip this pool
	continue ;
      else
	{
	  /* see if current pool becomes WEAK/FAINT or stays POS/NEG */
	  /* random value in {0,..,99} */
	  int randVal = myrandom(100) ;
	  int sigvalue = getsigvalue(mySig,absPoolNum) ;
	  if ((sigvalue==SIG_POS) && (randVal>=(int)(100*PP_FRAC)))
	    setsigvalue(mySig,absPoolNum,SIG_WEAK) ;
	  else if ((sigvalue==SIG_NEG) && (randVal>=(int)(100*NN_FRAC)))
	    setsigvalue(mySig,absPoolNum,SIG_FAINT) ;
	}
    }

  free(changedSig) ;

  return(mySig) ;

}


/*!
  \brief Return the distance between a simulated observation
  (as built by buildObservation) and the ideal noiseless 
  interpretation associated to posvars.
*/
int distToNoiseless(int falsePos, int falseNeg)
{
  int falseS = falseStrong(falsePos) ;
  int falseW = falseWeak(falsePos) ;
  int falseN = falseNone(falseNeg) ;
  int falseF = falseFaint(falseNeg) ;

  int distance = ( DIST_POS * falseS + DIST_WEAK * falseW +
	       DIST_NEG * falseN + DIST_FAINT * falseF ) ;
  return(distance) ;
}


