/*----------------------------------------------------------------*
 *
 * File : validation.c
 * Author : NTM
 * Created : 15/09/04
 *
 *
 * Copyright (C) Nicolas Thierry-Mieg, 2006.
 *
 *
 * This file is part of InterPool, written by 
 * Nicolas Thierry-Mieg (CNRS, France) Nicolas.Thierry-Mieg@imag.fr
 *
 * InterPool is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * InterPool is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with InterPool; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 *-----------------------------------------------------------------*/

#include <stdlib.h> /* free, bsearch, exit, atoi, strtod */
#include <string.h> /* memset */
#include <stdio.h> /* printf and friends */

#include "types.h" /* MOT */
#include "distance.h" /* DIST_XXX */
#include "jobs.h" /* definition of JobIdentSim datatype */
#include "pools.h" /* STD, getpoolvalue */
#include "design.h" /* fillDesign */
#include "varia.h" /* simulateVars */
#include "signa.h" /* equalSigs, setOfSigs, freeSetOfSigs */
#include "signaIO.h" /* writeSig */
#include "fonc.h" /* compInts (for bsearch), optimalq, firstDivisor */
#include "myrand.h" /* plantSeed */
#include "simulObservation.h" /* buildObservation, distToNoiseless, falseXXX */
#include "solvexpNaive.h" /* solvexpNaive */
#include "solvexpClosure.h" /* solvexpClosure */

#include "validation.h"


/* define DEBUG to produce additional output, including saving
   the generated sigs to files */
#undef DEBUG 
//#define DEBUG




/************************************************************************
 ******************* LOCAL FUNCTIONS ************************************
 ************************************************************************/

/********************** DECLARATIONS ************************************/

/*!
  \brief Test if two sets of signatures are equal (setwise: order may differ). 
  Returns true if they are equal, and false if not.
*/
static bool equalSetsOfSigs(setOfSigs sigs1, setOfSigs sigs2) ;



/************************ BODIES ***************************************/


/*!
  Test if two sets of signatures are equal.
  This function is useful for cross-validating the various solvexp methods.

  \param tabSig1,tabSig2: the 2 sets of sigs

  \return TRUE if the 2 sets hold exactly the same sigs (although the order
  may be different), FALSE otherwise.

  NOTE: this function wants signature contents to be absolutely identical!
  This means that if a solvexp method returns a setOfSigs in which some
  sigs have values that shouldn't appear in interpretations (eg SIG_xxxCONF
  or SIG_WEAK), it will say that the sigs are different.
  In general, to avoid problems, all solvexp methods should return 
  sets of proper interpretations, ie sigs where pospools are marked SIG_POS 
  and negpools are marked SIG_NEG.
*/
static bool equalSetsOfSigs(setOfSigs sigs1, setOfSigs sigs2)
{
  int nbSigs = sigs1.nbOfSigs ;
  int distance = sigs1.distance ;

  if (nbSigs != sigs2.nbOfSigs)
    {
#ifdef DEBUG
      fprintf(stderr, 
	      "In equalSetsOfSigs, the two sets have different nbOfSigs values: %d and %d\n",
	      nbSigs, sigs2.nbOfSigs) ;
#endif /* DEBUG */
      return(FALSE) ;
    }

  else if (distance != sigs2.distance)
    {
#ifdef DEBUG
      fprintf(stderr, 
	      "In equalSetsOfSigs, the two sets have different distance values: %d and %d\n",
	      distance, sigs2.distance) ;
#endif /* DEBUG */
      return(FALSE) ;
    }
  
  else
    {
      // the 2 sets have identical distance and nbOfSigs, compare the allSigs
#ifdef DEBUG
      fprintf(stderr, 
	      "OK, sets of sigs both have %d sigs at distance %d\n",
	      nbSigs, distance) ;
#endif

      signature** allSigs1 = sigs1.allSigs ;
      signature** allSigs2 = sigs2.allSigs ;

      /* The sigs can be in different orders in the 2 sets.
	 To avoid trying to use the same sig several times, we
	 mark the sigs from sigs2 that have already been matched
	 to a sig of sigs1.
	 This is done using sig2Used.
      */
      bool* sig2Used = malloc(nbSigs * sizeof(bool)) ;
      if (sig2Used == NULL)
	{
	  fprintf(stderr, "In equalSetsOfSigs, no more mem for sig2Used\n") ;
	  exit(1) ;
	}
      {
	// initialize to FALSE
	int i ;
	for (i=0; i<nbSigs; i++)
	  sig2Used[i] = FALSE ;
      }

      
      int indexSig1 ;
      for (indexSig1=0; indexSig1<nbSigs; indexSig1++)
	{
	  signature* currentSig1 = allSigs1[indexSig1] ;
	  	  
#ifdef DEBUG
	  fprintf(stderr, "examining sig number %d from first set\n", indexSig1) ;
#endif

	  /* compare currentSig1 to each sig from allSigs2, until:
	     - an equal sig2 is found (then mark this sig2 as Used and try 
	       the next sig1) ;
	     - no sig2 is equal to currentSig1: cleanup and return FALSE.
	  */
	  bool currentSig1Found = FALSE ;
	  int indexSig2 ;
	  for (indexSig2=0; indexSig2<nbSigs; indexSig2++)
	    {
	      if (sig2Used[indexSig2])
		// this sig2 already used, try the next one
		continue ;

	      else if ( equalSigs(currentSig1, allSigs2[indexSig2]) )
		{ // OK, sigs are equal
#ifdef DEBUG
		  fprintf(stderr, "OK, sig number %d from second set is identical\n", indexSig2) ;
#endif
		  sig2Used[indexSig2] = TRUE ;
		  currentSig1Found = TRUE ;
		  break ;
		}
	    }

	  if (!currentSig1Found)
	    {
#ifdef DEBUG
	      fprintf(stderr, "problem: cannot find this sig in second set\n") ;
#endif
	      free(sig2Used) ;
	      return(FALSE) ;
	    }
	}

      /* if we get here, it means all sigs1 have been found: the sets are equal */
      
      {
	// sanity check: sig2Used should be full of TRUE's
	int i ;
	for (i=0; i<nbSigs; i++)
	  if (!sig2Used[i])
	    {
	      fprintf(stderr, "in equalSetsOfSigs, at the end sig2Used has a FALSE value! DEBUG ME!\n") ;
	      exit(1) ;
	    }
      }

      free(sig2Used) ;
      return(TRUE) ;
    }
}



/************************************************************************
 ******************* EXPORTED FUNCTIONS *********************************
 ************************************************************************/




/*!
  Using 2 (different) solvexp methods, perform a 
  batch of simulations as specified in thisJob, and compare the
  obtained nearest coherent signatures instead of outputting the 
  simm results.

  This function is largely inspired by simulation (in simulation.c).
  The beginning of the work is identical... we just don't finish
  the simulation job, and instead compare signatures to each other
  rather than to the simulated positives. The schema is:
  - 1. build the pools
  - 2. build a simulated VV
  - 3. generate the corresponding observation, possibly with noise
  - 4. solve this observation using 2 (different) solvexp methods
     (and therefore obtain 2 sets of  nearest coherent interpretations)
  - 5. compare these 2 sets of sigs, and output comparison result.
  
  NOTE: as in simulation/2, this function is charged with opening 
  and closing an output stream, using the outFileName stored in thisJob.

  \param thisJob holds all info relevant to current simm job.
  \param method1, method2 choose the solvexp methods you want to compare.
  Current valid values are: 
    - 1: use solvexpNaive
    - 2: use solvexpClosure (with method==1, ie findBestClosuresSim with Rec)
    - 3: use solvexpClosure (with method==2, ie findBestClosuresSim with RecSubstracted)
    - 4: use solvexpClosure (with method==3, ie findBestClosuresReal with Rec)
    - 5: use solvexpClosure (with method==4, ie findBestClosuresReal with RecSubstracted)
*/
void validation(JobIdentSim *thisJob, int method1, int method2)
{

  // check that the methods are valid and different
  if ( (method1 < 1) || (method1 > 5) || (method2 < 1) || (method2 > 5) || (method1==method2) )
    {
      fprintf(stderr, 
	      "validation called with unsupported or identical methods: %d %d! Exiting now\n",
	      method1, method2) ;
      exit(1) ;
    }

  int n = thisJob->n ;
  int nbPools = thisJob->nbPools ;
  char *designFile = thisJob->designFileName ;
  int nbPosVars = thisJob->nbPosVars ;
  int falsePos = thisJob->falsePos ;
  int falseNeg = thisJob->falseNeg ;
  int nsim = thisJob->nsim ;
  char *randomGenMethod = thisJob->randomGenMethod ;
  unsigned int seed = thisJob->seed ;
  char *outFileName = thisJob->outFileName ;

  /* create an output stream for writing */
  FILE *outStream ;

  /* OUTDIR is made in doValidation.c. For parallel version
     we might have to move it here? */


  /* make sure the output file doesn't already exist */
  outStream = fopen(outFileName, "r");
  if (outStream != NULL)
    {
      fprintf(stderr,"In validation: cannot write to %s, file already exists\n", outFileName);
      exit(1);
    }

  /* open stream for writing */
  outStream = fopen(outFileName, "w");
  if (outStream == NULL)
    {
      fprintf(stderr,"In simulation: cannot open %s for writing\n", outFileName);
      exit(1);
    }

  /* header of output file: print common information */

  /* number of errors generated */
  int falseS = falseStrong(falsePos) ;
  int falseW = falseWeak(falsePos) ;
  int falseN = falseNone(falseNeg) ;
  int falseF = falseFaint(falseNeg) ;

  fprintf(outStream, "Comparing methods %d and %d\n", method1, method2) ;
  fprintf(outStream, "Currently the methods numbers mean:\n") ;
  fprintf(outStream, "\t1: solvexpNaive,\n") ;
  fprintf(outStream, "\t2 to 5: solvexpClosure, more precisely:\n") ;
  fprintf(outStream, "\t\t2: closureSim with Rec algo,\n") ;
  fprintf(outStream, "\t\t3: closureSim with RecSubstracted algo,\n") ;
  fprintf(outStream, "\t\t4: closureReal with Rec algo,\n") ;
  fprintf(outStream, "\t\t5: closureReal with RecSubstracted algo.\n") ;
  fprintf(outStream, "using design file=%s\n", designFile) ;
  fprintf(outStream, "nb of posVars=%d, nb of falseStrong=%d, falseWeak=%d,",nbPosVars,falseS,falseW);
  fprintf(outStream, "  nb of falseFaint=%d, falseNone=%d\n",falseF,falseN);
  fprintf(outStream, "using costs: NEG==%d, FAINT==%d, WEAK==%d, POS==%d\n\n",
	  DIST_NEG, DIST_FAINT, DIST_WEAK, DIST_POS) ;
  fprintf(outStream, "performing %d simulations\n", nsim) ;
  fprintf(outStream, "using as random generator: %s, with seed: %u\n", randomGenMethod, seed) ;
  fprintf(outStream, "\n") ; /* final newline: end of header */


  /* plant seed */
  plantSeed(seed) ;
    
  {
    /* build pools */
    MOT* tabpool =  buildPools(designFile, n, nbPools) ;

    int i ; /* loop var */
    for (i=0; i<nsim; i++)
      {
#ifdef DEBUG
	fprintf(outStream, "starting sim number %d\n", i+1) ;
#endif /*DEBUG*/
	
	//build a simulated VV
	int* posvars = simulateVars(n, nbPosVars) ;
	
	//build the corresponding noisy observation
	signature* mySig = buildObservation(tabpool, n, nbPools,
					    posvars, falsePos, falseNeg) ;

#ifdef DEBUG
	/* print the sig in a file */
	char outFileNameSig[OUTFILELENGTH] ;
	int numchars = snprintf(outFileNameSig, OUTFILELENGTH, "%s.%i", outFileName, i);
	// make sure file name was not too long
	if ((numchars < 0) || (numchars >= OUTFILELENGTH))
	  {
	    fprintf(stderr, "in validation, error building outFileNameSig: too long?\n");
	    exit(1);
	  }
	writeSig(outFileNameSig, mySig);
#endif /* DEBUG */

	setOfSigs sigsMethod1, sigsMethod2 ;

	/* obtain nearest coherent signatures with method1 */
	if (method1==1)
	  {
	    sigsMethod1 = solvexpNaive(tabpool, mySig, n) ;
	  }
	else if ((method1>=2) && (method1<=5))
	  {
	    int method = method1 - 1 ;
	    /* maxDist is only used for method1 2 and 3, solvexpClosure discards it
	       if method1==4 or 5 */
	    int maxDist = distToNoiseless(falsePos, falseNeg) ;
	    sigsMethod1 = solvexpClosure(tabpool, mySig, maxDist, method, n) ;
	  }
	else
	  {
	    /* shouldn't happen, valid methods are checked above */
	    fprintf(stderr, 
		    "in validation: method1 (%d) is invalid, but was NOT checked at the beginning! FIX ME!\n", 
		    method1) ;
	    exit(1) ;
	  }

	/* obtain nearest coherent signatures with method2 */
	if (method2==1)
	  {
	    sigsMethod2 = solvexpNaive(tabpool, mySig, n) ;
	  }
	else if ((method2>=2) && (method2<=5))
	  {
	    int method = method2 - 1 ;
	    /* maxDist is only used for method1 2 and 3, solvexpClosure discards it
	       if method1==4 or 5 */
	    int maxDist = distToNoiseless(falsePos, falseNeg) ;
	    sigsMethod2 = solvexpClosure(tabpool, mySig, maxDist, method, n) ;
	  }
	else
	  {
	    /* shouldn't happen, valid methods are checked above */
	    fprintf(stderr, 
		    "in validation: method2 (%d) is invalid, but was NOT checked at the beginning! FIX ME!\n", 
		    method2) ;
	    exit(1) ;
	  }

	if ( equalSetsOfSigs(sigsMethod1, sigsMethod2) )
	  {
	    fprintf(outStream, "OK, deduced signatures are identical\n\n") ;
	  }
	else
	  {
	    /* sigs are different! one method is wrong, seek-and-debug!! */
	    fprintf(outStream, 
		    "ERROR: in simm number %d, signatures are different! do some debugging...\n\n",
		    i+1) ;
	  }

	freeSig(mySig) ; /* mySig no longer needed, free it */
	free(posvars) ;
	freeSetOfSigs(sigsMethod1) ;
	freeSetOfSigs(sigsMethod2) ;
      }
    free(tabpool);
  }
  
  /* print file trailer (useful for parsing the file) */
  fprintf(outStream, "___DONE___\n") ;

  fclose(outStream) ;
}



