/*----------------------------------------------------------------*
 *
 * File : decodeObservedSig.c
 * Author : NTM
 * Created : 01/04/05
 *
 *
 * Copyright (C) Nicolas Thierry-Mieg, 2006.
 *
 *
 * This file is part of InterPool, written by 
 * Nicolas Thierry-Mieg (CNRS, France) Nicolas.Thierry-Mieg@imag.fr
 *
 * InterPool is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * InterPool is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with InterPool; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 *-----------------------------------------------------------------*/

#include <stdlib.h>
#include <string.h> /* strlen */
#include <stdio.h> /* printf and friends */
#include <time.h> /* time */

#include "types.h" /* MOT, ...? */
#include "jobs.h" /* JobIdentReal */
#include "pools.h" /* getpoolvalue */
#include "design.h" /* fillDesign */
#include "signa.h" /* getsigvalue, setOfSigs, freeSetOfSigs */
#include "signaIO.h" /* readSig */
#include "solveSigSimple.h" /* solveSigSimple */
#include "solvexpNaive.h" /* solvexpNaive */
#include "solvexpClosure.h" /* solvexpClosure */
#include "sigsToVVs.h" /* buildDeducedVV */
#include "distance.h" /* DIST_XXX */

#include "decodeObservedSig.h"

/* length of buffer for ctime_r call */
#define DATELENGTH 30

/* values used for the error vectors (see findErrors, mergeErrors, etc...).
   These are used exclusively in this file, values don't matter
   except that they should be ints and all be different. 
   The meaning of each value is:
   - ERRORS_OK: pool is "true" (true pos or true neg in every coherent sig)
   - ERRORS_FP: pool is false positive (according to every coherent sig)
   - ERRORS_FN: pool is false neg (according to every sig)
   - ERRORS_AMBI: pool is ambiguous (has several different values in the coherent sigs).
*/
#define ERRORS_OK 1
#define ERRORS_FP 2
#define ERRORS_FN 3
#define ERRORS_AMBI 4


/************************************************************************
 ******************* LOCAL FUNCTIONS ************************************
 ************************************************************************/

/********************** DECLARATIONS ************************************/

/*!
  \brief Return the "cost" (lower is better) of interpreting variable var 
  as positive, given the observation observedSig.
*/
static int varCost (int var, signature* observedSig, MOT* tabpool, int n) ;


/*!
  \brief mySig is an interpretation and observedSig an observation.
  Return a vector of nbPools ints (allocated here), using ERRORS_XX values
  to state whether each pool is FP, FN, OK, or AMBI.
  Precondition: mySig must have only SIG_POS or SIG_NEG; and
  observedSig cannot have any SIG_XXXCONF values. This is checked.
*/
static int* findErrors(signature* mySig, signature* observedSig) ;


/*!
  \brief given a set of coherentSigs, build and return an error vector,
  using the ERRORS_XX values.
  Returns the merged error vector (allocated here), or NULL (if coherentSigs 
  is empty).
*/
static int* mergeErrors(setOfSigs coherentSigs, signature* observedSig) ;


/*!
  \brief print to outStream (which must be open) the lists of 
  positive and ambiguous variables stored in thisVV.
*/
static void printVV(MOT* thisVV, signature* observedSig, 
		    MOT* tabpool, int n, FILE* outStream) ;


/*!
  \brief print to outStream(which must be open) 
  the lists of FPs, FNs and AMBI pools according to errorVec (which
  uses the ERRORS_XX values).
*/
static void printErrors(int* errorVec, signature* observedSig, 
			int nbPools, FILE* outStream) ;


/*!
  \brief build the merged deduced VV and the merged errors 
  corresponding to coherentSigs, and print them to 
  thisJob.outFileName; if there are several coherentSigs, 
  also print the VVs + erroneous pools (compared to the observedSig) 
  for each nearest coherent sig.
  Sigs in coherentSigs are decoded sigs (ie interpretations), so they
  should have only SIG_POS or SIG_NEG.
  observedSig is an observation, so it should only hold SIG_POS, SIG_WEAK, 
  SIG_FAINT and SIG_NEG values (no SIG_XXXCONF values).
  This function takes care of opening and closing the stream
  for outFileName.
*/
static void printDecodingResults(JobIdentReal* thisJob, signature* observedSig, 
				 setOfSigs coherentSigs, MOT* tabpool) ;


/************************ BODIES ***************************************/


/*!
  Return the "cost" (lower is better) of interpreting variable var 
  as positive, given the observation observedSig.
  This is the number of negative pools (FAINT or NEG) from observedSig
  that contain var, weighted by the cost of each of these pools.
  Typically, var has been decoded positive, and we are calculating 
  a confidence score for var.
  \param observedSig is the observed sig, can have any (pos/weak/faint/neg) value.
*/
static int varCost (int var, signature* observedSig, MOT* tabpool, int n)
{
  int nbPools = observedSig->nbPools ;
  int cost = 0;
  int absPoolNum;
  for(absPoolNum=0;absPoolNum<nbPools; absPoolNum++)
    {
      MOT* thispool = pool(tabpool, absPoolNum, n) ;
      if (getpoolvalue(thispool, var) == 11)
	{
	  /* var is in thispool */
	  int sigvalue = getsigvalue(observedSig, absPoolNum);

	  if (sigvalue == SIG_NEG)
	    cost += DIST_NEG ;
	  else if (sigvalue == SIG_FAINT)
	    cost += DIST_FAINT ;
	  /* SANITY: observedSig should NOT be enriched, check it */
	  else if ((sigvalue == SIG_NEGCONF) || (sigvalue == SIG_FAINTCONF)
		   || (sigvalue == SIG_WEAKCONF) || (sigvalue == SIG_POSCONF))
	    {
	      fprintf(stderr, "in varCost: observation is enriched. Illegal.\n") ;
	      exit(1) ;
	    }
	}
    }
  return cost ;
}


/*!
  \brief mySig is an interpretation and observedSig an observation.
  Return a vector of nbPools ints (allocated here), using ERRORS_XX values
  to state whether each pool is FP, FN, OK, or AMBI.
  Precondition: mySig must have only SIG_POS or SIG_NEG; and
  observedSig cannot have any SIG_XXXCONF values. This is checked.
*/
static int* findErrors(signature* mySig, signature* observedSig)
{
  int nbPools = observedSig->nbPools ;
  int* errorVec = (int*)malloc(nbPools*sizeof(int)) ;
  if(errorVec==NULL)
    {
      fprintf(stderr,"in findErrors, no more memory for errorVec\n");
      exit(1);
    }

  int absPoolNum;
  for (absPoolNum=0; absPoolNum < nbPools; absPoolNum++)
    {
      int thisVal = getsigvalue(mySig, absPoolNum) ;
      int obsVal = getsigvalue(observedSig, absPoolNum) ;
      if (thisVal == SIG_POS)
	{
	  if ((obsVal == SIG_POS) || (obsVal == SIG_WEAK))
	    // true positive
	    errorVec[absPoolNum] = ERRORS_OK ;
	  else if ((obsVal == SIG_NEG) || (obsVal == SIG_FAINT))
	    // false neg
	    errorVec[absPoolNum] = ERRORS_FN ;
	  else
	    {
	      // shouldn't happen!
	      fprintf(stderr, 
		      "in findErrors, observedSig has illegal value for pool %d.\n", absPoolNum) ;
	      exit(1) ;
	    }
	}
      else if (thisVal == SIG_NEG)
	{
	  if ((obsVal == SIG_NEG) || (obsVal == SIG_FAINT))
	    // true negative
	    errorVec[absPoolNum] = ERRORS_OK ;
	  else if ((obsVal == SIG_POS) || (obsVal == SIG_WEAK))
	    // false pos
	    errorVec[absPoolNum] = ERRORS_FP ;
	  else
	    {
	      // shouldn't happen!
	      fprintf(stderr, 
		      "in findErrors, observedSig has illegal value for pool %d.\n", absPoolNum) ;
	      exit(1) ;
	    }
	}
      else
	{
	  // thisVal comes from a decoding result, should only have POS or NEG
	  fprintf(stderr, "in findErrors: a coherent sig has an illegal value.\n") ;
	  exit(1) ;
	}
    }

  return(errorVec) ;
}


/*!
  \brief given a set of coherentSigs, build and return an error vector,
  using the ERRORS_XX values.
  Returns the merged error vector (allocated here), or NULL (if coherentSigs 
  is empty).

  \param coherentSigs a set of signatures (typically, the set of nearest 
  coherent signatures obtained after resolving an observed sig).
  \param observedSig the observed sig.

  preconditions: coherent sigs should be interpretations (only SIG_POS 
  or SIG_NEG); and observedSig should be an observation (no SIG_XXXCONF).
*/
static int* mergeErrors(setOfSigs coherentSigs, signature* observedSig)
{
  int* mergedErrors ;
  int nbPools = observedSig->nbPools ;
  int nbOfSigs = coherentSigs.nbOfSigs ;
  if (nbOfSigs <= 0)
    // no signatures in coherentSigs, strange?
    mergedErrors = NULL ;
  else
    {
      // initialize using first sig
      int signum = 0 ;
      mergedErrors = findErrors(coherentSigs.allSigs[signum], observedSig) ;

      // now merge with each remaining sigs
      for (signum = 1; signum < nbOfSigs; signum++)
	{
	  int absPoolNum ;
	  for (absPoolNum=0; absPoolNum < nbPools; absPoolNum++)
	    {
	      int currentME = mergedErrors[absPoolNum] ;
	      int thisVal = getsigvalue(coherentSigs.allSigs[signum], absPoolNum) ;
	      int obsVal = getsigvalue(observedSig, absPoolNum) ;
	      if (thisVal == SIG_POS)
		{
		  if ((obsVal == SIG_POS) || (obsVal == SIG_WEAK))
		    {
		      /* TP according to this sig */
		      if (currentME == ERRORS_FP)
			/* previously found as FP: set to ambi */
			mergedErrors[absPoolNum] = ERRORS_AMBI ;
		      /* otherwise it was TP or ambi previously, don't change */
		    }
		  else if ((obsVal == SIG_NEG) || (obsVal == SIG_FAINT))
		    {
		      /* FN according to this sig */
		      if (currentME == ERRORS_OK)
			/* previously found as TN, set to ambi */
			mergedErrors[absPoolNum] = ERRORS_AMBI ;
		      /* otherwise it was FN or ambi, don't change */
		    }
		  /* else, illegal value for obsVal but this has been tested previously */
		}
	      else if (thisVal == SIG_NEG)
		{
		  if ((obsVal == SIG_NEG) || (obsVal == SIG_FAINT))
		    {
		      /* TN according to this sig */
		      if (currentME == ERRORS_FN)
			/* previously found as FN: set to ambi */
			mergedErrors[absPoolNum] = ERRORS_AMBI ;
		      /* otherwise it was TN or ambi previously, don't change */
		    }
		  else if ((obsVal == SIG_POS) || (obsVal == SIG_WEAK))
		    {
		      /* FP according to this sig */
		      if (currentME == ERRORS_OK)
			/* previously found as TP, set to ambi */
			mergedErrors[absPoolNum] = ERRORS_AMBI ;
		    }
		  // else, illegal value for obsVal has been already tested
		}
	      else
		{
		  // thisVal comes from a decoding result, should only have POS or NEG
		  fprintf(stderr, "in mergeErrors: a coherent sig has an illegal value.\n") ;
		  exit(1) ;
		}
	    }
	}
    }
  return mergedErrors ;
}


/*!
  \brief print to outStream (which must be open) the lists of 
  positive and ambiguous variables stored in thisVV.
*/
static void printVV(MOT* thisVV, signature* observedSig, 
		    MOT* tabpool, int n, FILE* outStream)
{
  int var ;
  fprintf(outStream, "POSITIVE VARIABLES and costs:\n") ;
  for (var=0; var<n; var++)
    {
      if (getpoolvalue(thisVV, var) == 11)
	{
	  int cost = varCost(var, observedSig, tabpool, n) ;
	  fprintf(outStream, "%d (cost %d)\n", var, cost) ;
	}
    }

  fprintf(outStream, "\nAMBIGUOUS VARIABLES and costs:\n") ;
  for (var=0; var<n; var++)
    {
      if (getpoolvalue(thisVV, var) == 01)
	{
	  int cost = varCost(var, observedSig, tabpool, n) ;
	  fprintf(outStream, "%d (cost %d)\n", var, cost) ;
	}
    }
  fprintf(outStream, "\n") ;
}


/*!
  \brief print to outStream(which must be open) 
  the lists of FPs, FNs and AMBI pools according to errorVec (which
  uses the ERRORS_XX values).
*/
static void printErrors(int* errorVec, signature* observedSig, 
			int nbPools, FILE* outStream)
{
  int absPoolNum ;

  /* FP's: */
  /* for FPs, print total fpcost */
  int fpCost = 0 ;
  for (absPoolNum=0; absPoolNum < nbPools; absPoolNum++)
    if (errorVec[absPoolNum] == ERRORS_FP)
      {
	int obsVal = getsigvalue(observedSig, absPoolNum) ;
	if (obsVal == SIG_POS)
	  fpCost += DIST_POS ;
	else if (obsVal == SIG_WEAK)
	  fpCost += DIST_WEAK ;
	else
	  { // should not happen since errorVec[absPoolNum] == ERRORS_FP
	    fprintf(stderr, "in printErrors: obsVal is %d, debug me!\n", obsVal) ;
	    exit(1) ;
	  }
      }
  fprintf(outStream, 
	  "FALSE POSITIVE (POS OR WEAK) POOLS (absPoolNum, between 0 and %d): total cost %d\n",
	  nbPools-1, fpCost) ;
  for (absPoolNum=0; absPoolNum < nbPools; absPoolNum++)
    if (errorVec[absPoolNum] == ERRORS_FP)
      fprintf(outStream, "%d\n", absPoolNum) ;

  /* FN's: */
  fprintf(outStream, 
	  "\nFALSE NEGATIVE (NEG OR FAINT) POOLS (absPoolNum, between 0 and %d):\n",
	  nbPools-1) ;
  for (absPoolNum=0; absPoolNum < nbPools; absPoolNum++)
    if (errorVec[absPoolNum] == ERRORS_FN)
      fprintf(outStream, "%d\n", absPoolNum) ;

  /* AMBIS: */
  fprintf(outStream, 
	  "\nAMBIGUOUS POOLS (absPoolNum, between 0 and %d):\n",
	  nbPools-1) ;
  for (absPoolNum=0; absPoolNum < nbPools; absPoolNum++)
    if (errorVec[absPoolNum] == ERRORS_AMBI)
      fprintf(outStream, "%d\n", absPoolNum) ;

  fprintf(outStream, "\n") ;
}


/*!
  \brief build the merged deduced VV and the merged errors 
  corresponding to coherentSigs, and print them to 
  thisJob.outFileName; if there are several coherentSigs, 
  also print the VVs + erroneous pools (compared to the observedSig) 
  for each nearest coherent sig.
  Sigs in coherentSigs are decoded sigs (ie interpretations), so they
  should have only SIG_POS or SIG_NEG.
  observedSig is an observation, so it should only hold SIG_POS, SIG_WEAK, 
  SIG_FAINT and SIG_NEG values (no SIG_XXXCONF values).
  This function takes care of opening and closing the stream
  for outFileName.
*/
static void printDecodingResults(JobIdentReal* thisJob, signature* observedSig, 
				 setOfSigs coherentSigs, MOT* tabpool)
{
  int n = thisJob->n ;
  int nbPools = thisJob->nbPools ;
  char* designFileName = thisJob->designFileName ;
  char* inFileName = thisJob->inFileName ;
  char* outFileName = thisJob->outFileName ;

  FILE* outStream ;
  
  /****************** preparatory work for I/O ******************/
  /**************************************************************/

  /* assume OUTDIR exists. It should be mkdir'ed in the main (doDecoding.c) */
  
  /* make sure the output file doesn't already exist */
  outStream = fopen(outFileName, "r");
  if (outStream != NULL)
    {
      fprintf(stderr,"In printDecodingResults: cannot write to %s, file already exists\n", outFileName);
      exit(1);
    }
  /* open stream for writing */
  outStream = fopen(outFileName, "w");
  if (outStream == NULL)
    {
      fprintf(stderr,"In printDecodingResults: cannot open %s for writing\n", outFileName);
      exit(1);
    }
  
  /********* examine coherentSigs and print merged VVs **********/
  /**************************************************************/

  int nbOfSigs = coherentSigs.nbOfSigs ;
  int distance = coherentSigs.distance ;

  /* find current date for header */
  time_t currentTime = time(NULL) ;
  if (currentTime == (time_t)-1)
    {
      fprintf(stderr, "in printDecodingResults, error calling time: time is unavailable on this system?\n") ;
      exit(1) ;
    }
  char myDate[DATELENGTH] ;
  if (NULL == ctime_r(&currentTime, myDate))
    {
      fprintf(stderr, "in printDecodingResults, cannot convert currentTime to string.\n") ;
      exit(1) ;
    }

  /* header of output file */
  fprintf(outStream, "decoding observed signature from file %s\n", inFileName) ;
  fprintf(outStream, "using design file %s\n", designFileName) ;
  fprintf(outStream, "decoded %s", myDate) ;
  /* NOTE: myDate ends with \n, no need to add another */
  fprintf(outStream, "using costs: NEG==%d, FAINT==%d, WEAK==%d, POS==%d\n\n",
	  DIST_NEG, DIST_FAINT, DIST_WEAK, DIST_POS) ;

  /* build merged VV and errors */
  MOT* mergedDeducedVV = buildDeducedVV(coherentSigs, tabpool, n) ;

  if (mergedDeducedVV == NULL)
    { /* no solutions were found */
      fprintf(outStream, "NO solutions found upto distance %d\n\n", distance-1) ;
      
      if (nbOfSigs != 0) // sanity check
	{
	  fprintf(stderr, 
		  "in printDecodingResults: mergedDeducedVV is NULL but nbOfSigs is %d (should be 0)! DEBUG ME!\n",
		  nbOfSigs) ;
	  exit(1) ;
	}
    }

  /* else, print merged VV and corresponding merged errors */
  else
    {
      int* mergedErrors = mergeErrors(coherentSigs, observedSig) ;
      if (mergedErrors == NULL)
	{
	  /* solutions according to mergedDeducedVV but not to mergedErrors! */
	  fprintf(stderr, "in printDecodingResults, inconsistency between the 2 mergings!\n") ;
	  exit(1) ;
	}

      fprintf(outStream, "%d solutions found, at distance %d\n\n", nbOfSigs, distance) ;
      
      printVV(mergedDeducedVV, observedSig, tabpool, n, outStream) ;
      printErrors(mergedErrors, observedSig, nbPools, outStream) ;

      /* discard mergedDeducedVV and mergedErrors */
      free(mergedDeducedVV) ;
      free(mergedErrors) ;
    }
  
  
  /******* print VV and fp's/fn's for each coherent ***********
   ******* sig if there are several ***************************/
  if (nbOfSigs > 1)
    {
      int sigNum ;
      for (sigNum = 0; sigNum < nbOfSigs; sigNum++)
	{
	  signature* thisSig = coherentSigs.allSigs[sigNum] ;
	  
	  /* calculate corresponding VV */
	  MOT* thisVV = solveSigSimple(tabpool, thisSig, n) ;
	  /* we know thisSig is coherent because it went through buildDeducedVV... 
	     but just in case: */
	  if (thisVV == NULL)
	    {
	      fprintf(stderr, "in printDecodingResults, sig is not coherent, impossible!\n") ;
	      exit(1) ;
	    }
	  /* print */
	  fprintf(outStream, "\n#######################################################\n") ;
	  fprintf(outStream, "SOLUTION NUMBER %d:\n\n", sigNum+1) ;
	  
	  printVV(thisVV, observedSig, tabpool, n, outStream) ;

	  int* errorVec = findErrors(thisSig, observedSig) ;
	  printErrors(errorVec, observedSig, nbPools, outStream) ;

	  /* clean up */
	  free(thisVV) ;
	  free(errorVec) ;

	  /* we could also (or instead) call writeSig to produce a
	     set of "corrected sig files" that would be compatible
	     with my current cgi web interface...
	     but I'll skip that for now; it's probably easier to
	     do that sort of thing in perl anyways. */
	}
    }
  
  
  /* print file trailer (useful for parsing) */
  fprintf(outStream, "\n___DONE___\n") ;

  fclose(outStream);
}

/************************************************************************
 ******************* EXPORTED FUNCTIONS *********************************
 ************************************************************************/




/*
  Decode one real observed signature, 
  and output results: merged deduced positive and ambiguous variables
  (merged from each nearest coherent sig); followed by the lists
  of positives and ambis, along with the absolute pool numbers 
  of erroneous pools, for each nearest coherent sig.

  One decoding is defined as:
  - 1. build the pools
  - 2. read the observed signature (format as produced by my cgi web script)
  - 3. solve that signature (and therefore obtain a set of  nearest 
       coherent signatures)
  - 4. build the merged deduced VV corresponding to this setOfSigs,
       output to file followind by the VVs + erroneous pools for each
       nearest coherent sig.

  We use the JobIdentReal structure from jobs.h to store most parameters.<br>
  NOTE: this function is charged with opening and closing an output stream,
  using the outFileName stored in thisJob.

  \param thisJob: holds all info relevant to current decoding job.
  \param mode: choose what solver you want to use. Current modes are:
         - 1: use solvexpNaive
	 - 4: use solvexpClosure (findBestClosuresReal with Rec)
	 - 5: use solvexpClosure (findBestClosuresReal with RecSubstracted)
	 If unsure, use mode 5.
*/
void decodeObservedSig(JobIdentReal *thisJob, int mode)
{

  // check that mode is valid
  if ((mode < 1) || (mode > 5) ||  (mode == 2) || (mode == 3))
    {
      fprintf(stderr, "decodeObservedSig called with unsupported mode %d! Exiting now\n", mode) ;
      exit(1) ;
    }

  int n = thisJob->n ;
  int nbPools = thisJob->nbPools ;
  char *designFile = thisJob->designFileName ;
  char *inFileName = thisJob->inFileName ;
  /* outFileName not needed */

  /* build pools */
  MOT* tabpool = buildPools(designFile, n, nbPools) ;

  /* read and check signature file */
  signature* observedSig = readSig(inFileName, nbPools) ;

  /* decode observed sig */
  setOfSigs deducedSetOfSigs ;
  
  if (mode==1)
    {
      deducedSetOfSigs = solvexpNaive(tabpool, observedSig, n) ;
    }
  else if ((mode==4) || (mode==5))
    {
      int method = mode - 1 ;
      /* bogus nbOfErrors, solvexpClosure discards it since method==3 or 4 */
      int nbOfErrors = 0 ;
      deducedSetOfSigs = solvexpClosure(tabpool, observedSig, nbOfErrors, method, n) ;
    }
  else
    {
      /* shouldn't happen, valid modes are checked above */
      fprintf(stderr, "in decodeObservedSig: mode is invalid, but was NOT checked at the beginning! FIX ME!\n") ;
      exit(1) ;
    }
  
  /* print results */
  printDecodingResults(thisJob, observedSig, deducedSetOfSigs, tabpool) ;
  
  /* clean up */
  free(tabpool);
  freeSig(observedSig) ;
  freeSetOfSigs(deducedSetOfSigs) ;
}

