/*---------------------------------------------------------------*
 *
 * File : signaIO.c
 * Author : NTM
 * Created : 19/09/06
 *
 *
 * Copyright (C) Nicolas Thierry-Mieg, 2006.
 *
 *
 * This file is part of InterPool, written by 
 * Nicolas Thierry-Mieg (CNRS, France) Nicolas.Thierry-Mieg@imag.fr
 *
 * InterPool is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * InterPool is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with InterPool; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 *---------------------------------------------------------------*/

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h> /* isdigit */
#include <string.h> /* strcmp */
#include <time.h> /* time */

#include <expat.h> /* expat XML parser */

#include "types.h" /* bool */
#include "xml.h" /* xml tags */
#include "signa.h" /* signature, allocSig, set/getsigvalue */

#include "signaIO.h"

/* size of buffer for expat */
#define BUFFSIZE        8192

/* length of buffer for ctime_r call */
#define DATELENGTH 30


//#define DEBUG
#undef DEBUG

/************************************************************************
 ******************* LOCAL FUNCTIONS ************************************
 ************************************************************************/

/////////////// DECLARATIONS ///////////////

typedef struct
{
  int len ; /* length of current string, or -1 if we are not in a useful tag */
  int maxLen ; /* size of allocated mem for text */
  char* text ; /* the string, 0-terminated */
  signature* mySig ; /* needed to save the parsed sig */
  int nbPools ; /* useful for sanity checking when parsing */
} myCharData ;


static void XMLCALL startTagHandler(void* data, const XML_Char* tag, const XML_Char** attr) ;

static void XMLCALL endTagHandler(void* data, const XML_Char* tag) ;

static void XMLCALL charHandler(void* data, const XML_Char* str, int length) ;



//////////////// BODIES ///////////////////
static void XMLCALL startTagHandler(void* data, const XML_Char* tag, const XML_Char** attr)
{
  myCharData* myData = (myCharData*)data ;
  /* check version of sig file */
  if (strcmp(tag, SIGNATURE_TAG) == 0)
    {
      bool versionFound = FALSE ;
      int i ;
      for (i=0; (!versionFound) && (attr[i] != NULL) ; i += 2)
	{
	  if (strcmp(attr[i], SIG_VERSION_TAG) == 0)
	    {
	      if (strcmp(attr[i+1], SIG_V20) != 0)
		{
		  fprintf(stderr, 
			  "Signature file is wrong version (expecting %s, found %s)\n",
			  SIG_V20, attr[i+1]) ;
		  exit(1) ;
		}
	      versionFound = TRUE ;
	    }
	}
      if (!versionFound)
	{
	  fprintf(stderr, "Signature file doesn't have version number! Dying.\n") ;
	  exit(1) ;
	}
    }

  if ( (strcmp(tag, POS_TAG) == 0) || (strcmp(tag, WEAK_TAG) == 0)
       || (strcmp(tag, FAINT_TAG) == 0) || (strcmp(tag, NEG_TAG) == 0))
    myData->len = 0 ;
   
  else
    // set len to -1 so we don't bother saving whatever text comes next
    myData->len = -1 ;

#ifdef DEBUG
  fprintf(stderr, "new start tag %s, len is now %d\n", tag, myData->len) ;
#endif
}


/* pre-condition: myData->mySig must be initialized to SIG_POSCONF
   for every pool. 
*/
static void XMLCALL endTagHandler(void* data, const XML_Char* tag)
{
  myCharData* myData = (myCharData*)data ;
#ifdef DEBUG
  fprintf(stderr, "entering end tag %s, len is %d, text is %s\n", 
	  tag, myData->len, myData->text) ;
#endif

  if (myData->len != -1)
    {
      int sigValue ;
      if (strcmp(tag, POS_TAG) == 0)
	sigValue = SIG_POS ;
      else if (strcmp(tag, WEAK_TAG) == 0)
	sigValue = SIG_WEAK ;
      else if (strcmp(tag, FAINT_TAG) == 0)
	sigValue = SIG_FAINT ;
      else if (strcmp(tag, NEG_TAG) == 0)
	sigValue = SIG_NEG ;
      else
	{
	  /* we should never have myData->len != -1 with other tags! */
	  fprintf(stderr, "in endTagHandler, len is %d but tag is %s\n",
		  myData->len, tag) ;
	  exit(1) ;
	}

      char* begin = myData->text ;
      char* end = begin;
      while (1) 
	{
	  int pool = (int)strtol(begin, &end, 10); 
	  /* FIXME: should check error codes */
	  if (begin==end)
	    // there were no more digits
	    break;
	  if (pool >= myData->nbPools)
	    {
	      fprintf(stderr, 
		      "in readSig, pool number %d larger than nbPools (%d)\n",
		      pool, myData->nbPools) ;
	      exit(1) ;
	    }
#ifdef DEBUG
	  fprintf(stderr, "setting pool %d to value %d\n", pool, sigValue) ;
#endif
	  /* sanity check: each pool should have only one value.
	     Rely on the fact that mySig was initialized to SIG_POSCONF. */
	  if (getsigvalue(myData->mySig, pool) != SIG_POSCONF)
	    {
	      fprintf(stderr, 
		      "in readSig, pool number %d has two different values!\n",
		      pool) ;
	      exit(1) ;
	    }
	  setsigvalue(myData->mySig, pool, sigValue) ;
	  begin = end;
	}
      /* OK, reset len to -1 */
      myData->len = -1 ;
    }
}


static void XMLCALL charHandler(void* data, const XML_Char* str, int length)
{
  myCharData* myData = (myCharData*)data ;
#ifdef DEBUG
  fprintf(stderr, "entering char data, len is %d\n", myData->len) ;
#endif

  if (myData->len != -1)
    // otherwise, we are not in a tag holding sig data, just ignore it
    {
      if ((myData->len + length) >= myData->maxLen)
	{
	  /* extend myData->text */
	  myData->maxLen += BUFFSIZE ;
	  myData->text = realloc(myData->text, myData->maxLen) ;
	  if (myData->text == NULL)
	    {
	      fprintf(stderr, "in charHandler while parsing sig file, cannot extend myData->text\n") ;
	      exit(1) ;
	    }
	}

      char* thisChar = myData->text + myData->len ;
      myData->len += length ;
      while (length-- > 0)
	{
	  *(thisChar++) = *(str++) ;
	}
      // add terminating \0
      *thisChar = '\0' ;
    }
}



/************************************************************************
 ******************* EXPORTED FUNCTIONS *********************************
 ************************************************************************/


/*!
  \brief Read an observation from a file in version 2.0;
  build and return the corresponding signature.

  The format for the file (version 2.0) is described by an XML dtd,
  available in a DTD subdir somewhere... (hopefully!)
*/
signature* readSig(char* sigfile, int nbPools)
{
  FILE* file = fopen(sigfile, "r") ;
  if (file==NULL)
    {
      fprintf(stderr, "in readSig, cannot open file %s\n", sigfile) ;
      exit(1) ;
    }

  signature* mySig = allocSig(nbPools) ;
  {
    /* allocSig inits to 0. we re-initialize to a dummy value 
       for sanity check later. We use SIG_POSCONF, given that 
       sigfiles cannot have any conflicts tagged.
       This dummy value is used to make sure every pool is
       present in the sigfile (at the end of this function),
       and also that no pool has two different values in the
       sigfile (in endTagHandler). */
    int i;
    for (i=0; i<nbPools; i++)
      setsigvalue(mySig, i, SIG_POSCONF);
  }

  /* buffer: for reading the file one block at a time */
  char buffer[BUFFSIZE] ;

  /* myData: used as an accumulator when reading the list of pools
     of a given value (eg POS).
     Allocate initially BUFFSIZE chars, but will grow if necessary. */
  myCharData* myData = (myCharData*)malloc(sizeof(myCharData)) ;
  if (myData==NULL)
    {
      fprintf(stderr, "in readSig, no memory for myData\n") ;
      exit(1) ;
    }
  myData->len = -1 ;
  myData->maxLen = BUFFSIZE ;
  myData->text = (char*)malloc(BUFFSIZE*sizeof(char)) ;
  if (myData->text == NULL)
    {
      fprintf(stderr, "in readSig, no memory for myData->text\n") ;
      exit(1) ;
    }
  myData->mySig = mySig ;
  myData->nbPools = nbPools ;

  XML_Parser parser = XML_ParserCreate(NULL) ;
  if (! parser) 
    {
      fprintf(stderr, "In readSig, couldn't allocate memory for parser\n") ;
      exit(1) ;
    }
  
  XML_SetUserData(parser, myData) ;
  XML_SetElementHandler(parser, startTagHandler, endTagHandler) ;
  XML_SetCharacterDataHandler(parser, charHandler) ;

  int done = 0 ;
  do
    {
      int len = fread(buffer, 1, BUFFSIZE, file) ;
      if (ferror(file))
	{
	  fprintf(stderr, "In readSig, read error\n") ;
	  exit(1) ;
	}
      done = feof(file) ;

      if (XML_Parse(parser, buffer, len, done) == XML_STATUS_ERROR)
	{
	  fprintf(stderr, "Parse error at line %lu:\n%s\n",
		  (unsigned long)XML_GetCurrentLineNumber(parser),
		  XML_ErrorString(XML_GetErrorCode(parser))) ;
	  /* cast to unsigned long because XML_GetCurrentLineNumber
	     returns various types depending on version */
	  exit(-1) ;
	}
    }
  while (! done) ;

  free(myData->text) ;
  free(myData) ;
  XML_ParserFree(parser) ;
  fclose(file) ;

  /* sanity check: every pool must have some value */
  {
    int pool ;
    for(pool = 0; pool < nbPools ; pool++)
      {
	if (getsigvalue(mySig, pool) == SIG_POSCONF)
	  {
	    fprintf(stderr, "in readSig, pool %d has no value!\n", pool) ;
	    exit(1) ;
	  }
      }
  }

  return(mySig) ;
}


/*!
  \brief Read an observation from a file in the obsolete v1.x format;
  build and return the corresponding signature.

  Format for the file is:
  1. any empty line and any line starting with # is skipped (comments)
  2. each pool's signature value appears on a separate line (the acceptable
  values are those #defined in signa.h, but for an observed signature they
  should be restricted to SIG_NEG, SIG_FAINT, SIG_WEAK and SIG_POS). 
  They must appear in ascending  absolute pool number, ie using STD the 
  first k values are those of layer 0, the next k values are layer 1, etc...

  The total number of values must be == nbPools (this is checked).
*/
signature* readSigV1(char* sigfile, int nbPools)
{
  FILE* file = fopen(sigfile, "r") ;
  if (file==NULL)
    {
      fprintf(stderr, "in readSigV1, cannot open file %s\n", sigfile) ;
      exit(1) ;
    }

  signature* mySig = allocSig(nbPools) ;

  /* fill mySig->sig */

  int absPoolNum=0 ;
  int value = 0 ; /* value of current pool */
  int currentchar ;
  bool newLine = TRUE ; /* TRUE iff next char begins a new line */
  bool inComment = FALSE ; /* TRUE iff we are in an end-of-line comment */
  
  while((currentchar = getc(file)) != EOF)
    {
      if (inComment)
	{
	  if (currentchar == '\n')
	    {
	      /* newline: current comment line is finished */
	      inComment = FALSE ;
	      newLine = TRUE ;
	    }
	  /* in any case, get the next char without further action */
	  continue ;
	}

      else if (newLine)
	{
	  if (currentchar == '#')
	    {
	      /* beginning a comment line */
	      inComment = TRUE ;
	      newLine = FALSE ;
	      continue ;
	    }
	  else if (currentchar == '\n')
	    {
	      /* empty line, just skip it */
	      continue ;
	    }
	    else if (isdigit(currentchar))
	      {
		/* first digit of a sig value */
		/* make sure we don't already have nbPools values */
		if (absPoolNum >= nbPools)
		  {
		    fprintf(stderr, "in readSigV1, too many sig values in file %s\n",
			    sigfile) ;
		    exit(1) ;
		  }
		value = (int)(currentchar - '0') ;
		newLine = FALSE ;
		continue ;
	      }
	    else
	      {
		fprintf(stderr, 
			"in readSigV1, unexpected char %c at the beginning of a line in file %s\n",
			currentchar, sigfile) ;
		exit(1) ;
	      }
	}
      
      else
	{
	  /* we are not in a comment line, and not at the beginning of a new line */
	  if (isdigit(currentchar))
	    {
	      /* next digit of a sig value */
	      value = value*10 + (int)(currentchar - '0') ;
	      continue ;
	    }
	  else if (currentchar == '\n')
	    {
	      /* current value is fully read, check it and save */
	      if ((value == SIG_NEG)|| (value == SIG_POS) || (value == SIG_FAINT)|| (value == SIG_WEAK))
		{
		  setsigvalue(mySig, absPoolNum, value) ;
		  newLine = TRUE ;
		  absPoolNum++ ;
		}
	      else 
		{
		  fprintf(stderr, "in readSigV1 with file %s, illegal sig value: %d\n", sigfile, value) ;
		  exit(1) ;
		}
	    }
	  else
	    {
	      fprintf(stderr, 
		      "in readSigV1, unexpected char %c in the middle of a non-comment line in file %s\n",
		      currentchar, sigfile) ;
	      exit(1) ;
	    }
	}
    }
  
  fclose(file) ;
  return(mySig);
}


/*!
  \brief Save a signature to a file. Format is identical 
  to readSig (currently v2.0).
*/
void writeSig(char* sigfile, const signature* mySig)
{
  FILE* file;

  /* make sure the file doesn't already exist */
  file = fopen(sigfile, "r");
  if (file != NULL)
    {
      fprintf(stderr,"In writeSig: cannot write to %s, file already exists\n",sigfile);
      exit(1);
    }
  
  /* open file for writing */
  file = fopen(sigfile, "w");
  if (file == NULL)
    {
      fprintf(stderr,"In writeSig: cannot open %s for writing\n",sigfile);
      exit(1);
    }

  int nbPools = mySig->nbPools ;

  /* we first fill one vector of absPoolNums for each sig value */

  int* tabPOS = (int*)malloc(nbPools*sizeof(int));
  if(tabPOS==NULL)
    {
      fprintf(stderr,"in writeSig, no more memory for tabPOS\n");
      exit(1);
    }
  int* tabWEAK = (int*)malloc(nbPools*sizeof(int));
  if(tabWEAK==NULL)
    {
      fprintf(stderr,"in writeSig, no more memory for tabWEAK\n");
      exit(1);
    }
  int* tabFAINT = (int*)malloc(nbPools*sizeof(int));
  if(tabFAINT==NULL)
    {
      fprintf(stderr,"in writeSig, no more memory for tabFAINT\n");
      exit(1);
    }
  int* tabNEG = (int*)malloc(nbPools*sizeof(int));
  if(tabNEG==NULL)
    {
      fprintf(stderr,"in writeSig, no more memory for tabNEG\n");
      exit(1);
    }

  int nbPOS = 0;
  int nbWEAK = 0;
  int nbFAINT = 0;
  int nbNEG = 0;
  {
    int absPoolNum ;
    for (absPoolNum=0; absPoolNum < nbPools; absPoolNum++)
      {
	int value = getsigvalue(mySig, absPoolNum) ;

	if (value==SIG_POS)
	  tabPOS[nbPOS++] = absPoolNum ;
	else if (value==SIG_WEAK)
	  tabWEAK[nbWEAK++]=absPoolNum;
	else if (value==SIG_FAINT)
	  tabFAINT[nbFAINT++]=absPoolNum;
	else if (value==SIG_NEG)
	  tabNEG[nbNEG++]=absPoolNum;
	else
	  {
	    // conflicting values cannot appear in sigfiles
	    fprintf(stderr, "in writeSig: illegal value in mySig: %d\n", value) ;
	    exit(1) ;
	  }
      }


    /* find local date */
    time_t currentTime = time(NULL) ;
    if (currentTime == (time_t)-1)
      {
	fprintf(stderr, "in writeSig, error calling time: time is unavailable on this system?\n") ;
	exit(1) ;
      }
    char myDate[DATELENGTH] ;
    if (NULL == ctime_r(&currentTime, myDate))
      {
	fprintf(stderr, "in writeSig, cannot convert currentTime to string.\n") ;
	exit(1) ;
      }


    /* now fill the file */

    fprintf(file, "<%s %s=\"%s\">\n", SIGNATURE_TAG, SIG_VERSION_TAG, SIG_V20);
    fprintf(file, "   <%s>\n",HEADER_TAG);
    fprintf(file, "      <%s>", DESCRIPTION_TAG) ;
    fprintf(file, "         signature file generated by writeSig, from the InterPool package\n") ;
    fprintf(file, "      </%s>", DESCRIPTION_TAG) ;
    fprintf(file, "      <%s>\n", DATE_TAG) ;
    /* note: myDate ends with \n, no need to add it */
    fprintf(file, "         %s", myDate) ;
    fprintf(file, "      </%s>\n", DATE_TAG) ;
    fprintf(file, "   </%s>\n",HEADER_TAG);
    /* end of header */

    fprintf(file, "   <%s>\n\n", VALUES_TAG);

    if (nbPOS!=0)
      {
	fprintf(file, "      <%s>\n         ", POS_TAG);
	int i ;
	for (i=0 ; i<nbPOS; i++)
	  {
	    fprintf(file, "%i ", tabPOS[i]);
	  }
	fprintf(file, "\n      </%s>\n\n", POS_TAG);
      }
    if (nbWEAK!=0)
      {
	fprintf(file, "      <%s>\n         ", WEAK_TAG);
	int i ;
	for (i=0 ; i<nbWEAK; i++)
	  {
	    fprintf(file, "%i ", tabWEAK[i]);
	  }
	fprintf(file, "\n      </%s>\n\n", WEAK_TAG);
      }
    if (nbFAINT!=0)
      {
	fprintf(file, "      <%s>\n         ", FAINT_TAG);
	int i ;
	for (i=0 ; i<nbFAINT; i++)
	  {
	    fprintf(file, "%i ", tabFAINT[i]);
	  }
	fprintf(file, "\n      </%s>\n\n", FAINT_TAG);
      }
    if (nbNEG!=0)
      {
	fprintf(file, "      <%s>\n         ", NEG_TAG);
	int i ;
	for (i=0 ; i<nbNEG; i++)
	  {
	    fprintf(file, "%i ", tabNEG[i]);
	  }
	fprintf(file, "\n      </%s>\n\n", NEG_TAG);
      }

    fprintf(file, "   </%s>\n\n", VALUES_TAG);
    fprintf(file, "</%s>\n", SIGNATURE_TAG);
  }

  fclose(file);

  free(tabPOS);
  free(tabWEAK);
  free(tabFAINT);
  free(tabNEG);
}

