/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2006) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

#ifndef PHRED_TABLE_READER_H
#define PHRED_TABLE_READER_H

#include "454/qual/PhredTableBase.h"

/// Constructs a table of thresholds and associated quality scores.
///
/// \class PhredTable
///
/// Note that by default the constructor will create a compacted
/// table for fast lookup if one does not already exist.
///
/// Uses the Phred algorithm (Ewing B, Green P. Genome Res. 1998
/// Mar;8(3):186-94).  Once the table is constructed, can be used to
/// look up bases with the appropriate predictor scores and get the
/// quality.  Space efficiency is not ideal--each tuple stores its
/// entire list of indices--but time efficiency is fairly good. In
/// particular, the algorithm used is linear in the number of bins,
/// making large tables (millions of entries) feasible.
///
/// If data are outside the range of the phredtable, we assign them
/// to the closest bin in that table (that is, out of range for a 
/// parameter means we give that parameter the maximum value in the
/// table). This is done with CapValues().
/// We keep track of these overflows with Overflow() and
/// ResetOverflow().
///
/// There are now several Lookup() methods. The fastest one is
/// CompactLookup(). VeryFastLookup() is only partially implemented 
/// because we figured out that CompactLookup() was faster.
///
/// Added Compact() and CompactLookup() to accelerate lookup even more:
/// the idea is to do all the lookups for all possible bin combinations
/// once and store that, so that we only have to do a single indexing into 
/// a long lookup table for each base. In fact, we need to transform the bins
/// vector into a ulonglong with FindTuple, so it's a bit more involved than
/// that, but it still shortens lookup time a lot.
/// 
/// One might want to cut this up into  several different classes 
/// with different lookup methods, but this works for now.


class PhredTableReader: public PhredTableBase {

  friend class PhredTableWriter;

public:
  static longlong outerLookupCounter;
  static longlong innerLookupCounter;
  static longlong outerFLookupCounter;
  static longlong innerFLookupCounter;
  static const unsigned char ZERO_BECOMES_CHAR_0;

private:
  friend class TestPhredTable;
  vec<vec<int> > firstBin_; ///< used in FastLookup
  vec< vec< vec<int> > > validLines_; ///< used in VeryFastLookup
  vec<unsigned char> compactedTable_; ///< used in CompactedLookup
  vec<float> maxBin_; ///< To ensure that our data does not go over
  mutable ulonglong overflow_; ///< Overflow counter for data outside training range.
  
public:


  /// Read in a PhredTable. 
  /// The second parameter should be set to true if we have just recreated
  /// a previously existing table, otherwise the lookups will be messed up!
  /// This is kind of a bother, and we should probably change the way the
  /// data are saved.
  PhredTableReader(const String & fname, bool verboseSetup = false,
		   PredictorParameterHandler * h = new NullPredictorHandler());

  virtual ~PhredTableReader();

  /// Reset the overflow counter to 0;
  void ResetOverflow() { overflow_ = 0; }

  /// Read the overflow counter.
  ulonglong Overflow() const { return overflow_; }

  /// Number of predictors derived from the table.
  virtual int npredictors() const { return m_table[0].size() - 2; }

  /// Alias for CompactLookup.
  int Lookup(vec<float> & values) const { return CompactLookup(values); }

  /// Look up a set of values for the error predictors in the lookup table.
  /// Returns the quality score.
  int SlowLookup(vec<float> & values) const;

  /// Accelerate lookup by figuring out the best possible table line 
  /// for each predictor and starting our search at the worst of those.
  int FastLookup(vec<float> & values) const;

  /// Accelerate lookup by presorting all the possible lines for each
  /// predictor bin value into a vector and doing a merge-search for a line that is 
  /// present in all of the 
  int VeryFastLookup(vec<float> & values) const;

  ///Read the qual from a 1-D vector
  int CompactLookup(vec<float> & values)  const;

  ///Change an old-style table into one with header line and compacted table.
  static void RetrofitOldTable(const String & fname) {
    PhredTableBase base;
    {
      Ifstream(is, fname);
      base.OldReadPhredTable(is);
    }
    PhredTableReader reader(base);
    reader.Compact();
    Ofstream(os, fname);
    reader.PrintPhredTable(os);
    reader.WriteCompacted(os);
  }
    

private:


  ///Private constructor for use by PhredTableWriter.
  /// Simply copies the internal table and initializes. 
  PhredTableReader(const PhredTableBase & base);

  ///Save the table into a one-D vector
  void Compact();

  /// Write out compactedTable_ in a mostly human-readable format.
  void WriteCompacted(ostream & os);

  /// Read in compactedTable_ from our mostly human-readable format.
  /// If the read fails (e.g. a new file), compact ourselves.
  void ReadCompacted(istream & is);

  ///Initialize the various internal vectors from m_table.
  void Init(bool verboseSetup = false);

  ///Initialize binMapper_ from m_table.
  void InitBinMapper();

  ///Check that SetupFastLookup has been run.
  bool isInited() {
    return (npredictors() > 0 && npredictors() == binMapper_.isize());
  }


  /// Ensure that the values are no larger than maxBins_.
  /// Record overflows in overflow_.
  void CapValues(vec<float> & values) const {
    for (int i=0; i != maxBin_.isize(); ++i) {
      if (values[i] > maxBin_[i]) {
      	values[i] = maxBin_[i];
	++overflow_;
      }
    }
  }

  /// Helper for VeryFastLookup, not implemented
  void FindNextLines(vec<int> & nextlines, 
		     const vec<vec<vec<int> > > & validLines,
		     const vec<unsigned int> & bins) const;


};


#endif //PHRED_TABLE_READER_H
