/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2006) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

#include "454/qual/PhredTableBase.h"
#include "math/Functions.h"

/// Constructs a table of thresholds and associated quality scores.
///
/// \class PhredTableWriter
///
/// Uses the Phred algorithm (Ewing B, Green P. Genome Res. 1998
/// Mar;8(3):186-94).  Once the table is constructed, can be used to
/// look up bases with the appropriate predictor scores and get the
/// quality.  Space efficiency is not ideal--each tuple stores its
/// entire list of indices--but time efficiency is fairly good. In
/// particular, the algorithm used is linear in the number of bins,
/// making large tables (millions of entries) feasible.
///
///
/// Note that there is no good reason why these methods are written
/// inline, now that the class is stable; we should move them to a .cc
/// file.  

class PhredTableWriter: public PhredTableBase {

private:
  friend class TestPhredTable;
  vec<const vec<float> *> predictors; ///< predictor information
  ///< correct and wrong for individual predictors
  vec<vec<pair<int,int> > > predProjection_; 
  vec<vec<float> > thresholds; ///< threshold information
  vec<String> predNames; ///< names for predictor columns
  const int MIN_BASES; ///< minimum number of bases per bin.

public:

  class Tuple;

  PhredTableWriter(int minBases = 5000): MIN_BASES(minBases) {}

  virtual ~PhredTableWriter();

  /// Number of predictors calculated from thresholds vector.
  virtual int npredictors() const { return thresholds.size(); }

  ///Write a complete PhredTable, with compacted vector at the end.
  void WritePhredTable(const String & filename, PredictorParameterHandler * h =
		       new NullPredictorHandler()) const;

  /** Run the Phred algorithm to make a table of useful threshold values.

      \param predictors: 2-D matrix of predictor values, each column
      (represented as a pointer to a vec<float>) corresponding to one
      predictor.  Number of predictors must equal size of thresholds
      vector.
      
      \param thresholds: For each predictor, a vector of the
      thresholds chosen.  The number of thresholds does not need to be
      equal for all predictors.  The thresholds within each vec will
      get sorted and uniqued.
      
      \param incorrect: For each sample, a vector holding 0 if
      correct, something else otherwise.  Length must equal the length
      of each predictor column.
  */
  void MakePhredTable(const vec<float> & incorrect);

  /// Add a continuous predictor with nThresholds thresholds.
  void addPredictor(const vec<float> &newpredictor, int nThresholds,
		    const String & name = "")
  {
    findThresholds(newpredictor, nThresholds);
    cout << "Using " << thresholds.back().size() << " bins ";
    predictors.push_back(&newpredictor);
    predNames.push_back(name);
  }

  /// Add a continuous predictor with predetermined thresholds.  
  void addPredictor(const vec<float> &newpredictor,
		    const vec<float> &threshold,
		    const String & name = "")
  {
    thresholds.push_back(threshold);
    cout << "Using " << thresholds.back().size() << " bins ";
    predictors.push_back(&newpredictor);
    predNames.push_back(name);
  }

  /// Add a discrete predictor bo be binned into targetBins bins.
  void addDiscretePredictor(const vec<float> &newpredictor, int targetBins,
			    const String & name = "");

  /// Print out the error rates along each individual predictor
  void PrintPredErrorRates(ostream & os) const;

private:

  /// Subtract the bins at or below o from x.
  void subtractIfNotLE(Tuple &x, const Tuple &o, const vec<Tuple> &v) const;

  /// Zero x if it is below o.
  void zeroIfLE(Tuple &x, const Tuple &o) const ;

  ///Load data from predictors and incorrect into the appropriate Tuple.
  void LoadCorrectWrong(vec<Tuple> & allCombos,
			const vec<float> & incorrect) const;

  /// Project the n-dimensional table onto each predictor. 
  void Project(vec<Tuple> & allCombos) ;

  /// Initialize binMapper_ from the thresholds information.
  void InitBinMapper();

  ///Create all Tuples of thresholds, in the order implied by 
  /// FindIndices and FindTuple
  void CreateCombos(vec<vec<float> > & thresholds,
		    vec<Tuple> & allCombos);


  /// Return the row number of the best tuple.
  /// That's the one with the highest qual score and, in case of ties, 
  /// the highest number of bases. If that is still a tie, take the one
  /// with the highest sum of indices.
  int FindBestRow(vec<Tuple> & allCombos) const;

  ///Sum up all the correct and wrong bases "under" each Tuple.
  ///"Under" means that the bin containing those bases must have
  ///thresholds all of which are equal to or lower than the thresholds
  ///of the Tuple of interest. So we also count the bases in the Tuple
  ///of interest.  We use the principle of inclusion and exclusion to
  ///accelerate the computation.
  void CalculateSums(vec<Tuple> & allCombos) const;

  void PrintAllCombos(ostream &out, const vec<Tuple > & allCombos) const {
    copy(allCombos.begin(), allCombos.end(),
	 ostream_iterator<Tuple>(out,"\n"));
  }

  void findThresholds(const vec<float> &newpredictor, int nThresholds);


public: 

  /**Helper class for managing the tuples of thresholds.
     \class Tuple

     Represents an entry in several n-dimensional arrays.  Remembers
     its indices, the number of correct and wrong bases in that bin,
     and the sum of correct and wrong bases in that bin and all bins
     lower to it, where lower means that each of the thresholds is
     not greater, as in operator<.

     Invariants: The size (number of indices) of the tuple never changes.
     If the indices are changed the various data values are set to 0.
  */
  ///Key method is operator< to help find positions quickly for 
  ///loading the data.
  class Tuple {
    friend class TestPhredTable;
    vec<unsigned int> m_indices;

  public:
    int correct; ///< in this bin: should not change
    int wrong; ///< in this bin: should not change
    int sumCorrect;///< in this bin and all below it: changes often
    int sumWrong;///< in this bin and all below it: changes often

    explicit Tuple(unsigned int n) : m_indices(n) { Clear(); }

    Tuple(const vec<unsigned int> & indices) : m_indices(indices.size())
    {  SetIndices(indices.begin(), indices.end()); }

    template <typename FwdIt>
    Tuple(FwdIt first, FwdIt last) : m_indices(distance(first, last)) {
      SetIndices(first, last);
    }
    
    ///Set threshold vector and set all counts to 0.
    void SetIndices(const vec<unsigned int> & indices) {
      SetIndices(indices.begin(), indices.end());
    }

    ///Set threshold vector and set all counts to 0.
    template <typename FwdIt>
    void SetIndices(FwdIt first, FwdIt last) {
      AssertEq(distance(first, last), longlong(size()));
      copy(first, last, m_indices.begin());
      ClearData();
    }

    /// Clear both indices and counts
    void Clear() {
      fill(m_indices.begin(), m_indices.end(), 0);
      ClearData();
    }      

    /// Clear counts only
    void ClearData() {
      correct = wrong = sumCorrect = sumWrong = 0;
    }

    /// Length of indices vector
    unsigned int size() const { return m_indices.size(); }

    /// Indices vector
    const vec<unsigned int> & indices() const { return m_indices; }

    /// Index i
    unsigned int operator[](int i) const {
      return m_indices[i];
    }

    /// Index i, mutable
    unsigned int & operator[](int i) {
      return m_indices[i];
    }

    /// Sum of indices of the tuple.  Used for tiebreaking in FindBestRow.
    unsigned int sumIndices() const {
      return accumulate(m_indices.begin(), m_indices.end(), 0);
    }

    ///Compare indices.  Note that this is not a complete ordering!
    bool operator<(const Tuple & o) const {
      return ((*this)<=o) && !((*this)==o); 
    }

    ///Compare indices. Note that this is not a complete ordering!
    bool operator<=(const Tuple & o) const {
      AssertEq(size(), o.size());
      for (unsigned int i=0; i != size(); ++i) {
	if (m_indices[i] > o.m_indices[i]) return false;
      }
      return true;
    }

    ///Compare indices. Note that this is not a complete ordering!
    bool operator>(const Tuple & o) const {
      return (o < *this);
    }

    ///Compare indices for exact match.
    bool operator==(const Tuple & o) const {
      AssertEq(size(), o.size());
      return (m_indices == o.m_indices);
    }

    int QualScore() const {
      return ::QualScore(sumCorrect, sumWrong);
    }
    
    int SumBases() const {
      return sumCorrect + sumWrong;
    }

    friend ostream & operator<< (ostream & os, const Tuple & t) {
      copy(t.m_indices.begin(), t.m_indices.end(),
	   ostream_iterator<int>(os," "));
      os << ": " << t.correct << " " << t.wrong << " " 
	 << t.sumCorrect << " " << t.sumWrong;
      if (t.SumBases()>0)
	os << " " << t.QualScore();
      return os;
    }

  };

/***** End of inner class Tuple ***********************/


};
