/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2006) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

#ifndef QUAL_INFO_H
#define QUAL_INFO_H

#include "lookup/LookAlign.h"
#include "Basevector.h"
#include "Qualvector.h"
#include "math/HoInterval.h"

///Store and print information about how well quality scores work.
///
/// \class QualInfo
///
/// The internal vectors are indexed by quality score from 0 to 99.
class QualInfo {
private:
  vec<longlong > count_; ///< Total number of bases of this quality
  vec<longlong > errors_; ///< Total number of errors at this quality.
  longlong m_, i_, d_;

  struct Bin { 
    int start, end;
    longlong count, errs;

    Bin(int s, int e, longlong c, longlong err):
      start(s), end(e), count(c), errs(err) {}

    double ErrorRate() const { 
      return 0 == count ? numeric_limits<double>::quiet_NaN() 
	: double(errs)/count; 
    }

    bool operator<(const Bin & o) {
      return ErrorRate() < o.ErrorRate();
    }
  };


public:

  enum errType { MIS = 0x1, INS= 0x2, DEL= 0x4, ALL = MIS | INS | DEL }; 

  QualInfo(): count_(100,0), errors_(100,0), m_(0), i_(0), d_(0) {}

  void AddMutation() { ++m_; }
  void AddInsertion() { ++i_; }
  void AddDeletion() { ++d_; }
  void PrintErrors( ostream & os);

  ///Add one piece of data: qual score and whether base was correct.
  void AddInfo(unsigned char qual, bool correct) {
    ++count_[qual];
    if (!correct) ++errors_[qual];
  }

  ///Print summary of quals, grouping into bins of size binsize.
  ///Produces columns as described by BinHeaders().
  void PrintBins(ostream & out, int binsize = 1, int lowest=0,
      int highest = 40);

  ///Print three columns: predicted q, proportion of total bases, actual q
  ///Puts text headers in the first line. Intended as easily matlab-readable.
  void DataForGraph(ostream & out, int lowest=0, int highest = 40);

  /// Return the amount of variance explained by an ideal regression line.
  /// Note that this can be negative if the trend of the data is far from
  /// the regression line!
  float R2ideal(int start=0, int end=-1);

  /// Return the proportion of bases with actual quality >= q
  /// Do not count any bases for which there is a higher bin with quality
  /// lower than q.
  float ActualQGreater(int q=30);

  /// Column headings for PrintBins.
  const char * BinHeaders() const {
    return "bin\tcount\terrors\terrRate\tqual\t95%CI\tCumqual\t%>=qual\n";
  }

  ///Add data from an alignment to this QualInfo.
  void AddAlignment(const look_align_plus & la, 
		    const basevector & read,
		    const qualvector & qual,
		    const basevector & ref,
		    errType etype = ALL,
		    int EXCLUDE_ENDS = 3);

private:
  ///helper for PrintBins: print data for one bin.
  void PrintBin(ostream & out, int start, int end, longlong totcount,
		longlong toterrs, double cumulativePercent, double cumulativeQual);

  /// Calculate the percent of bases at or above a certain quality.
  void CumulativePercent(const vec<Bin> & bins, vec<double>& cumPercent, vec<double>&cumQual);
  
};

#endif //QUAL_INFO_H
