/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2007) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////


#ifndef EVALUATE_FLOW_H
#define EVALUATE_FLOW_H

/// This class helps to evaluate the "goodness" of a flow.
///
/// \class EvaluateFlow
///
/// The class has been reduced to have only static functions 
/// that do not depend on anything in 454/flowdata now that we
/// are using the SFF format files.

/*

A calculation of the cutoff based on Mah. distances shows that the
cutoff is a mean weighted by the opposite dist.'s sigma:

cutoff = (sigma[1]*mu[0] + sigma[0] * mu[1]) / (sigma[0]+sigma[1]);
*/


#include "Vec.h"
#include "math/Functions.h"

#include <functional>

class EvaluateFlow  {

 public:
  static const int MAX_PASSES=10;///< number of passes of means adjustment.

  static float OverlapScore(const vec<normal_distribution> & dists);

  ///Second 1-D Clustering Expectation Maximization step.
  /// Given distributions, calculate the next set of cutoffs.
  static void ComputeCutoffs(const vec<normal_distribution> & distributions, 
                                vec<double > & cutoffs);
  
  /// First step in 1-D Clustering Expectation Maximization step.
  /// Given a set of clusters defined by cutoffs in the data, 
  /// compute their means and standard deviations.
  template<class ForwardIter>
  static void CalcDistributions(ForwardIter start, ForwardIter end, 
                                vec<normal_distribution> & distributions, 
                                const vec< double> & cutoffs);


 private:
  static const int MIN_DATA; ///<Need a vector of at least this size
  ///to estimate means or stdevs.
  ///mean std error of the change in means vector must be less than 
  ///this number times the mean of the key.
  static const double MEANS_CHANGE_THRESHOLD;

 public:
  static const int BAD_KEY; ///<If the means start off in wrong order
  static const int BAD_MEANS; ///<If the means end up in wrong order

};

template<class ForwardIter>
void EvaluateFlow::CalcDistributions(ForwardIter begin, ForwardIter end, 
                                     vec<normal_distribution> & distributions, 
                                     const vec<double> & cutoffs) {
  const int S = cutoffs.size();
  distributions.resize(S);
  vec<int> counts(S,0);
  vec<double> sum(S,0.0), sumsq(S,0.0);

  for (ForwardIter iter = begin; iter != end; ++iter) {
    //cout << *iter << "\t";
    for (int j = 0; j != S; ++j) {
      double x= *iter;
      if (x < cutoffs[j]) {
	//cout  << " under cutoff " << j << endl;
	sum[j] += x;
	sumsq[j] += x*x;
	++counts[j];
	break;
      }
    }
  }
  for (int i=0; i !=S; ++i) {
    if (counts[i]) {
    distributions[i].mu_ = sum[i] / counts[i];
    distributions[i].sigma_ = sqrt(sumsq[i] / counts[i] - 
                                   distributions[i].mu_ * distributions[i].mu_);
    } else {
      distributions[i].mu_ = distributions[i].sigma_ = 0.0;
    }
    //cout << i <<" " << distributions[i].mu_ << " " << distributions[i].sigma_
    //     <<  endl;
  }
}

#endif //EVALUATE_FLOW_H
