/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2006) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

/// \file SffInfo.cc This class contains and gives access to a table
/// of information about flows.  See SffInfo.h.

#include "454/sff/SffInfo.h"
#include "CoreTools.h"
#include "454/flowdata/AnalysisInfo.h"
#include "454/EvaluateFlow.h"
#include "ParseSet.h"
#include "454/sff/SffTypes.h"
#include "454/sff/SffRead.h"
#include "454/qual/PhredTableReader.h"
#include "Pair.h"


#include <cmath>
#include <sstream>
#include <algorithm>

using namespace sff;

int SffPredictorParameters::cafieValue_ = 0;/// See SffRead::PushCafie();
int SffPredictorParameters::otherWidth_ = 10;/// See SffRead::Other();
int SffPredictorParameters::otherType_ = 2;/// See SffRead::Other();
int SffPredictorParameters::flowalign_ = 0;/// See SffRead::CacheFlowAlign();
int SffPredictorParameters::flowalignRadius_ = 2;/// See SffRead::CacheFlowAlign();
int SffPredictorParameters::flowalignMaxRadius_ = 1;/// See SffRead::Max3FlowAlign();
int SffPredictorParameters::posType_ = 0;/// See SffRead::PosForQuality();
int SffPredictorParameters::posStart_ = 35;/// See SffRead::PosForQuality();

void SffPredictorParameters::SetFromFile(const String & fname) {
  Ifstream(is, fname);
  String params;
  params.Slurp(is);
  SetPredictorParameters(params);
}
  
void SffPredictorParameters::SetPredictorParameters(const String & params) {
  istringstream is(params);
  String parameter, value;
  while (true) {
    is >> parameter >> value;
    if (!is) break;
    parameter.ToLower();
    value.ToLower();
    if ("cafie" == parameter) { cafieValue_ = value.Int(); }
    else if ("otherwidth" == parameter) { otherWidth_ = value.Int(); }
    else if ("othertype" == parameter) { otherType_ = value.Int(); }
    else if ("flowalign" == parameter) { flowalign_ = value.Int(); }
    else if ("flowalignradius" == parameter) { flowalignRadius_ = value.Int(); }
    else if ("flowalignmaxradius" == parameter) { flowalignMaxRadius_ = value.Int(); }
    else if ("postype" == parameter) { posType_ = value.Int(); }
    else if ("posstart" == parameter) { posStart_ = value.Int(); }
    else FatalErr("Unknown parameter type " << parameter);
  }
}

String SffPredictorParameters::GetPredictorParameters() const {
  ostringstream os;
  os << "cafie" << "\t" << cafieValue_  << "\t" 
     << "otherwidth"  << "\t" << otherWidth_ << "\t" 
     << "othertype"  << "\t" << otherType_ << "\t"
     << "flowalign" << "\t" << flowalign_ << "\t"
     << "flowalignRadius" << "\t" << flowalignRadius_ << "\t" 
     << "flowalignMaxRadius" << "\t" << flowalignMaxRadius_ << "\t" 
     << "postype"  << "\t" << posType_ << "\t" 
     << "posstart"  << "\t" << posStart_ << "\t"; 
  return os.str();
}

SffRead::SffRead(SffInfo & info, int index): 
index_(index), 
info_(info)
{
  vbases = &(info.GetMutableBasevecVector(SffInfo::BASES));
  vquals = &(info.GetMutableQualvecVector(SffInfo::QUALS));
  vname = &(info.GetMutableStringVector(SffInfo::NAME));
  vflows = &(info.GetMutableShortvecVector(SffInfo::FLOWS));
  vcorrespondence = &(info.GetMutableShortvecVector(SffInfo::CORR));
  vclipQualLeft = &(info.GetMutableShortVector(SffInfo::CLIP_QUAL_LEFT));
  vclipQualRight = &(info.GetMutableShortVector(SffInfo::CLIP_QUAL_RIGHT));
  vclipAdapLeft = &(info.GetMutableShortVector(SffInfo::CLIP_ADAP_LEFT));
  vclipAdapRight = &(info.GetMutableShortVector(SffInfo::CLIP_ADAP_RIGHT));
#if 0
  PRINT(vbases->size());
  PRINT(vquals->size());
  PRINT(vname->size());
  PRINT(vflows->size());
  PRINT(vcorrespondence->size());
  PRINT(vclipQualLeft->isize());
  PRINT(vclipQualRight->isize());
  PRINT(vclipAdapLeft->isize());
  PRINT(vclipAdapRight->isize());
#endif

  ForceAssertEq(longlong(size()), vbases->size());
  ForceAssertEq(longlong(size()), vquals->size());
  ForceAssertEq(longlong(size()), vname->size());
  ForceAssertEq(longlong(size()), vflows->size());
  ForceAssertEq(longlong(size()), vcorrespondence->size());
  ForceAssertEq(size(), vclipQualLeft->size());
  ForceAssertEq(size(), vclipQualRight->size());
  ForceAssertEq(size(), vclipAdapLeft->size());
  ForceAssertEq(size(), vclipAdapRight->size());
}

float SffRead::SeparationScore(unsigned int start, unsigned int endflow,
    unsigned int maxPasses) {
  if (0 == endflow) endflow = Flows().size();//default value
  AssertLt(start, endflow);
  AssertLe(longlong(endflow), Flows().size());
  vec<double> cutoffs;
  cutoffs.push_back(50, 150);
  vec<normal_distribution> dists;
  normal_distribution zeros(0,0);
  for (unsigned int pass=0; pass != maxPasses; ++pass) {
    EvaluateFlow::CalcDistributions(Flows().begin() + start, 
	Flows().begin() + endflow, 
	dists, cutoffs);
    //check that the distributions found make sense.
    for (int j=0; j < dists.isize()-1; ++j) {
      if (dists[j].mu_ >= dists[j+1].mu_ 
	  || 0 == dists[j].sigma_ 
	  || 0 == dists[j+1].sigma_)
	return EvaluateFlow::BAD_MEANS;
    }
    EvaluateFlow::ComputeCutoffs(dists, cutoffs);
  }
  return EvaluateFlow::OverlapScore(dists);
}

void SffRead::CacheFlowAlign() {
  flowErrors_.resize(Flows().size());
  for (int i=0; i != Flows().size(); ++i) {
    flowErrors_[i] = (50 - abs(50 - (Flows()[i] % 100) ) ) / 100.0;
  }

  const int B = Bases().size();
  flowAlignCache_.resize(B);

  if (SffPredictorParameters::flowalign_==2) { // Really different definition of local flow align
    vec<float> tmp(flowErrors_.size());
    MeanWindow(flowErrors_.begin(), flowErrors_.end(), tmp.begin(),
	       SffPredictorParameters::flowalignRadius_);
    for (int b=0; b != B; ++b) {
      flowAlignCache_[b] = tmp[Corr()[b]];
    }
  } else {
    int startBase, endBase, startFlow, endFlow;
    for (int b=0; b != B; ++b) {
      startBase = max(0,b-1); // initial base positions are neighbors
      endBase = min(b+1,B-1);
      if (SffPredictorParameters::flowalign_==1) { // if variant definition:
	// Linear search for bases different from current base in both directions 
	while (startBase>0 && Bases()[startBase]==Bases()[b])
	  --startBase;
	while (endBase<B-1 && Bases()[endBase]==Bases()[b])
	  ++endBase;
      }
      startFlow = Corr()[startBase];
      endFlow = 1 + Corr()[endBase];
      AssertGt(endFlow, startFlow);
      float sum = accumulate(flowErrors_.begin() + startFlow,
			     flowErrors_.begin() + endFlow,
			     0.0);
      flowAlignCache_[b] = sum/(endFlow-startFlow);
    }
  }
  //PRINT(flowErrors_);
  //PRINT(flowAlignCache_);
}

void SffRead::PushCafie(vec<float> & values, int base) {
  switch (SffPredictorParameters::cafieValue_) {
  case 0: values.push_back(Ie(base)); break;
  case 1: values.push_back(CarryForward(base)); break;
  case 2: values.push_back(Ie(base) + CarryForward(base)); break;
  case 3: values.push_back(max(Ie(base),CarryForward(base))); break;
  case 4: values.push_back(Ie(base)); values.push_back(CarryForward(base)); 
    break;
  default: FatalErr("Unknown type in SffRead::PushCafie: " 
		    << SffPredictorParameters::cafieValue_);
    break;
  }
}

void SffRead::PushOther(vec<float> & values, int base) {
  switch(SffPredictorParameters::otherType_) {
  case 0: break;
  case 1: 
    values.push_back(MaxFlowAlign(base, SffPredictorParameters::otherWidth_)); 
    break;
  case 2: 
    values.push_back(MeanFlowAlign(base, SffPredictorParameters::otherWidth_));
    break;
  default: FatalErr("Unknown type in SffRead::PushOther: " 
		    << SffPredictorParameters::otherType_);
    break;
  }
}

void SffRead::GetPredictors(vec<float> & values, int base) {
  if (sepCache_.empty()) { 
    sepCache_.push_back(-SeparationScore(FlowClips().first, 
					 FlowClips().second));
  }
  values.clear();
  values.push_back(PosForQuality(base));
  values.push_back(Max3FlowAlign(base));
  values.push_back(sepCache_[0]);
  values.push_back(Homopol(base));
  PushCafie(values, base);
  PushOther(values, base);
}

void SffRead::SetQualsFrom(const PhredTableReader & p, unsigned char MIN, 
			   unsigned char MAX, int SUBTRACT) {
  AssertEq(Quals().size(), Bases().isize());

  //If the read is clipped to nothing, set all quals to 1.
  pair<int,int> c=Clips();
  if (c.first == c.second) {
    fill(Quals().begin(), Quals().end(), (unsigned char)(1));
    return;
  }

  vec<float> values;
  unsigned char q;
  for (int b=0; b != Bases().isize(); ++b) {
    GetPredictors(values, b);
    unsigned char q = p.CompactLookup(values);
    q -= SUBTRACT;
    //copy(values.begin(), values.end(), ostream_iterator<float>(cout, " "));
    //PRINT(int(q));
    q = min(q, MAX);
    q = max(q, MIN);
    Quals()[b] = q;
#ifdef FIRST_100
    if (Index() >= 100) exit(0);
    if (b >= Clips().first && b < Clips().second) {
      cout << b << " " << as_base(Bases()[b]) << " ";
      copy(values.begin(), values.end(), ostream_iterator<float>(cout," "));
      cout << "\n";
    }
#endif
  }
}
    
void SffRead::FixClips(const serfvec<unsigned short> & newCorr) {
  //PRINT5(Bases().size(), ClipAdapRight(), ClipAdapLeft(), ClipQualRight(), ClipQualLeft());
  FixClip(ClipAdapRight(), newCorr);
  FixClip(ClipAdapLeft(), newCorr);
  FixClip(ClipQualRight(), newCorr);
  FixClip(ClipQualLeft(), newCorr);
  //PRINT5(newCorr.size(), ClipAdapRight(), ClipAdapLeft(), ClipQualRight(), ClipQualLeft());
}

void SffRead::FixClip( short & clip, const serfvec<unsigned short> & newCorr) {
  if (sff::NO_CLIP == clip) return;
  if (Corr().size() == clip) { clip = newCorr.size(); return; }
  int flow = Corr()[clip];
  int firstBaseInFlow = lower_bound(Corr().begin(), Corr().end(), flow)
    - Corr().begin();
  int baseInHomopol = clip - firstBaseInFlow;
  int newbase = lower_bound(newCorr.begin(), newCorr.end(), flow) 
    - newCorr.begin();
  //if newbase is at the end, then put clip at the end.
  // This can happen if we have cut
  //a lot of Ns off the end of the basevector, for example.
  if (newCorr.size() <= newbase) { clip = newCorr.size(); return; }

  //PRINT5(flow,firstBaseInFlow,baseInHomopol,newbase,clip);
  //if (newbase - 1 > 0) PRINT(newCorr[newbase -1]);
  //PRINT(newCorr[newbase]);
  //if (newbase + 1 < newCorr.size()) PRINT(newCorr[newbase +1]);
  clip = newbase + baseInHomopol; 
}
		     
		     

float SffRead::MaxFlowAlign(int base, int width) {
  int start = max(0,base - width);
  int end = min(Bases().isize(), base + width + 1);
  AssertLt(start, end);
  float ret=0;
  for (int b=start; b != end; ++b) {
    ret = max(ret, FlowAlign(b));
  }
  return ret;
}


float SffRead::MeanFlowAlign(int base, int width) {
  int start = max(0,base - width);
  int end = min(Bases().isize(), base + width + 1);
  AssertLt(start, end);
  float sum=0;
  for (int b=start; b != end; ++b) {
    sum += FlowAlign(b);
  }
  return sum / (end - start);
}


ostream & operator<<(ostream & os, SffRead & r) {
  os << r.Name() << "\n"; 
  r.Bases().Print(os);
  os << "\n";
  const int BASES=4;
  const int CYCLES = r.Flows().size() / BASES;
  os << "cycle\tT\tA\tC\tG\n";
  for (int start=0; start != CYCLES; ++start) {
    os << start*BASES << "\t";
    copy(r.Flows().begin() + start*BASES, 
	 r.Flows().begin() + (start+1)*BASES, 
	 ostream_iterator<float>(os, "\t"));
    os << "\n";
  }
  return os;
}


