/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2005) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

#ifndef FORCE_DEBUG
     #define NDEBUG
#endif

#include "CoreTools.h"
#include "454/BaseFlow.h"
#include "math/Functions.h"
#include "VecTemplate.h"
#include "454/flowdata/FlowOrder.h"
#include "454/flowdata/FlowVectorOps.h"
#include "454/flowdata/FlowDataBroker.h"
#include "454/flowdata/FlowVector.h"
#include "454/BaseCaller.h"
#include "FetchReads.h"
#include "assembly/PrefetchStrategy.h"
#include "PrintAlignment.h"

#include <string>

//========================== BaseFlowOrder methods =======================//
ostream &operator<< (ostream &out, const BaseFlowOrder &b)
{
  out << b.FlowOrderString();
  return out;
}

istream &operator>> (istream &in, BaseFlowOrder &b)
{
  String i;
  in >> i;
  if (in) 
    b = BaseFlowOrder(i);
  return in;
}

bool operator==(const BaseFlowOrder &a, const BaseFlowOrder &b)
{
  return (a.perm[0]==b.perm[0] && a.perm[1]==b.perm[1]
	  && a.perm[2]==b.perm[2] && a.perm[3]==b.perm[3]);
}

//========================== BaseFlowRef methods =======================//

BaseFlowOrder BaseFlowRef::FLOW_ORDER("TACG");

BaseFlowRef::BaseFlowRef(const BaseFlowOrder & flowOrder, const basevector & bases):
  F_(new serfvec<float>), 
  B_(new basevector(bases)),
  M_(new serfvec<flow_index>),
  self_owned_(True),
  floworder_(flowOrder)
{
  vec<int> c;
  vec<float> f = FlowsFromBases(FlowOrder(flowOrder.FlowOrderString()),
				bases, 0.0, &c);
  copy(f.begin(),f.end(),back_inserter(*F_));
  copy(c.begin(),c.end(),back_inserter(*M_));   
}

BaseFlowRef::BaseFlowRef(const FlowOrder & order, const String & bases):
  F_(new serfvec<float>), 
  B_(new basevector),
  M_(new serfvec<flow_index>),
  self_owned_(True),
  floworder_(order.CycleString())
{
  B().SetFromString(bases);
  const int N = bases.size();
  int o=-1;
  int height=0;
  // Skip o ahead to the first base in bases without adding flows
  while (order[++o] != bases[0])
    ;
  // Now process normally
  for (int b=0; b!= N;) {
    height=1;
    M().push_back(F().size()); 
    while (++b != N && bases[b] == order[o]) {
      ++height;
      M().push_back(F().size());
    }
    F().push_back(height);
    if (b!=N) {
      while (order[++o] != bases[b]) 
	F().push_back(0);
    }
  }
  AssertEq(M().size(), longlong(bases.size()));
}

void BaseFlowRef::MakeIdealFrom( const BaseFlowRef& x ) {
  DeepCopy(x);
  for (int i=0; i != F_->size(); ++i) {
    (*F_)[i] = round((*F_)[i]);
  }
}

int BaseFlowRef::Dots() {
  int dots = 0;
  for (int i=1; i < M_->size(); ++i) {
    if ( ((*M_)[i] - (*M_)[i-1]) > 3 ) ++dots;
  } 
  return dots;
}
  
void BaseFlowRef::SetToReverseOf( const BaseFlowRef& x ) {    
  ForceAssert(x.floworder_.IsCyclicPalindrome());
  floworder_ = x.floworder_;
  F( ).SetToReverseOf( x.F( ) );
  B( ).ReverseComplement( x.B( ) );
  B( ).resize( NBases( ) );
  M( ).resize( NBases( ) );
  for ( int i = 0; i < NBases( ); i++ )
    M( NBases( ) - i - 1 ) = NFlows( ) - x.M(i) - 1;
}

void BaseFlowRef::FlowAsQuals(serfvec<unsigned char> & quals) {
  quals.resize(B_->size());
  const int S = quals.size();
  float flow;
  for (int i=0; i != S; ++i) {
    flow = FlowAtBase(i);
    quals[i] = (unsigned char)( flow < 10 ? flow*10 : flow); 
  }
}
  
serfvec<unsigned char> BaseFlowRef::FlowAsQuals() {
  serfvec<unsigned char> ret;
  FlowAsQuals(ret);
  return ret;
}

void BaseFlowRef::PrintAligned(ostream & os) {
  //Create a fake alignment to ourselves;
  align a(0,0,avector<int>(1,0),avector<int>(1,B_->size()));
  PrintVisualAlignment(False, os, *B_, *B_, a, this->FlowAsQuals());
}


void BaseFlowRef::TrimKey(int KEY_BASES)
{
  ForceAssertGt(M_->size(), KEY_BASES);
  int lastkeyflow = (*M_)[KEY_BASES-1];
  int firstflow = 1+lastkeyflow;
  if ((*M_)[KEY_BASES]==lastkeyflow) { // read begins with same base ending key
    (*F_)[lastkeyflow] -= 1.0f; // so remove it from the corresponding flow
    firstflow=lastkeyflow; // and don't get rid of that flow
  }
  // Flowvector: Shorten by firstflow flows
  copy(F_->begin()+firstflow, F_->end(), F_->begin());
  F_->resize(F_->size()-firstflow);
  // basevector: Shorten by KEY_BASES bases
  basevector temp = *B_;
  B_->SetToSubOf(temp, KEY_BASES, temp.size()-KEY_BASES);
  // Correspondence: Shorten by KEY_BASES and reduce flow indices by firstflow 
  for (int i=KEY_BASES; i<M_->size(); ++i)
    (*M_)[i-KEY_BASES] = (*M_)[i]-firstflow;
  M_->resize(M_->size()-KEY_BASES);
}

	
ostream &operator<< (ostream &out, const BaseFlowRef &b)
{
  out << "BaseFlowRef with offset " << (int(b.Offset())) << " and bases ";
  b.B().Print(out); 
  out << b.F() << b.M();
  return out;
}

bool operator== (const BaseFlowRef &a, const BaseFlowRef &b)
{
  return (a.floworder_ == b.floworder_) && (*(a.F_) == *(b.F_))
    && (*(a.B_) == *(b.B_)) && (*(a.M_) == *(b.M_));
}

bool EqualPositiveFlows (const BaseFlowRef &a, const BaseFlowRef &b)
{
  if (!((a.floworder_ == b.floworder_) && (*(a.B_) == *(b.B_))))
    return false;
  for (unsigned int i=0; i<a.B_->size(); ++i) {
    if (((*a.F_)[(*a.M_)[i]]) != ((*b.F_)[(*b.M_)[i]]))
      return false;
  }
  return true;
}

void BaseFlowRef::PadEndToFlowCycle()
{
  while (0 != (Offset() + F().size()) % 4)
    F().push_back(0);
}

/// Add zeros to the beginning of the flow vector as needed to make Offset()==0.
void BaseFlowRef::PadBeginToFlowCycle()
{
  const int pad = Offset();
  // basevector: no change 
  // Correspondence: Increase flow indices by pad 
  for (int i=0; i<M_->size(); ++i) {
    (*M_)[i] += pad;
  }
  // Flowvector: Add pad flows, all 0
  F_->resize(F_->size()+pad);
  copy_backward(F_->begin(), F_->end()-pad, F_->end());
  fill(F_->begin(), F_->begin()+pad, 0);
}

float Score( const BaseFlowRef& x1, const BaseFlowRef& x2, 
	     int pos1, int pos2, Bool verbose, int length) {
  longlong end1 = length + longlong(pos1);//make sure to avoid overflow!
  end1 = min(longlong(x1.NFlows()), end1+1);
  ForceAssertLt( pos1, x1.NFlows() );
  ForceAssertLt( pos2, x2.NFlows() );
  ForceAssert(x1.Floworder() == x2.Floworder());
  float score = 0.0;
  int count = 0;
  float err;
  for ( ; pos1< end1 && pos2<x2.NFlows(); ++pos1, ++pos2) {
    err = Abs( x1.F(pos1) - x2.F(pos2) );    
    score += err;
    ++count;
    if (verbose) {    
      PRINT6( pos1, pos2, x1.F(pos1), x2.F(pos2), end1, err );
    }
  }
  score /= float(count);
  if (verbose) PRINT4(score, length, x1.NFlows(), x2.NFlows());
  return score;    
}


//========================== VecBaseFlow methods =======================//


void VecBaseFlow::Reverse( ) {    
  for ( int i = 0; i < N( ); i++ ) {    
    F(i).ReverseMe( );
    B(i).ReverseComplement( );
    static serfvec<flow_index> Mi;
    Mi = M(i);
    for ( int j = 0; j < (int) B(i).size( ); j++ )
      M(i)[ B(i).size( ) - j - 1 ] = F(i).size( ) - Mi[j] - 1;    
    }    
}

void VecBaseFlow::Read( const String& h ) {    
  F_.ReadAll( h + ".flowf" );
  B_.ReadAll( h + ".fastb" );
  M_.ReadAll( h + ".indexb" );    
  String floworderName = h + ".floworder";
  if (IsRegularFile(floworderName)) {
    // For backwards compatibility we load order if available,
    // otherwise assume default.
    Ifstream(in, h + ".floworder" );
    in >> floworder_;
  }
}

void VecBaseFlow::Write( const String& h ) {    
  F_.WriteAll( h + ".flowf" );
  B_.WriteAll( h + ".fastb" );
  M_.WriteAll( h + ".indexb" );   
  Ofstream(out, h + ".floworder" );
  out << floworder_;
}

///Helper method for ReadFromFastaAndFlowb
int FindIn(const basevector & in, const basevector & query) {
  string inString = in.ToString();
  string queryString = query.ToString();
  string::size_type pos = inString.find(queryString);
  if (pos == string::npos) return -1;
  else return int(pos);
}
	
      
void VecBaseFlow::ReadFromFastaAndFlowb(const String & fasta, 
					const String & flowbPrefix,
					const String & otherFlowbPrefix) {
  /// Get the flows and index them by name.
  FlowDataBroker broker;
  broker.ReadData(flowbPrefix);
  broker.RemovePFlows();
  floworder_ = BaseFlowOrder(broker.GetFlowOrder().CycleString());

  FlowDataBroker * otherflows = 0;
  if (!otherFlowbPrefix.empty()) {
    otherflows = new FlowDataBroker();
    otherflows->ReadData(otherFlowbPrefix);
    otherflows->RemovePFlows();
  }

  int MAX_FLOWS=broker.size();
  vec<int> rindex(MAX_FLOWS, -1);
  String name;
  for (int i=0; i != MAX_FLOWS; ++i) {
    name = broker.GetFlowRead(i).GetName().SafeBefore("_");
    int n = name.Int();
    if (n >= rindex.isize()) {
      rindex.resize(n+100, -1);
    }
    rindex[n] = i;
  }

  ///Get the reads into our B_ member
  vecqualvector q;
  vecString names;
  FetchReads(B_,q,&names, 0, fasta);
  const int N = B_.size();
  F_.Reserve(N*broker.GetFlowRead(0).GetFlowVector().size(), N);
  M_.Reserve(B_.rawsize() , N);

  ///Get the flows into our F_ member: find the FlowVector by name.
  vec<int> correspondence;
  const int KEY_SIZE = 4;
  BaseCaller caller(broker.GetFlowOrder());
  for (int i=0; i != N; ++i) {
    BaseFlowRef bf;
    int index = rindex[names[i].Int()];
    AssertGe(index, 0);
    const FlowVector & fvec = broker.GetFlowRead(index).GetFlowVector();
    caller.GetBasesWithTrivialCutoffs(fvec, *bf.B_, &correspondence);
    //check to see that the above did not fail:
    if (bf.B_->size() == 0) {
      // Didn't find the read bases at the called bases
	B_[i].Print(cout, "B_[i]");
	FatalErr("Called empty basevector at index" 
		 << i << " name " << names[i] 
		 << "\n did you use a normalized flowb file?");
    }

    copy(correspondence.begin(), correspondence.begin() + bf.B_->size(), 
	 back_inserter(*bf.M_));

    /// replace the flows used for calling with other flows. This is done
    /// usually only for research purposes.
    if (otherflows) {
      FlowRead r = otherflows->GetFlowRead(names[i]);
      if (!r.IsValid()) { //probably lost this while basecalling,
	//set everything to 0.
	B_[i].Setsize(0);
	F_.push_back_reserve(serfvec<float>());
	M_.push_back_reserve(serfvec<flow_index>());
	continue;
      }
      const FlowVector & otherfvec = r.GetFlowVector();
      for (int j=0; j <= bf.M_->back(); ++j) {
	bf.F_->push_back(otherfvec(j));
      }
    }
    else {
      for (int j=0; j <= bf.M_->back(); ++j) {
	bf.F_->push_back(fvec(j));
      }
    }
    bf.TrimKey(KEY_SIZE);    //trim off the key

    // Check basevector from calling against basevector from fasta file.
    // If latter has been trimmed, remove the flows & correspondences.
    if (*bf.B_ != B_[i]) {
      int start = FindIn(*bf.B_, B_[i]);
      if (0 != start) {
	// Didn't find the read bases at the called bases
	bf.B_->Print(cout, "bases");
	B_[i].Print(cout, "B_[i]");
	FatalErr("they should match at index" << i << " name " << names[i]);
      }
      // Resize to the length from the read bases
      bf.M_->resize(B_[i].size());
      bf.F_->resize(bf.M_->back()+1);
      // Now check to see if some of the bases on the very last flow have 
      // been trimmed, and adjust the flows accordingly.
      if (round(bf.F_->back()) > 1) {
	int height = int(round(bf.F_->back()));
	int end = B_[i].size()-1;
	const char endbase = B_[i][end];
	while (endbase == B_[i][end] && end > 0) --end;
	if (height != B_[i].isize()-end-1) {
	  bf.F_->back() -=  height - (B_[i].size()-end-1);
	}
      }
    }
    // Add to accumulating results
    F_.push_back_reserve(*bf.F_);
    M_.push_back_reserve(*bf.M_);
  }
}

/// Read a fasta file and construct ideal flows
void VecBaseFlow::ConstructFromFasta(const String & fasta)
{
  // Load bases
  FetchReads( B_, 0, fasta );
  for (int i=0; i<B_.size(); ++i) {
    BaseFlowRef tmp(floworder_, B_[i]);
    F_.push_back_reserve(tmp.F());
    M_.push_back_reserve(tmp.M());
  }
}


//========================== AlignBaseFlow methods =======================//

float AlignBaseFlow::Score( const VecBaseFlow& v, const VecBaseFlow& vr ) {    
  int id1 = Id1( ), id2 = Id2( );
  int offset = Offset( );
  int low = Max( 1, offset );
  int high = Min( v.NFlows(id1) - 1, v.NFlows(id2) - 1 + offset );
  ForceAssertLt( low, high );
  float score = 0.0;
  if ( !Rc2( ) )
    {    for ( int i = low; i < high; i++ )
      score += Abs( v.F(id1)[i] - v.F(id2)[i-offset] );    }
  else
    {    for ( int i = low; i < high; i++ )
      score += Abs( v.F(id1)[i] - vr.F(id2)[i-offset] );    }
  return score / float(high-low);    
}

int AlignBaseFlow::Overlap( int nflows1, int nflows2 ) {    
  return Min( nflows1, nflows2 + (int) Offset( ) ) 
    - Max( 0, (int) Offset( ) );    
}

BINARY2_DEF( AlignBaseFlow );

