// Copyright (c) 2005 Broad Institute/Massachusetts Institute of Technology


/// \file BaseFlow.h Defines the BaseFlow family of classes,
/// BaseFlowRef, VecBaseFlow, and AlignBaseFlow.
///
/// A VecBaseFlow stores a "vec<BaseFlowRef>" efficiently.  The class
/// has input and output operations to files h.{flowf,fastb,indexb},
/// where h is an arbitrary string header.
///
/// An AlignBaseFlow is an alignment between two members of a VecBaseFlow, the
/// second of which may be reverse-complemented.  The alignment itself is 
/// represented by an offset, which is "pos1 - pos2" for the alignment between the
/// first flowvector and the second flowvector (or its reverse complement).

#ifndef BASE_FLOW_H
#define BASE_FLOW_H

#include "CoreTools.h"
#include "Basevector.h"
#include "454/flowdata/FlowOrder.h"

class FlowDataBroker;

/// A BaseFlowRef points to data representing a flowgram, base calls for it, 
/// and the correspondence between the two.  Thus the class consists of 
/// three pointers, pointing to:
///
/// 1. a flowgram F, represented as a vec<float>, which is to have the 
/// following features:
/// - it is normalized;
/// - it has no key or P flows;
/// - it follows the standard flow order TACG, but is not required to 
/// start with T.  This allows for trimming and for reverse complementation 
/// [which makes (TACG)^n into CG (TACG)^n-1 TA].
///
/// 2. a basevector B, which contains the base calls.
///
/// 3. a vec<flow_index> M, which maps each position on B to a
/// position on F.
///
/// The class has shallow copy semantics, and one must call DeepCopy() to make
/// a deep copy.

/// [Note that flow_index is currently defined to be an int, which
/// makes BaseFlow useful for translating long sequences to flowspace
/// but is not space-efficient for storing many short reads.  If we
/// ever need to be more efficient we'll have to somehow bifurcate the
/// definition of "flow_index".]

typedef int flow_index;

// Helper class that encapsulates the flow order and associated offset
// computation, including a table for accelerating the computation.
struct BaseFlowOrder {
  // map from base char to base order (both in range 0 to 3);
  // computed from flow order and used to speed computation of offset.
  char perm[4];
  BaseFlowOrder(const String & order)
  {
    init(order);
  }

  BaseFlowOrder(const FlowOrder & flowOrder)
  {
    init(flowOrder.CycleString());
  }

  void init(const String & order) {
    ForceAssertEq(order.size(), 4);
    for (int i=0; i<4; ++i) {
      perm[as_char(order[i])] = i;
    }
  }
    

  // Offset: in the flow order TACG, return 0 if floworder position of
  // firstBase == matchingFlow (i.e. T and 0, A and 1, C and 2, G and
  // 3).  Return 1 if floworder position of firstBase is 1 greater
  // than matchingFlow (T and 3, A and 0, C and 1, G and 2).  Etc.
  // This class is optimized for this calculation.
  char Offset(char firstBase, flow_index matchingFlow) const
  {
    ForceAssertLt(firstBase, 4); // TODO: Remove once this works!
    ForceAssertLt(matchingFlow, 4); // TODO: Remove once this works!
    return (*(perm + firstBase) - matchingFlow) % 4;
  }

  // FlowOrderString: Return the string associated with the flow
  // order.  As a result this operation is slower, i.e. don't use
  // this in tight loops.
  String FlowOrderString() const
  {
    String order(4);
    for (int i=0; i<4; ++i) {
      order[perm[i]] = as_base(i);
    }
    return order;
  }

  // IsCyclicPalindrome: Return true if some cyclic permutation of the
  // rc of the flow order string equals the flow order string.  It
  // turns out that the 8 cases that fail are ACTG, AGTC, CAGT, CTGA,
  // GACT, GTCA, TCAG, TGAC, i.e. all the cyclic shifts of ACTG and
  // AGTC.

  bool IsCyclicPalindrome() const
  {
    String order = FlowOrderString();
    return !(order=="ACTG" || order=="AGTC" || order=="CAGT" || order=="CTGA"
	     || order=="GACT" || order=="GTCA" || order=="TCAG" || order=="TGAC");
  }
};

ostream &operator<< (ostream &out, const BaseFlowOrder &b);
istream &operator>> (istream &in, BaseFlowOrder &b);
bool operator==(const BaseFlowOrder &a, const BaseFlowOrder &b);

class BaseFlowRef {

  friend class BaseFlowTester; //for access to internals while testing.
  friend class VecBaseFlow;
  
public:

  static BaseFlowOrder FLOW_ORDER; // Default flow order

  ///Default constructor
  BaseFlowRef( ) :
    F_(new serfvec<float>), 
    B_(new basevector),
    M_(new serfvec<flow_index>),
    self_owned_(True),
    floworder_(BaseFlowRef::FLOW_ORDER) { }

  ///Shallow copy.
  BaseFlowRef( const BaseFlowRef &x):
    F_(x.F_), B_(x.B_), M_(x.M_),
    self_owned_(False), floworder_(x.floworder_) { }

  ~BaseFlowRef( ) { Destroy(); }

  void Destroy() {
    if (self_owned_) {    
      delete F_;
      delete B_;
      delete M_;    
    }
  }

  ///Construct a BaseFlowRef with synthetic flows from a basevector.
  BaseFlowRef(const BaseFlowOrder & flowOrder, const basevector & bases);

  ///Construct a BaseFlowRef with synthetic flows from a string of bases.
  BaseFlowRef(const FlowOrder & flowOrder, const String & bases);

  BaseFlowRef( serfvec<float>& F, basevector& B, serfvec<flow_index>& M,
	       const BaseFlowOrder & flowOrder = BaseFlowRef::FLOW_ORDER) 
    : F_(&F), B_(&B), M_(&M), self_owned_(False), floworder_(flowOrder)
  {
    AssertEq( (int) B.size( ), M.size( ) );
    AssertGe( B.size( ), 1 );    
  }

  const serfvec<float>& F( ) const { return *F_; }
  serfvec<float>& F( ) { return *F_; }

  const float& F( int i ) const { return F( )[i]; }
  float& F( int i ) { return F( )[i]; }

  /// Offset: return the offset of F(0) in the flow order.
  /// For example, in the flow order TACG, return 0 if F(0) corresponds to a
  /// T, 1 if it corresponds to an A, 2 for C, 3 for G.
  char Offset( ) const { return floworder_.Offset( B(0), M(0) ); }

  const basevector& B( ) const { return *B_; }
  basevector& B( ) { return *B_; }

  unsigned char B( int i ) const { return B( )[i]; }

  const serfvec<flow_index>& M( ) const { return *M_; }
  serfvec<flow_index>& M( ) { return *M_; }

  const flow_index& M( int i ) const { return M( )[i]; }
  flow_index& M( int i ) { return M( )[i]; }

  float FlowAtBase( int i ) const { return F( M(i) ); }

  int NFlows( ) const { return F_->size( ); }
  int NBases( ) const { return B_->size( ); }

  /// Create an "ideal" base flow where all flow heights are integers.
  void MakeIdealFrom( const BaseFlowRef& x );

  /// Count the number of "dots": 3 or more 0 flows in a row;
  int Dots();

  /// Set *this to the reverse complement of x.
  void SetToReverseOf( const BaseFlowRef& x );

  ///Return the flow values as qual scores.
  /// We multiply them by 10 if they are < 10, leave them alone otherwise,
  /// so that they fit in two characters.
  /// This is used so we can print them in PrintVisualAlignment.
  serfvec<unsigned char> FlowAsQuals();

  ///Return the flow values as qual scores by multiplying them by 10.
  /// We multiply them by 10 if they are < 10, leave them alone otherwise,
  /// so that they fit in two characters.
  /// This is used so we can print them in PrintVisualAlignment.
  void FlowAsQuals(serfvec<unsigned char> & quals);

  /// Print the flow values above each base; uses FlowAsQuals. 
  /// We multiply the flow values by 10 if they are < 10, 
  /// leave them alone otherwise,
  /// so that they fit in two characters.
  void PrintAligned(ostream & os);

  // Trims the first KEY_BASES from read.  DOES NOT CHECK what bases
  // are trimmed.  Does account properly for reads that begin with the
  // same base that the key ends with.
  void TrimKey(int KEY_BASES);

  void swap(BaseFlowRef &x)
  {
    ::swap(F_, x.F_);
    ::swap(B_, x.B_);
    ::swap(M_, x.M_);
    ::swap(self_owned_, x.self_owned_);
    ::swap(floworder_, x.floworder_);
  }

  ///Turn *this into a self-owned deep copy of x.
  void DeepCopy(const BaseFlowRef &x)
  {
    Destroy();
    F_ = new serfvec<float>(*x.F_);
    M_ = new serfvec<flow_index>(*x.M_);
    B_ = new basevector(*x.B_);
    floworder_ = x.floworder_;
    self_owned_ = True;
  }

  BaseFlowRef & operator=(const BaseFlowRef &x) {
    BaseFlowRef temp(x);
    swap(temp);
    return *this;
  }
  
  /// Check that flow order and all 3 data vectors are the same.
  friend bool operator== (const BaseFlowRef &a, const BaseFlowRef &b);

  /// Check that the bases are the same and that all positive flows match
  friend bool EqualPositiveFlows (const BaseFlowRef &a, const BaseFlowRef &b);

  /// Add zeros to the end of the flow vector as needed to bring it
  /// out to an even number of flows.
  void PadEndToFlowCycle();
  /// Add zeros to the beginning of the flow vector as needed to make Offset()==0.
  void PadBeginToFlowCycle();

  /// Access the floworder information
  BaseFlowOrder Floworder() const { return floworder_; }
private:

  serfvec<float>* F_;
  basevector* B_;
  serfvec<flow_index>* M_;
  Bool self_owned_;
  BaseFlowOrder floworder_;

};

ostream &operator<< (ostream &out, const BaseFlowRef &b);

/// Score( x1, x2, pos1, pos2, length ): suppose base pos1 on x1 
///is positioned 
/// over base pos2 on x2.  For the corresponding alignment in flowspace, 
/// compute sum_i( |x1.F[i] - x2.F[i]| ), as i ranges from pos1 to 
/// the closest of pos1 + length or the end of either x1 or x2.
/// Note that pos1+ length is _included_ in the alignment.

float Score( const BaseFlowRef& x1, const BaseFlowRef& x2, 
	     int pos1, int pos2, Bool verbose = False,
	     int length = INT_MAX);


/// A VecBaseFlow stores a "vec<BaseFlow>" efficiently.  The class has input and
/// output operations to files h.{flowf,fastb,indexb}, where h is an arbitrary
/// string header.
class VecBaseFlow {

public:

  explicit VecBaseFlow( BaseFlowOrder floworder = BaseFlowRef::FLOW_ORDER) :
    floworder_(floworder) { }

  VecBaseFlow( const vecvec<float>& F, const vecbasevector& B,
	       const vecvec<flow_index>& M , BaseFlowOrder floworder) :
    F_(F), B_(B), M_(M), floworder_(floworder) { }

  int N( ) const { return F_.size( ); }

  const vecvec<float>& F( ) const { return F_; }
  vecvec<float>& F( ) { return F_; }

  const serfvec<float>& F( int i ) const { return F_[i]; }
  serfvec<float>& F( int i ) { return F_[i]; }

  const vecbasevector& B( ) const { return B_; }
  vecbasevector& B( ) { return B_; }

  const basevector& B( int i ) const { return B_[i]; }
  basevector& B( int i ) { return B_[i]; }

  const vecvec<flow_index>& M( ) const { return M_; }
  vecvec<flow_index>& M( ) { return M_; }

  const serfvec<flow_index>& M( int i ) const { return M_[i]; }
  serfvec<flow_index>& M( int i ) { return M_[i]; }

  BaseFlowRef operator[](int i) {
    return BaseFlowRef(F_[i], B_[i], M_[i], floworder_);
  }

  int NFlows( int i ) const { return F_[i].size( ); }

  char Offset( int i ) const { return floworder_.Offset( B_[i][0], M_[i][0] ); }

  void Reverse( );

  void Read( const String& h );
  void Write( const String& h );

  
  /// Read from a fasta and a normalized flowb file.
  /// (by checking the names in both against each other). 
  /// It is ok if there are more flowbs than there are sequences in the 
  /// fasta file, but this method will assert if any of the sequences in the 
  /// fasta file are missing from the flowb file.
  /// In some circumstances, we want to replace the normalized flows used 
  /// for basecalling with something else after the correspondence has been
  /// figured out. In that case, pass in the flowb prefix for the other 
  /// flows as the third parameter.
  void ReadFromFastaAndFlowb(const String & fasta, 
			     const String & flowbPrefix,
			     const String & otherFlowbPrefix = "");

  /// Read a fasta file and construct ideal flows
  void ConstructFromFasta(const String & fasta);

private:

  vecvec<float> F_;
  vecbasevector B_;
  vecvec<flow_index> M_;
  BaseFlowOrder floworder_;
};

/// An AlignBaseFlow is an alignment between two members of a VecBaseFlow, the
/// second of which may be reverse-complemented.  The alignment itself is 
/// represented by an offset, which is "pos1 - pos2" for the alignment between the
/// first flowvector and the second flowvector (or its reverse complement).
/// All alignments are assumed to start at the beginning of the flowvectors and
/// end at the end, and have no indels.
class AlignBaseFlow {

public:

  AlignBaseFlow( ) { }

  AlignBaseFlow( int id1, int id2, Bool rc2, short offset )
    : id1_(id1), id2_(id2), rc2_(rc2), offset_(offset) { }

  int Id1( ) const { return id1_; }
  int Id2( ) const { return id2_; }

  void SetId1( int id1 ) { id1_ = id1; }
  void SetId2( int id2 ) { id2_ = id2; }

  Bool Rc2( ) const { return rc2_; }

  void SetRc2( Bool rc2 ) { rc2_ = rc2; }

  int Offset( ) const { return offset_; }

  void SetOffset( short offset ) { offset_ = offset; }

  ///Length of alignment.
  int Length( const VecBaseFlow & v1, const VecBaseFlow& v2 ) {
    return Offset() > 0 
      ? min(v1.F(id1_).size() - Offset(), int(v2.F(id2_).size()))
      : min(int(v1.F(id1_).size()), v2.F(id2_).size() + Offset());
  }

  int StartOn1() { return std::max(Offset(),0); }
  int StartOn2() { return std::max(-Offset(),0); }

  ///Return a score measuring how good the alignment is.
  ///The score is measured as the average of the absolute values of the
  /// differences between the flow values of the two flow vectors.
  float Score( const VecBaseFlow& v, const VecBaseFlow& vr );

  /// overlap from beginning of the alignment up to nflows1 on 1 
  /// or nflows2 on 2, whichever comes first.
  int Overlap( int nflows1, int nflows2 );

  /// Flip: exchange order of read ids and adjust offset accordingly.

  void Flip( int nflows1, int nflows2 ) {    
    swap( id1_, id2_ );
    if ( !Rc2( ) ) offset_ = -offset_;
    else offset_ += nflows2 - nflows1;    
  }

  friend Bool operator<( const AlignBaseFlow& a1, const AlignBaseFlow& a2 ) {    
    if ( a1.Id1( ) < a2.Id1( ) ) return True;
    if ( a1.Id1( ) > a2.Id1( ) ) return False;
    if ( a1.Id2( ) < a2.Id2( ) ) return True;
    if ( a1.Id2( ) > a2.Id2( ) ) return False;
    if ( a1.Rc2( ) < a2.Rc2( ) ) return True;
    if ( a1.Rc2( ) > a2.Rc2( ) ) return False;
    if ( a1.Offset( ) < a2.Offset( ) ) return True;
    return False;    
  }

  friend Bool operator==( const AlignBaseFlow& a1, const AlignBaseFlow& a2 ) {    
    return a1.id1_ == a2.id1_ 
      && a1.id2_ == a2.id2_ 
      && a1.rc2_ == a2.rc2_
      && a1.offset_ == a2.offset_;    
  }
          
private:

  int id1_, id2_;
  Bool rc2_;
  short offset_;

};

#endif
