// Copyright (c) 2004 Broad Institute/Massachusetts Institute of Technology
#ifndef PATHWALK
#define PATHWALK


#include "CoreTools.h"
#include "Feudal.h"
#include "ReadPairing.h"

#include "paths/KmerPath.h"
#include "paths/AlignAndMerge.h"

#include <set>
#include <functional>


// A PathWalk holds a KmerPath being walked, and remembers
// which gap-free block of kmers ("block") our improvement attempt is
// centered on.
//
// A KmerPath is heavy, so these should live in containers where they
// don't get moved around (eg a list), and any violent treatment
// should be done to iterators to them.

class PathWalk {
public:
  PathWalk( const KmerPath &path, int workingBlockIdx,
            bool containsCloser = false )
    : m_path( path ), 
      m_containsCloser( containsCloser ), 
      m_index( workingBlockIdx ), 
      m_id( s_count++ ),
      m_totalKmers(0)
  {
    // Pre-calculate the sizes of the blocks, as these values are used
    // frequently in the comparison function.
    int kmerBlockSize = 0;
    for( int segIdx = 0; segIdx < path.NSegments(); ++segIdx )
    {
      if ( path.isSeq( segIdx ) ) 
        kmerBlockSize += path.Length( segIdx );
      else 
      {
        m_gapIdxs.push_back( segIdx );
        m_blockSizes.push_back( kmerBlockSize );
	m_totalKmers += kmerBlockSize;
        kmerBlockSize = 0;
      }
    }
    m_blockSizes.push_back( kmerBlockSize );
    m_totalKmers += kmerBlockSize;
    
    ForceAssertLe( m_index, (int) m_blockSizes.size() );
  }

 private:
  KmerPath m_path;
  vec<int> m_blockSizes;
  vec<int> m_gapIdxs;

  // NOTE: This is mutable, so that it can be changed for a PathWalk
  // which is an element of a set of such.  Philosophically this is
  // defensible because we consider two PathWalks to be == even if this
  // set differs.  This does mean that the set MUST NOT be consulted 
  // by operator< !!
  mutable set<int> mm_readsRequired;

  bool m_containsCloser;
  int m_index;
  int m_id;
  int m_totalKmers;

  static int s_count;

 public:
  const KmerPath &  GetPath()  const { return m_path; }
  int               GetIndex() const { return m_index; }
  

  int  GetKmerBlockSize( int blockIdx ) const { return m_blockSizes[blockIdx]; }
  int  GetNumKmerBlocks() const { return m_blockSizes.size(); }
  int  GetNumKmers() const { return m_totalKmers; }

  // NOTE: The following const methods change mm_readsRequired.  Pay attention!
  void RequireRead( int id ) const { mm_readsRequired.insert( id ); }
  void IntersectRequiredReadsWith( const PathWalk& other ) const {
    set<int> newRequired;
    set_intersection( mm_readsRequired.begin(), mm_readsRequired.end(),
		      other.mm_readsRequired.begin(), other.mm_readsRequired.end(),
		      inserter(newRequired, newRequired.begin()) );
    mm_readsRequired = newRequired;
  }

  void RequireReadsFrom( const PathWalk &other ) const
  {
    copy( other.mm_readsRequired.begin(), other.mm_readsRequired.end(), 
          inserter( mm_readsRequired, mm_readsRequired.end() ) );
  }

  bool RequiresRead( int id ) const { return ( mm_readsRequired.count( id ) > 0 ); }

  bool ContainsCloser() const { return m_containsCloser; }
  bool Unimprovable() const { return m_index == (int) m_blockSizes.size(); }

  // Could we merge in some otherPath without adding any new kmers?
  bool CanAddCloser( const KmerPath& closer,
		     int minGoodPathLength, 
		     int maxGoodPathLength,
		     vec<PathWalk>& new_closed_walks,
		     bool DEBUG=false ) const;

  // The "stable part" of a PathWalk goes from its beginning
  // to the last sequence segment before the current working block.
  // We expect this part not to change much, though it is possible
  // that a merge will move the current working block backwards and
  // into previously stable territory.

  /// StablePartCmp returns -1,0,+1 for dictionary-order <,=,>
  // of stable parts of two PathWalks (strcmp style).

  // The two intended uses were (1) selectively deleting things
  // when the heap exploded, and (2) doing a somewhat depth-first
  // ordering for paths which have already diverged.  Neither of these
  // seemed to work well, so this code is entirely unused.

  friend int StablePartCmp( const PathWalk &lhs, const PathWalk &rhs) {
    const KmerPath &p1 = lhs.m_path;
    const KmerPath &p2 = rhs.m_path;
    
    int nochange_blocks = min( lhs.GetIndex(), rhs.GetIndex() );
    int seg, segmax = min(p1.NSegments(), p2.NSegments());

    for( seg=0; seg < segmax; seg++ ) {
      // If only one path has a gap, it is lex earlier
      if( p1.isGap(seg) ^ p2.isGap(seg) )
	return( p2.isGap(seg) - p1.isGap(seg) );
      // If we reach the end of the stable region, they're equal
      if( p1.isGap(seg) && --nochange_blocks==0 )
	return 0;
      if( p1.Segment(seg) < p2.Segment(seg) ) return -1; 
      if( p2.Segment(seg) < p1.Segment(seg) ) return +1;
    }
    // We reached the end of one (or both) paths.
    // If one ended before the other, it is lex earlier
    if( p1.NSegments() > seg ) return +1;
    if( p2.NSegments() > seg ) return -1;
    return 0;
  }

  friend bool StablePartLt( const PathWalk &lhs, const PathWalk &rhs) {
    return( StablePartCmp(lhs,rhs) < 0 );
  }
  friend bool StablePartEq( const PathWalk &lhs, const PathWalk &rhs) {
    return( StablePartCmp(lhs,rhs) == 0 );
  }

    

  // This operator< is for the benefit of a priority queue, so
  // "largest" in this context means "most in need of examination".
  // In this case, we want to return the answer to the question:
  // should we examine w2 before we examine w1, i.e. is w1 less
  // important than w2?

  // Suitable for use on containers for iterators to PWCs, via functor
  // dereferenced_compare in STLExtensions.h

  friend bool operator<( const PathWalk &lhs, const PathWalk &rhs) 
  {
    if ( lhs.m_containsCloser && ! rhs.m_containsCloser ) return true;
    if ( rhs.m_containsCloser && ! lhs.m_containsCloser ) return false;
    
    // Consider the lengths of the blocks in each path.  Walks with
    // longer initial blocks should be lower priority.  If the initial
    // block is the same length, look at the next one, and so on.
    const KmerPath &p1 = lhs.m_path;
    const KmerPath &p2 = rhs.m_path;
    
//     // EXPERIMENTAL CHANGE:
//     // First compare stable parts, in reverse lex order.
//     // If they differ, we should work on the lex earlier path first,
//     // since it might be a truncation of the lex later one.
//     switch( StablePartCmp(lhs, rhs) ) {
//     case -1: return false;
//     case +1: return true;
//     default: ; // do the old comparison routine below.
//     }
//     // remove -- doesn't seem to work well right now.

    int blockLimit1 = min<int>( lhs.m_blockSizes.size(), lhs.GetIndex() + 1 );
    int blockLimit2 = min<int>( rhs.m_blockSizes.size(), rhs.GetIndex() + 1 );
    
    /* 
    int blockSum1 = 0;
    for ( int ii = 0; ii < blockLimit1; ++ii )
      blockSum1 += lhs.m_blockSizes[ ii ];
    
    int blockSum2 = 0;
    for ( int ii = 0; ii < blockLimit2; ++ii )
      blockSum2 += rhs.m_blockSizes[ ii ];
    
    if ( blockSum1 > blockSum2 ) return true;
    if ( blockSum1 < blockSum2 ) return false;
    */

    int blockIdx1 = 0, blockIdx2 = 0;
    int blockSize1 = 0, blockSize2 = 0;
    while ( blockIdx1 < blockLimit1 &&
            blockIdx2 < blockLimit2 )
    {
      if ( blockIdx1 > 0 )
      {
        int gapIdx1 = lhs.m_gapIdxs[blockIdx1-1];
        int gapIdx2 = rhs.m_gapIdxs[blockIdx2-1];
        
        // If we're here, all the blocks to this point are the same
        // size.  If the gaps differ, we can return.
        
	// THIS MIGHT BE BAD: a merge can increase a gap's stretchiness
        
        if ( p1.Length(gapIdx1) < p2.Length(gapIdx2) ) return true;
        if ( p1.Length(gapIdx1) > p2.Length(gapIdx2) ) return false;
        if ( p1.Minimum(gapIdx1) < p2.Minimum(gapIdx2) ) return true;
        if ( p1.Minimum(gapIdx1) > p2.Minimum(gapIdx2) ) return false;
      }
      
      blockSize1 = lhs.m_blockSizes[blockIdx1];
      blockSize2 = rhs.m_blockSizes[blockIdx2];
      
      if ( blockSize1 != blockSize2 )
        break;
      
      ++blockIdx1, ++blockIdx2;
    }
    
    // If the last blocks examined had different sizes, the path with
    // the longer block has lower priority.
    
    if ( blockSize1 > blockSize2 ) return true;
    if ( blockSize1 < blockSize2 ) return false;
    
    // If the first N kmer blocks are all the same size, but one path
    // has more segments left, it's lower priority.
    
    // THIS MIGHT BE BAD: the left endpt of the merged-in path can be in
    // an earlier block (which we don't want to go back to!)
    
    
    if ( blockIdx1 <  blockLimit1 &&
         blockIdx2 == blockLimit2 )
      return true;
    
    if ( blockIdx1 == blockLimit1 &&
         blockIdx2 <  blockLimit2 )
      return false;
    
    // At this point, both paths have the same number of blocks and
    // they're of identical sizes. We now have to compare the kmer
    // intervals themselves so that identical paths (from the kmer
    // point of view) are grouped together.
    
    const int minNSegments = min( p1.NSegments(), p2.NSegments() );
    
    for( int i=0; i<minNSegments; i++ )
      if( ! (p1.Segment(i)==p2.Segment(i)) ) 
      {
        if (p1.isGap(i) && p2.isSeq(i) ) return false;
        if (p1.isSeq(i) && p2.isGap(i) ) return true;
        if (p1.isGap(i) && p2.isGap(i) )
        {
          if ( p1.Length(i) < p2.Length(i) ) return true;
          if ( p1.Length(i) > p2.Length(i) ) return false;
          if ( p1.Minimum(i) < p2.Minimum(i) ) return true;
          if ( p1.Minimum(i) > p2.Minimum(i) ) return false;
          return false;
        }
        // Both p1.Segment(i) and p2.Segment(i) are sequence.
        if ( p1.Start(i) < p2.Start(i) ) return true;
        if ( p1.Start(i) > p2.Start(i) ) return false;
        if ( p1.Stop(i)  < p2.Stop(i)  ) return false;
        if ( p1.Stop(i)  > p2.Stop(i)  ) return true;
        return false;
      }
    
    // If we get here, these are == (not <) up to minNSegments.  If
    // one has more segments (i.e. is longer), it is of lower
    // priority.
    if ( p1.NSegments() != p2.NSegments() )
      return ( p1.NSegments() > p2.NSegments() );
    
    // If we get here, these are of equal priority (lhs is not lower than rhs).
    return false;
  }
  
  struct LexComparator
    : public binary_function<PathWalk,PathWalk,bool>
  {
    bool operator() ( const PathWalk& lhs, const PathWalk& rhs ) const
    {
      if ( lhs.ContainsCloser() && ! rhs.ContainsCloser() ) return true;
      if ( rhs.ContainsCloser() && ! lhs.ContainsCloser() ) return false;
    
      const KmerPath& p1 = lhs.GetPath();
      const KmerPath& p2 = rhs.GetPath();
      
      const int minNSegments = min( p1.NSegments(), p2.NSegments() );
      
      for( int i=0; i<minNSegments; i++ )
        if( ! (p1.Segment(i)==p2.Segment(i)) ) 
          return ( p2.Segment(i) < p1.Segment(i) );
      
      // If we get here, these are == (not <) up to minNSegments.  If
      // one has more segments (i.e. is longer), it is of lower
      // priority.
      if ( p1.NSegments() != p2.NSegments() )
        return ( p1.NSegments() > p2.NSegments() );
      
      return false;
    }
  };

  // This does not (and must not!) check the mutable mm_readsRequired
  friend bool operator==(const PathWalk &lhs, const PathWalk &rhs) {
    return ( lhs.GetIndex() == rhs.GetIndex() &&
             lhs.GetPath() == rhs.GetPath() );
  }

  friend ostream & operator<<( ostream &out, const PathWalk &pwc )
  {
    int minGaps = 0;
    int maxGaps = 0;
    for(int i=0; i<pwc.m_path.NSegments(); i++)
      if ( pwc.m_path.isGap( i ) )
      {
        minGaps += pwc.m_path.Minimum( i );
        maxGaps += pwc.m_path.Maximum( i );
      }
    
    out << "(" << pwc.m_path.KmerCount() << " kmers in "
        << pwc.m_blockSizes.size() << " blocks, "
        << minGaps << "-" << maxGaps << " kmers in gaps, "
        << ( pwc.ContainsCloser() ? "closed" : "open" ) << "): ";

    int currentIdx = 0;
    for(int i=0; i<pwc.m_path.NSegments(); i++) {
      if ( pwc.m_path.isGap( i ) )
      {
        out << "(" << pwc.m_blockSizes[ currentIdx ] << " kmers)";
        if ( currentIdx++ == pwc.m_index )
          out << "+";
      }
      out << pwc.m_path.Segment(i);
    }
    out << "(" << pwc.m_blockSizes[ currentIdx ] << " kmers)";
    if ( currentIdx == pwc.m_index )
      out << "+";
    
    return out;
  }
};


// A global function to insert a new PathWalk into a set<PathWalk>.
// If a matching PathWalk already exists, this modifies it so that it
// ends up with the intersection of the mm_readsRequired of the two.

// Since mm_readsRequired is mutable, we can do this in-place.  If it
// weren't, we'd delete the old one and insert an almost-identical new one.

template <class PathWalkSet>
pair<typename PathWalkSet::iterator,bool>
PathWalkSetInsert( PathWalkSet& thePathWalkSet, const PathWalk& to_be_added ) {
  pair<typename PathWalkSet::iterator,typename PathWalkSet::iterator> seek = 
    thePathWalkSet.equal_range( to_be_added );
  if( seek.first == seek.second ) // there isn't already a copy
    return make_pair( thePathWalkSet.insert( seek.second, to_be_added ),
		      true );
  else { // a copy exists; modify it
    seek.first->IntersectRequiredReadsWith( to_be_added );
    return make_pair( seek.first, false );
  }
}


// A function object so you can for_each the above:
template <class PathWalkSet>
struct PathWalkSetInserter
  : public unary_function<PathWalk&, void> {
  PathWalkSetInserter( PathWalkSet& thePathWalkSet ) : pws(thePathWalkSet) {}

  void operator() (const PathWalk& to_be_added)
  { PathWalkSetInsert( pws, to_be_added ); }

 private:
  PathWalkSet& pws;
};

// An alternate comparator (like operator<) for minimum-length,
// appropriate for searches that try to keep all the paths at the same
// physical length.  Again, "largest" means "most in need of
// examination", so longer paths are "less than" shorter paths.

struct GreaterMinLength {
  bool operator()(const PathWalk &a, const PathWalk &b)
  {
    return a.GetPath().MinLength() > b.GetPath().MinLength(); 
  }
};


#endif
