// Copyright (c) 2005 Broad Institute/Massachusetts Institute of Technology

#ifndef PATHS_KMERPATHBREADTHSEARCHER_H
#define PATHS_KMERPATHBREADTHSEARCHER_H

// This ought to be changed to use the Extender, but for now it is
// just cut-and-pasted from WalkOneInsert.
// #include "paths/KmerPathExtender.h"

#include "Vec.h"

#include "paths/AlignAndMerge.h"
#include "paths/KmerBaseBroker.h"
#include "paths/PathWalk.h"
#include "paths/PathWalkHeap.h"
#include "paths/KmerPath.h"
#include "paths/WalkPlot.h"
#include "paths/NegativeGapValidator.h"

#include <set>
#include <algorithm>  // for heaps


/// KmerPathBreadthSearcher encapsulates the breadth-first search algorithm
/// for walking inserts in kmer-space.

class KmerPathBreadthSearcher
{
public:
  KmerPathBreadthSearcher( const vecKmerPath &paths,
			   const vecKmerPath &paths_rc,
			   const vec<tagged_rpint> &pathsDB,// should use Jon's
			   const NegativeGapValidator* ngv, // class instead
                           AbstractPathWalkHeap* p_heap,
			   unsigned int heap_size_limit = 200,
			   WalkPlotter *p_walkPlotter = 0,
			   WalkPlotter *p_eachPlotter = 0,
			   int verbosity = 0 )
    : m_paths( paths ),
      m_paths_rc( paths_rc ),
      m_pathsDB( pathsDB ),
      mp_ngv( ngv ),
      mp_heap( p_heap ),
      m_heap_size_limit( heap_size_limit ),
      mp_walkPlotter( p_walkPlotter ),
      mp_eachPlotter( p_eachPlotter ),
      m_plotDeadEnds( false ),
      m_verbosity( verbosity ),
      m_min_perfect_match( 1 ),
      m_abort_extension_explosions( true ),
      m_use_ngv_in_merge( true ),
      m_use_perfect_matches_only( false )
  {}

  void SetHeapSizeLimit( int limit ) { m_heap_size_limit = limit; }

  void SetPlotDeadEnds( bool plotDeadEnds ) { m_plotDeadEnds = plotDeadEnds; }

  void SetMinPerfectMatch( int mpm ) { m_min_perfect_match = mpm; }

  void SetUseNGVinMerge( bool use ) { m_use_ngv_in_merge = use; }

  void SetUsePerfectMatchesOnly( bool use ) { m_use_perfect_matches_only = use; }

  void SetAbortExtensionExplosions( bool abort ) 
    { m_abort_extension_explosions = abort; }


  struct Result {
    set<KmerPath> closures;
    set<KmerPath> explosion;
    unsigned int maxHeapSize;
    // For backwards-compatibility:
    bool heapExploded() { return (!explosion.empty() || extensionExploded); }
    bool foundClosure() { return !closures.empty(); }
    // Did we find so many extensions all at once that we aborted?
    // This could be folded into heap explosions; separating it is a
    // bit more informative.
    bool extensionExploded;
  };


  void FindClosures( const KmerPath& startingPath, // Where we really start
		     const KmerPath& closingPath,  //   almost unused (rehab)
                     const int startingPathId,     //   "don't use this path"
                     const int closingPathId,      // Where we really end
                     const int minAcceptableLength,
                     const int maxAcceptableLength,
                     Result &result );

  // If you're walking the whole insert, this abbreviated form will do:
  /// Find all walks from read id1 to RC of read id2
  /// of lengths between minLen and maxLen.

  void FindClosures( int id1, int id2, int minLen, int maxLen, Result &result )
  { FindClosures( m_paths[id1], m_paths_rc[id2], id1, id2, minLen, maxLen, result ); }



private:
  // A simple structure indicating an intersection between two paths,
  // i.e. two paths, with a segment on each that overlap.  Some
  // additional identification is passed on for ease of debugging (the
  // id and rc of the second path, as the first path is always the
  // partial walk).
  
  struct PathIntersection
  {
  public:
    PathIntersection() {}
    
    PathIntersection( const KmerPath *path1, const KmerPath *path2,
		      const int seg1, const int seg2, 
		      const int path2Id, const bool path2Rc,
		      const bool possibleCloser )
      : mp_path1( path1 ), mp_path2( path2 ), 
	m_seg1( seg1 ), m_seg2( seg2 ), 
	m_pathId( path2Id ), m_rc( path2Rc ),
	m_closer( possibleCloser )
    {}

    const KmerPath * Path1() const { return mp_path1; }
    const KmerPath * Path2() const { return mp_path2; }

    int Seg1() const { return m_seg1; }
    int Seg2() const { return m_seg2; }

    int PathId() const { return m_pathId; }
    bool Rc() const { return m_rc; }

    bool IsPossibleCloser() const { return m_closer; }

    // The goal of this operator< is that two PathIntersections
    // that might actually represent the same path alignment -- but
    // not idential because they are looking at different path segments 
    // within the same gap-free block -- end up next to each other.
    // Perhaps foolishly, let's have operator== be this equiv rel'n.
    
    // Two such intersections must have the same segoff (segment offset):
    int segoff() const { return( m_seg1 - m_seg2 ); }

    bool operator<( const PathIntersection &other ) const
    {
      if ( this->mp_path1 < other.mp_path1 ) return true;
      if ( this->mp_path1 > other.mp_path1 ) return false;
      if ( this->mp_path2 < other.mp_path2 ) return true;
      if ( this->mp_path2 > other.mp_path2 ) return false;
      if ( this->m_rc     < other.m_rc     ) return true;
      if ( this->m_rc     > other.m_rc     ) return false;
      if ( this->m_closer < other.m_closer ) return true;  // I think these two 
      if ( this->m_closer > other.m_closer ) return false; // will never get used
      // If these all match, order so that == things (below) will be adjacent:
      if ( this->segoff() < other.segoff() ) return false;
      if ( this->segoff() > other.segoff() ) return true;
      if ( this->m_seg1 < other.m_seg1 ) return true;
      return false;
    }

    // WARNING: This tests for EQUIVALENCE, not equality!
    // If pi1 == pi2 then they might have different m_seg's,
    // if the segments in between are identical non-gaps.
    bool operator==( const PathIntersection &other ) const {
      if( this->mp_path1 != other.mp_path1  ||
	  this->mp_path2 != other.mp_path2  ||
	  this->m_rc     != other.m_rc      ||
	  this->m_closer != other.m_closer  ||
	  this->segoff() != other.segoff() )
	return false;
      for( int i = min(m_seg1, other.m_seg1); 
	   i <= max(m_seg1, other.m_seg1); i++ )
	if( mp_path1->isGap(i) || 
	    ! (mp_path1->Segment(i) == mp_path2->Segment(i-segoff())) )
	  return false;
      return true;
    }

  private:
    const KmerPath *mp_path1, *mp_path2;
    int m_seg1, m_seg2;
    int m_pathId;
    bool m_rc;
    bool m_closer;
  };  // end struct PathIntersection
  
  // The private helper functions that should be replaced with
  // appropriate wrapping around the Extension class:
private:
  bool FindExtensions( const PathWalk &currentWalk, 
		       set<PathWalk> &extensions, 
		       const int startingPathId,
		       const int closingPathId,
		       set<KmerPath> &closures,
		       const int minAcceptablePathLength,
		       const int maxAcceptablePathLength,
		       bool DEBUG_GAP_SIZES=False );

  void FindIntersections( const PathWalk &currentWalk,
			  const int currentSegIdx,
			  vec<PathIntersection> &intersections,
			  const int startingPathId,
			  const int closingPathId );

  void FindPerfectExtensions( const KmerPath& basePath,
                              const KmerPath& otherPath,
                              const int basePathSeg,
                              const int otherPathSeg,
                              vec<MergedKmerPath>& merges );


private:
  const vecKmerPath &m_paths;
  const vecKmerPath &m_paths_rc;
  const vec<tagged_rpint> &m_pathsDB;
  const NegativeGapValidator* mp_ngv;
  AbstractPathWalkHeap* mp_heap;
  unsigned int m_heap_size_limit;
  WalkPlotter *mp_walkPlotter;
  WalkPlotter *mp_eachPlotter;
  bool m_plotDeadEnds;
  int m_verbosity;
  int m_min_perfect_match;
  bool m_rehabilitate_heap_explosions;
  bool m_abort_extension_explosions;
  bool m_use_ngv_in_merge;
  bool m_use_perfect_matches_only;
};


#endif
