// Copyright (c) 2005 Broad Institute/Massachusetts Institute of Technology

#include "paths/KmerPathBreadthSearcher.h"

// KmerPathBreadthSearcher encapsulates the breadth-first search algorithm
// for walking inserts in kmer-space.

// Find all the paths that intersect the current path at the current
// segment, ignoring the starting path, and flagging the intersections
// found as possible closers if the intersecting path is the rc of the
// closing path.
void KmerPathBreadthSearcher::FindIntersections( const PathWalk &currentWalk,
						 const int currentSegIdx,
						 vec<PathIntersection> &intersections,
						 const int startingPathId,
						 const int closingPathId )
{
  const KmerPath &currentPath = currentWalk.GetPath();

  vec<longlong> rpintIdxs;
  Contains( m_pathsDB, currentPath.Segment( currentSegIdx ), rpintIdxs );

  for ( unsigned int i = 0; i < rpintIdxs.size(); ++i )
  {
    int rpintIdx = rpintIdxs[i];
    
    int pathId = m_pathsDB[ rpintIdx ].PathId();
    int pathPos = m_pathsDB[ rpintIdx ].PathPos();
    bool rc = ( pathId < 0 );
    if ( rc ) pathId = -pathId - 1;
    
    // Skip already used paths.
    //if ( currentWalk.ContainsRead( pathId ) ) 

    if ( pathId == startingPathId )
      continue;
    
    // Skip closing path unless it's rc.
    if ( pathId == closingPathId && ! rc )
      continue;
    
    const KmerPath *p_otherPath = ( rc ? &m_paths_rc[ pathId ] : &m_paths[ pathId ] );
    bool isClosingPath = ( rc && pathId == closingPathId );
    
    intersections.push_back( 
      PathIntersection( &currentPath, 
                        p_otherPath,
                        currentSegIdx, 
                        pathPos,
                        pathId, rc, 
                        isClosingPath ) );
  }
}


void 
KmerPathBreadthSearcher::FindPerfectExtensions( const KmerPath& basePath,
                                                const KmerPath& otherPath,
                                                const int basePathSeg,
                                                const int otherPathSeg,
                                                vec<MergedKmerPath>& merges )
{
  pair<KmerPathLoc,KmerPathLoc> locPair = 
    CreateAlignedLocs( basePath, otherPath,
                       basePathSeg, otherPathSeg );

  pair<KmerPathLoc,KmerPathLoc> leftScanPair = locPair;
  if ( ! ScanLeftPerfectMatchGaps( leftScanPair.first, leftScanPair.second ) )
    return;

  pair<KmerPathLoc,KmerPathLoc> rightScanPair = locPair;
  if ( ! ScanRightPerfectMatchGaps( rightScanPair.first, rightScanPair.second ) )
    return;

  MergedKmerPath merger;
  
  // If the new read extends the base path to the left, start with that bit.
  if ( ! ( leftScanPair.second == otherPath.Begin() ) )
  {
    otherPath.CopySubpath( otherPath.Begin(), leftScanPair.second, merger.path );
    basePath.CopySubpathNoFirstKmer( basePath.Begin(), basePath.End(), merger.path );
  }
  // Otherwise, just copy the base path.
  else
  {
    merger.path = basePath;
  }

  // Copy the new kmers from the read, if necessary.
  if ( ! ( rightScanPair.second == otherPath.End() ) )
    otherPath.CopySubpathNoFirstKmer( rightScanPair.second, otherPath.End(), merger.path );
  
  merger.left_end.first = leftScanPair.second.GetIndex() - otherPath.Begin().GetIndex();
  merger.left_end.second = leftScanPair.first.GetIndex() - basePath.Begin().GetIndex();

  merger.right_end.first = otherPath.End().GetIndex() - rightScanPair.second.GetIndex();
  merger.right_end.second = basePath.End().GetIndex() - rightScanPair.first.GetIndex();

  merger.given = merger.left_end.first + locPair.first.GetIndex();
  
  merges.push_back( merger );
}

// Find the extensions of the current walk.
// Should change this to a wrapper around a call to the Extender class.
// (This version has lots of selection of good vs bad extensions built in;
// the Extender will return everything, and we need to evaluate them.)

// This is the old version, cut-and-pasted from WalkOneInsert.

bool KmerPathBreadthSearcher::FindExtensions( const PathWalk &currentWalk, 
					      set<PathWalk> &extensions, 
					      const int startingPathId,
					      const int closingPathId,
					      set<KmerPath> &closures,
					      const int minAcceptablePathLength,
					      const int maxAcceptablePathLength,
					      bool DEBUG_GAP_SIZES )
{
  extensions.clear();

  const KmerPath &currentPath = currentWalk.GetPath();
  int currentPathKmerCount = currentPath.KmerCount();

  if ( currentWalk.Unimprovable() )
  {
    if ( currentWalk.ContainsCloser() )
    {
      if ( m_verbosity > 1 )
        cout << "Found closure." << endl;
      closures.insert( currentPath );
    }

    return true; // found something (even thought extensions is empty())
  }

  // Find the intersections of read paths with the segment of the walk
  // we're currently working on.
  vec<PathIntersection> intersections;

  if ( m_use_perfect_matches_only )
  {
    FindIntersections( currentWalk, currentPath.NSegments()-1, intersections,
                       startingPathId, closingPathId );
    
    // Look for intersections with closer not involving last segment
    // of current walk (in case we walked right past it).
    const KmerPath* p_closer = &(m_paths_rc[closingPathId]);
    for ( int seg = 0; seg < currentPath.NSegments()-1; ++seg )
      if ( currentPath.Segment(seg).Contains( p_closer->FirstSegment().Start() ) )
        intersections.push_back( PathIntersection( &currentPath, p_closer,
                                                   seg, 0,
                                                   closingPathId, true, true ) );
  }
  else
  {
    int targetKmerBlock = currentWalk.GetIndex();
    int current_block_segments = 0;
    for ( int segIdx = 0; segIdx < currentPath.NSegments(); ++segIdx )
    {
      if ( currentPath.isGap( segIdx ) )
      {
        if ( --targetKmerBlock < 0 )
          break;
      }
      
      else // currentPath.isSeq( segIdx )
        if ( targetKmerBlock == 0 ) {
          FindIntersections( currentWalk, segIdx, intersections,
                             startingPathId, closingPathId );
          current_block_segments++;
        }
    }
  }

  if ( m_verbosity > 2 )
    PRINT( currentWalk );

  bool addedCloser = false;

  int currentBlockSize = currentWalk.GetKmerBlockSize( currentWalk.GetIndex() );

  // intersections may contain many redundant items:
  // the same alignment, anchored by different matching
  // segments in a single gap-free block.  Avoid extra work...
  // Nuts -- for some reason this seems to hurt more than it helps.
//   if( current_block_segments > 1 )
//     sort( intersections.begin(), intersections.end() );

  for ( unsigned int i = 0; i < intersections.size(); ++i )
  {
//     if ( i>0 && intersections[i]==intersections[i-1] )
//       continue;

    PathIntersection &thisIntersect = intersections[i];

    if ( m_verbosity > 5 )
    {
      PRINT4( thisIntersect.PathId(), thisIntersect.Rc(), 
              *(thisIntersect.Path2()), thisIntersect.Seg2() );
    }


    vec<MergedKmerPath> mergers;
    if ( m_use_perfect_matches_only )
      this->FindPerfectExtensions( *(thisIntersect.Path1()), *(thisIntersect.Path2()),
                                   thisIntersect.Seg1(), thisIntersect.Seg2(),
                                   mergers );
    else
      MergePaths( *(thisIntersect.Path1()), *(thisIntersect.Path2()),
                  thisIntersect.Seg1(), thisIntersect.Seg2(), 
                  mergers,
                  m_min_perfect_match, 
                  (m_use_ngv_in_merge ? mp_ngv : NULL),
                  DEBUG_GAP_SIZES );

    for ( unsigned int mergerIdx = 0; mergerIdx < mergers.size(); ++mergerIdx )
    {
      MergedKmerPath &merger = mergers[ mergerIdx ];
        
      if ( m_verbosity > 4 )
        PRINT( merger );

      int firstKmerInOldPath = thisIntersect.Path1()->Begin().GetKmer();
      int startSegIdx = merger.left_end.first;
      KmerPathLoc start( merger.path, startSegIdx );
      start.SetKmer( firstKmerInOldPath );
      KmerPathLoc stop;
      if ( ! currentWalk.ContainsCloser() )
      {
        // We haven't brought in the closer yet
        if ( thisIntersect.IsPossibleCloser() )
        {
          // If we're merging in the closer, we want to trim to the
          // end of the closer (path2).
          int lastKmerInCloser = thisIntersect.Path2()->End().GetKmer();
          int endSegIdx = merger.path.NSegments() - merger.right_end.second - 1;
          stop = KmerPathLoc( merger.path, endSegIdx );
	  stop.SetKmer( lastKmerInCloser );
        }
        else
        {
          // If we're merging in a non-closing read, we want to trim
          // to the end of the max of the two paths.
          stop = merger.path.End();
        }
      }
      else
      {
        // We've already brought in the closer, so we want the end of the first path.
        int lastKmerInOldPath = thisIntersect.Path1()->End().GetKmer();
        int endSegIdx = merger.path.NSegments() - merger.right_end.first - 1;
        stop = KmerPathLoc( merger.path, endSegIdx );
	stop.SetKmer( lastKmerInOldPath );
      }

      int mergerFirstAffected = max( merger.left_end.first, merger.left_end.second );
      // This is the leftmost place where merging happened.  But if this
      // is preceded by a gap, that gap may have shrunk, so decrement this!
      if( mergerFirstAffected > start.GetIndex()
	  && merger.path.isGap( mergerFirstAffected-1 ) )
	--mergerFirstAffected;
      
      // Find the first segment where the merger is genuinely different:
      while( mergerFirstAffected < merger.given &&
	     ( thisIntersect.Path1()->Segment(mergerFirstAffected - start.GetIndex())
	       == merger.path.Segment(mergerFirstAffected) ) )
	mergerFirstAffected++;

      // If we have an NGV, check the negative gaps,
      // and restrict them where possible.
      // (Only examine merger.path from mergerFirstAffected on.)
      if( mp_ngv != NULL ) {
	if( ! mp_ngv->MakeValid( merger.path, mergerFirstAffected ) ) {
	  if( m_verbosity > 4 ) {
	    cout << "Merger rejected due to illegal negative gap" << endl;
	  }
	  continue;
	}
      }

      KmerPath resultPath;
      merger.path.CopySubpath( start, stop, resultPath );

      // Don't let a read already in the Required list add k-mers.
      // It might add useful (but lost) gap information, though (!).
      if ( currentWalk.RequiresRead( thisIntersect.PathId() ) ) {
	if ( resultPath.KmerCount() > currentPathKmerCount ) {
	  if ( m_verbosity > 4 ) {
	    cout << "Not allowing required read " << thisIntersect.PathId()
		 << " to add kmers." << endl;
	  }
	  continue;
	}
	else if ( ! ( resultPath == currentPath ) ) {
	  if ( m_verbosity > 4 ) {
	    cout << "Required read " << thisIntersect.PathId()
		 << " re-contributing helpful gap information!" << endl;
	  }
	}
      }

      int resultFirstAffected = max(0,mergerFirstAffected - start.GetIndex());
      int resultGiven = merger.given - start.GetIndex();


      // skip things that are too long.  (Can't do this earlier, since
      // negative gap validation might increase MinLength.)
      if ( resultPath.MinLength() > maxAcceptablePathLength )
        continue;

      // Don't add the closer path if the result would be too short.
      if ( thisIntersect.IsPossibleCloser() &&
	   resultPath.MaxLength() < minAcceptablePathLength )
	continue;


      // Compare resultPath 

      bool useThisBlock = false;
      int givenBlockIdx = 0, firstAffectedBlockIdx = 0;
      int blockIdx = 0;
      for ( int resultSegIdx = 0; resultSegIdx < resultPath.NSegments(); ++resultSegIdx )
      {
        if ( resultPath.isGap( resultSegIdx ) )
          ++blockIdx;  // do this first, so you "round up" if firstAffected is a gap
        if ( resultSegIdx == resultGiven )
          givenBlockIdx = blockIdx;
        if ( resultSegIdx == resultFirstAffected )
          firstAffectedBlockIdx = blockIdx;
      }

      if ( m_verbosity > 6 )
        PRINT2( resultGiven, givenBlockIdx );

      if ( m_verbosity > 6 )
        PRINT2( resultFirstAffected, firstAffectedBlockIdx );

      // Before labelling this closed, check its length.
      // But if this check fails, we still allow it to incorporate the
      // closer path as an intermediate step.  Perhaps that's wrong.
      PathWalk newWalk( resultPath, firstAffectedBlockIdx,
                        currentWalk.ContainsCloser() || thisIntersect.IsPossibleCloser() );
      newWalk.RequireReadsFrom( currentWalk );
      newWalk.RequireRead( thisIntersect.PathId() );


      // Check whether the priority of the new walk is greater than the
      // priority of the current walk.
      if ( m_verbosity > 3 &&
           currentWalk < newWalk )
      {
        cout << endl;
        cout << "I added a read path to a walk and it got WORSE!"
	     << "  -- but maybe this is OK"
             << endl;
        PRINT( *(thisIntersect.Path2()) );
        PRINT( merger );
        PRINT( currentWalk );
        PRINT( newWalk );
        cout << endl;
      }


      // The new walk must be lower priority than the currentWalk to keep it.
      // THIS WILL HAVE TO CHANGE IF THE PRIORITY FUNCTION ISN'T MONOTONIC --
      // and I think it shouldn't be.  But one thing at a time.
      if ( ! ( newWalk < currentWalk ) )
      {
        if ( m_verbosity > 5 )
        {
          PRINT( newWalk );
          cout << "No improvement." << endl;
        }
        continue;
      }

      if ( ! thisIntersect.IsPossibleCloser() &&
           ! m_use_perfect_matches_only )
        ForceAssertGe( newWalk.GetKmerBlockSize( givenBlockIdx ), currentBlockSize );

      if ( thisIntersect.IsPossibleCloser() &&
           ! currentWalk.ContainsCloser() )
        addedCloser = true;

      pair<set<PathWalk>::iterator,bool> result = 
	PathWalkSetInsert( extensions, newWalk );

      if ( m_verbosity > 3 &&
           result.second == true )
        PRINT( newWalk );

    }
  }

  // If we can't find any extensions using the current gap-free
  // stretch, presume the gap is uncloseable and jump to the next
  // gap free stretch.
  if ( extensions.empty() )
    if ( currentWalk.GetIndex() < currentWalk.GetNumKmerBlocks() )
    {
      if ( m_verbosity > 3 )
        cout << "Proceeding to next gap." << endl;
      PathWalk newWalk( currentPath, currentWalk.GetIndex() + 1, 
			currentWalk.ContainsCloser() );
      newWalk.RequireReadsFrom( currentWalk );
      PathWalkSetInsert( extensions, newWalk );
    }
  
  return extensions.empty();  // we return true early if we find a closure.
}



// The heart of the search from WalkOneInsert, m_utatis m_utandis
// (It is a sad day when your comments contain jokes with zero intended audience.)

void KmerPathBreadthSearcher::FindClosures( const KmerPath& startingPath, 
					    const KmerPath& closingPath,
					    const int startingPathId,
					    const int closingPathId,
					    const int minAcceptableLength,
					    const int maxAcceptableLength,
					    Result &result ) {
  // Initialize result
  result.closures.clear();
  result.explosion.clear();
  result.maxHeapSize = 0;
  result.extensionExploded = false; 

  if( mp_walkPlotter )
    mp_walkPlotter->Clear();

  set<PathWalk> extensions;
  set<KmerPath>& closures = result.closures;

  PathWalk startingWalk( startingPath, 0 );

  // Only do this if we seem to be starting a walk across a full insert
  if( startingPath == m_paths[startingPathId] )
    startingWalk.RequireRead( startingPathId );

  mp_heap->Clear();
  mp_heap->Push( startingWalk );

  unsigned int& maxHeapSize = result.maxHeapSize;
  maxHeapSize = mp_heap->Size();
  bool heapExploded = false;

  while ( mp_heap->Size() > 0 ) {
    if ( mp_eachPlotter ) {
      mp_eachPlotter->Clear();
      HeapPlotVisitor plotVisitor( mp_eachPlotter );

      mp_heap->Accept( plotVisitor );

      mp_eachPlotter->Plot();
    }

    const PathWalk& worstWalk = mp_heap->GetTop();

    bool extended = FindExtensions( worstWalk, extensions, 
				    startingPathId, closingPathId,
				    closures,
				    minAcceptableLength, maxAcceptableLength );
      
    // NB: closed paths return extended=True
    if( ! extended && mp_walkPlotter && m_plotDeadEnds) { 
      // hand the dead-end KmerPath in *worstWalk to the WalkPlotter
      mp_walkPlotter->Add( worstWalk.GetPath(), WalkPlotter::DEAD_END );
    }	
      
    PathWalk oldWorstWalk( worstWalk );  
    // EXPERIMENTAL: use for reference if heap explodes
    mp_heap->Pop();

    // EXPERIMENTAL: if this would overwhelm the pile, abort now?
    // This may make walking take much longer, since we keep going
    // where we otherwise would have exited due to heap explosion.
    if( m_abort_extension_explosions && 
	extensions.size() > m_heap_size_limit ) {
      heapExploded = true; // But these aren't passed to the plotter.
      result.extensionExploded = true;
      if( m_verbosity > 0 )
	cout << "Not adding these " << extensions.size() 
	     << " extensions -- too many!" << endl;
    }
    else
    {
      for ( set<PathWalk>::iterator iter = extensions.begin();
            iter != extensions.end(); ++iter )
        mp_heap->Push( *iter );
    }

    switch ( m_verbosity )
      {
      case 0:
	break;
      case 1:
	if ( mp_heap->Size() > maxHeapSize )
	  if ( maxHeapSize / 100 < mp_heap->Size() / 100 )
	    cout << "." << flush;
	break;
      case 2:
	if ( mp_heap->Size() > maxHeapSize )
	  cout << "Heap grew to " << mp_heap->Size() << endl;
	break;
      default:
	PRINT( mp_heap->Size() );
      }

    maxHeapSize = max<unsigned int>( maxHeapSize, mp_heap->Size() );

    // If the heap has exploded...
    if ( mp_heap->Size() > m_heap_size_limit ) 
      {
	if ( m_verbosity > 0 )
	  cout << "Heap exploded" << endl;

	heapExploded = true;

        HeapPathExtractor extractor( &result.explosion );
        extractor.SetCopyOpenPaths( true );
        extractor.SetCopyClosedPaths( false );
        mp_heap->Accept( extractor );

        if ( mp_walkPlotter )
        {
          HeapPlotVisitor plotVisitor( mp_walkPlotter );
          plotVisitor.SetPlotOpenPaths( true );
          plotVisitor.SetOpenType( WalkPlotter::HEAP_EXPLOSION );
          plotVisitor.SetPlotClosedPaths( false );

          mp_heap->Accept( plotVisitor );
        }

	// Delete the open paths -- we keep improving the closed ones
	mp_heap->RemoveIf( OpenPathWalkFilter() );

	if ( m_verbosity > 0 )
	  cout << "Found " << mp_heap->Size() << " closed paths."<< endl;

	// If the heap is still too large, break.

	if ( mp_heap->Size() > m_heap_size_limit )
	  break;
      } // end of handling heap explosion
  } // exits this loop when theWalkPile is empty or too large

  if( mp_walkPlotter ) {
    for ( set<KmerPath>::iterator closureIter = closures.begin();
	  closureIter != closures.end(); ++closureIter )
      mp_walkPlotter->Add( *closureIter, WalkPlotter::CLOSED );
  }

}
