// Copyright (c) 2005 Broad Institute/Massachusetts Institute of Technology

#include "paths/PileReadsOnPath.h"

/// Take a path and find all reads which sit on it.
/// Gives back a vector of PathEmbeddings.
/// This must be passed a list<KmerPath> that it can add to for storage
/// of subpaths it creates when reads partially overhang the path.
/// Deleting this list will invalidate embeddings of overhanging reads.


//  Each time we find a read which might overhang on the left or right,
//  we save the details in a last_contact.  At the end we look for
//  pairs of matching LCs to find reads which overhang on both sides.
struct last_contact {
  int path_id;
  KmerPathLoc read_loc;
  int path_seg;

  last_contact( int pid, KmerPathLoc loc, int seg )
    : path_id(pid), read_loc(loc), path_seg(seg) { }

  // We will want to sort these by path_id.
  friend bool operator<(const last_contact& lhs, const last_contact& rhs)
  { return( lhs.path_id < rhs.path_id ); }

};


void ReadsOnPathPiler::PileReadsOnPath( const KmerPath& path,
					vec<ReadOnPath>& pile,
					list<KmerPath>& subpath_storage,
					bool allow_overhang ) const {

  // subpath_storage.back() is always available for use.
  KmerPath AVAIL;
  subpath_storage.push_back(AVAIL);

  vec<longlong> db_indices;

  vec<last_contact> left_overhangs;
  vec<last_contact> right_overhangs;

  for( int seg=0; seg < path.NSegments(); seg++ ) {
    if( path.isGap(seg) ) continue;

    // Search pathsDB for things that overlap this segment:
    Contains( pathsDB, path.Segment(seg), db_indices );

    for( uint i=0; i < db_indices.size(); i++ ) {

      const tagged_rpint& hit = pathsDB[db_indices[i]];
      const int id = hit.PathId();
      const KmerPath& read = ( id>=0 ? paths[id] : paths_rc[-id-1] );

      // Every completely embedded read must have a unique hit on its segment 0.
      if( hit.PathPos() == 0 ) {
	vec<PathEmbedding> embeddings;
	FindPathEmbeddings( read, path, embeddings, seg );
	// Push a ReadOnPath for this embedding onto the return pile:
	for( vec<PathEmbedding>::iterator emb = embeddings.begin();
	     emb != embeddings.end(); emb++ )
	  pile.push_back( ReadOnPath( IdentityEmbedding(*(emb->GetSubPath())),
				      *emb, id ) );
      }

      if( ! allow_overhang ) continue;

      // Find things that overhang on the left:
      if( (seg == 0                      // fall off
	   && hit.Start() <= path.Start(seg)
	   && !(hit.PathPos()==0 && hit.Start()==path.Start(seg)) )
	  ||
	  (read.isGap( hit.PathPos()-1 ) // jump off via a gap
	   && hit.Start() >= path.Start(seg)
	   && read.Maximum( hit.PathPos()-1 ) >=
	      path.MinLength(0,seg-1) + hit.Start() - path.Start(seg)) ) {
	KmerPathLoc start( read, hit.PathPos() );
	start.SetKmer( max( path.Start(seg), hit.Start() ) );
	KmerPathLoc stop = read.End();
	// Remeber this point of last contact, for later use:
	left_overhangs.push_back(last_contact( id, start, seg ));

	PathEmbedding sub_read = SubpathEmbedding( read, start, stop,
						   &subpath_storage.back() );
	vec<PathEmbedding> embeddings;
	FindPathEmbeddings( *sub_read.GetSubPath(), path, embeddings, seg );
	if( ! embeddings.empty() )
	  subpath_storage.push_back(AVAIL);  // old back() is now in use.
	// Push a ReadOnPath for each embedding onto the return pile:
	for( vec<PathEmbedding>::iterator emb = embeddings.begin();
	     emb != embeddings.end(); emb++ )
	  pile.push_back( ReadOnPath( sub_read, *emb, id ) );
	
      }
	
      // Find things that overhang on the right:
      if( (seg == path.NSegments()-1     // fall off
	   && hit.Stop() >= path.Stop(seg)
	   && !(hit.PathPos()==read.NSegments()-1 && hit.Stop()==path.Stop(seg)) )
	  ||
	  (read.isGap( hit.PathPos()+1 ) // jump off via a gap
	   && hit.Stop() <= path.Stop(seg)
	   && read.Maximum( hit.PathPos()+1 ) >=
	      path.MinLength(seg+1,path.NSegments()-1)
	      + path.Stop(seg) - hit.Stop() ) ) {
	KmerPathLoc start = read.Begin();
	KmerPathLoc stop( read, hit.PathPos() );
	stop.SetKmer( min( path.Stop(seg), hit.Stop() ) );
	// Remeber this point of last contact, for later use:
	right_overhangs.push_back(last_contact( id, stop, seg ));

	PathEmbedding sub_read = SubpathEmbedding( read, start, stop,
						   &subpath_storage.back() );
	vec<PathEmbedding> embeddings;
	FindPathEmbeddings( *sub_read.GetSubPath(), path, embeddings,-1,seg);
	if( ! embeddings.empty() )
	  subpath_storage.push_back(AVAIL);  // old back() is now in use.
	// Push a ReadOnPath for each embedding onto the return pile:
	for( vec<PathEmbedding>::iterator emb = embeddings.begin();
	     emb != embeddings.end(); emb++ )
	  pile.push_back( ReadOnPath( sub_read, *emb, id ) );
      }
    }
  }

  if( allow_overhang ) {
    // Now look through the vectors of left and right overhangs for any
    // reads which might hang off on both sides.  Presumably this is
    // impossible if the path is a full insert walk, but it can happen
    // if the path we're given is short, eg an edge of a HyperGraph.

    // This sorts by path_id:
    sort( left_overhangs.begin(), left_overhangs.end() );
    sort( right_overhangs.begin(), right_overhangs.end() );

    vec<last_contact>::iterator right_iter,
      left_point = left_overhangs.begin(), 
      right_point = right_overhangs.begin();

    while( left_point != left_overhangs.end() &&
	   right_point != right_overhangs.end() ) {
      // If we have a path_id match, process the top left_point and all
      // matching right_points.  (right_iter steps through them.)
      if( left_point->path_id == right_point->path_id )
	for( right_iter = right_point;
	     right_iter != right_overhangs.end() 
	       && right_iter->path_id == left_point->path_id;
	     right_iter++ ) {
	  if( right_point->read_loc < left_point->read_loc ) continue;
	  PathEmbedding sub_read = SubpathEmbedding( left_point->read_loc.GetPath(),
						     left_point->read_loc,
						     right_iter->read_loc,
						     &subpath_storage.back() );
	  vec<PathEmbedding> embeddings;
	  FindPathEmbeddings( *sub_read.GetSubPath(), path, embeddings,
			      left_point->path_seg, right_iter->path_seg );
	  if( ! embeddings.empty() )
	    subpath_storage.push_back(AVAIL);  // old back() is now in use.
	  // Push a ReadOnPath for each embedding onto the return pile:
	  for( vec<PathEmbedding>::iterator emb = embeddings.begin();
	       emb != embeddings.end(); emb++ )
	    pile.push_back( ReadOnPath( sub_read, *emb, left_point->path_id ) );
	}
      // Increment whichever pointer is pointing to the smaller path_id.
      // In case of a tie, increment left_point, since we might be working through
      // multiple left_points with the same collection of matching right_points.
      if( left_point->path_id <= right_point->path_id )
	left_point++;
      else
	right_point++;
    }
  }

  // The back of subpath_storage is an available (unused) spot.
  subpath_storage.pop_back();
}


void ConvertEmbeddingsToLocs( const vec<ReadOnPath>& ROPs,
			      vec<ReadOnSuperBaseVector>& SBVlocs,
			      const KmerBaseBroker* kbb ) {
  SBVlocs.clear();
  SBVlocs.reserve( ROPs.size() );

  for( vec<ReadOnPath>::const_iterator ROP = ROPs.begin(); 
       ROP != ROPs.end(); ROP++ ) {

    const KmerPathLoc path_begin = ROP->intoPath.SubStartLoc();
    const KmerPathLoc path_end   = ROP->intoPath.SubStopLoc();

    const int id = ROP->readId;

    const KmerPathLoc read_begin = ROP->intoRead.SubStartLoc();
    const KmerPathLoc read_end   = ROP->intoRead.SubStopLoc();

    const ReadOnSuperBaseVector rosbv( kbb->ToSequenceLocFirst( path_begin ),
				       kbb->ToSequenceLocLast( path_end ),
				       id,
				       kbb->ToReadBaseFirst( read_begin, id ),
				       kbb->ToReadBaseLast( read_end, id ) );
    SBVlocs.push_back( rosbv );
  }
}


    

