/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2005) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

// Define class HyperKmerPath, which is a directed graph whose edges are
// KmerPaths, and other ancillary classes.

#ifndef HYPER_KMER_PATH_H
#define HYPER_KMER_PATH_H

#include "Basevector.h"
#include "CoreTools.h"
#include "Equiv.h"
#include "graph/Digraph.h"
#include "graphics/BasicGraphics.h"
#include "paths/CompletedInsert.h"
#include "paths/KmerBaseBroker.h"
#include "paths/KmerPath.h"
#include "paths/NegativeGapValidator.h"
#include "paths/PileReadsOnPath.h"
#include "ReadPairing.h"

class PointedSubPathPair;       // forward declaration
class MultiKmerPathLoc;         // forward declaration
class KmerPathSplitPoint;       // forward declaration
class KmerPathCorrespondence;   // forward declaration
class HyperSloppyReadPlacement; // forward declaration
class CompletedInsert;          // forward declaration

/**
   Class: HyperKmerPath

   A graph in which each edge is a <KmerPath> ( and each node is just something that
   holds adjacent edges together ).  The graph is a factored representation of a set
   of <KmerPaths>.  The graph *may be disconnected* (i.e. have several connected components).
   Note that in this graph, there may be many different edges between a given
   pair of nodes ( or many different self-loops on a given node ).

   The nodes of this graph have no interpretation of their own; they're just places
   where the edges join.  Each edge, on the other hand, is a KmerPath.

   Some main uses of HyperKmerPaths:

   For each <long-insert pair>, a HyperKmerPath is built representing the possible
   <closures> of the pair.
   
   From each <neighborhood>, <LocalizeReads> constructs a HyperKmerPath representing
   the possible actual sequences of that neighborhood in the genome.  These are then
   joined into increasingly larger graphs until a graph representation of the entire
   assembly is built.

   See also <HyperBasevector>, which is a sequence-space equivalent of HyperKmerPath.
*/
class HyperKmerPath : public digraphE<KmerPath> {

     public:

     // ===========================================================================
     // ======================= INTEGRITY TESTS (partial) =========================
     // ===========================================================================

     void TestValid( ) const;
     void TestKmersGood( const KmerBaseBroker& kbb ) const;

     // ===========================================================================
     // ====================== CONSTRUCTORS AND THEIR KIN =========================
     // ===========================================================================

     // Constructor 1: empty HyperKmerPath

     HyperKmerPath( ) { }

     // Constructors 2ab: given a collection of KmerPaths, create a graph having
     // one edge and two vertices for each of the edge objects

     HyperKmerPath( int K, const vec<KmerPath>& p ) 
          : digraphE<KmerPath>( p, EDGES_SEPARATE )
     {    K_ = K;    }

     HyperKmerPath( int K, const vecKmerPath& p )
          : digraphE<KmerPath>( VecOfKmerPath( p ), EDGES_SEPARATE )
     {    K_ = K;    }

     // Constructor 3: given an equivalence relation on a bunch of KmerPaths, build
     // the HyperKmerPath having two vertices per equivalence class, with one edge 
     // between those two vertices for each member of the equivalence class.

     HyperKmerPath( int K, const vecKmerPath& p, const equiv_rel& e )
          : digraphE<KmerPath>( VecOfKmerPath( p ), e )
     {    K_ = K;    }

     // Constructor 4: given a HyperKmerPath, and given a list of vertex indices,
     // create the HyperKmerPath having those vertices (with indices starting at 0,
     // but in the given order), and having all the edges that were between those
     // vertices.  Thus this is a "complete subgraph" constructor.

     HyperKmerPath( const HyperKmerPath& h, const vec<vrtx_t>& v )
          : digraphE<KmerPath>( (const digraphE<KmerPath>& ) h, v )
     {    K_ = h.K( );    }

     // Constructor 5: from a vec<CompletedInsert>.

     HyperKmerPath( int K, const vec<CompletedInsert>& inserts );

     // Constructor 6: from a file.

     explicit HyperKmerPath( const String& filename );

     // Constructor 7: extract a given component from another HyperKmerPath.

     HyperKmerPath( const HyperKmerPath& h, int n )
          : digraphE<KmerPath>( (const digraphE<KmerPath>& ) h, n )
     {    K_ = h.K( );    }

     // Constructor 8: from a HyperKmerPath h and an equivalence relation on its
     // vertices.  This identifies verticies according to the given equivalence
     // relation.

     HyperKmerPath( const HyperKmerPath& h, const equiv_rel& e )
          : digraphE<KmerPath>( (const digraphE<KmerPath>& ) h, e )
     {    K_ = h.K( );    }

     // Constructor 9: from the disjoint union of some HyperKmerPaths.

     HyperKmerPath( int K, const vec<HyperKmerPath>& v )
     {    SetK(K);
          SetToDisjointUnionOf(v);    }

     // SetToDisjointUnionOf: clear a given HyperKmerPath and set it to the disjoint
     // union of a given collection of HyperKmerPaths.

     void SetToDisjointUnionOf( const vec<HyperKmerPath>& v );

     // Constructor 10: from K, a digraphE g, and some edge objects.  This ignores 
     // the edge objects of g and puts in the new edge objects.

     template<class T>
     HyperKmerPath( int K, const digraphE<T>& g, const vec<KmerPath>& edges )
          : digraphE<KmerPath>( g, edges )
     {    K_ = K;    }

     // Constructor 11: from raw data.

     HyperKmerPath( int K, const vec< vec<vrtx_t> >& from, const vec< vec<vrtx_t> >& to,
          const vec<KmerPath>& edges, const vec< vec<int> >& from_edge_obj,
          const vec< vec<int> >& to_edge_obj )
          : digraphE<KmerPath>( from, to, edges, to_edge_obj, from_edge_obj )
     {    K_ = K;    }

     void Initialize( int K, const vec< vec<vrtx_t> >& from, const vec< vec<vrtx_t> >& to,
          const vec<KmerPath>& edges, const vec< vec<int> >& from_edge_obj,
          const vec< vec<int> >& to_edge_obj )
     {    K_ = K;
          from_ = from;
          to_ = to;
          FromEdgeObjMutable( ) = from_edge_obj;
          ToEdgeObjMutable( ) = to_edge_obj;
          EdgesMutable( ) = edges;    }

     // Constructor 12: from a collection of HyperKmerPaths and a set of 
     // identifications between vertices in their disjoint union, each of which is 
     // specified as ( (g1,v1), (g2,v2) ) where g1, g2 refer to HyperKmerPaths and 
     // v1, v2 refer to vertices on those HyperKmerPaths.

     HyperKmerPath( int K, const vec<HyperKmerPath>& g,
          const vec< pair< pair<int,vrtx_t>, pair<int,vrtx_t> > >& joins )
     {    vec< digraphE<KmerPath> > gg( g.size( ) );
          for ( int i = 0; i < g.isize( ); i++ )
               gg[i] = digraphE<KmerPath>( g[i] );
          digraphE<KmerPath>::Initialize( gg, joins );
          SetK(K);    }

     // Constructor 13: from a given HyperKmerPath and a collection of subsets of
     // its edges, each of which is given the induced subgraph structure, when are
     // then merged into a disjoin union.

     HyperKmerPath( const HyperKmerPath& h, const vec< vec<int> >& C )
          : digraphE<KmerPath>( (const digraphE<KmerPath>& ) h, C )
     {    SetK( h.K( ) );    }

     int K( ) const { return K_; }
     void SetK( int K ) { K_ = K; }

     // ===========================================================================
     // ============================= EDITORS =====================================
     // ===========================================================================

     // Reverse the component containing a given vertex.

     void ReverseComponent( vrtx_t v );

     // Remove components having less than a specified number of kmers.  For this,
     // the number of kmers in a component is defined to be the sum of the lengths
     // of its edges (which may not be the most useful definition).

     void RemoveSmallComponents( int min_kmers );

     // Reverse entire graph.

     void Reverse( );

     // ContractEmptyEdges: For each edge  v ------> w labelled with the
     // empty KmerPath, delete the edge and pull all edges of w into v.

     void ContractEmptyEdges( );

     // ReduceLoops: Wherever we have u ------> v <------> w ------> x, change 
     // v <------> w into a self-loop at v and remove w.

     void ReduceLoops( );

     // RemoveWeakBits: Suppose that one of the following two situations is 
     // encountered:
     // (a) there are edges X ---> Y, Y ---> Z of length >= 4kb, and an edge
     //     w: B ---> Y or w: Y ---> B of length <= 100, supported by only one read,
     //     and B is a source or sink, and there are no other edges involving Y;
     // (b) there are edges X ---> Y, Z ---> W of length >= 4kb, and edges
     //     w: Y ---> Z, k: Y ---> Z of length <= 100, such that h is supported by 
     //     only one read, k is supported by at least three reads, and k has no
     //     gaps, and there are no other edges involving Y and Z.
     // Then edge w is deleted.
                         
     void RemoveWeakBits( const vec<HyperSloppyReadPlacement>& locs );

     // MethodDecl: CanonicalizeEdges
     // Canonicalize the KmerPaths on all the edges.
     void CanonicalizeEdges();

     // Zipper: look for two edges that start at the same vertex and go to a 
     // different vertex, and such that the edges agree at the beginning.
     // Merge up to the point where they disagree.  Ditto for reverse.
     //
     // INCOMPLETELY IMPLEMENTED.

     void Zipper( );

     // Compress edge objects, so that any adjacent and mergeable
     // KmerPathIntervals are merged.

     void CompressEdgeObjects( );

     // Remove edges which have no read support.

     void RemoveReadlessEdges( const vec<HyperSloppyReadPlacement>& locs );

     // ===========================================================================
     // ============================ PRINTERS =====================================
     // ===========================================================================

     // PrintSummary: generate one line per edge, e.g.
     //     35 --- 523 +/- 8 --> 16
     // would be outputted for an edge from vertex 35 to vertex 16 that has a
     // mean length in kmers of 523 and a variability of 8 (resulting from gaps).

     void PrintSummary( ostream& out ) const;

     // PrintSummaryPlus: like PrintSummary, but also
     // 1. Print kmer ranges for each edge.
     // 2. Organize by graph component and within component, by rough order.
     // 3. Optionally, show reads on paths.

     void PrintSummaryPlus( ostream& out, 
          const vec<HyperSloppyReadPlacement>* locs = 0, KmerBaseBroker* kbb = 0, 
          vec<read_pairing>* pairs = 0, vec<int>* pairs_index = 0, 
          int max_reads_to_show_per = 0, Bool print_kmer_ranges = False,
          const vec<String>* edge_remarks = 0, 
          Bool print_component_id_line = True, 
          const vec<String>* component_remarks = 0,
          const vec<Bool>* component_remarks_only = 0 ) const;

     // Create a fasta file having one record per edge, with gaps replaced by Ns.

     void DumpFasta( const String& fn, const KmerBaseBroker& kbb ) const;

     // Method: DumpGraphML
     // Output the structure HyperKmerPath in a textual format that can be easily read without
     // reference to our code base.
     void DumpGraphML( const String& graphMLFileNam ) const;
     

     // PrintSummaryDOT: similar to PrintSummary but generate DOT output.
     // PrintSummaryDOT0: similar but don't label edges
     // PrintSummaryDOT0w: similar but don't label edges, do weight them, and 
     // color-code:
     // < 100 kmers: gray
     // 100-1000 kmers: black
     // 1000-10000 kmers: red
     // > 10000 kmers: magenta

     void PrintSummaryDOT( ostream& out ) const;
     void PrintSummaryDOT0( ostream& out ) const;
     void PrintSummaryDOT0w( ostream& out, Bool label_contigs = True,
			     Bool label_vertices = False, Bool label_edges = False,
			     const vec<vrtx_t>* componentsToPrint = NULL ) const;

     // DisplayEdge: display the placements of reads on a given edge.  The optional
     // arguments start and stop are measured under the hypothesis that gaps are
     // assigned their minimum possible values.

     void DisplayEdge( vec<graphics_primitive>& picture, int e, int K,
          const vec<HyperSloppyReadPlacement>& locs, const KmerBaseBroker& kbb,
          int start = -1, int stop = -1 ) const;

     // Code to show links and merge using them.

     void ShowLinks( const vec<HyperSloppyReadPlacement>& locs, 
          const vec<read_pairing>& pairs, const vec<int>& trims, 
          Bool silent = False, Bool allow_short_edges = False );

     // ===========================================================================
     // =========================== BINARY I/O ====================================
     // ===========================================================================

     friend void BinaryWrite( int fd, const HyperKmerPath& h );
     friend void BinaryRead( int fd, HyperKmerPath& h );

     friend void BinaryWrite( int fd, const vec<HyperKmerPath>& h )
     {    BinaryWriteComplex( fd, h );    }
     friend void BinaryRead( int fd, vec<HyperKmerPath>& h )
     {    BinaryReadComplex( fd, h );    }

     // ===========================================================================
     // ============================== OTHER ======================================
     // ===========================================================================


     void MakeEdgeDatabase( vec<tagged_rpint>& edgedb ) const;

     nkmers_t EdgeLength( edge_t e ) const { return EdgeObject(e).KmerCount( ); }

     // PathLengths: find the lengths of all directed paths from x to y, excluding
     // those which use the same vertex more than once.
     //
     // If x = -1, instead find lengths of all directed paths from a source to y.
     // If y = -1, instead find lengths of all directed paths from x to a sink.
     //
     // If maxpaths >= 0 and more than maxpaths paths are seen, fail by returning
     // False.

     Bool PathLengths( vrtx_t x, vrtx_t y, vec< pair<int,int> >& lengths, 
          int maxpaths = -1, Bool include_gaps = True ) const;

     // GroupReadPlacements: The idea is that a single read may stretch across more
     // than one edge, and that thus the correct way to think about the placement is
     // as a sequence of placements along consecutive edges.  This code finds
     // plausible groups, but does not actually check that the groups are valid.

     void GroupReadPlacements( const vec<HyperSloppyReadPlacement>& locs,
          int nreads, vec< vec< vec<int> > >& locgrps ) const;

     // GetPath(EmbeddedSubPath): return the concatenation of the paths in the
     // subpath.

     KmerPath GetPath( const EmbeddedSubPath<KmerPath>& s )
     {    KmerPath answer;
          for ( int i = 0; i < s.NEdges( ); i++ )
               answer.Append( s.EdgeObject(i) );
          return answer;    }

     // AlignSubpaths.  Given a PointedSubPathPair, look for partial alignments 
     // extending the matching kmers, and return the number naligns of alignments 
     // found.  If naligns = 1, return details as follows:
     // - ends[0] and ends[1]: positions of leftmost and rightmost kmers of 
     //   partial alignment on first and second sequences;
     // - M: merged path
     // - i0,...,im: positions of first sequence's vertices on M, if in
     //   the aligning part;
     // - j0,...,jn: positions of second sequence's vertices on M, if in
     //   the aligning part.
     // CHANGED: if n > 1, return details for the FIRST alignment found.

     void AlignSubpaths( const PointedSubPathPair& p, int& naligns, 
          vec< pair<MultiKmerPathLoc,MultiKmerPathLoc> >& ends, KmerPath& M, 
          vec<KmerPathSplitPoint>& i, vec<KmerPathSplitPoint>& j,     
          const NegativeGapValidator& ngv ) const;

     // SharedKmers: find all pairs of edges i <= j which share a kmer, returning
     // a vec<KmerPathCorrespondence> to capture this info.  We do not
     // return multiple position pairs which are simple shifts of each other along
     // path segments.  Also, we do not return identity matches.

     void SharedKmers( vec<KmerPathCorrespondence>& shares, int min_improper );

     // De novo piler of reads on a HyperKmerPath.  The resulting locs are sorted.
     
     void PileReadsOnHyperPath( vec<HyperSloppyReadPlacement>& locs,
          const ReadsOnPathPiler& piler, const vec<read_pairing>& pairs,
          const vec<int>& pairs_index, const vec<int>& trims,
          const vec<CompletedInsert>& inserts );

     Bool HappyPlacement( int i1, int i2, const vec<HyperSloppyReadPlacement>& locs,
          const read_pairing& P, const vec<int>& trims, const vec<int>& comp );


     // MethodDecl: FindIsomorphicComponents
     // Find HyperKmerPath connected components isomorphic to at least one other
     // connected component.
     // Return the equivalence relation of the components, and the list of ids of those components
     // that have an isomorphic partner in the graph.
     // If you just want to know whether the graph has _any_ isomorphic components,
     // call with stopIfFoundOne==True.
     Bool FindIsomorphicComponents( equiv_rel& componentRelation, vec<vrtx_t>& isomorphicComponentReps,
				    Bool stopIfFoundOne = False ) const;
     

     // ===========================================================================
     // ============================= PRIVATE =====================================
     // ===========================================================================

     private:

     int K_;

};  // class HyperKmerPath


/// ComponentsAreIsomorphic:
///
/// Given two HyperKmerPaths and one vertex on each, check
/// whether the connected components containing those vertices
/// are isomorphic, with the vertices in correspondence.
/// That is, check whether there is a map from vertices of hkp1
/// to vertices of hkp2 so that corresponding vertices are joined
/// by edges labelled with the same KmerPaths.  (Only the vertices
/// in the connected components of the seed vertices, though.)
///
/// For this to be feasible (in polynomial time :-), we require
/// that the edges in/out of each vertex all bear distinct labels.
/// The function asserts if this is not the case.
///
/// There is an optional vec<int>* argument.  If the components
/// are indeed isomorphic, then this vector will be filled with
/// the isomorphism: v[i]=j means vertex i in hkp1 matches up
/// with vertex j in hkp2, and v[i]=-1 means vertex i was not
/// in the connected component of the seed vertex.
///
/// This is a global function, not a member function.

bool ComponentsAreIsomorphic( const HyperKmerPath& hkp1, vrtx_t seed_vx_1,
			      const HyperKmerPath& hkp2, vrtx_t seed_vx_2,
			      vec<vrtx_t>* p_isomorphism = NULL );


// A HyperKmerPathLoc represents the location of a kmer on a HyperPath.  Note that
// the definition is NOT robust: changing the HyperKmerPath will invalidate the
// location.

class HyperKmerPathLoc {
     
     public:

     HyperKmerPathLoc( ) : v_(0), fv_(0) { }
     HyperKmerPathLoc( vrtx_t v, int fv, const KmerPathLocAlt& loc )
          : v_(v), fv_(fv), loc_(loc) { }

     vrtx_t Vertex( ) const { return v_; }
     int EdgeFromVertex( ) const { return fv_; }
     KmerPathLocAlt& LocOnEdge( ) { return loc_; }
     const KmerPathLocAlt& LocOnEdge( ) const { return loc_; }
     int Interval( ) const { return loc_.Interval( ); }
     int PosOnInterval( ) const { return loc_.PosOnInterval( ); }

     friend Bool operator<( const HyperKmerPathLoc& h1, const HyperKmerPathLoc& h2 )
     {    if ( h1.Vertex( ) < h2.Vertex( ) ) return True;
          if ( h1.Vertex( ) > h2.Vertex( ) ) return False;
          if ( h1.EdgeFromVertex( ) < h2.EdgeFromVertex( ) ) return True;
          if ( h1.EdgeFromVertex( ) > h2.EdgeFromVertex( ) ) return False;
          return h1.LocOnEdge( ) < h2.LocOnEdge( );    }

     private:

     vrtx_t v_;              // vertex
     int fv_;             // index of edge in from_[v]
     KmerPathLocAlt loc_; // position on edge

};

// A KmerPathSplitPoint represents a point in a KmerPath at which it could be split
// into two pieces (allowing for the possibility that one is null).  We do not allow
// splitting within a gap.
//
// The data is tracked via counting the number of complete kmer intervals to the 
// left of the split point, plus zero or more kmers in the partial kmer interval
// to the immediate left of the split point.

class KmerPathSplitPoint {

     public:

     KmerPathSplitPoint( ) 
          : intervals_to_left_(-1), kmers_in_partial_to_left_(-1) { }
     KmerPathSplitPoint( int intervals_to_left, int kmers_in_partial_to_left )
          : intervals_to_left_(intervals_to_left), 
          kmers_in_partial_to_left_(kmers_in_partial_to_left) { }

     Bool Initialized( ) const { return intervals_to_left_ >= 0; }

     int IntervalsToLeft( ) const { return intervals_to_left_; }
     int KmersInPartialToLeft( ) const { return kmers_in_partial_to_left_; }

     void SetIntervalsToLeft( int x ) { intervals_to_left_ = x; }

     friend Bool operator<( const KmerPathSplitPoint& s1, 
          const KmerPathSplitPoint& s2 )
     {    if ( s1.IntervalsToLeft( ) < s2.IntervalsToLeft( ) ) return True;
          if ( s1.IntervalsToLeft( ) > s2.IntervalsToLeft( ) ) return False;
          if ( s1.KmersInPartialToLeft( ) < s2.KmersInPartialToLeft( ) ) return True;
          return False;    }

     friend Bool operator==( const KmerPathSplitPoint& s1, 
          const KmerPathSplitPoint& s2 )
     {    return s1.IntervalsToLeft( ) == s2.IntervalsToLeft( )
               && s1.KmersInPartialToLeft( ) == s2.KmersInPartialToLeft( );    }

     // Between: return the part of a KmerPath between two split points.  We don't
     // allow the split points to be equal.

     friend KmerPath Between( const KmerPath& p, const KmerPathSplitPoint& left,
          const KmerPathSplitPoint& right );

     friend ostream& operator<<( ostream& out, const KmerPathSplitPoint& s )
     {    return out << s.IntervalsToLeft( ) << "." 
               << s.KmersInPartialToLeft( );    }

     private:

     int intervals_to_left_;
     int kmers_in_partial_to_left_;

};

// EdgeLabel generates a label corresponding to a KmerPath.

String EdgeLabel( vrtx_t v, vrtx_t w, const KmerPath& p, int K );

// Class: KmerPathCorrespondence
//
// A KmerPathCorrespondence is just the index of one KmerPath, and the position
// of a kmer on it, and the index of another KmerPath, and the position of a kmer
// on it, such that the kmers are the same.
//
// In the constructor, the edge on the first path goes from vertex from1 to vertex
// to1, and the edge on the second path goes from vertex from2 to vertex to2.  The
// indices of the edges in digraphE::edges_ are id1 and id2.  The index of the first
// edge in from_[from1] is from_index1 and the index of the second edge in
// from_[from2] is from_index2.
//
// Typical usage: a KmerPathCorrespondence between two edges of a <HyperKmerPath>
// indicates that the two edges may potentially be merged to create a more compact
// representation of the closure set represented by the HyperKmerPath.
class KmerPathCorrespondence {

     public:

     KmerPathCorrespondence( ) { }
     KmerPathCorrespondence( int id1, vrtx_t from1, int from_index1, vrtx_t to1, 
          const KmerPathLocAlt& l1, int id2, vrtx_t from2, int from_index2, vrtx_t to2, 
          const KmerPathLocAlt& l2 ) 
          : id1_(id1), id2_(id2), from1_(from1), from2_(from2), 
          from_index1_(from_index1), from_index2_(from_index2), to1_(to1), 
          to2_(to2), l1_(l1), l2_(l2) { }

     void Swap( );

     int Id1( ) const { return id1_; }
     int Id2( ) const { return id2_; }
     vrtx_t From1( ) const { return from1_; }
     vrtx_t From2( ) const { return from2_; }
     int FromIndex1( ) const { return from_index1_; }
     int FromIndex2( ) const { return from_index2_; }
     vrtx_t To1( ) const { return to1_; }
     vrtx_t To2( ) const { return to2_; }
     KmerPathLocAlt Pos1( ) const { return l1_; }
     KmerPathLocAlt Pos2( ) const { return l2_; }
     int Offset( ) const { return l1_.PosOnPath( ) - l2_.PosOnPath( ); }

     friend Bool operator<( const KmerPathCorrespondence& c1,
          const KmerPathCorrespondence& c2 );

     private:
     
     int id1_, id2_;
     vrtx_t from1_, from2_;
     int from_index1_, from_index2_;
     vrtx_t to1_, to2_;
     KmerPathLocAlt l1_, l2_;

};

// A MultiKmerPath represents a sequence of consecutive KmerPaths.  
// A MultiKmerPathLoc represents a position on a MultiKmerPath.

class MultiKmerPath : public vec<KmerPath> {

     public:

     MultiKmerPath( ) { }

     void AddPath( const KmerPath& p ) { push_back(p); }

     MultiKmerPath( const EmbeddedSubPath<KmerPath>& a )
     {    for ( int i = 0; i < a.NEdges( ); i++ )
               push_back( a.EdgeObject(i) );    }

     // Merger concatenates the KmerPaths in a MultiKmerPath.  We deliberately
     // avoid concatenating abutting KmerPathIntervals because that would make it
     // harder to do accounting when using MultiKmerPaths.

     KmerPath Merger( ) const;
     
};

class MultiKmerPathLoc {

     public:

     // Boring constructors.

     MultiKmerPathLoc( ) { }
     MultiKmerPathLoc( int edge, int segment_on_edge, int kmer_on_segment )
          : edge_(edge), segment_on_edge_(segment_on_edge),
          kmer_on_segment_(kmer_on_segment) { }
     MultiKmerPathLoc( int edge, const KmerPathLocAlt& l )
          : edge_(edge), segment_on_edge_( l.Interval( ) ),
          kmer_on_segment_( l.PosOnInterval( ) ) { }

     // The following constructor takes as input a MultiKmerPath and a position
     // on its merger (which is a KmerPath), and from that generates a position
     // on the MultiKmerPath.

     MultiKmerPathLoc( const MultiKmerPath& m, const KmerPathLoc& l );

     KmerPathLoc PosOnMerger( const MultiKmerPath& m, const KmerPath& merger ) const;

     longlong Kmer( const MultiKmerPath& m )
     {    return m[ Edge( ) ].Segment( SegmentOnEdge( ) ).Start( ) 
               + KmerOnSegment( );    }

     // Access the data:

     int Edge( ) const { return edge_; }
     int SegmentOnEdge( ) const { return segment_on_edge_; }
     int KmerOnSegment( ) const { return kmer_on_segment_; }

     // Set data.

     void SetEdge( int e ) { edge_ = e; }
     void SetSegmentOnEdge( int s ) { segment_on_edge_ = s; }
     void SetKmerOnSegment( int k ) { kmer_on_segment_ = k; }

     void AddToSegmentOnEdge( int a ) { segment_on_edge_ += a; }

     // Test for ends.  The right end requires more data because this class doesn't
     // own enough information.

     Bool LeftEnd( ) const
     {    return Edge( ) == 0 && SegmentOnEdge( ) == 0 && KmerOnSegment( ) == 0;    }

     Bool RightEnd( const EmbeddedSubPath<KmerPath>& p ) const
     {    const KmerPath& kp = p.EdgeObject( Edge( ) );
          int lastseg = kp.NSegments( ) - 1;
          return Edge( ) == p.NEdges( ) - 1 && SegmentOnEdge( ) == lastseg
               && KmerOnSegment( ) == kp.Segment(lastseg).Length( ) - 1;    }

     Bool AtRightEndOfEdge( const MultiKmerPath& m ) const
     {    const KmerPath& km = m[ Edge( ) ];
          int lastseg = km.NSegments( ) - 1;
          return SegmentOnEdge( ) == lastseg
               && KmerOnSegment( ) == km.Segment(lastseg).Length( ) - 1;    }

     friend Bool operator<=(const MultiKmerPathLoc& l1, const MultiKmerPathLoc& l2);
     friend Bool operator>=( const MultiKmerPathLoc& l1, const MultiKmerPathLoc& l2 )
     {    return l2 <= l1;    }

     friend Bool operator==( const MultiKmerPathLoc& l1, const MultiKmerPathLoc& l2 )
     {    return l1.Edge( ) == l2.Edge( ) 
               && l1.SegmentOnEdge( ) == l2.SegmentOnEdge( )
               && l1.KmerOnSegment( ) == l2.KmerOnSegment( );    }

     friend ostream& operator<<( ostream& out, const MultiKmerPathLoc& l )
     {    return out << l.Edge( ) << "." << l.SegmentOnEdge( ) << "."
               << l.KmerOnSegment( );    }

     private:

     int edge_;
     int segment_on_edge_;
     int kmer_on_segment_;

};

// An FullMapToHyperKmerPath represents an alignment between a proper KmerPath p
// and a HyperKmerPath h, which sends every kmer in p to a kmer in h (and not to
// a "gap kmer").  The data is given as follows:
// 1. An EmbeddedSubPath e of h, describing the sequence of edges which are used,
// allowing for the same edge to be used more than once (although this may be a
// pathological case);
// 2. A vec<MultiKmerPathLoc> l, of length p.NSegments( ), which specifies for each
// segment of p which is not a gap segment, where its first kmer is mapped on e.
//
// AlignFullTo( p, h, m ) finds all the FullMapToHyperKmerPath objects from p[i] 
// h and puts them in m[i].

class FullMapToHyperKmerPath {

     public:

     FullMapToHyperKmerPath( ) { }

     FullMapToHyperKmerPath( 
          const EmbeddedSubPath<KmerPath>& e, const vec<MultiKmerPathLoc>& l )
          : e_(e), l_(l) { }

     friend void AlignFullTo( const vec<KmerPath>& p, const HyperKmerPath& h,
          vec< vec<FullMapToHyperKmerPath> >& m );

     private:

     EmbeddedSubPath<KmerPath> e_;
     vec<MultiKmerPathLoc> l_;

};

// A PointedSubPathPair consists of two EmbeddedSubPaths of a HyperKmerPath,
// and positions on them, referring to equal kmers.

class PointedSubPathPair {

     public:

     void TestValid( ) const; // incomplete test

     PointedSubPathPair( ) { }
     PointedSubPathPair( const EmbeddedSubPath<KmerPath>& a1, 
          const EmbeddedSubPath<KmerPath>& a2, const MultiKmerPathLoc& l1, 
          const MultiKmerPathLoc& l2 )
          : a1_(a1), a2_(a2), l1_(l1), l2_(l2)
     {    TestValid( );    }

     const EmbeddedSubPath<KmerPath>& Path1( ) const { return a1_; }
     const EmbeddedSubPath<KmerPath>& Path2( ) const { return a2_; }
     const EmbeddedSubPath<KmerPath>& Path( int i ) const 
     {    return ( i == 0 ? a1_ : a2_ );    }
     EmbeddedSubPath<KmerPath>& PathMutable( int i )
     {    return ( i == 0 ? a1_ : a2_ );    }

     const MultiKmerPathLoc& Loc1( ) const { return l1_; }
     const MultiKmerPathLoc& Loc2( ) const { return l2_; }
     const MultiKmerPathLoc& Loc( int i ) const
     {    return ( i == 0 ? l1_ : l2_ );    }
     MultiKmerPathLoc& LocMutable( int i )
     {    return ( i == 0 ? l1_ : l2_ );    }

     // Check for duplicated edges.

     Bool HasDups( ) const 
     {    return DuplicatedEdges( Path1( ) ) || DuplicatedEdges( Path2( ) );    }

     void PrintSummary( ostream& out, int i, int K ) const;
     void PrintSummary( ostream& out, int K ) const
     {    PrintSummary( out, 0, K );
          cout << "\n";
          PrintSummary( out, 1, K );    
          cout << "\n";    }

     private:

     EmbeddedSubPath<KmerPath> a1_, a2_;
     MultiKmerPathLoc l1_, l2_;

};

// We define a HyperSloppyReadPlacement, which is the HyperKmerPath analog of a 
// SloppyReadPlacement on a KmerPath (as defined in CompletedInsert.h).

class HyperSloppyPathEmbedding {

     public:

     HyperSloppyPathEmbedding( ) { }

     HyperSloppyPathEmbedding( Bool left_end_mapped, Bool right_end_mapped,
          const HyperKmerPathLoc& left_end_to, const HyperKmerPathLoc& right_end_to )
          : left_end_mapped_(left_end_mapped), right_end_mapped_(right_end_mapped),
          left_end_to_(left_end_to), right_end_to_(right_end_to) { }

     Bool LeftEndMapped( ) const { return left_end_mapped_; }
     Bool RightEndMapped( ) const { return right_end_mapped_; }

     HyperKmerPathLoc LeftEndTo( ) const { return left_end_to_; }
     HyperKmerPathLoc RightEndTo( ) const { return right_end_to_; }

     void SetLeftEndMapped( Bool b ) { left_end_mapped_ = b; }
     void SetRightEndMapped( Bool b ) { right_end_mapped_ = b; }

     void SetLeftEndTo( const HyperKmerPathLoc& l ) { left_end_to_ = l; }
     void SetRightEndTo( const HyperKmerPathLoc& r ) { right_end_to_ = r; }

     private:

     Bool left_end_mapped_, right_end_mapped_;
     HyperKmerPathLoc left_end_to_, right_end_to_;

};

class HyperSloppyReadPlacement {

     public:

     HyperSloppyReadPlacement( ) { }

     HyperSloppyReadPlacement( int vertex, int edge_from, int read_id, int pair_id, 
          Bool rc, const HyperSloppyPathEmbedding& e )
          : vertex_(vertex), edge_from_(edge_from), read_id_(read_id), 
          pair_id_(pair_id), rc_(rc), e_(e) { }

     int Vertex( ) const { return vertex_; }
     int EdgeFrom( ) const { return edge_from_; }

     int ReadId( ) const { return read_id_; }

     Bool Paired( ) const { return pair_id_ >= 0; }
     int PairId( ) const { return pair_id_; }

     Bool Rc( ) const { return rc_; }
     Bool Fw( ) const { return !rc_; }

     HyperSloppyPathEmbedding Embedding( ) { return e_; }

     Bool LeftEndMapped( ) const { return e_.LeftEndMapped( ); }
     Bool RightEndMapped( ) const { return e_.RightEndMapped( ); }
     Bool EndsMapped( ) const { return LeftEndMapped( ) && RightEndMapped( ); }
     HyperKmerPathLoc LeftLoc( ) const { return e_.LeftEndTo( ); }
     HyperKmerPathLoc RightLoc( ) const { return e_.RightEndTo( ); }

     void Reverse( const KmerPath& p );

     // Compute number of non-gap kmers covered by the placement.  You have to
     // provide the edge object as an argument.

     int KmersCovered( const KmerPath& p ) const;

     friend Bool operator<( const HyperSloppyReadPlacement& p1,
          const HyperSloppyReadPlacement& p2 )
     {    if ( p1.PairId( ) < p2.PairId( ) ) return True;
          if ( p1.PairId( ) > p2.PairId( ) ) return False;
          return p1.ReadId( ) < p2.ReadId( );    }

     friend Bool cmp_pos( const HyperSloppyReadPlacement& p1,
          const HyperSloppyReadPlacement& p2 )
     {    if ( !p1.LeftEndMapped( ) && p2.LeftEndMapped( ) ) return True;
          if ( p1.LeftEndMapped( ) && !p2.LeftEndMapped( ) ) return False;
          if ( !p1.LeftEndMapped( ) && !p2.LeftEndMapped( ) ) return True;
          return p1.LeftLoc( ) < p2.LeftLoc( );    }

     private:

     int vertex_;
     int edge_from_;
     int read_id_;
     int pair_id_;
     Bool rc_;
     HyperSloppyPathEmbedding e_;

};

Bool cmp_pos( const HyperSloppyReadPlacement& p1,
     const HyperSloppyReadPlacement& p2 );

// Class: HyperBasevector
// 
// A HyperBasevector is a <kmer numbering>-independent representation of 
// a HyperKmerPath.  However, it is not independent of K.
class HyperBasevector : public digraphE<basevector> {

     public:

     HyperBasevector( ) { }
     HyperBasevector( int K ) : K_(K) { }

     // Constructor from a HyperKmerPath having no gaps:

     HyperBasevector( const HyperKmerPath& h, const KmerBaseBroker& kbb );

     // Constructor: given a collection of basevectors, create a graph having
     // one edge and two vertices for each of the edge objects

     HyperBasevector( int K, const vec<basevector>& p ) 
          : digraphE<basevector>( p, EDGES_SEPARATE )
     {    K_ = K;    }

     // Constructor: form the disjoint union of some HyperBasevectors.
     
     HyperBasevector( int K, const vec<HyperBasevector>& v )
     {    SetK(K);   
          SetToDisjointUnionOf(v);    }

     // SetToDisjointUnionOf: clear a given HyperBasevector and set it to the 
     // disjoint union of a given collection of HyperBasevectors.

     void SetToDisjointUnionOf( const vec<HyperBasevector>& v );

     int K( ) const { return K_; }
     void SetK( int K ) { K_ = K; }

     void LowerK( int newK );

     // Reverse entire graph.

     void Reverse( );

     int EdgeLength( int e ) const { return EdgeObject(e).size( ); }

     friend void BinaryWrite( int fd, const HyperBasevector& h );
     friend void BinaryRead( int fd, HyperBasevector& h );

     friend void BinaryWrite( int fd, const vec<HyperBasevector>& h )
     {    BinaryWriteComplex( fd, h );    }
     friend void BinaryRead( int fd, vec<HyperBasevector>& h )
     {    BinaryReadComplex( fd, h );    }

     friend Bool operator==( const HyperBasevector& h1, const HyperBasevector& h2 );

     private:

     int K_;

};

#endif
