/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2005) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////


// ============================================================================
//
// This file defines the mastervec and serfvec classes, and specifies how to
// create other serfvec-like classes.
//
// ============================================================================


// ===========================================================================
//
/** Class to efficiently manage dynamically allocated memory for many vectors.
   @class mastervec

   PURPOSE: For a class X which uses dynamic storage, a vector<X> with 
   millions of  entries will require millions of separate memory allocations, 
   and consequently be inefficient in the use of both time and space.  Under 
   appropriate hypotheses on X, mastervec<X> solves this problem by causing 
   the dynamically allocated memory to be maintained in a single block, where 
   possible.  For a class T which is statically allocated, we define a vector 
   class serfvec<T> which behaves as X should, thereby making possible the 
   efficient storage of vector< vector<T> >, in the case where the outer 
   vector is very large.

 For example, on a system in which addresses occupy eight bytes, the storage
 cost for a vector< vector<T> > in which there are n vectors on the outside,
 and a total of k T's is
                   40 + (16 * n) + (k * sizeof(T))
 plus the cost of two memory allocations.

 ============================================================================


 ============================================================================

 HYPOTHESES: We require that an object of class X has a single contiguous 
static block of memory, and a single contiguous dynamic block of memory. 
It should have a default constructor which does not allocate memory.  It 
should be templatized 
 over a class A, with the property that its dynamic data is specifiable as an 
 array of A's.  (This is needed to avoid alignment faults.)  Moreover, the 
 following member functions must be provided:


 - A typedef for A:        typedef A   value_type;

 - Functions which specify the location and size of the object's static data.
 The size should depend only on the class X.  The "static data" as defined
 here need not refer to literally all the static data stored in memory, but
 must instead satisfy the requirements of SetExternal, below.

     - const char* StartOfStaticData( ) const;
     - const int SizeOfStaticData( ) const;
     .

 - Functions which specify the location and size of the object's dynamic data,
 measured in A's, and the amount of extra space which has been allocated.  If 
 both the size and extra space are zero, the location is not required to be 
 meaningfull.

     - const A* StartOfDynamicData( ) const;
     - longlong SizeOfDynamicData( ) const;
     - longlong ExtraSpaceForDynamicData( const );
     .

 - Function which specifies the ownership type of the dynamic memory block:
 self-owned or externally-owned.  In the externally-owned case, the class
 must never try to delete the block.  

     - Bool SelfOwned( ) const;
     .

 - Function which builds an object by copying static data, copying a pointer 
 to dynamic data, and setting ownership to external.  It is required that 
 if an object's data is copied using {Start/SizeOf}{Static/Dynamic}Data, 
 it can be 
 recreated by passing the same data back to SetExternal.  Note that it is 
 possible that {Start/Sizeof}StaticData do not literally specify all of the 
 static data: all that matters is that all static data can be recreated. 
 This is exploited in serfvec, which has SizeOfStaticData = 0.

     - void SetExternal( const char* start_of_static_data,
                          A* pointer_to_dynamic_data,
                          longlong size_of_dynamic_data,
                          longlong extra_space_for_dynamic_data );

 - Shift the start of dynamic data by adding a1 - a2 to it.

     - void ShiftStartOfDynamicData( A* a1, A* a2 );

 - Return to ground state (as if created by default constructor).  If
 occupying external dynamic space, return it.  The destructor should
 be identical to this.

     - void Reinitialize( );

 - Reinitialize without returning dynamic space.

    - void Blank( );

 ==============================================================================


 ==============================================================================

 MASTERVEC CORE FUNCTIONS:

    There is the default constructor, and the n-object constructor:

         mastervec( );
         mastervec( int n );

    There is also a constructor which loads data from a given file into the 
    new mastervec:

         mastervec( const String &filename );

    This is equivalent to doing:

         mastervec mv;
         mv.ReadRange( filename, 0, MastervecFileObjectCount( filename ) );

    (See below for descriptions of ReadRange() and MastervecFileObjectCount().)

    A function which changes the space allocated for the raw data (dynamic 
    data), or changes the number of objects allocated.

         void Reserve( longlong raw_mem_size, int n_objects );

    A function which clears out all data/objects (but does not reduce memory
    usage).
 
         void clear( );

    Functions to destroy the contents of and deallocate all memory
    associated with a mastervec.

         void destroy( );
         void Destroy( mastervec& );

    A function which adds data to the mastervec, leaving a given amount of 
    extra space in the dynamic block for the object (measured in A's).  
    A function which adds data to the mastervec, but does not modify the 
    dynamic block.

         void push_back( const X& obj, int extra_space = 0 );
         void push_back_external( const X& obj );

    Return number of objects in mastervec; change number of objects in 
    mastervec; change allocation of objects in mastervec.

         int size( ) const;
         void resize( int n );
         void reserve( int n );

    Return size and capacity of dynamic data space (# of A's).

         longlong rawsize( );
         longlong rawsize(int from, int to);
         longlong rawsize(const vec<int> & entries)
         longlong rawcapacity( );

    Access the i'th object.

         X& operator[ ](int i) const;

    Swap the i'th and j'th objects without swapping their dynamic data.

         void SwapElements( int i, int j );

 =============================================================================
 ==========================================================================

 DISK STORAGE: A mastervec object may be stored on disk in one of two ways.
 One way uses three files, and the other way uses one file.  As the file is 
 created by successive writes, the three file approach is used.  Upon completion
 of the writes, the three files are joined to form one file.  However, it is 
 possible to read from either the three file version or the one file version.

 We describe now the details of the two approaches to disk storage.  This
 information should not be used outside Feudal.h and Feudal.cc.

 In the three file case, there is a main file, and two subsidiary files.  If the
 main file is named "main", then the two subsidiary files are named
 "main..offsets" and "main..static".

 File main in the three file case:
      n = number of entries ( int, 4 bytes )
      storage type ( int, 4 bytes; 1 means one file, 3 means three files )
      start of offsets ( longlong, set to 0 )
      start of static ( longlong, set to 0 )
      dynamic data

 File main..offsets:
      offsets of entry i in dynamic data ( (n+1) * sizeof(pointer) bytes )
      (The last entry is there to permit computation of all dynamic data sizes.)

 File main..static:
      static data ( n * SizeOfStaticData( ) bytes ).

 File main in the one file case:
      It is the concatenation of main, main..offsets, and main..static,
      with the correct values for start of offsets and start of static inserted.

 ==========================================================================

 MASTERVEC I/O:

   Read either a range [from, to) of entries from a file (Read), a
   sorted list of entries from a file (ReadRange), or exactly one
   entry from a file (ReadOne), appending if pre_resized = False,
   starting at index 0 if pre_resized = True.  For example, given
   the range [10,20) and an empty mastervec, the first element of
   the mastervec will be the 10th entry from the file, the second
   element will be the 11th entry, and so on, ending with the tenth
   element being the 19th entry.  In addition, the specified amount
   of extra space will be added to the dynamic block for each entry.
   The second form behaves poorly if it has to resize the dynamic
   memory block.  If pre_reserved = True, it is assumed that space
   has been reserved in advance.  If pre_resized = True, it is
   assumed that the vector has been resized in advance.
   
        void Read( const String& filename, vec<int> entries, int extra = 0,
             Bool pre_reserved = False, Bool pre_resized = False );
        void ReadRange( const String& filename, int from, int to, int extra = 0,
             Bool pre_reserved = False, Bool pre_resized = False );
        void ReadOne( const String& filename, int id, int extra = 0,
             Bool pre_reserved = False, Bool pre_resized = False );

   Read a range [from, to) of entries from a file, or a sorted list
   of entries from a file, maintaining their indices relative to the
   file.  For example, given the range [10,20), the first ten
   elements of the mastervec will be empty, the tenth element of the
   mastervec will be the 10th entry from the file, the eleventh
   element will be the 11th entry, and so on, ending with the
   nineteenth element being the 19th entry.  The size of the
   mastervec will be equal to the number of entries in the specified
   file, even if most of them are not read in.  (This accomodates
   reuse of a given mastervec over multiple subset reads.)  Behaves in
   all other respects as Read() does.

        void SparseRead( const String& filename, int from, int to, int extra,
             Bool pre_reserved = False );
        void SparseReadRange( const String& filename, vec<int> entries, int extra,
             Bool pre_reserved = False );

   Append a range [from, to) of mastervec entries to mastervec datafiles
   (filename, filename..offsets, filename..static), creating the files if need be.

        void Write( const String& filename, int from, int to ) const;

   In effect append mastervec datafiles (top, top..offsets, top..static)
   to the ends of mastervec datafiles (bottom, bottom..offsets, bottom..static).

        void ConcatenateMastervecData( const String& bottom, const String& top );

   Join mastervec files (filename, filename..offsets, filename..static) into 
   filename.

        void MergeMastervecFiles( const String& filename );

   Also for simplicity, there is a function to reserve space for and then
   read in an entire mastervec (from a single file), and a function to write out 
   and then merge (to a single file) an entire mastervec.

        void ReadAll( const String& filename, Bool append = False );
        void WriteAll( const String& filename );

   Get info from a mastervec file (in one or three file format):

        int MastervecFileObjectCount( const String& filename );
        longlong MastervecFileRawCount( const String& filename );

 =================================================================================

*/

// ===========================================================================
/**

@class serfvec

 SERFVEC: The serfvec<A> class is designed to behave like an STL vector, but
 differs in the following ways:

    It satisfies the requirements for a class X as specified above for mastervec.

    It does bounds-checking for references, unless NDEBUG is defined.

    It does not have all the member functions which the STL vector does.

    It should only be used for a class A which has no dynamic allocation, and 
    and which has a default constructor which does nothing.

    Added functionality:

         void serfvec::ReverseThis( serfvec& v );
         serfvec Reverse( const serfec& v );
         void serfvec::SetToReverseOf( const serfvec& v );
         void serfvec::ReverseMe( );
         void serfvec::destroy( );
         void Destroy( serfvec& v );
         int Sum( const serfvec<unsigned char>& v );

 =========================================================================

*/



// ============================================================================
/**
@class vecvec
 CLASS VECVEC

 A vecvec<T> is the feudal representation for a vec< vec<T> >.

*/
// ===========================================================================


#ifndef FEUDAL
#define FEUDAL

#include <string.h>

#include "String.h"
#include "Vec.h"

#include <math.h>

#include <memory>

// For any class X in the mastervec template, the following functions must be 
// defined:
//
// typedef A value_type;
// const char* StartOfStaticData( ) const;
// const int SizeOfStaticData( ) const;
// const A* StartOfDynamicData( ) const;
// longlong SizeOfDynamicData( ) const;
// longlong ExtraSpaceForDynamicData( ) const;
// Bool SelfOwned( ) const;
// void SetExternal( const char*, A*, int, int );
// void ShiftStartOfDynamicData( A* a1, A* a2 );
// void Reinitialize( );
// void Blank( );

template<class X, class A>
class mastervec
{
public:

  typedef X                 value_type;
  typedef value_type*       pointer;
  typedef const value_type* const_pointer;

#if __GNUC__ > 2
  typedef mastervec<X,A>    vector_type;
  typedef __gnu_cxx::__normal_iterator<pointer, vector_type>
  iterator;
  typedef __gnu_cxx::__normal_iterator<const_pointer, vector_type>
  const_iterator;
#else
  typedef value_type*       iterator;
  typedef const value_type* const_iterator;
#endif

  typedef value_type&       reference;
  typedef const value_type& const_reference;
  typedef size_t            size_type;
  typedef ptrdiff_t         difference_type;

  typedef reverse_iterator<const_iterator>  const_reverse_iterator;
  typedef reverse_iterator<iterator>        reverse_iterator;

  mastervec( );

  explicit mastervec( int n );

  explicit mastervec( const String &filename ); // load the data in the given file

  ~mastervec();
     
  iterator       begin()       {  return iterator(objects_);  }
  const_iterator begin() const {  return const_iterator(objects_);  }
  iterator       end()         {  return iterator(objects_+objects_size_);  }
  const_iterator end()   const {  return const_iterator(objects_+objects_size_);  }

  reverse_iterator       rbegin()       { return reverse_iterator(end()); }
  const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
  reverse_iterator       rend()         { return reverse_iterator(begin());  }
  const_reverse_iterator rend()   const { return const_reverse_iterator(begin());  }

  reference       front()       {  return reference(*begin());  }
  const_reference front() const {  return const_reference(*begin());  }
  reference       back()        {  return reference(*(end()-1));  }
  const_reference back()  const {  return const_reference(*(end()-1));  }

  /// raw_mem_size is measured in bytes.
  void Reserve( longlong raw_mem_size,
		int n_objects );

  void resize( int n );
     
  void reserve( int n );
     
  void clear( );
     
  void destroy( );

  /// Add an object, assuming enough space has been reserved.
  /// If enough space has not been made with Reserve(), we only make
  /// space for this object, and the algorithm becomes quadratic.
  /// Delegates to push_back_reserve to avoid duplication and so that
  /// the call this->push_back((*this)[i]) can be valid.
  void push_back( const X& obj, int extra_space = 0 ) {
    push_back_reserve(obj, extra_space, 1.0);
  }
		  

  void push_back_external( const X& obj ) 
  {
    resize( objects_size_ + 1 );
    objects_[ objects_size_ - 1 ] = obj;   
  }

  ///Push back and enlarge if needed: multiply capacity by a factor "increase".
  void push_back_reserve( const X& obj,
			  int extra_space = 0, 
                          double increase = 1.3 ); 

  /**Append elements in range [from,to) from orig to this mastervec.
   * Precondition: 0 <= from <= to <= orig.size().
   * @param[in] extraRaw additional dynamic memory to be Reserved for future use
   * @param[in] extraSize additional space for objects to be Reserved for future use (?)
   * @param[in] orig the vector from which elements will be appended to this
   * @param[in] from beginning of range in \p orig to append
   * @param[in] to   end of range in \p orig to append
   * Since append is likely to cause a memory reallocation, the two default
   * parameters allow the programmer to make that reallocation larger if they
   * know they are going to need it later.
   */
  void Append( const mastervec & orig,
	       int from,
	       int to,
               longlong extraRaw = 0,
	       int extraSize = 0);

  /**Append all elements indexed by entries to this mastervec.
   * Precondition: all indices in entries are valid indices for orig.
   * See notes for append(orig, from, to).
   */
  void Append( const mastervec & orig,
	       const vec<int> & entries,
               longlong extraRaw = 0,
	       int extraSize = 0);

  /**Append entirety of orig to *this.
   */
  void Append( const mastervec & orig );

  ///dynamic data size measured in sizeof(A).
  longlong rawsize( ) const
  {  return raw_data_size_;  }

  ///dynamic data size for the object interval [from, to) measured in chars.
  ///Fast: O(1).
  /// measured in sizeof(A).
  longlong rawsize(int from, int to) const;

  ///dynamic data size for all objects whose indices are in entries, in chars.
  ///Slow; O(entries.size()).
  /// measured in sizeof(A).
  longlong rawsize(const vec<int> & entries) const;

  /// actual_rawsize: return the sum of the size of the objects' dynamic spaces
  /// measured in sizeof(A).
  longlong actual_rawsize( ) const
  {
    longlong answer = 0;
    for ( int i = 0; i < size( ); i++ )
      answer += objects_[i].SizeOfDynamicData( );
    return answer;   
  }

  /// dynamic data capacity measured in sizeof(A).
  longlong rawcapacity( ) const  {  return raw_data_capacity_;  }
  /// number of objects contained.
  int      size( )        const  {  return objects_size_;  }
  bool     empty()        const  {  return 0 == objects_size_;  } 
  /// capacity for objects.
  int      capacity( )    const  {  return objects_capacity_;  }

  void     ElementSizes(vec<int> & s) const {
    s.resize(size());
    for (int i=0; i !=size(); ++i) s[i] = (*this)[i].size();
  }

  const A* rawdata( )     const  {  return raw_data_;  }

  void Swap( mastervec& other )
  {
    swap( raw_data_size_,     other.raw_data_size_ );
    swap( raw_data_capacity_, other.raw_data_capacity_ );
    swap( raw_data_,          other.raw_data_ );
    swap( objects_,           other.objects_ );
    swap( objects_size_,      other.objects_size_ );
    swap( objects_capacity_,  other.objects_capacity_ );
    swap( fd_,                other.fd_ );
  }

  void SwapElements( int i, int j ) 
  {
    if ( i == j )
      return;
    static X temp;
    memcpy( &temp, &objects_[i], sizeof(X) );
    memcpy( &objects_[i], &objects_[j], sizeof(X) );
    memcpy( &objects_[j], &temp, sizeof(X) );    
    temp.Blank(); // Otherwise, temp may hold onto a non-null pointer in the
                  // last thing that was swapped, which will cause a double
                  // delete (and possibly a segfault) on exit.
  }

  X& operator[] (int i) 
  {
    AssertGe( i, 0 );
    AssertLt( i, objects_size_ );
    return objects_[i];   
  }

  const X& operator[] (int i) const
  {
    AssertGe( i, 0 );
    AssertLt( i, objects_size_ );
    return objects_[i];   
  }

  ///Two mastervecs are equal if same size and all elements are equal.
  friend
  bool operator == ( const mastervec & lhs, const mastervec & rhs )
  {
    if (lhs.size() != rhs.size() )
      return false;
    for (int i = 0; i != lhs.size(); ++i)
      {
	if (! ( lhs[i] == rhs[i]) )
	  return false;
      }
    return true;
  }

  ///Human-readable output of mastervec.
  friend
  std::ostream & operator << (std::ostream & os, const mastervec & m)
  {
    os << "Mastervec size: " << m.size() << endl;
    for (int i = 0; i != m.size(); ++i)
      {
	os << m[i] << endl;
      }
    return os;
  }

  // The Sort() method sorts a mastervec entirely internally.  The
  // default implementation of the STL sort() uses std::swap(), which
  // calls operator=() on objects of class X, which usually causes
  // those objects to become self-owned.  For large mastervecs, this
  // can cause unacceptable memory fragmentation.

  // The various sorting routines are defined inline here so that they
  // are only implemented if they are called.  This prevents
  // compilation errors resulting from instantiations of
  // mastervec<X,A> where class X does not define an operator<().  

  // An obvious improvement here would be to allow Sort() to accept
  // a binary functor to override the default X::operator<().

private:

  static const int qsort_cutoff = 20;

  void QuickSort( const int lower, const int upper );

  void QuickSort( const int lower, const int upper , vector<int> & perm );

  void InsertionSort()
  {
    for ( int i = 1; i < objects_size_; ++i )
      for ( int j = i; j > 0 && objects_[j] < objects_[j-1]; --j )
	this->SwapElements( j-1, j );
  }

  void InsertionSort( vector<int> & perm )
  {
    for ( int i = 1; i < objects_size_; ++i )
      for ( int j = i; j > 0 && objects_[j] < objects_[j-1]; --j ) {
	this->SwapElements( j-1, j );
        std::swap(perm[j-1], perm[j]);
      }
  }

public:


  void Sort()
  {
    QuickSort( 0, objects_size_-1 );
    InsertionSort();
  }

  // A vector of int's is not limiting here since QuickSort 
  // assumes an index type of int. 
  void SortSync( vector<int> & perm )
  {
    QuickSort( 0, objects_size_-1, perm );
    InsertionSort( perm );
  }

  void Unique()
  {
    if ( empty() ) return;
    int last_unique_object_idx = 0;
    for ( int i = 1; i < objects_size_; ++i )
      if ( ! ( objects_[i] == objects_[last_unique_object_idx] ) )
        this->SwapElements( i, ++last_unique_object_idx );
    resize( ++last_unique_object_idx );
  }
    
  void UniqueSort()
  {
    Sort();
    Unique();
  }


  /** Removes entries by index. The indices of the entries to remove
      are specified in to_remove and must be sorted.
  */
  void RemoveByIndex(const vec<int>& to_remove)
  {
    if (to_remove.empty()) return;
    int last_removed = to_remove[0];
    int j = 1;
    for(int i = last_removed + 1; i < objects_size_; ++i) {
      if (j >= to_remove.isize() || i < to_remove[j])
	this->SwapElements(i, last_removed++);
      else
	j++;
    }
    if (last_removed < objects_size_)
      resize(last_removed);
  }

  /** Removes entries if corresponding entry in to_remove is true.
  */
  void EraseIf(const vec<Bool>& to_remove)
  {
    ForceAssertEq( this->size(), to_remove.isize() );
    int curr_size = 0;
    for ( int i = 0; i < this->size(); ++i ) {
      if ( ! to_remove[i] ) {
        if ( curr_size != i )
          this->SwapElements(i,curr_size);
        ++curr_size;
      }
    }
    this->resize(curr_size);
  }

  /** Append a range [from, to) of mastervec entries to mastervec datafiles.
   (filename, filename..offsets, filename..static), creating the files if need be.
  */
  void Write( const String& filename,
	      int from,
	      int to ) const;
  
  ///write out  and then merge (to a single file) an entire mastervec.
  void WriteAll( const String& filename ) const;

  /** Read either a range [from, to) of entries from a file (Read), a
      sorted list of entries from a file (ReadRange), or exactly one
      entry from a file (ReadOne), appending if pre_resized = False,
      starting at index 0 if pre_resized = True.  For example, given
      the range [10,20) and an empty mastervec, the first element of
      the mastervec will be the 10th entry from the file, the second
      element will be the 11th entry, and so on, ending with the tenth
      element being the 19th entry.  In addition, the specified amount
      of extra space will be added to the dynamic block for each entry.
      The second form behaves poorly if it has to resize the dynamic
      memory block.  If pre_reserved = True, it is assumed that space
      has been reserved in advance.  If pre_resized = True, it is
      assumed that the vector has been resized in advance.

      @param[in] filename the filename from which to read the entries
      @param[in] entries the entries
      @param[in] extra (need comment)
      @param[in] pre_reserved (need comment)
      @param[in] pre_resized (need comment)
      @param[in] acceptCompFile: if False (the default), we Assert
      that this is not a compmastervec file. The parameter is set to true
      when calls are made from the compmastervec class.
  */
  void Read( const String& filename,
	     const vec<int>& entries,
	     int extra = 0,
	     Bool pre_reserved = False,
	     Bool pre_resized = False,
             Bool acceptCompFile = False );

  ///See documentation for Read.
  void ReadRange( const String& filename,
		  int from,
		  int to,
		  int extra = 0,
		  Bool pre_reserved = False,
		  Bool pre_resized = False,
		  Bool acceptCompFile = False  );

  ///See documentation for Read.
  void ReadOne( const String& filename,
		int it,
		int extra = 0,
		Bool pre_reserved = False,
		Bool pre_resized = False,
                Bool acceptCompFile = False  )
  {
    ReadRange( filename, it, it + 1, extra, pre_reserved, pre_resized,
               acceptCompFile );   
  }
     
  /** Read a range [from, to) of entries from a file (SparseReadRange), 
      or a sorted list of entries from a file (SparseRead), 
      maintaining their indices relative to the
      file.  For example, given the range [10,20), the first ten
      elements of the mastervec will be empty, the tenth element of the
      mastervec will be the 10th entry from the file, the eleventh
      element will be the 11th entry, and so on, ending with the
      nineteenth element being the 19th entry.  The size of the
      mastervec will be equal to the number of entries in the specified
      file, even if most of them are not read in.  (This accomodates
      reuse of a given mastervec over multiple subset reads.)  Behaves in
      all other respects as Read() does.
  */
  void SparseRead( const String& filename,
		   const vec<int>& entries,
		   int extra = 0,
		   Bool pre_reserved = False,
		   Bool acceptCompFile = False );

  ///See documentation for SparseRead.
  void SparseReadRange( const String& filename,
			int from,
			int to,
			int extra = 0,
			Bool pre_reserved = False,
			Bool acceptCompFile = False  );

  /// Method: ReadAll
  /// Reserve space for and read in an entire mastervec (from a single file).
  /// Use <MakeMappedMastervec()> instead if you're loading read-only data --
  /// then multiple processes can share one copy of the data.
  void ReadAll( const String& filename, 
                Bool append = False,
                Bool acceptCompFile = False  );

 protected:
  /// Construct objects as normal, but use a read-only mmap to read the dynamic data.
  void MapAll( const String& filename );

  /// Implementation of reserve.
  /// Returns contains number of objects to Blank() 
  /// and pointer to old data that need deletion, and should be given
  /// to reserveCleanup.
  pair<int, X *> reserveInternal( int n );

  /// Cleanup after reserveInternal.
  /// These two functions have been separated to allow push_back_reserve()
  /// and push_back() to accept existing members of the mastervec,
  /// as in push_back_reserve((*this)[0]);
  void reserveCleanup(pair<int, X *> cleanup);

  /// Implementation of Reserve, returns pointer to undeleted old data.
  A * ReserveInternal( longlong raw_mem_size, int n_objects );


 public:
  /// Return an auto_ptr to a const mastervec that has been mapped
  /// from disk.  You could use it like so:
  ///
  /// auto_ptr<const vecbasevector> pSeqs = vecbasevector::GetPtrToMapped( file );
  /// const vecbasevector& seqs = *pSeqs;
  ///
  /// The macro MakeMappedMastervec( FILE, T, DATA ) below does exactly this.
  ///
  /// The auto_ptr is const because when we use mmap, the data is read-only.

  static
  auto_ptr<const mastervec> GetPtrToMapped( const String& filename ) {
    mastervec* pMV = new mastervec;
    pMV->MapAll( filename );
    return auto_ptr<const mastervec>( pMV );
  }

  /// Compatible binary read and write operations for entire mastervec structure,
  /// using file descriptors.  The current implementations assume that none of the
  /// mastervec entries are self-owned.

  void BinaryRead( int fd );
  void BinaryWrite( int fd ) const;

  longlong MastervecFileRawCount( const String& filename );

  mastervec& operator= ( const mastervec& original );

  mastervec( const mastervec& original );

protected:

  longlong raw_data_size_;
  longlong raw_data_capacity_;
  A*  raw_data_;
  X*  objects_;
  int objects_size_;
  int objects_capacity_;
  int fd_;
};

// Macro: MakeMappedMastervec
//
// Load read-only data so that one copy of the data can be
// shared among multiple processes on the same machine.
//
// A handy macro.  For example, you can replace the line
// >  vecbasevector reads( filename );
// with
// >  MakeMappedMastervec( filename, vecbasevector, reads );
// as long as reads is treated as read-only.
//
#define MakeMappedMastervec( FILE, T, DATA )   \
     auto_ptr<const T> DATA ## _qwer_ptr = T::GetPtrToMapped( FILE ); \
     const T& DATA = *DATA ## _qwer_ptr;


/// BinPosition.  Return the position of an element in a sorted mastervec, else -1.
/// If the element appears more than once, the position of one of its instances
/// is returned.

template<class X, class A> 
inline int BinPosition( const mastervec<X,A>& v, const X& x )
{    if ( v.size( ) == 0 ) return -1;
     int first = 0, last = v.size( ) - 1, next;
     while (1)
     {    if (first == last) return ( !(x < v[last]) && !(v[last] < x) ) ? last : -1;
          next = first + (last - first) / 2;
          if ( x < v[next] ) last = next;
          else if ( v[next] < x ) first = next + 1;
          else return next;    }    }

//======================================================================

/**Join mastervec files (filename, filename..offsets, filename..static) into 
   one (with name filename).
*/
void MergeMastervecFiles( const String& filename );

///Inverse of MergeMastervecFiles: divide filename into three.
/// Names are filename, filename..offsets, filename..static
/// This operation does not consume much memory.
void DivideMastervecFile( const String& filename );

///Get info from a mastervec file (in one or three file format).
int MastervecFileObjectCount( const String& filename );

/**Get info from a mastervec file (in one or three file format).
   @param filename name of the mastervec file
   @param dataSize is the size of the A parameter in the mastervec. If this
   parameter is 0 (the default), we will try to read it from the file itself, 
   and fail if it is not there.
*/
longlong MastervecFileRawCount( const String& filename, int dataSize=0 );

/**In effect append mastervec datafiles (top, top..offsets, top..static)
   to the ends of mastervec datafiles (bottom, bottom..offsets, 
   bottom..static).
*/
void ConcatenateMastervecData( const String& bottom, const String& top );

///Read one item from feudal file filename at position index into value
template<class X>
void GetOneFeudal(const String& filename,
		  int index, 
		  X & value,
		  bool acceptCompFile);

/// Write mastervec to file using common syntax
template<class X, class A> inline void 
BinaryWrite(int fd, const mastervec<X,A>& m )
{    m.BinaryWrite( fd );    }
template<class X, class A> inline void 
BinaryRead( int fd, mastervec<X,A>& m )
{    m.BinaryRead( fd );    }


/**Permute input vector v in place according to permutation.
   Preconditions:
   - v.size() == permutation.size()
   - v must provide a SwapElements(int, int) method.
   If the permutation contains a -1, the position corresponding to that
   is essentially ignored and ends up in one of the available empty spaces

   This works for all mastervecs and compmastervecs.
*/
template<class SwappableVec>
void PermuteSwappableVec(SwappableVec & v,
			 const vec<int> & permutation)
{
  AssertEq((longlong) v.size(), (longlong) permutation.size());
  vec<int> o = permutation;
  for (int i = 0; i != (longlong) v.size(); ++i)
    {
      while (o[i] != i && o[i] != -1)
	{
	  v.SwapElements(i, o[i]);
	  std::swap(o[i], o[o[i]]);
	}
    }
}

/// Return true if both mastervecs have the same element sizes.
/// Useful inside Asserts, avoids calculations if not in debug mode.
template<class Mastervec1, class Mastervec2>
bool SameSizes(const Mastervec1 & v1, const Mastervec2 & v2) {
  vec<int> s1,s2;
  v1.ElementSizes(s1);
  v2.ElementSizes(s2);
  return s1 == s2;
}

template<class X, class A> inline void 
Destroy( mastervec<X,A>& m )
{    m.destroy( );    }
template<class X1, class A1, class X2, class A2> inline void 
Destroy( mastervec<X1,A1>& m1, mastervec<X2,A2>& m2 )
{    m1.destroy( ), m2.destroy( );    }
template<class X1, class A1, class X2, class A2, class X3, class A3> inline void 
Destroy( mastervec<X1,A1>& m1, mastervec<X2,A2>& m2, mastervec<X3,A3>& m3 )
{    m1.destroy( ), m2.destroy( ), m3.destroy( );    }
template<class X1, class A1, class X2, class A2, class X3, class A3,
     class X4, class A4> inline void 
Destroy( mastervec<X1,A1>& m1, mastervec<X2,A2>& m2, mastervec<X3,A3>& m3,
     mastervec<X4,A4>& m4 )
{    m1.destroy( ), m2.destroy( ), m3.destroy( ), m4.destroy( );    }
template<class X1, class A1, class X2, class A2, class X3, class A3,
     class X4, class A4, class X5, class A5> inline void 
Destroy( mastervec<X1,A1>& m1, mastervec<X2,A2>& m2, mastervec<X3,A3>& m3,
     mastervec<X4,A4>& m4, mastervec<X5,A5>& m5 )
{    m1.destroy( ), m2.destroy( ), m3.destroy( ), m4.destroy( ), m5.destroy( );    }


//======================================================================

template<class A>
class serfvec
{
private:

  // declared here so it can be inlined below
  void set_empty( )
  {
    data_ = 0;
    unused_ = 0;
    length_ = 0;
    capacity_and_self_owned_ = TopBit32;   
  }

  void range_check( int i ) const
  {
    AssertGe( i, 0 );
    AssertLt( i, length_ );
  }

public:

  typedef A                 value_type;
  typedef value_type*       pointer;
  typedef const value_type* const_pointer;

#if __GNUC__ > 2
  typedef serfvec<A>        vector_type;
  typedef __gnu_cxx::__normal_iterator<pointer, vector_type>
  iterator;
  typedef __gnu_cxx::__normal_iterator<const_pointer, vector_type>
  const_iterator;
#else
  typedef value_type*       iterator;
  typedef const value_type* const_iterator;
#endif

  typedef value_type&       reference;
  typedef const value_type& const_reference;
  typedef size_t            size_type;
  typedef ptrdiff_t         difference_type;

  typedef reverse_iterator<const_iterator>  const_reverse_iterator;
  typedef reverse_iterator<iterator>        reverse_iterator;



  serfvec( )
  {  set_empty( );  }

  explicit serfvec( int n )
  {
    Assert( n >= 0 && n <= Bits31 );
    unused_ = 0;
    data_ = new A[n];
    capacity_and_self_owned_ = TopBit32 ^ n;
    length_ = n;    
  }

  /// Construct a serfvec of length n filled with the value a;

  serfvec( int n, const A & a) {
    Assert( n >= 0 && n <= Bits31 );
    unused_ = 0;
    data_ = new A[n];
    uninitialized_fill_n(data_,n,a);
    capacity_and_self_owned_ = TopBit32 ^ n;
    length_ = n;   
  } 

  serfvec(const serfvec& v)
  {
    if ( v.data( ) == 0 )
      set_empty( );
    else 
      { 
	length_ = v.size( );
	capacity_and_self_owned_ = v.size( ) ^ TopBit32;
        unused_ = 0;
	data_ = new A[length_];
	memcpy( data_, v.data( ), length_ * sizeof(A) );    
      }
  }

  explicit 
  serfvec(const vec<A>& v)
  {
    length_ = v.size( );
    capacity_and_self_owned_ = v.size( ) ^ TopBit32;
    unused_ = 0;
    data_ = new A[length_];
    if ( v.nonempty( ) ) memcpy( data_, &v[0], length_ * sizeof(A) );    
  }

  ~serfvec( ) 
  {
    if ( SelfOwned( ) )
      delete [ ] data_;
    set_empty( );   
  }

  Bool SelfOwned( ) const
  {
    return ( capacity_and_self_owned_ & TopBit32 ) != 0;   
  }

  void Reinitialize( )
  {
    if ( SelfOwned( ) )
      delete [ ] data_;
    set_empty( );    
  }

  void Blank( )
  {
    set_empty( );   
  }

  int size( ) const
  {  return length_;  }

  void clear( )
  {  length_ = 0;  }

  bool empty( ) const
  {  return (length_ == 0);  }

  void destroy( ) 
  {
    if ( SelfOwned( ) ) 
      delete [ ] data_;
    set_empty( );   
  }

  int capacity( ) const
  {  return capacity_and_self_owned_ & Bits31;  }


  A& operator[ ]( int i )
  {
    range_check(i);
    return data_[i];    
  }

  const A& operator[ ]( int i ) const
  {
    range_check(i);
    return data_[i];    
  }


  iterator       begin()       { return iterator(data_); }
  const_iterator begin() const { return const_iterator(data_); }
  iterator       end()         { return iterator(data_+length_); }
  const_iterator end()   const { return const_iterator(data_+length_); }

  reverse_iterator       rbegin()       { return reverse_iterator(end()); }
  const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
  reverse_iterator       rend()         { return reverse_iterator(begin());  }
  const_reverse_iterator rend()   const { return const_reverse_iterator(begin());  }

  reference       front()       { Assert(length_>0); return reference(*begin()); }
  const_reference front() const { Assert(length_>0); return const_reference(*begin()); }
  reference       back()        { Assert(length_>0); return reference(*(end()-1)); }
  const_reference back()  const { Assert(length_>0); return const_reference(*(end()-1)); }


  void SetToSubOf( const serfvec& a, unsigned int j, int k )
  {
    AssertLe( (int) j + k, (int) a.size( ) );
    resize(k);
    for ( int i = 0; i < k; i++ )
      (*this)[i] = a[ j + i ];    
  }

  const A* StartOfDynamicData( ) const { return data_; }

  // Note that the following is not in class serf:

  A*       StartOfDynamicData( )       { return data_; }
  /// Measured in sizeof(A)
  longlong SizeOfDynamicData( )  const { return length_; }

  longlong ExtraSpaceForDynamicData( ) const { return capacity( ) - length_; }

  const char* StartOfStaticData( ) const { return 0; }
  const int   SizeOfStaticData( )  const { return 0; }

  void reserve( int n )
  {
    Assert( n >= 0 && n <= Bits31 );
    if ( n > capacity( ) )
      {
	A* new_data = new A[n];
	memcpy( new_data, data_, length_ * sizeof(A) );
	if ( SelfOwned( ) )
	  delete [ ] data_;
	data_ = new_data;
	capacity_and_self_owned_ = TopBit32 ^ n;   
      }
  }

  void resize( int n )
  {
    Assert( n >= 0 && n <= Bits31 );
    if ( n > capacity( ) )
      reserve(n);
    length_ = n;   
  }

  /// resize and fill new elements with value a.
  void resize( int n, const A & a) {
    int oldsize=size();
    resize(n);
    if (n > oldsize) fill(data_ + oldsize, data_ + n, a);
  }

  const A* data( ) const
  {  return data_;  }

  serfvec& operator = (const serfvec& v)
  {
    if ( v.data_ == 0 ) { 
      if ( SelfOwned( ) )
        delete [ ] data_;
      set_empty( );   
    }
    else {
      length_ = v.size( );
      if ( data_ == 0 || capacity( ) < v.size( ) ) {  
        if ( SelfOwned( ) )
          delete [ ] data_;
        data_ = new A[length_];
        capacity_and_self_owned_ = TopBit32 ^ length_;  
      }
      memcpy( data_, v.data( ), length_ * sizeof(A) );   
    }
    return *this;    
  }

  // This implementation of push_back automatically allocates an extra 20% in the 
  // event of overflow.  However, for optimal results, this condition should be 
  // avoided by carefully reserving in advance.

  void push_back( const A& a )
  {
    if ( length_ > capacity( ) - 1 )
      reserve(capacity( ) + capacity( )/5 + 1);
    data_[length_++] = a;   
  }

  ///Push back and enlarge if needed: multiply capacity by a factor "increase".
  void push_back_reserve( const A& a, double increase = 2.0 )
  {
    if ( capacity() == 0 )
      reserve( 1 );
    else if ( length_ > capacity( ) - 1 )
      reserve( static_cast<int>( ceil( static_cast<double>( capacity() ) * increase ) ) );
    data_[length_++] = a;   
  }

  void push_back( const A& a, const A & a2 ) {
    push_back(a); push_back(a2);
  }

  void push_back( const A& a, const A & a2 , const A & a3 ) {
    push_back(a); push_back(a2); push_back(a3);
  }

  void push_back( const A& a, const A & a2 , const A & a3, const A & a4 ) {
    push_back(a); push_back(a2); push_back(a3); push_back(a4);
  }

  void push_back( const A& a, const A & a2, const A & a3, const A & a4, 
		  const A & a5 ) {
    push_back(a); push_back(a2); push_back(a3); push_back(a4);push_back(a5);
  }

  void push_back( const A& a, const A & a2, const A & a3, const A & a4, 
		  const A & a5, const A & a6 ) {
    push_back(a,a2,a3); push_back(a4,a5,a6);
  }

  void push_back( const A& a, const A & a2, const A & a3, const A & a4, 
		  const A & a5, const A & a6, const A & a7 ) {
    push_back(a,a2,a3); push_back(a4,a5,a6,a7);
  }
 void push_back( const A& a, const A & a2, const A & a3, const A & a4, 
		  const A & a5, const A & a6, const A & a7, const A & a8  ) {
    push_back(a,a2,a3,a4); push_back(a5,a6,a7,a8);
  }
  void push_back( const A& a, const A & a2, const A & a3, const A & a4, 
		  const A & a5, const A & a6, const A & a7, const A & a8, 
		  const A & a9) {
    push_back(a,a2,a3,a4); push_back(a5,a6,a7,a8);push_back(a9);
  }
  void push_back( const A& a, const A & a2, const A & a3, const A & a4, 
		  const A & a5, const A & a6, const A & a7, const A & a8,
		  const A & a9, const A & a10) {
    push_back(a,a2,a3,a4); push_back(a5,a6,a7,a8);push_back(a9,a10);
  }
  void push_back( const A& a, const A & a2, const A & a3, const A & a4, 
		  const A & a5, const A & a6, const A & a7, const A & a8, 
		  const A & a9, const A & a10, const A & a11) {
    push_back(a,a2,a3,a4); push_back(a5,a6,a7,a8);push_back(a9,a10,a11);
  }
  void push_back( const A& a, const A & a2, const A & a3, const A & a4, 
		  const A & a5, const A & a6, const A & a7, const A & a8,
		  const A & a9, const A & a10, const A & a11, const A & a12) {
    push_back(a,a2,a3,a4); push_back(a5,a6,a7,a8);push_back(a9,a10,a11,a12);
  }

  void SetExternal( const char* start_of_static_data,
		    A* pointer_to_dynamic_data,
		    int size_of_dynamic_data,
		    int extra_space_for_dynamic_data )
  {
    data_ = pointer_to_dynamic_data;
    length_ = size_of_dynamic_data;
    capacity_and_self_owned_ = length_ + extra_space_for_dynamic_data;   
  }

  void ShiftStartOfDynamicData( A* a1, A* a2 )
  {   
    if ( data_ != 0 )
      data_ += a1 - a2;   
  }

  void SetToReverseOf( const serfvec<A>& v )
  {
    resize( v.size( ) );
    for ( int i = 0; i < v.size( ); i++ )
      (*this)[i] = v[ v.size( ) - i - 1 ];   
  }

  void ReverseMe( )
  {
    for ( int i = 0; i < size( )/2; i++ )
      swap( (*this)[i], (*this)[ size( ) - i - 1 ] );   
  }

  friend
  bool operator == (const serfvec& lhs, const serfvec& rhs )
  {
    if ( lhs.length_ != rhs.length_ )
      return false;
    for ( int i = 0; i < lhs.length_; ++i )
      if ( ! ( lhs.data_[i] == rhs.data_[i] ) )
	return false;
    return true;
  }

  friend
  bool operator != (const serfvec& lhs, const serfvec& rhs )
  {
    return !(lhs == rhs);
  }

  friend
  bool operator < (const serfvec& lhs, const serfvec& rhs )
  {
    int minLen = min( lhs.length_, rhs.length_ );
    for ( int i = 0; i < minLen; ++i ) {
      if ( lhs.data_[i] < rhs.data_[i] ) return true;
      if ( lhs.data_[i] > rhs.data_[i] ) return false;
    }
    return ( lhs.length_ < rhs.length_ );
  }

  void Swap( serfvec& v )
  {
    std::swap( data_, v.data_ );
    std::swap( length_, v.length_ );
    std::swap( capacity_and_self_owned_, v.capacity_and_self_owned_ );
  }

  friend serfvec Cat( const serfvec& left, const serfvec& right )
  {    static serfvec join;
       join.resize( left.size( ) + right.size( ) );
       for ( int i = 0; i < left.size( ); i++ )
            join[i] = left[i];
       for ( int i = 0; i < right.size( ); i++ )
            join[ i + left.size( ) ] = right[i];
       return join;    }

private:

  // For full compatibility across 32-bit and 64-bit architectures, we use a 
  // union to pad the the data_ pointer so that 8 bytes are always used.  Of course
  // this wastes space on 32-bit systems.

  union {
       A*  data_;
       longlong unused_;
  };

  int length_;

  // capacity_and_self_owned_: the high order bit is the self_owned flag,
  // the lower 31 bits are the capacity.
  int capacity_and_self_owned_;

};

template<class T> void EraseIf( serfvec<T>& v, const vec<Bool>& erase )
{
  int count = 0;
  for ( int i = 0; i < v.size( ); i++ ) {
    if ( !erase[i] ) {
      if ( count != i ) 
        v[count] = v[i];
      ++count;
    }
  }
  v.resize(count);
}

//======================================================================

// These functions are deprecated.  Use serfvec<A>::ReverseMe() and 
// serfvec<A>::SetToReverseOf() instead.

template<class A>
inline
void ReverseThis( serfvec<A>& v )
{
  v.ReverseMe();
}

template<class A>
inline
serfvec<A> Reverse( const serfvec<A>& v )
{
  serfvec<A> x;
  x.SetToReverseOf(v);
  return x;   
}


template<class A>
inline
void Destroy( serfvec<A>& v )
{
  v.destroy( );   
}


///Output all elements of v, one per line.
template<class T>
ostream & operator << (ostream & os, const serfvec<T> & v)
{
  os << " size of serfvec: " << v.size() << endl;
  for (int i = 0; i != v.size(); ++i) {
    os << v[i] << endl;
  }
  os << endl;
  return os;
}
    
///Copy a serfvec to a vec
template<class T>
void copyToVec(const serfvec<T> & s, vec<T> & v)
{
  const int SIZE = s.size();
  v.resize(SIZE);
  //Note that we cannot use memcpy because type T may have an operator=
  for (size_t i = 0; i != v.size(); ++i) {
    v[i] = s[i];
  }
} 


template<class T>
bool operator == (const serfvec<T> &s, const vec<T> &v)
{
  if ((size_t) s.size() != v.size())
    return false;
  for (size_t i = 0; i != v.size(); ++i) {
    if (v[i] != s[i])
      return false;
  }
  return true;
}

template<class T>
bool operator == (const vec<T> &v, const serfvec<T> &s)
{
  return (s == v);
}

inline
int Sum( const serfvec<unsigned char>& v )
{
  int sum = 0;
  for ( int i = 0; i < v.size( ); i++ )
    sum += v[i];
  return sum;   
}


//======================================================================

template<class T>
class vecvec : public mastervec< serfvec<T>, T> 
{ 
public:
  vecvec( )
    : mastervec< serfvec<T>, T>( )
  { }

  vecvec( int n )
    : mastervec< serfvec<T>, T>( n )
  { }

  vecvec( const String &filename )
    : mastervec< serfvec<T>, T>( filename )
  { }

  vecvec( const vec< vec<T> >& v )
  {    for ( int i = 0; i < v.isize( ); i++ )
            push_back_reserve( serfvec<T>( v[i] ) );    }

};


//======================================================================


/** File header class for mastervec and compmastervec.
    @class mv_file_control_block
    
    This used to be a simple struct, and got promoted to a class when we 
    needed to do some data compression and store values in bits.

    Note that the internal data are carefully packed into a certain format so that
    it will be compatible with any existing old mastervec files. That is also
    why we leave n, offsets_start and static_start open for public access,
    because a lot of existing code depends on that, not always in obvious ways
    (for example, there is code that reads into a file and assumes that the data
    in a certain place is offsets_start, without ever reading in an actual
    mv_file_control_block).
*/
    

class mv_file_control_block
{ 
  //=== Data ===//
 public:
  int n;

 private:
  ///This char is cut up into bits for different variables (see methods below).
  /// variables are nfiles, isCompFile, bigEndian, and version.
  unsigned char c1;
  unsigned char futureUse;
  unsigned char sizeX; ///<size of template class X
  unsigned char sizeA; ///<size of template class A

 public:
  longlong offsets_start;
  longlong static_start;
  
  //=== Methods ===//
 public:
  ///Default constructor: make sure all is set to 0 initially.
  mv_file_control_block()
  {
    memset(this, 0, sizeof(mv_file_control_block));
  }

  //=== Accessors ===//

  ///Number of files: 1 or 3 for mastervec, 1 for compmastervec
  unsigned int nfiles() const { return c1 & 0x3; }

  ///True if it's a compmastervec file
  bool isCompFile() const { return c1 & 0x4; }

  ///True if bigEndian format. Not currently in use.
  bool bigEndian() const { return c1 & 0x8; }

  ///Version number, four bits, not currently in use.
  ///If version() == 15 (max) then further info is at the end of the file.
  unsigned char version() const { return c1 & 0xF0; } 

  ///Size of the X (vector) class in the mastervec.
  unsigned char vecSize() const { return sizeX; }

  ///Size of the A (underlying data) class in the mastervec.
  unsigned char dataSize() const { return sizeA; }

  bool IsOldFormat() const { return 0==vecSize() && 0 == dataSize(); }

  void nfiles(int n) { c1 &= ~0x3; c1 |= (n & 0x3); }
  void setCompFile(bool b) { c1 &= ~0x4; c1 |= ( (b & 0x1) << 2 ); }
  void setBigEndian(bool b) { c1 &= ~0x8; c1 |= ( (b & 0x1) << 3 ); }
  void SetVecSize( int i) { sizeX=i; }
  void SetDataSize( int i ) { sizeA = i; }
};


/// IsGoodFeudalFile: do a WEAK test to see if a file is in the feudal format.
/// \param filename name of the file
/// \param verbose: if true, print out why the test failed.
/// \param ok3: if true, control.nfiles() == 3 is acceptable.
Bool IsGoodFeudalFile( const String& filename, bool verbose=false,
		       bool ok3 = false);

/// Weak test to see if it could be a feudal file for a particular serfvec.
/// What it actually checks is that the sizeof(X) and sizeof(X::value_type) 
/// match the ones in the file header.
/// \param filename name of the feudal file
/// \param dummy used only to establish the template type.
/// \param verbose: if true, explain how the test failed.
/// \param ok3: if true, control.nfiles() == 3 is acceptable.
template<class X>
bool IsGoodFeudalFile( const String & filename, const X * dummy,
		       bool verbose = false, bool ok3 = false);

// BuildIndex: given a vec<T> v, build a vecvec<I> which indexes it
// (where I is an integer type), using key.
//
// Example:
//
// vec<read_location> v = ...
// int ncontigs = ...
// vecvec<int> index;
// BuildIndex( v, index, mem_fun_ref(&read_location::Contig), ncontigs );
// ... or ...
// int C( const read_location& rl ) { return rl.Contig( ); }
// BuildIndex( v, index, C, ncontigs );

template<class T, class KEY, class I>
void BuildIndex( const vec<T>& v, vecvec<I>& index, const KEY& key, int key_count )
{
  index.clear( );
  index.Reserve( v.size( ), key_count );
  index.resize(key_count);
  for ( I i = 0; i < (I) v.size( ); i++ )
    index[ key( v[i] ) ].push_back(i);
  for ( int i = 0; i < key_count; i++ )
    sort( index[i].begin( ), index[i].end( ) );    
}

#endif
