/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2007) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

#ifndef KMERRECORD
#define KMERRECORD

#include "Basevector.h"
#include "CoreTools.h"
#include "KmerShape.h"
#include "math/Functions.h"

/**
   File: KmerRecord.h

   Data structures for representing kmers and kmer occurrences,
   together with some associated information (for example the
   frequency of the kmer or the location (read and position) of the
   kmer occurrence).
*/

/// Class: byte_pac
///
/// A byte_pac is a structure with A+B bytes of data, stored using ints, having
/// a < operator which facilitates sorting using only the first A bytes of 
/// data.
/// Slightly more than A bytes will be used for sorting if it is more efficient.
/// operator== compares using all data, while EqualKmers compares using
/// only the kmer data (the first A bytes).

template< int A, int B >
class byte_pac {
public:
  union 
  {    
    unsigned int ints[ (A+B+3)/4 ];
    unsigned short shorts[ (A+B+3)/4 * 2 ];
    unsigned char bytes[ (A+B+3)/4 * 4 ];   
  };
  unsigned char * begin() { return bytes; }
  const unsigned char * begin() const { return bytes; }
  unsigned char * end() { return bytes + size(); }
  const unsigned char * end() const { return bytes + size(); }
  unsigned int size() const { return  (A+B+3)/4 * 4; }

  ///Compare only the bytes that contain kmer data.
  bool EqualKmers(const byte_pac & b) const {
    return 0==memcmp(bytes,b.bytes,A);
  }

  ///Compare only the bytes that contain data.
  friend bool operator==(const byte_pac & l, const byte_pac & r) {
    return 0==memcmp(l.bytes,r.bytes,A+B);
  }
};

// K=8
template< int B >
bool operator< ( const byte_pac<2,B>& b1, const byte_pac<2,B>& b2 )
{    
  if ( b1.shorts[0] < b2.shorts[0] ) return true;
  return false;
}

// K=12
template< int B >
bool operator< ( const byte_pac<3,B>& b1, const byte_pac<3,B>& b2 )
{    
  if ( b1.shorts[0] < b2.shorts[0] ) return true;
  if ( b1.shorts[0] > b2.shorts[0] ) return false;
  if ( b1.bytes[2] < b2.bytes[2] ) return true;
  return false;
}

// K=16
template< int B >
bool operator< ( const byte_pac<4,B>& b1, const byte_pac<4,B>& b2 )
{ 
  if ( b1.ints[0] < b2.ints[0] ) return true;
  return false;
}

// K=20
template< int B >
bool operator< ( const byte_pac<5,B>& b1, const byte_pac<5,B>& b2 )
{
  if ( b1.ints[0] < b2.ints[0] ) return true;
  if ( b1.ints[0] > b2.ints[0] ) return false;
  if ( b1.bytes[4] < b2.bytes[4] ) return true;
  return false; 
}

// K=24
template< int B >
bool operator< ( const byte_pac<6,B>& b1, const byte_pac<6,B>& b2 )
{ 
  if ( b1.ints[0] < b2.ints[0] ) return true;
  if ( b1.ints[0] > b2.ints[0] ) return false;
  if ( b1.shorts[2] < b2.shorts[2] ) return true;
  return false;    
}

// K=28
template< int B >
bool operator< ( const byte_pac<7,B>& b1, const byte_pac<7,B>& b2 )
{
  if ( b1.ints[0] < b2.ints[0] ) return true;
  if ( b1.ints[0] > b2.ints[0] ) return false;
  if ( b1.shorts[2] < b2.shorts[2] ) return true;
  if ( b1.shorts[2] > b2.shorts[2] ) return false;
  if ( b1.bytes[6] < b2.bytes[6] ) return true;
  return false;
}

// K=32
template< int B >
bool operator< ( const byte_pac<8,B>& b1, const byte_pac<8,B>& b2 )
{
  if ( b1.ints[0] < b2.ints[0] ) return true;
  if ( b1.ints[0] > b2.ints[0] ) return false;
  if ( b1.ints[1] < b2.ints[1] ) return true;
  return false;    
}

// K=36
template< int B >
bool operator< ( const byte_pac<9,B>& b1, const byte_pac<9,B>& b2 )
{
  if ( b1.ints[0] < b2.ints[0] ) return true;
  if ( b1.ints[0] > b2.ints[0] ) return false;
  if ( b1.ints[1] < b2.ints[1] ) return true;
  if ( b1.ints[1] > b2.ints[1] ) return false;
  if ( b1.bytes[8] < b2.bytes[8] ) return true;
  return false;    
}

// K=40
template< int B >
bool operator< ( const byte_pac<10,B>& b1, const byte_pac<10,B>& b2 )
{
  if ( b1.ints[0] < b2.ints[0] ) return true;
  if ( b1.ints[0] > b2.ints[0] ) return false;
  if ( b1.ints[1] < b2.ints[1] ) return true;
  if ( b1.ints[1] > b2.ints[1] ) return false;
  if ( b1.shorts[4] < b2.shorts[4] ) return true;
  return false;    
}

// K=48
template< int B >
bool operator< ( const byte_pac<12,B>& b1, const byte_pac<12,B>& b2 )
{
  if ( b1.ints[0] < b2.ints[0] ) return true;
  if ( b1.ints[0] > b2.ints[0] ) return false;
  if ( b1.ints[1] < b2.ints[1] ) return true;
  if ( b1.ints[1] > b2.ints[1] ) return false;
  if ( b1.ints[2] < b2.ints[2] ) return true;
  return false;    
}

// K=64
template< int B >
bool operator< ( const byte_pac<16,B>& b1, const byte_pac<16,B>& b2 )
{
  if ( b1.ints[0] < b2.ints[0] ) return true;
  if ( b1.ints[0] > b2.ints[0] ) return false;    
  if ( b1.ints[1] < b2.ints[1] ) return true;
  if ( b1.ints[1] > b2.ints[1] ) return false;    
  if ( b1.ints[2] < b2.ints[2] ) return true;
  if ( b1.ints[2] > b2.ints[2] ) return false;    
  if ( b1.ints[3] < b2.ints[3] ) return true;
  return false;    
}

// K=96
template< int B >
bool operator< ( const byte_pac<24,B>& b1, const byte_pac<24,B>& b2 )
{
  if ( b1.ints[0] < b2.ints[0] ) return true;
  if ( b1.ints[0] > b2.ints[0] ) return false;    
  if ( b1.ints[1] < b2.ints[1] ) return true;
  if ( b1.ints[1] > b2.ints[1] ) return false;    
  if ( b1.ints[2] < b2.ints[2] ) return true;
  if ( b1.ints[2] > b2.ints[2] ) return false;    
  if ( b1.ints[3] < b2.ints[3] ) return true;
  if ( b1.ints[3] > b2.ints[3] ) return false;    
  if ( b1.ints[4] < b2.ints[4] ) return true;
  if ( b1.ints[4] > b2.ints[4] ) return false;    
  if ( b1.ints[5] < b2.ints[5] ) return true;
  return false;    
}

// K=100
template< int B >
bool operator< ( const byte_pac<25,B>& b1, const byte_pac<25,B>& b2 )
{
  if ( b1.ints[0] < b2.ints[0] ) return true;
  if ( b1.ints[0] > b2.ints[0] ) return false;    
  if ( b1.ints[1] < b2.ints[1] ) return true;
  if ( b1.ints[1] > b2.ints[1] ) return false;    
  if ( b1.ints[2] < b2.ints[2] ) return true;
  if ( b1.ints[2] > b2.ints[2] ) return false;    
  if ( b1.ints[3] < b2.ints[3] ) return true;
  if ( b1.ints[3] > b2.ints[3] ) return false;    
  if ( b1.ints[4] < b2.ints[4] ) return true;
  if ( b1.ints[4] > b2.ints[4] ) return false;    
  if ( b1.ints[5] < b2.ints[5] ) return true;
  if ( b1.ints[5] > b2.ints[5] ) return false;
  if ( b1.bytes[24] < b2.bytes[24] ) return true;
  return false;    
}

// K=192
template< int B >
bool operator< ( const byte_pac<48,B>& b1, const byte_pac<48,B>& b2 )
{
  for ( int j = 0; j < 12; j++ )
  {    if ( b1.ints[j] < b2.ints[j] ) return true;
       if ( b1.ints[j] > b2.ints[j] ) return false;    }
  return false;    
}

// K=200
template< int B >
bool operator< ( const byte_pac<50,B>& b1, const byte_pac<50,B>& b2 )
{
  for ( int j = 0; j < 12; j++ )
  {    if ( b1.ints[j] < b2.ints[j] ) return true;
       if ( b1.ints[j] > b2.ints[j] ) return false;    }
  if ( b1.shorts[24] < b2.shorts[24] ) return true;
  return false;    
}

// K=500
template< int B >
bool operator< ( const byte_pac<125,B>& b1, const byte_pac<125,B>& b2 )
{
  for ( int j = 0; j < 31; j++ )
  {    if ( b1.ints[j] < b2.ints[j] ) return true;
       if ( b1.ints[j] > b2.ints[j] ) return false;    }
  if ( b1.bytes[124] < b2.bytes[124] ) return true;
  return false;    
}

// K=1000
template< int B >
bool operator< ( const byte_pac<250,B>& b1, const byte_pac<250,B>& b2 )
{
  for ( int j = 0; j < 62; j++ )
  {    if ( b1.ints[j] < b2.ints[j] ) return true;
       if ( b1.ints[j] > b2.ints[j] ) return false;    }
  if ( b1.shorts[124] < b2.shorts[124] ) return true;
  return false;    
}

// K=1200
template< int B >
bool operator< ( const byte_pac<300,B>& b1, const byte_pac<300,B>& b2 )
{
  for ( int j = 0; j < 75; j++ )
  {    if ( b1.ints[j] < b2.ints[j] ) return true;
       if ( b1.ints[j] > b2.ints[j] ) return false;    }
  return false;    
}

// K=1600
template< int B >
bool operator< ( const byte_pac<400,B>& b1, const byte_pac<400,B>& b2 )
{
  for ( int j = 0; j < 100; j++ )
  {    if ( b1.ints[j] < b2.ints[j] ) return true;
       if ( b1.ints[j] > b2.ints[j] ) return false;    }
  return false;    
}

// K=2000
template< int B >
bool operator< ( const byte_pac<500,B>& b1, const byte_pac<500,B>& b2 )
{
  for ( int j = 0; j < 125; j++ )
  {    if ( b1.ints[j] < b2.ints[j] ) return true;
       if ( b1.ints[j] > b2.ints[j] ) return false;    }
  return false;    
}

// K=4000
template< int B >
bool operator< ( const byte_pac<1000,B>& b1, const byte_pac<1000,B>& b2 )
{
  for ( int j = 0; j < 250; j++ )
  {    if ( b1.ints[j] < b2.ints[j] ) return true;
       if ( b1.ints[j] > b2.ints[j] ) return false;    }
  return false;    
}

// K=10000
template< int B >
bool operator< ( const byte_pac<2500,B>& b1, const byte_pac<2500,B>& b2 )
{
  for ( int j = 0; j < 625; j++ )
  {    if ( b1.ints[j] < b2.ints[j] ) return true;
       if ( b1.ints[j] > b2.ints[j] ) return false;    }
  return false;    
}

/// Class: kmer_record
///
/// Stores a kmer together with its origin and position.
///
/// Class kmer_record comes in small and big versions, depending on the value 
/// of the template parameter I (1 or 2).
///
/// A kmer_record consists of the following
///   - k bases, stored 4 per byte. k must be a multiple of 4.
///   - a 4-byte readID
///   - a position, negated in the case where the reverse complement of
///   the k-mer occurs at the given position.  The position occupies 2 bytes
///   when I = 1 and 4 bytes when I = 2.
///
/// Models <SortKmersOutputRecord>.  See also <kmer>, which stores just
/// the kmer without the origin and position.
template<int K, int I = 1> class kmer_record {

public:

  /// Constructor is empty and does no work after compilation.
  /// Here only for the static assert: if we use kmers of size that
  /// is not a multiple of 4, our comparison and equality operators
  /// can mess up. So let's make sure we don't.
  kmer_record() { STATIC_ASSERT_M(0==K%4, K_not_multiple_4); }

  /// Size of the bases contained in this kmer_record. 
  /// For use when creating a basevector from our data.
  static const int BASES_SIZE=K;

  const unsigned char* Bytes( ) const
  {    return data_.bytes;    }


  const unsigned char * BytesEnd() const {
    return data_.end();
  }

  const int BytesSize() const {
    return data_.size();
  }

  unsigned int* Ints( )
  {    return data_.ints;    }

  void GetBasevector( basevector& kmer ) const {
    kmer.resize(K);
    kmer.SetData(data_.ints);
  }

  int GetId( ) const
  {    
    if ( I == 1 )
      {
#ifdef Little_Endian
	return data_.bytes[(K+3)/4] |
	  (short(data_.bytes[(K+3)/4 + 1]) << 8) |
	  (int(data_.bytes[(K+3)/4 + 2]) << 16) |
	  (int(data_.bytes[(K+3)/4 + 3]) << 24);
#endif
#ifdef Big_Endian
	return data_.bytes[(K+3)/4 + 3] |
	  (short(data_.bytes[(K+3)/4 + 2]) << 8) |
	  (int(data_.bytes[(K+3)/4 + 1]) << 16) |
	  (int(data_.bytes[(K+3)/4]) << 24);
#endif
      }
    else if ( I == 2 ) return data_.ints[ (K+15)/16 ];
    else return 1; /* unreachable statement */    }

  int GetPos( ) const
  {    
    if ( I == 1 )
      {
#ifdef Little_Endian
	return (int) (short) (data_.bytes[(K+3)/4 + 4] 
			      | (short(data_.bytes[(K+3)/4 + 5]) << 8));
#endif
#ifdef Big_Endian
	return (int) (short) (data_.bytes[(K+3)/4 + 5] 
			      | (short(data_.bytes[(K+3)/4 + 4]) << 8));
#endif
      }

    else if ( I == 2 ) return data_.ints[ (K+15)/16 + 1 ];
    else return 1; /* unreachable statement */    }

  void Set( const basevector& b, int read_id, int read_pos ) {    
    Assert( I == 1 || I == 2 );
    Assert( read_id >= 0 );
    for ( int j = 0; j < (K+15)/16; j++ ) {
      data_.ints[j] = b.DataAsInts( j );
    }
    if ( I == 1 ) {
#ifdef Little_Endian
      for ( int j = 0; j < 4; j++ )
	data_.bytes[ (K+3)/4 + j ] = 
	  ((unsigned char*) (&read_id))[j];
#endif
#ifdef Big_Endian
      for ( int j = 0; j < 4; j++ )
	data_.bytes[ (K+3)/4 + j ] = 
	  ((unsigned char*) (&read_id))[j+1];
#endif
      short rp = (short) read_pos;
      for ( int j = 0; j < 2; j++ ) {
	data_.bytes[ (K+3)/4 + 4 + j ] = ((unsigned char*) (&rp))[j];   
      }
    }
    else if ( I == 2 ) {    
      data_.ints[ (K+15)/16 ] = read_id;
      data_.ints[ (K+15)/16 + 1 ] = read_pos;    
    }    
  }

  // Set a kmer_record from an array of uints: -dnave 2001/10/30
  void Set( const unsigned int* b, int read_id, int read_pos ) {
    Assert( I == 1 || I == 2 );
    Assert( read_id >= 0 );
    for ( int j = 0; j < (K+15)/16; j++ ) {
      data_.ints[j] = b[j];
    }
    if ( I == 1 ) {
#ifdef Little_Endian
      for ( int j = 0; j < 4; j++ )
	data_.bytes[ (K+3)/4 + j ] = 
	  ((unsigned char*) (&read_id))[j];
#endif
#ifdef Big_Endian
      for ( int j = 0; j < 4; j++ )
	data_.bytes[ (K+3)/4 + j ] = 
	  ((unsigned char*) (&read_id))[j+1];
#endif
      short rp = (short) read_pos;
      for ( int j = 0; j < 2; j++ ) {
	data_.bytes[ (K+3)/4 + 4 + j ] = ((unsigned char*) (&rp))[j]; 
      }
    }
    else if ( I == 2 ) {    
      data_.ints[ (K+15)/16 ] = read_id;
      data_.ints[ (K+15)/16 + 1 ] = read_pos;    
    }    
  }

  ///Only works for I=2
  void SetId( int read_id ) {
    AssertEq(I, 2);
    data_.ints[ (K+15)/16 ] = read_id;
  }

  ///Only works for I=2
  void SetPos( int read_pos ) {
    AssertEq(I, 2);
    data_.ints[ (K+15)/16 + 1 ] = read_pos;
  }

  ///We mark bad by setting the id to -1
  void MarkAsBad() { SetId(-1); };

  /// Check whether id is -1.
  bool IsBad() const { return -1 == GetId(); }

  ///Set all data from an unsigned char * buffer (used when reading from file).
  void Set( const unsigned char * rawdata ) {
    copy(rawdata, rawdata + BytesSize(), data_.begin());
  }

  ///True if kmers are equal, even if GetPos() and GetId() are different.
  bool EqualKmers( const kmer_record& k2 ) const {
      return data_.EqualKmers(k2.data_);
  }

  ///True if kmers are rc, even if GetPos() and GetId() are different.
  bool ReverseKmers( const kmer_record& k2 ) const {
      basevector b;
      b.resize(K);
      b.SetData((char*)k2.Bytes());
      b.ReverseComplement();
      kmer_record rck2;
      rck2.Set(b, 0, 0);
      return EqualKmers(rck2);
  }

  bool EqualOrReverseKmers( const kmer_record& k2 ) const {
    return (EqualKmers(k2) || ReverseKmers(k2));
  }

  ///Pick the lower of myself and my reverse complement.
  ///If I reverse myself, set pos negative as a marker, and refer it to the
  /// back of the basevector which becomes the front as we reverse.
  void Canonicalize(int length) {
    basevector b;
    b.resize(K);
    b.SetData((char*)Bytes());
    b.ReverseComplement();
    kmer_record rc;
    // Set the position counting from the back, and add -1 to distinguish
    // 0 forward from 0 rc
    rc.Set(b, GetId(), -(length - (GetPos() + K))  - 1);
    if (rc < *this) *this = rc;
  }
      
  /// Looks at the position to see if it is < 0
  bool IsReversed() const { return GetPos() < 0; }

  /// Return a positive position, even if the kmer_record is reversed.
  /// For reversed kmer, position is from back of basevector!
  ///
  /// BEWARE!  This method is only valid for kmer_records produced by
  /// SimpleSortKmers.  The equivalent for kmer_records produced by
  /// SortKmers is abs(GetPos())-1, which *always* gives a position in
  /// the forward version of the basevector.
  int TruePos() const {
    int pos = GetPos();
    return pos >= 0 ? pos : -pos-1;
  }

  friend bool operator==( const kmer_record& k1, const kmer_record& k2 ) {    
    return k1.data_ == k2.data_;    
  }  

  friend bool operator!=( const kmer_record& k1, const kmer_record& k2 ) {    
    return !(k1 == k2);    
  }  

  friend bool operator<( const kmer_record& k1, const kmer_record& k2 )
  {    return k1.data_ < k2.data_;    }

  friend bool operator>( const kmer_record& k1, const kmer_record& k2 )
  {    return k2 < k1;    }

  static Bool id_cmp( const kmer_record& k1, const kmer_record& k2 )
  {    return k1.GetId( ) < k2.GetId( );    }

  static Bool id_cmp_pos( const kmer_record& k1, const kmer_record& k2 )
  {    
    int id1 = k1.GetId( ), id2 = k2.GetId( );
    if ( id1 < id2 ) return True;
    if ( id1 > id2 ) return False;
    return Abs( k1.GetPos( ) ) < Abs( k2.GetPos( ) );    
  }

  static Bool cmp_pos( const kmer_record& k1, const kmer_record& k2 )
  {    
    return k1.GetPos( ) < k2.GetPos( );
  }

  void ToString(ostream & out) const {
    basevector b;
    b.resize(K);
    b.SetData((char *)Bytes());
    out << GetId() << " " << GetPos() << " ";
    b.Print(out);
  }

  friend ostream & operator<<(ostream & out, const kmer_record krec) {
    krec.ToString(out);
    return out;
  }


private:

  // byte_pac< (K+3)/4, 5 + 3*I > data_;
  byte_pac< (K+3)/4, 4 + 2*I > data_;
};

///For sorting by id and pos, keeping negative positions negative.
template<class KmerRecord>
struct LessByIdAndPos: 
  public binary_function<KmerRecord, KmerRecord, bool> {
  bool operator()(const KmerRecord & k1, const KmerRecord & k2) {
    int id1 = k1.GetId( ), id2 = k2.GetId( );
    if ( id1 < id2 ) return true;
    if ( id1 > id2 ) return false;
    return k1.GetPos( )  < k2.GetPos( ) ;    
  }
};

///Return true if kmers are equal, even if GetPos() and GetId() are different.
template<class KmerRecord>
struct CompareForwardKmers: 
  public binary_function<KmerRecord, KmerRecord, bool> {
  bool operator()(const KmerRecord & k1, const KmerRecord & k2) {
    return k1.EqualKmers(k2);
  }
};

///Return true if kmers are equal in forward or rc directions.
template<class KmerRecord>
struct CompareForwardReverseKmers: 
  public binary_function<KmerRecord, KmerRecord, bool> {
  bool operator()(const KmerRecord & k1, const KmerRecord & k2) {
    return k1.EqualOrReverseKmers(k2);
  }
};

/**
   Class: kmer
  
   A kmer holds just a kmer.  It allocates just enough space to hold
   exactly K bases.
  
   Note that there is also a logical type <kmer_t>, which is just a type
   for <basevectors> that happen to be used to hold a single kmer.

   Models <SortKmersOutputRecord>.  See also <kmer_record>, which stores,
   in addition to the kmer sequence, the origin (<read id>) and position
   of the kmer occurrence.
*/
template<int K> class kmer {

     public:

  /// Size of the bases contained in this kmer_record. 
  /// For use when creating a basevector from our data.
  static const int BASES_SIZE=K;
  
     kmer( ) { }
      
     kmer( const basevector& b )
     {    for ( int j = 0; j < (K+15)/16; j++ )
               data_.ints[j] = b.DataAsInts(j);    }

     const unsigned char* Bytes( ) const
     {    return data_.bytes;    }
          
     const unsigned int* Ints( ) const
     {    return data_.ints;    }

     void GetBasevector( basevector& kmer ) const {
       kmer.resize(K);
       kmer.SetData(data_.ints);
     }

     friend bool operator<( const kmer& k1, const kmer& k2 )
     {    return k1.data_ < k2.data_;    }

     // The following Set function is embarrassing.  The read_id and read_pos
     // arguments are not used.  They are only there to avoid a messy problem
     // in getting SortKmers to function correctly for two separate classes.

     void Set( const basevector& b, int read_id, int read_pos )
     {    for ( int j = 0; j < (K+15)/16; j++ )
               data_.ints[j] = b.DataAsInts(j);    }

     private:
          
     byte_pac< (K+3)/4, 0 > data_; 

};


inline
void CanonicalizeKmer( basevector& b ) {    
  b.Canonicalize();
}


// Class: kmer_with_count
//
// A kmer_with_count holds a kmer and a multiplicity.  It consists of:
// * k bases, stored 4 per byte (with the last byte zero-filled if need be);
// * a 2-byte count.
//
// The "count" associated with a kmer is sometimes used to represent not counts
// but other values we may want to associate with the kmer -- for example,
// a 0/1 value indicating whether the kmer is <strong>.
// <KmerShortMap> uses vectors of this class to represent
// vectors of (kmer, short int) pairs.
template<int K> class kmer_with_count {
  
     public:

     static const int max_count = 65535;

     kmer_with_count( ) { }
      
     kmer_with_count( const basevector& b, unsigned short count )
     {    for ( int j = 0; j < (K+15)/16; j++ )
               data_.ints[j] = b.DataAsInts(j);
          for ( int j = (K+3)/4; j < (K+7)/8*2; j++ )
               data_.bytes[j] = 0;
          data_.shorts[ (K+7)/8 ] = count;    }
          
     void GetBasevector( basevector& kmer ) const {
       kmer.resize(K);
       kmer.SetData(data_.ints);
     }

     unsigned short Count( ) const { return data_.shorts[ (K+7)/8 ]; }

     const unsigned int* Ints( ) const
     {    return data_.ints;    }

     const unsigned short* Shorts( ) const
     {    return data_.shorts;    }

     friend Bool operator<( const kmer_with_count& k1, const kmer_with_count& k2 )
     {    if ( k1.data_ < k2.data_ ) return True;
          if ( k2.data_ < k1.data_ ) return False;
          if ( k1.Count( ) < k2.Count( ) ) return True;
          return False;    }

     friend Bool operator==( const kmer_with_count& k1, const kmer_with_count& k2 )
     {    return ( k1.data_ == k2.data_ );    }

     friend Bool eq_kmer( const kmer_with_count& k1, const kmer_with_count& k2 )
     {    return ( 0 == memcmp( k1.data_.bytes, k2.data_.bytes, (K+3)/4 ) );    }

     friend Bool lt_kmer( const kmer_with_count& k1, const kmer_with_count& k2 )
     {    return ( k1.data_ < k2.data_ );     }

     private:
  //           (kmer )  (  optional pad   )   (count)
     byte_pac< (K+3)/4, (K+7)/8*2 - (K+3)/4 +    2   > data_; 

};

#define KmerRecordType(KSHAPE,I) kmer_record_ ## KSHAPE ## _ ## I
#define CreateKmerRecordType(KSHAPE,dummy) \
    typedef kmer_record<KSHAPE::KSIZE,1> KmerRecordType(KSHAPE,1) ;  \
    typedef kmer_record<KSHAPE::KSIZE,2> KmerRecordType(KSHAPE,2)

FOR_ALL_KSHAPES(CreateKmerRecordType,);

#define INSTANTIATE_KMER_RECORD_FOR_K(K, dummy) \
  BINARY2_DEF( kmer_with_count<K> ); \
  BINARY3_DEF( kmer_with_count<K> )


#endif
