#ifndef LOOKUP_KMER_INDEX_H
#define LOOKUP_KMER_INDEX_H
/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2007) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////
#include "CoreTools.h"
#include "Basevector.h"

/// \file KmerIndex.h: Utilities for working with kmers using
/// straightforward index encoding.  Currently no .cc file.

inline unsigned int Index(const vec<char>& bases, unsigned int pos, unsigned int K)
{    unsigned int index = 0;
     for ( unsigned int l = 0; l < K; l++ )
     {    index = index << 2;
          char base = bases[ pos + l ];
          if ( base == 'A' || base == 'a' );
          else if ( base == 'C' || base == 'c' ) index ^= 1;
          else if ( base == 'G' || base == 'g' ) index ^= 2;
          else if ( base == 'T' || base == 't' ) index ^= 3;    }
     return index;    }


/// Converts Kmer index back to the base sequence, to which this index corresponds.
/// @param bases[out] the bases will be written into this basevector
/// @param index Kmer index (numerical representation) to be converted to base sequence
/// @param K size of the Kmer
/// @return returns the reference to the same \c bases vector passed as the first argument
inline basevector &  KmerIndexToBases(basevector &bases, unsigned int index, unsigned int K) {
  bases.Setsize(K);

  for ( int l = K-1 ; l >=0 ; l-- ) {
    //    char base;
    //    switch (Kmer & 0x3) {
    //    case BASE_A: base = 'A';
    bases.Set(l,(unsigned char)index & 0x3);
    index >>=2;
  }
  return bases;
}


/// Returns numeric representation (index) of a Kmer of length \c K that starts at position
/// \c pos in the sequence \c bases
inline unsigned int Index(const basevector& bases, const unsigned int pos, const unsigned int K)
{    unsigned int index = bases[pos];
     for ( unsigned int l = pos + 1; l < pos + K; l++ )
     {    
       index <<= 2;
       index ^= bases[ l ];    
     }
     return index;    
}


/// Calculates new index value from the one for the previous kmer in the basevector
inline void NextIndex(unsigned int& index, const basevector& bases,
		      const unsigned int pos, const unsigned int K)
{    index <<= 2;
     index ^= bases[pos + K - 1];
     index &= (1 << K * 2 ) - 1;    }

/// Somewhat faster way to calculate new index value from the one for
/// the previous kmer in the basevector.  If index is initialized with
/// Index(bases, 0, K) then the first value of nextpos for NextIndex()
/// should be K.  Kmask should be KmerBitmask(K).
inline void NextIndex2(unsigned int& index, const basevector& bases,
		       const unsigned int nextpos, const unsigned int Kmask)
{
  index <<= 2;
  index ^= bases[nextpos];
  index &= Kmask;
}

/// The unsigned int that has 1s in the positions used for Kmer
/// numbers, and 0s elsewhere.
inline unsigned int KmerBitmask(const unsigned int K)
{
  return (1 << K*2 ) - 1;
}

/// Wrap NextIndex2 to be more convenient to use
struct KmerIndexSeq {
  unsigned int Kmask_, K_;
  unsigned int index_, nextpos_, tmp_;
  KmerIndexSeq(unsigned int K) : Kmask_(KmerBitmask(K)), K_(K) { }
  /// Initialize for new basevector 
  void Reset(const basevector &bases)
  { index_ = Index(bases, 0, K_); nextpos_=K_; }
  /// The current kmer index (moves on to next)
  unsigned int operator()(const basevector &bases)
  {
    tmp_=index_;
    if (nextpos_<bases.size()) {
      NextIndex2(index_, bases, nextpos_, Kmask_);
      ++nextpos_;
    }
    return tmp_;
  }
};



#endif
