#ifndef __INCLUDE_SortKmersImpl_H
#define __INCLUDE_SortKmersImpl_H

// Copyright (c) 2005 Broad Institute/Massachusetts Institute of Technology

#ifndef FORCE_DEBUG
     #define NDEBUG
#endif

#include "math/Functions.h"
#include "system/StaticAssert.h"
#include "SortKmers.h"
#include "KmerRecord.h"

/**
   File: SortKmersImpl

   Extraction of kmers from reads, and sorting of the result; see SortKmers() .

   Upon entry, reads and read_ids are given (as is pass).  There are (at 
   present) passes 0 (only), passes 0 through 9, or 0 through 99, 
   depending on whether Passes is 1 or 10 or 100.  In each pass, we 
   extract some of the k-mers b from each read.

   Compute the reverse complement c of b, and pick the "minimum" of b, c.  
   The exact definition of minimum doesn't matter, except that it is symmetric: 
   min(b,c) = min(c,b).

   We then form the triple (minimum (b or c as above), read_id, k-mer position),
   as a kmer_record.  These are placed in R (in total S of them), and sorted, 
   using only the k-mer part.  Thus the output values are R and S.

   We store the position as pos + 1 (if we don't use the reverse complement), 
   and as -(pos + 1) if we do.

   In the 10 pass case, the passes are defined by the following table:

   0: A..A or T..T      5: C..C or G..G  
   1: C..A or T..G      6. G..C or G..C 
   2. G..A or T..C      7. A..T or A..T
   3. T..A or T..A      8. A..G or C..T
   4: A..C or G..T      9. C..G or C..G

   For example, pass 0 includes all k-mers of the form A..A or T..T.

   In the 100 pass case, pass 10 would include all k-mers of the form
   AC..AA or TC..AT or AT..GA or TT..GT.  Etc.

   Note that much of this file will only work when 4 divides k.

   Memory utilization has been reduced for the case Passes=100, for huge 
   "reads"; the same improvement has not been implemented for the other cases.

   \ingroup grp_kmerGathering
*/

#define SORT_CORE \
{    KSHAPE::extractKmer(b, reads[l], q); \
     basevector::CanonicalForm canon_form = b.Canonicalize(); \
     if ( canon_form <= 0 ) R[S++].Set( b, read_ids[l], -(q+1) ); \
     if ( canon_form >= 0 ) R[S++].Set( b, read_ids[l], q+1 );    }


#define BASE_EQ(BYTE,SHIFT,BASE) ((BYTE & (3<<SHIFT)) == (BASE)<<SHIFT)

// 1 PASS VERSION

/**
   \copydoc SortKmers(dummy<100>,const vecbasevector&,const vec<int>&,int,vec<RECORD>&,unsigned int&,bool)

   This routine may not scale well; for a better-scaling version, see the 100-pass version:
   SortKmers(dummy<100>,const vecbasevector&,const vec<int>&,int,vec<RECORD>&,unsigned int&) .

   \callergraph
*/
template< class KSHAPE, class RECORD >
void SortKmers( dummy<1>, const vecbasevector& reads, 
  const vec<int>& read_ids, int pass, vec<RECORD>& R, unsigned int& S, bool use_stable_sort )
{
  const int K = KSHAPE::KSIZE;
  const unsigned int KSPAN = (unsigned int)KSHAPE::KSPAN;
  basevector b(K);
  S = 0;
  for ( int l = 0; l < reads.size( ); l++ )
    {    if ( reads[l].size( ) < KSPAN ) continue;
    unsigned int N = reads[l].size( ) - KSPAN + 1;
    if ( S + 2*N >= R.size( ) )
      R.resize( MAX( 6*R.size( )/5, R.size( ) + 2*N ) );
    Bool use_b, use_c;
    for ( unsigned int q = 0; q < N; q++ )
      {    SORT_CORE    }    
    }
  if (use_stable_sort)
    stable_sort( R.begin( ), R.begin( ) + S );
  else
    sort( R.begin( ), R.begin( ) + S );
}

// *********************** SIMPLE 10 PASS VERSION ************************
// Intended only for quality control, but not maintained.

/*
  template<int K, int I> void SortKmers( dummy<10>, const vecbasevector& reads, 
  const vec<int>& read_ids, int pass, vec< kmer_record<K, I> >& R, 
  unsigned int& S )
  {    basevector b(K), c(K);
  S = 0;
  for ( unsigned int l = 0; l < reads.size( ); l++ )
  {    if ( reads[l].size( ) < K ) continue;
  unsigned int N = reads[l].size( ) - K + 1;
  if ( S + 2*N >= R.size( ) )
  R.resize( MAX( 6*R.size( )/5, R.size( ) + 2*N ) );
  Bool use_b, use_c;

  if ( pass == 7 ) pass = 12;
  unsigned char bases[2];
  bases[0] = pass % 4;
  bases[1] = (pass >> 2);

  for ( unsigned int q = 0; q < N; q++ )
  {    if ( (reads[l](q) == bases[0] && reads[l](q+K-1) == bases[1]) ||
  (reads[l](q) == 3-bases[1] && reads[l](q+K-1) == 3-bases[0]) )
  {    SORT_CORE    }    }    }
  sort( R.begin( ), R.begin( ) + S );    }
*/

// *********************** 10 PASS VERSION ************************

/**
   \copydoc SortKmers(dummy<100>,const vecbasevector&,const vec<int>&,int,vec<RECORD>&,unsigned int&,bool)

   \note This simple 10-pass version is intended only for quality control, but not maintained.
*/
template<class KSHAPE, class RECORD>
void SortKmers( dummy<10>, const vecbasevector& reads, const vec<int>& read_ids,
  int pass, vec<RECORD>& R, unsigned int& S, bool use_stable_sort )
{
  const int K = KSHAPE::KSIZE;
  const unsigned int KSPAN = (unsigned int)KSHAPE::KSPAN;
  basevector b(K);
  S = 0;
  for ( int l = 0; l < reads.size( ); l++ ) {    
    if ( reads[l].size( ) < KSPAN ) continue;
    unsigned int N = reads[l].size( ) - KSPAN + 1;

    if ( pass == 7 ) pass = 12;
    unsigned char ba = pass % 4, bb = (pass >> 2) % 4;

    Bool use_c, use_b;

    // The following piece of code is here because I've only optimized the
    // case where 4 divides k.  However, it also serves as a reminder of
    // what the optimized case is supposed to accomplish.

    if ( K % 4 != 0 ) {    
      unsigned int q = 0;
      const basevector& r = reads[l];
      while(1) {    
	while( q < N ) {    
	  if ( r[q] == ba && r[q+K-1] == bb ) break;
	  if ( r[q] == 3 - bb && r[q+K-1] == 3 - ba ) break;
	  ++q;    
	}
	if ( q == N ) break;
	if ( S + 2*N >= R.size( ) )
	  R.resize( MAX( 6*R.size( )/5, R.size( ) + 2*N ) );
	SORT_CORE;
	++q;    
      }
      continue;    
    }

    unsigned int q = 0;
    // int qk = q + K - 1; // Just to help us think.
    const basevector& r = reads[l];

    // The following calls to DataAsBytes should be eliminated.
    unsigned char* x = r.DataAsBytes( );
    unsigned char* xk = r.DataAsBytes( ) + (K-1)/4;

    unsigned char y, yk;
    yk = *xk;

    unsigned int top = 0;
    while( top < N ) {    top += Min( N - top, (unsigned int) 10000 );
    if ( S + 20000 >= R.size( ) || S + 2*N >= R.size( ) )
      R.resize( MAX( 6*R.size( )/5, R.size( ) + 2*N ) );

    while(1) {    
#define SORT_TEST(A,B) \
                         ( BASE_EQ(y,A,ba) && BASE_EQ(yk,B,bb) ) || \
                         ( BASE_EQ(y,A,3-bb) && BASE_EQ(yk,B,3-ba) )

      // Case 1: q = 0 (mod 4), qk = 3 (mod 4).

      if ( q == N ) break;
      y = *x;
      if ( SORT_TEST(0,6) ) SORT_CORE;
      ++q;

      // Case 2: q = 1 (mod 4), qk = 0 (mod 4).

      if ( q == N ) break;
      ++xk;
      yk = *xk;
      if ( SORT_TEST(2,0) ) SORT_CORE;
      ++q;

      // Case 3: q = 2 (mod 4), qk = 1 (mod 4).

      if ( q == N ) break;
      if ( SORT_TEST(4,2) ) SORT_CORE;
      ++q;

      // Case 4: q = 3 (mod 4), qk = 2 (mod 4).

      if ( q == N ) break;
      if ( SORT_TEST(6,4) ) SORT_CORE;
      ++x;
      ++q;    
    }    
    }    
  }
  if ( use_stable_sort) 
    stable_sort( R.begin( ), R.begin( ) + S );
  else
    sort( R.begin( ), R.begin( ) + S );
}

// *********************** 100 PASS VERSION ************************

#define SORT_TEST100(A,B,Ai,Bi,Y,Yk,Yi,Yki,ba,bb) \
     ( ( BASE_EQ(Y,A,ba) && BASE_EQ(Yk,B,bb) ) || \
       ( BASE_EQ(Y,A,3-(bb)) && BASE_EQ(Yk,B,3-(ba)) ) ) && \
     ( ( BASE_EQ(Yi,Ai,bai) && BASE_EQ(Yki,Bi,bbi) ) || \
       ( BASE_EQ(Yi,Ai,3-(bbi)) && BASE_EQ(Yki,Bi,3-(bai)) ) )

#define ONE_PASS_OF_SORT(BA, BB)                                          \
                                                                          \
template<class KSHAPE, class RECORD> void one_pass_of_sort ## BA ## BB    \
     ( const vecbasevector& reads, const vector<int>& read_ids,           \
       int pass2, vector<RECORD>& R, unsigned int& S )                    \
                                                                          \
{                                                                         \
     const int K = KSHAPE::KSIZE;                                         \
     const unsigned int KSPAN = (unsigned int)KSHAPE::KSPAN;              \
     basevector b(K);                                                     \
     S = 0;                                                               \
                                                                          \
     for ( int l = 0; l < reads.size( ); l++ )                            \
     {    if ( reads[l].size( ) < KSPAN ) continue;                           \
          const unsigned int N = reads[l].size( ) - KSPAN + 1;                \
                                                                          \
          const unsigned char bai = pass2 % 4;                            \
          const unsigned char bbi = (pass2 >> 2) % 4;                     \
                                                                          \
          Bool use_c, use_b;                                              \
                                                                          \
          STATIC_ASSERT_M( K % 4 == 0, KmerSizeMustDivideByFour );        \
                                                                          \
          const basevector& r = reads[l];                                 \
                                                                          \
          unsigned int q = 0;                                             \
          /* unsigned int qk = q + KSPAN - 1;  Just to help us think. */      \
                                                                          \
	  /* The following calls to DataAsBytes should be eliminated. */  \
          unsigned char* x = r.DataAsBytes( );                            \
          unsigned char* xk = r.DataAsBytes( ) + (KSPAN-1)/4;                 \
                                                                          \
          unsigned char y, yk;                                            \
          yk = *xk;                                                       \
                                                                          \
          unsigned char yi = *x;                                          \
                                                                          \
          unsigned int top = 0;                                           \
          while( top < N )                                                \
          {    top += Min( N - top, (unsigned int) 10000 );               \
               if ( S + 2*(top - q) >= R.size( ) )                        \
               {    int nn = Max( (int) (6*R.size( )/5),                  \
                         (int) (S + 2*(top - q)) );                       \
                    R.resize(nn);    }                                    \
                                                                          \
               while(1)                                                   \
               {                                                          \
                    /* Case 1: q = 0 (mod 4), qk = 3 (mod 4).  [qi: 1; qki: 2] */  \
                                                                          \
                    if ( q == top ) break;                                \
                    y = yi;                                               \
                    if ( SORT_TEST100(0,6,2,4,y,yk,yi,yk,BA,BB) ) SORT_CORE;    \
                    ++q;                                                  \
                                                                          \
                    /* Case 2: q = 1 (mod 4), qk = 0 (mod 4).  [qi: 2; qki: 3] */  \
                                                                          \
                    if ( q == top ) break;                                \
                    ++xk;                                                 \
                    if ( SORT_TEST100(2,0,4,6,y,*xk,yi,yk,BA,BB) ) SORT_CORE;   \
                    ++q;                                                  \
                                                                          \
                    /* Case 3: q = 2 (mod 4), qk = 1 (mod 4).  [qi: 3; qki: 0] */  \
                                                                          \
                    if ( q == top ) break;                                \
                    yk = *xk;                                             \
                    if ( SORT_TEST100(4,2,6,0,y,yk,yi,yk,BA,BB) ) SORT_CORE;    \
                    ++q;                                                  \
                                                                          \
                    /* Case 4: q = 3 (mod 4), qk = 2 (mod 4).  [qi: 0; qki: 1] */  \
                                                                          \
                    if ( q == top ) break;                                \
                    ++x;                                                  \
                    yi = *x;                                              \
                    if ( SORT_TEST100(6,4,0,2,y,yk,yi,yk,BA,BB) ) SORT_CORE;    \
                    ++q;    }    }    }    }

  ONE_PASS_OF_SORT(0, 0)  // pass1 = 0
  ONE_PASS_OF_SORT(1, 0)  // pass1 = 1
  ONE_PASS_OF_SORT(2, 0)  // pass1 = 2
  ONE_PASS_OF_SORT(3, 0)  // pass1 = 3
  ONE_PASS_OF_SORT(0, 1)  // pass1 = 4
  ONE_PASS_OF_SORT(1, 1)  // pass1 = 5
  ONE_PASS_OF_SORT(2, 1)  // pass1 = 6
  ONE_PASS_OF_SORT(0, 3)  // pass1 = 12
  ONE_PASS_OF_SORT(0, 2)  // pass1 = 8
  ONE_PASS_OF_SORT(1, 2)  // pass1 = 9

#define CALL_ONE_PASS_OF_SORT(BA, BB)                                  \
     one_pass_of_sort ## BA ## BB<KSHAPE,RECORD>( reads, read_ids, pass2, R, S );

/**
   Extracts and sorts all kmers from a read set.

   \copydoc SortKmersImpl.h

   Template arguments:
      \li \c K - the size of the kmers
      \li \c RECORD - the type of kmer occurrence records to generate, to put into the output array \p R .  The type must have
              a Set method to record the kmer sequence, read id and position; see kmer_record::Set.  Two example types are
             \link kmer\endlink and kmer_record.
      \li \c KSHAPE - the \link KmerShape.h shape\endlink of the kmers to extract

   \param[in] reads the reads from which to extract the kmers
   \param[in] read_ids an array parallel to \p reads, giving an integer id to each corresponding read in \p reads.
   \param[in] pass the pass; this function must be called once for each pass, where the number of passes is indicated
       by the first argument to this function (and chooses the right function version for the given number of passes).
   \param[out] R the kmer records, one for each occurrence of a kmer in a read, referencing
       back to the reads.
   \param[out] S the number of records added to \p R by this call
   \param[in]  use_stable_sort if \c true, records for a given kmer that \link kmer_record::GetId refer\endlink to the same read will be
               kept adjacent in the output array \p R .

   \callergraph

   Implementation notes:
      \li on any invocation of this routine, exactly one call to CALL_ONE_PASS_OF_SORT() happens.  Remember that this routine must be called multiple times,
          once for each \p pass value.
*/
template<class KSHAPE, class RECORD>
void SortKmers( dummy<100>, const vecbasevector& reads, const vec<int>& read_ids,
       int pass, vec<RECORD>& R, unsigned int& S, bool use_stable_sort )
{
  int pass1 = pass % 10;
  int pass2 = pass / 10;
  if ( pass2 == 7 ) pass2 = 12;

  if  (pass1 == 0) CALL_ONE_PASS_OF_SORT(0, 0)  // pass1 = 0
  else if (pass1 == 1) CALL_ONE_PASS_OF_SORT(1, 0)  // pass1 = 1
  else if (pass1 == 2) CALL_ONE_PASS_OF_SORT(2, 0)  // pass1 = 2
  else if (pass1 == 3) CALL_ONE_PASS_OF_SORT(3, 0)  // pass1 = 3
  else if (pass1 == 4) CALL_ONE_PASS_OF_SORT(0, 1)  // pass1 = 4
  else if (pass1 == 5) CALL_ONE_PASS_OF_SORT(1, 1)  // pass1 = 5
  else if (pass1 == 6) CALL_ONE_PASS_OF_SORT(2, 1)  // pass1 = 6
  else if (pass1 == 7) CALL_ONE_PASS_OF_SORT(0, 3)  // pass1 = 12
  else if (pass1 == 8) CALL_ONE_PASS_OF_SORT(0, 2)  // pass1 = 8
  else if (pass1 == 9) CALL_ONE_PASS_OF_SORT(1, 2)  // pass1 = 9
  if (use_stable_sort)
    stable_sort( R.begin( ), R.begin( ) + S );
  else
    sort( R.begin( ), R.begin( ) + S );    
}

   
#endif
// #ifndef __INCLUDE_SortKmersImpl_H
