/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2006) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

#ifndef FORCE_DEBUG
    #define NDEBUG
#endif

#include <map>

#include "Basevector.h"
#include "Bitvector.h"
#include "CoreTools.h"
#include "Feudal.h"
#include "FeudalMimic.h"
#include "KmerRecord.h"
#include "SortKmers.h"
#include "math/Functions.h"
#include "pairwise_aligners/BalancedMutmerGraph.h"
#include "pairwise_aligners/Mutmer.h"
#include "paths/KmerPath.h"
#include "paths/ReadsToPathsCoreX.h"

template<class POS> class mhit {

     public:

     mhit( ) { }
     mhit( int id1, int id2, Bool rc, POS pos1, POS pos2, POS len )
          : id1_(id1), id2_(id2), rc_(rc), pos1_(pos1), pos2_(pos2), len_(len) { }

     void Set( int id1, int id2, Bool rc, POS pos1, POS pos2, POS len )
     {    id1_ = id1;
          id2_ = id2;
          rc_ = rc;
          pos1_ = pos1;
          pos2_ = pos2;
          len_ = len;    }

     int Id1( ) const { return id1_; }
     int Id2( ) const { return id2_; }
     POS pos1( ) const { return pos1_; }
     POS pos2( ) const { return pos2_; }
     POS Len( ) const { return len_; }
     Bool Rc2( ) const { return rc_; }

     void SetId1( int id1 ) { id1_ = id1; }
     void SetId2( int id2 ) { id2_ = id2; }
     void SetPos1( int pos1 ) { pos1_ = pos1; }
     void SetPos2( int pos2 ) { pos2_ = pos2; }

     friend Bool operator<( const mhit& h1, const mhit& h2 )
     {    if ( h1.Id2( ) < h2.Id2( ) ) return True;
          if ( h1.Id2( ) > h2.Id2( ) ) return False;
          return h1.Len( ) > h2.Len( );    }
          
     private:

     int id1_, id2_;
     Bool rc_;
     POS pos1_, pos2_;
     POS len_;

};

template<int K, int I> 
     inline void GetNextKmerPairPathsX( vector< kmer_record<K,I> >& R, int S, 
     int& read_id1, int& read_id2, int& pos1, int& pos2, vecbitvector& chosen )
{    static int i, j, r2, xpos1;
     if ( read_id1 >= 0 ) goto after_return;
     for ( i = 0; i < S; i++ )
     {    
          // Get batch of k-mer records corresponding to one k-mer.

          int l;
          for ( j = i+1; j < S; j++ )
          {    for ( l = (K+3)/4 - 1; l >= 0; l-- )
                    if ( R[j].Bytes( )[l] != R[i].Bytes( )[l] ) break;
               if ( l >= 0 ) break;    }
          sort( R.begin( ) + i, R.begin( ) + j, kmer_record<K,I>::id_cmp_pos );
          read_id1 = R[i].GetId( );
          pos1 = R[i].GetPos( );

          // Mark canonical placement of k-mer.

          xpos1 = pos1;
          if ( pos1 < 0 ) xpos1 = -pos1;
          --xpos1;
          chosen[read_id1].Set( xpos1, True );

          // Go through pairs.

          for ( r2 = i+1; r2 < j; r2++ )
          {    read_id2 = R[r2].GetId( );
               pos2 = R[r2].GetPos( );
               return;
               after_return: continue;    }
          i = j - 1;    }    
     read_id1 = -1;    }

template<int I, int k, int BLOCKS_PER_NODE, class POS> void MakeAlignsPathsX(
     const vecbasevector& EE, longlong& COUNT, vecbitvector& chosen,
     vec< mhit<POS> >& mhits )
{
     mhits.clear( );
     int N = EE.size( );
     mutmer_graph<I, BLOCKS_PER_NODE> M(N);

     vec<int> rid(N);
     for ( int i = 0; i < N; i++ )
          rid[i] = i;
     longlong S_init = 0;
     for ( int l = 0; l < EE.size( ); l++ )
          if ( EE[l].size( ) >= k ) S_init += EE[l].size( ) - k;
     S_init += S_init/4;
     S_init /= 33;

     if ( S_init > 3000000000u )
          FatalErr( "MakeAlignsPathsX: the value of S is " << S_init
               << ".  This is dangerously large, because it represents\nthe "
               << "size of a vector which is to be stored in an unsigned int,\n"
               << "and this vector could grow. "
               << "You may need to rewrite some code to get it to work." );

     unsigned int S = S_init, total_S = 0;

     vec< kmer_record<k,I> > R(S);
     for ( int pass = 0; pass < 100; pass++ )
     {    dummy<100> d100;
          SortKmers( d100, EE, rid, pass, R, S );
          ForceAssertGe( R.size( ), S );
          total_S += S;
          int read_id1(-1), read_id2, pos1(-1), pos2;
          while(1)
          {    
               // Get next k-mer pair.

               GetNextKmerPairPathsX<k,I>( R, S, read_id1, read_id2, pos1, pos2,
                    chosen );
               if ( read_id1 < 0 ) break;

               // Merge into mutmer graph.

               M.MergeKmer( pos1, pos2, k, read_id1, read_id2, 
                    EE[read_id1], EE[read_id2], True );    }    }

     if ( total_S == 0 ) return;

     // Generate mutmers.

     vec< mutmer_read_id<I> > mid( Max(M.Counts( )) );
     int j, id2_last = -1;
     for ( int l = 0; l < N; l++ ) 
     {    int count = M.All(l, mid);
          sort( mid.begin( ), mid.begin( ) + count );
          for ( int i = 0; i < count; i++ ) 
          {    for ( j = i+1; j < count; j++ ) 
               {    if ( mid[i].ReadIdRc( ) != mid[j].ReadIdRc( ) ) break;    }
               Bool RC = mid[i].Rc( );
               int id1 = l, id2 = mid[i].ReadId( );
               COUNT += j - i;
               for ( int r = 0; r < j - i; r++ ) 
               {    int pos1, pos2, len, e;
	            mid[i+r].Unpack( pos1, pos2, len, e );
                    static mhit<POS> h;
                    if ( id1 < id2 ) h.Set( id1, id2, RC, pos1, pos2, len );
                    else if ( !RC ) h.Set( id2, id1, RC, pos2, pos1, len );
                    else
                    {    int n1 = EE[id1].size( ), n2 = EE[id2].size( );
                         h.Set( id2, id1, RC, n2-pos2-len, n1-pos1-len, len );    }
                    mhits.push_back(h);    }
               i = j - 1;    }    }    }

template<class POS> void ReadsToPathsMainPartX( longlong COUNT, const int N, 
     const vec< pair<int,int> >& q40id, const unsigned int& K, 
     const vecbasevector& EE, vecbasevector& EEX, vecbitvector& chosen, 
     vecbitvector& chosen_rc, map<longlong,longlong>& palindromic_id, 
     longlong npalindromes, const Bool BIGREADS, const int blocks_per_node,
     vecKmerPath& paths );

void ReadsToPathsCoreX( const vecbasevector& EE, const unsigned int& K, 
     const longlong genome_size, vecKmerPath& paths )
{
     // Check arguments, set up paths.

     ForceAssertSupportedK( K );
     ForceAssert( genome_size > 0 );

     // Set up tracking of palindromes.

     longlong npalindromes = 0;
     map<longlong,longlong> palindromic_id;

     // Bring in the reads.  
  
     int N = EE.size( );
     Bool short_too_small = False;
     for ( int i = 0; i < N; i++ )
          if ( EE[i].size( ) > SHRT_MAX ) short_too_small = True;
     Bool BIGREADS = False;
     for ( int i = 0; i < N; i++ )
          if ( (int) EE[i].size( ) >= 1024 ) BIGREADS = True;

     // Place reads in descending order by size.  The vector q40id remembers the 
     // new order, which is reversed later.

     vec< pair<int,int> > q40id(N);
     for ( int i = 0; i < N; i++ )
          q40id[i] = make_pair( EE[i].size( ), i );
     ReverseSort(q40id);
     vecbasevector EEX(EE);
     {    vec<int> new_order(N), new_map(N);
          for ( int i = 0; i < N; i++ )
               new_order[i] = q40id[i].second;
          for ( int i = 0; i < N; i++ )
               new_map[ new_order[i] ] = i;
          for ( int i = 0; i < N; i++ )
          {    if ( i == new_order[i] ) continue;
               swap( EEX[i], EEX[ new_order[i] ] );
               int saved_new_order_i = new_order[i];
               swap( new_order[i], new_order[ new_map[i] ] );
               swap( new_map[i], new_map[saved_new_order_i] );    }    }

     // Go through one pass.

     int blocks_per_node = 50;
     longlong kmercount = 0;
     for ( int i = 0; i < EEX.size( ); i++ )
       if ( EEX[i].size() >= K )
         kmercount += EEX[i].size() - K + 1;

     float coverage = float(kmercount) / float(genome_size);
     if ( coverage < 10.0 ) blocks_per_node = int(floor(2.5 * coverage));

     longlong COUNT = 0;
     vecbitvector chosen, chosen_rc;
     Mimic( EEX, chosen);

     if ( !short_too_small )
     {    ReadsToPathsMainPartX<short>( COUNT, N, q40id, K, 
               EE, EEX, chosen, chosen_rc, palindromic_id, npalindromes, 
               BIGREADS, blocks_per_node, paths );    }
     else
     {    ReadsToPathsMainPartX<int>( COUNT, N, q40id, K, 
               EE, EEX, chosen, chosen_rc, palindromic_id, npalindromes, 
               BIGREADS, blocks_per_node, paths );    }    }

template<class POS> void ReadsToPathsMainPartX( longlong COUNT, const int N, 
     const vec< pair<int,int> >& q40id, const unsigned int& K, 
     const vecbasevector& EE, vecbasevector& EEX, vecbitvector& chosen, 
     vecbitvector& chosen_rc, map<longlong,longlong>& palindromic_id, 
     longlong npalindromes, const Bool BIGREADS, 
     const int blocks_per_node, vecKmerPath& pathsv )
{
     vec< mhit<POS> > mhits;

     #define CALL_MAKE_ALIGNS( A, B, C )                                      \
     {    MakeAlignsPathsX<A, B, C, POS>( EEX, COUNT, chosen, mhits );    }

     #define CASE(_K)                                                            \
      {    if ( !BIGREADS )                                                      \
           {    if ( blocks_per_node <= 12 ) CALL_MAKE_ALIGNS( 1, _K, 6 )        \
                else if ( blocks_per_node <= 24 ) CALL_MAKE_ALIGNS( 1, _K, 12 )  \
                else CALL_MAKE_ALIGNS( 1, _K, 50 );    }                         \
           else                                                                  \
           {    if ( blocks_per_node <= 12 ) CALL_MAKE_ALIGNS( 2, _K, 6 )        \
                else if ( blocks_per_node <= 24 ) CALL_MAKE_ALIGNS( 2, _K, 12 )  \
                else CALL_MAKE_ALIGNS( 2, _K, 50 );    }    }

     DISPATCH_ON_K(K, CASE);

     // Read mutmers back in, remove file, undo chopping and sorting of reads, 
     // sort mutmers.

     {    vec<int> new_order(N);
          for ( int i = 0; i < N; i++ )
               new_order[i] = q40id[i].second;
          for ( int i = 0; i < mhits.isize( ); i++ )
          {    if ( mhits[i].Id1( ) < N )
                    mhits[i].SetId1( new_order[ mhits[i].Id1( ) ] );
               if ( mhits[i].Id2( ) < N )
                    mhits[i].SetId2( new_order[ mhits[i].Id2( ) ] );    }
          vecbitvector chosen2;
          Mimic( EE, chosen2 );
          for ( int i = 0; i < N; i++ )
               chosen2[ new_order[i] ] = chosen[i];
          chosen = chosen2;    }
     Sort(mhits);

     // If chosen[i](j) is set, set chosen_rc[i](j), unless the k-mer is
     // palindromic.

     Mimic( EE, chosen_rc );
     int iK = K;
     for ( int i = 0; i < chosen.size( ); i++ )
     {    for ( int j = 0; j < (int) chosen[i].size( ); j++ )
          {    if ( !chosen[i][j] ) continue;
               const basevector& b = EE[i];
               int x;
               for ( x = 0; x < (int) K/2; x++ )
                    if ( b[j+x] + b[j+iK-x-1] != 3 ) break;
               if ( x == (int) K/2 ) continue;
               chosen_rc[i].Set( j, True );    }    }

     // Number positions on the reads.

     longlong kx = 0;
     vec<longlong> start( EE.size( ) );
     for ( int i = 0; i < EE.size( ); i++ )
     {    start[i] = kx;
          kx += EE[i].size( );    }
     if ( kx > halfway_kmer )
     {    FatalErr( "Five byte limit on k-mer indices exceeded." );    }

     // Compute average basic path length.

     vec<int> lengths;
     for ( int i = 0; i < chosen.size( ); i++ )
     {    for ( int j = 0; j < (int) chosen[i].size( ); j++ )
          {    if ( !chosen[i][j] ) continue;
               int k;
               for ( k = j + 1; k < (int) chosen[i].size( ); k++ )
                    if ( !chosen[i][k] ) break;
               lengths.push_back( k - j );
               j = k - 1;    }    }
     for ( int i = 0; i < chosen_rc.size( ); i++ )
     {    for ( int j = 0; j < (int) chosen_rc[i].size( ); j++ )
          {    if ( !chosen_rc[i][j] ) continue;
               int k;
               for ( k = j + 1; k < (int) chosen_rc[i].size( ); k++ )
                    if ( !chosen_rc[i][k] ) break;
               lengths.push_back( k - j );
               j = k - 1;    }    }

     // Build read paths for the reads themselves.

     vec<KmerPath> paths;
     paths.reserve(N);
     longlong nsegments = 0;
     int i = 0;
     for ( int id2 = 0; id2 < N; id2++ )
     {    int j;
          for ( j = i; j < mhits.isize( ); j++ )
               if ( mhits[j].Id2( ) != id2 ) break;
          static vec<longlong> index;
          int n2 = EE[id2].size( );
          index.resize_and_set( Max( 0, n2 - (int) K + 1 ), -1 );
          for ( int u = 0; u <= n2 - (int) K; u++ )
               if ( chosen[id2][u] ) index[u] = halfway_kmer + start[id2] + u + 1;

          for ( int k = i; k < j; k++ )
          {    int id1 = mhits[k].Id1( );
               int pos1 = mhits[k].pos1( ), pos2 = mhits[k].pos2( );
               int len = mhits[k].Len( );
               Bool rc2 = mhits[k].Rc2( );

               // Validate mutmer.

               #ifndef NDEBUG
               {    for ( int u = 0; u < len; u++ )
                    {    if ( !rc2 )
                         {    ForceAssertEq( EE[id1][pos1+u], EE[id2][pos2+u] );    }
                         else
                         {    ForceAssertEq( 3-EE[id1][pos1+u], 
                                   EE[id2][n2-pos2-u-1] );    }    }    }
               #endif

               // Find correspondence.

               if ( !rc2 )
               {    for ( int u = 0; u <= len - (int) K; u++ )
                    {    if ( !chosen[id1][pos1+u] || index[pos2+u] >= 0 ) continue;
                         index[pos2+u] 
                              = halfway_kmer + start[id1] + pos1 + u + 1;    }    }
               else
               {    for ( int u = (int) K - 1; u < len; u++ )
                    {    int up = u - (int) K + 1;
                         if ( !chosen[id1][ pos1 + up ] ) continue;
                         if ( index[n2-pos2-u-1] >= 0 ) continue;
                         if ( chosen_rc[id1][ pos1 + u - (int) K + 1 ] )
                              index[n2-pos2-u-1] 
                                   = halfway_kmer - start[id1] - pos1 - up;
                         else
                              index[n2-pos2-u-1]
                                   = halfway_kmer + start[id1] 
                                        + pos1 + up + 1;    }    }    }

          // Check for palindromes.

          for ( int u = 0; u < index.isize( ); u++ )
          {    const basevector& rd = EE[id2];
               int x;
               for ( x = 0; x < (int) K/2; x++ )
                    if ( rd[u+x] + rd[u+iK-x-1] != 3 ) break;
               if ( x == (int) K/2 )
               {    longlong pid = palindromic_id[ index[u] ];
                    if ( pid == 0 )
                    {    ForceAssertLt( npalindromes, max_palindromes );
                         pid = first_palindrome + npalindromes;
                         palindromic_id[ index[u] ] = pid;
                         ++npalindromes;    }
                    index[u] = pid;     }     }

          KmerPath p;
          int sc = 0;
          for ( int u = 0; u <= n2 - (int) K; u++ )
          {    int v;
               for ( v = u + 1; v <= n2 - (int) K; v++ )
                    if ( index[v] != index[v-1] + 1 ) break;
               ++nsegments;
               p.AddSegment( index[u], index[u] + v - u - 1 );
               u = v - 1;    }
          paths.push_back(p);

          for ( int u = 0; u <= n2 - (int) K; u++ )
          {    if ( index[u] < 0 ) PRINT3_TO( cout, id2, u, n2 );
               ForceAssert( index[u] >= 0 );    }

          i = j;    }

     // Create reads paths for reads.

     pathsv.clear( );
     pathsv.Reserve( nsegments, N );
     for ( int i = 0; i < N; i++ )
          pathsv.push_back( paths[i] );    }


// Break long kmer paths, call ReadsToPathsCoreX, 
// and put broken things back together.

void ReadsToPathsCoreY( const vecbasevector& EE, const unsigned int& K, 
			const longlong genome_size, vecKmerPath& paths, 
			int max_interval ) {

  // Are there any long reads?
  bool long_read = false;
  for(int i=0; i < EE.size() && !long_read; i++)
    long_read = ( EE[i].isize() > max_interval );
 
  // If not, Y=X.
  if( ! long_read ) {
    ReadsToPathsCoreX( EE, K, genome_size, paths );
    return;
  }

  // If so, we need to break up the reads.
  // Copied from GenomeToPaths.
  int nreads = 0;
  longlong reads_rawsize = 0;
  vec<int> contig;
  int ncontigs = EE.size();
  vecbasevector sheared_reads;
  basevector b;

  for ( int pass = 1; pass <= 2; pass++ ) {
    if ( pass == 2 ) {
      sheared_reads.Reserve( reads_rawsize, nreads );
      contig.reserve(nreads);
    }
    for ( int i = 0; i < EE.size( ); i++ ) {
      const basevector& g = EE[i];
      for ( int start = 0; start < g.isize( ); start++ ) {
	int len = Min( max_interval, g.isize( ) - start );
	if ( pass == 1 ) {
	  nreads++;
	  reads_rawsize += (len+15)/16;
	}
	else {
	  b.SetToSubOf( g, start, len );
	  sheared_reads.push_back(b);
	  contig.push_back(i);
	}
	if ( start + len == g.isize( ) ) break;
	start += len - K;
      }
    }
  }

  // Now the vecbasevector sheared_reads holds the sheared version of EE.
  vecKmerPath sheared_paths;
  ReadsToPathsCoreX( sheared_reads, K, genome_size, sheared_paths );

  // Now release the memory that held the sheared reads...
  Destroy(sheared_reads);

  // Finally, reassemble.  We have to do this gently,
  //  so that nothing ends up self-owned.
  paths.clear();
  paths.Reserve( sheared_paths.rawsize(), EE.size() );

  KmerPath one_path;
  int which_shear = 0;

  for( int which_path = 0; which_path < EE.size(); which_path++ ) {
    one_path.Clear();
    while( which_shear < contig.isize() && 
	   contig[which_shear] == which_path ) {
      one_path.Append( sheared_paths[which_shear] );
      which_shear++;
    }
    paths.push_back( one_path );
  }

}
  
void ReadsToPathsCoreY( const vecbasevector& EE, const unsigned int& K,
     const longlong genome_size, vecKmerPath& paths, vecKmerPath& paths_rc,     
     vec<tagged_rpint>& pathsdb, int max_interval )
{    ReadsToPathsCoreY( EE, K, genome_size, paths, max_interval );
     paths_rc = paths;
     for ( int i = 0; i < paths_rc.size( ); i++ )
          paths_rc[i].Reverse( );
     CreateDatabase( paths, paths_rc, pathsdb );    }
  
void ReadsToPathsCoreY( const vecbasevector& EE, const unsigned int& K,
     const longlong genome_size, vecKmerPath& paths, vecKmerPath& paths_rc,     
     vec<big_tagged_rpint>& pathsdb, int max_interval )
{    ReadsToPathsCoreY( EE, K, genome_size, paths, max_interval );
     paths_rc = paths;
     for ( int i = 0; i < paths_rc.size( ); i++ )
          paths_rc[i].Reverse( );
     CreateDatabase( paths, paths_rc, pathsdb );    }
