/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2006) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

#ifndef FORCE_DEBUG
     #define NDEBUG
#endif

#include "Basevector.h"
#include "CoreTools.h"
#include "Feudal.h"
#include "lookup/LookAlign.h"
#include "lookup/LookupTable.h"
#include "lookup/PerfectLookup.h"

void PerfectLookup( const unsigned int K, const vecbasevector& query, 
		    const String& lookup_file, vec<look_align>& aligns, const AlignDir direction,
		    const Bool subsumed_only, const unsigned int target_seq_overlap)
{
  PerfectLookup( K, query, lookup_file, aligns, direction, 0, query.size() - 1,
		 subsumed_only, target_seq_overlap);
}

void PerfectLookup( const unsigned int K, const vecbasevector& query, 
		    const String& lookup_file, vec<look_align>& aligns,
		    const AlignDir direction, int range_start, int range_end, 
		    const Bool subsumed_only, const unsigned int target_seq_overlap)
{    
     aligns.clear( );
     
     // Check we have something to align
     if (query.empty())
       return;

     // Read header information from lookup table.
     lookup_table look(lookup_file);

     ForceAssertEq( look.K( ), K );
     ForceAssertLt( range_end, query.size());
     ForceAssertLe( range_start, range_end);

     // For each query (or its rc), find a kmer in it that appears a minimal number 
     // of times in the target.

     const int npasses = ( direction == FW ? 1 : 2 );
     const int nqueries = range_end - range_start + 1;

     vec<int> best_pos( nqueries * 2 );
     vec<unsigned int> best_index( nqueries * 2 );
     for ( int id = 0; id < nqueries; id++ )
     {    int query_id = range_start + id;
          const basevector& s = query[query_id];
          for ( int pass = 0; pass < npasses; pass++ )
          {    static basevector src;
	       if ( pass == 1 ) src.ReverseComplement(s);
	       const basevector& S = ( pass == 0 ? s : src );
               int pos = -1;
               unsigned int min_freq = 0, mindex = 0;
	       int length = S.isize() - (int) K;
	       unsigned int index = Index( S, 0, K );
	       unsigned int freq = look.Freq(index);
	       if ( freq != 0 ) 
	       {    mindex = index;
		    pos = 0;
		    if (freq != 1)
		    {    min_freq = freq;
		         for ( int j = 1; j <= length; j++ )
			 {   NextIndex( index, S, j, K );
			     freq = look.Freq(index);
			     if ( freq == 1 )
			     {    mindex = index;
		                  pos = j; 
				  break;    }
			     if ( freq == 0 )
			     {    pos = -1;
                                  break;    }
			     if ( pos < 0 || freq < min_freq )
			     {    min_freq = freq;
			          mindex = index;
				  pos = j;    }    }    }    }
               best_pos[ 2*id + pass ] = pos;
               best_index[ 2*id + pass ] = mindex;    }    }

     // Set up for alignment generation.

     static look_align la;
     la.nhits = la.mutations = la.indels = 0;
     la.a.SetNblocks(1);
     la.a.SetGap( 0, 0 );

     // Go through the lookup table chunks.

     const unsigned int max_query_size = 200 * 1000 * 1000;
     for ( unsigned int i = 0; i < look.NChunks( ); i++ )
     {    look.ReadChunk(i);
          // Go through the query sequences.

          for ( int id = 0; id < nqueries; id++ )
          {    int query_id = range_start + id;
               const basevector& s = query[query_id];
   	       unsigned int query_length = s.size();

               // Go through the orientations.

               for ( int pass = 0; pass < npasses; pass++ )
		 {  int r = best_pos[ 2*id + pass ];  // r is low-freq kmer position in query
                    if ( r < 0 ) continue;
                    static basevector src;
                    if ( pass == 1 ) src.ReverseComplement(s);
                    const basevector& S = ( pass == 0 ? s : src );
                    unsigned int index = best_index[ 2*id + pass ];
                    unsigned int start = look.StartLocs(index);
                    unsigned int stop = look.StopLocs(index);
                    for ( unsigned int l = start; l < stop; l++ )
		    {    unsigned int offset = look.Locs(l) + ( max_query_size - r );

                         unsigned int tig, rpos; // rpos is low-freq kmer position in target
                         look.GetContigPos( look.Locs(l), tig, rpos );
			 
			 unsigned int startx;
			 unsigned int q_start;
			 unsigned int t_start;
			 unsigned int length = query_length;

			 // Determine starting position in target and query
			 if ( offset < look.ContigStart(tig) + max_query_size) {
			   q_start = r - rpos;
			   t_start = 0;
			   length -= q_start;
			   startx = look.ContigStart(tig);
			 } else {
			   q_start = 0;
			   t_start = rpos - r;
			   startx = offset - max_query_size;
			 }

			 if (startx + length > look.ContigStop(tig) ) {
			   length = look.ContigSize(tig) - t_start;
			 }

			 // Do we want subsumed alignments only?
			 if (length != query_length && subsumed_only)
			   continue;

                         // Validate alignment, skipping portion covered by kmer.
                         if ( startx < look.BasesStart( ) ) continue;
                         Bool mismatch = False;
			 unsigned int start_of_kmer = r - q_start;
                         for ( unsigned int y = 0; y < start_of_kmer; y++ )
			 {   if ( S[q_start + y] != look.Base( startx + y ) )
			     {    mismatch = True;
                                   break;    }    }
                         if (mismatch) continue;  // Alignment failed
                         for ( unsigned int y = start_of_kmer + K; y < length; y++ )
			 {   if ( S[q_start + y] != look.Base( startx + y ) )
			     {    mismatch = True;
                                   break;    }    }
                         if (mismatch) continue;  // Alignment failed

			 // For partial alignments and overlapping target sequences we try to
			 // filter out all but one of the overlapping alignments.
			 if (length != query_length && target_seq_overlap != 0) {
			   if (length <= target_seq_overlap ) // A longer alignment must also 
			     continue;                        // exist so ignore this one.
			   if (q_start != 0 && rpos - t_start <= target_seq_overlap - K) 
			     continue;  // Another overlapping alignment must exist.
			 }

                         // Record alignment.
                         la.query_id = query_id;
                         la.target_id = tig;
                         la.a.Setpos1(q_start);
                         la.a.Setpos2(t_start);
                         la.query_length = query_length;
                         la.target_length = look.ContigSize(tig);
                         la.rc1 = ( pass == 1 );
                         la.a.SetLength( 0, length );
                         aligns.push_back(la);    }    }    }    }   }
