/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2006) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

#ifndef FORCE_DEBUG
  #define NDEBUG
#endif

#include <map>
#include "kmer_freq/MakeProvisionalChanges.h"
#include "kmer_freq/WriteKmerFrequencies.h"
#include "math/Functions.h"
#include "paths/BaseErrorProb.h"

/**
   \file

   Actual implementation of ProvisionalEdits.cc .

   \copydoc ProvisionalEdits.cc
   \ingroup grp_edits
*/

/**
   \copydoc ProvisionalEdits.cc

   This routine corrects the reads according to the procedure described above.
   Note that it is not templatized on the kmer size K; rather, it deals with all
   kmer sizes in turn.
   
   \param[in,out] reads              the reads; any edits (fixes) will be applied in-place to the reads in this array.
   \param[in]     quals              the \link grp_quals quality scores\endlink of the reads
   \param[in]     tablePtrs          for each kmer size, the strong KmerShortMap
   \param[in]     verbose            whether to print more debug info
   \param[in]     probProfile        if quality scores not given in \p quals, then the probability
      of error at each read \e position
   \param[in]     max_errors         consider mutations with up to this many errors
   \param[in]     max_entries        cap on the number of possible mutations to consider
   \param[in]     error_prob_cutoff  if we have already found a sucessful change more likely than a given one 
       by a factor of error_prob_cutoff then stop.
   \param peek_ahead if true then calculate probability of most likely max_errors+1
          combination and cap number of possible mutations to consider at this point
*/
void
MakeProvisionalChanges( vecbasevector& reads, vecqualvector& quals,
                        vec<KmerShortMap*>& tablePtrs,
                        const bool verbose,
		        BaseErrorProbProfile probProfile,
			const int max_errors, const int max_entries,
			const int error_prob_cutoff,
			const bool peek_ahead)
{
  // Make provisional changes.
  longlong numchanged = 0;
  longlong numweak = 0;

  // Map of BaseErrorTables for each read size used
  map <int, BaseErrorTable> errorTableMap;
  
  // If error profile not passed then create default flat profile for largest read size
  if (probProfile.getProbProfileSize() == 0) {
    unsigned int max_length = 0;
    for (int i = 0; i < reads.size(); i++)
      max_length = Max(max_length, reads[i].size());
    probProfile = BaseErrorProbProfile(max_length);
  }

  // Obtain BaseErrorTable for the first read size encountered
  errorTableMap[reads[0].size()] = probProfile.getErrorTable(reads[0].size(), max_errors, 
							     max_entries, peek_ahead);

  // BaseErrorTable based on each read's quality scores (optional)
  BaseErrorTable qualErrorTable;

  longlong singlemut = 0;
  longlong multiplemut = 0;

  int tablePtrsSize = tablePtrs.isize();

  // Find minimum value of K
  int minK = tablePtrs[0]->GetKmerSize();
  for ( int j = 0; j < tablePtrsSize; ++j )
    minK = Min(minK, tablePtrs[j]->GetKmerSize());

  int nreads = reads.size( );
  double clock = -1.0;
  for ( int i = 0; i < nreads; i++ ) {
    if ( i % 100000 == 0 ) {
      if ( i > 0 ) {
        double time_used = WallClockTime( ) - clock;
        cout << "i = " << i << " of " << nreads
             << ", num weak = " << numweak
             << ", num changed = " << numchanged
             << ", time used = " << setprecision(3) 
             << time_used << " seconds" << endl;    
      }
      clock = WallClockTime( );    
    }

    basevector& theRead = reads[i];

    int origNumStrong = 0;
    for ( int j = 0; j < tablePtrsSize; ++j ) {
      if (tablePtrs[j]->IsStrong(theRead))
        ++origNumStrong;
    }

    if ( origNumStrong == tablePtrsSize )
      continue;

    ++numweak;

    if ( verbose )
      PRINT( i );

    int readSize = theRead.size();

    // Cannot correct reads shorter than the smallest value of K in tables
    if (readSize < minK) {
      continue;
    }
    
    basevector mutatedRead = theRead;
    int bestNumStrong = origNumStrong;

    // BaserErrorTable iterator (will point to table suitable for current read size)
    map <int, BaseErrorTable>::iterator etmItr;
    
    if (quals.empty()) {
      // Get appropriate BaseErrorTable for this read size, or make new one if required
      etmItr = errorTableMap.find(readSize);
      if (etmItr == errorTableMap.end()) {
	errorTableMap[readSize] = probProfile.getErrorTable(readSize, max_errors,
							    max_entries, peek_ahead);
	etmItr = errorTableMap.find(readSize);
      }
    } else {
      // Use quality scores to produce error table for each read
      probProfile = BaseErrorProbProfile(quals[i]);
      qualErrorTable = probProfile.getErrorTable(quals[i].size(), max_errors,
						 max_entries, peek_ahead);
    }

    // Select BaseErrorTable to use (from error profile or quality scores)
    BaseErrorTable& errorTable = (quals.empty() ? etmItr->second : qualErrorTable);
     
    // To hold record of best sucessful change
    vec<int> mutpos_hit;
    vec<int> mutbase_hit;

    // Current change information
    vec<int> mutpos;
    vec<int> mutbase;
    double mutprob;

    double cutoff_prob = errorTable.back().error_prob * error_prob_cutoff;
    bool found = false;

    int mutTableSize = errorTable.isize();
    int mutationno;
    for(mutationno = 0; mutationno < mutTableSize; ++mutationno) {

      // Get potential change information
      mutpos = errorTable[mutationno].base_pos;
      int mutcount = mutpos.isize();
      mutbase.resize(mutcount);
      mutprob = errorTable[mutationno].error_prob;

      // If we have already found a sucessful change more likely than this one 
      // by a factor of error_prob_cutoff then stop.
      if (mutprob < cutoff_prob)
	break;
           
      // Try out all possible base/position combinations (4, 16, 256, etc)
      int nocombs = 2 << mutcount * 2 - 1;
      for (int comb = 0; comb < nocombs; ++comb) {
	bool skip = false;
	int basesum = comb;
	for (int posno = 0; posno < mutcount; ++posno) {
	  int base = basesum % 4;
	  int pos = mutpos[posno];
	  if (theRead[pos] == base) {
	    skip = true;
	    break;
	  } else {
	    mutbase[posno] = base;
	    mutatedRead.Set( pos, base );
	    basesum /= 4;
	  }
	}

	// Current base/position combination same as original - skip.
	if (skip)
	  continue;

	// Measure effect of base changes.
	int numStrong = tablePtrsSize;
	for ( int j = 0; j < tablePtrsSize && numStrong >= bestNumStrong; ++j ) {
	  if (!tablePtrs[j]->IsStrong(mutatedRead))
	    --numStrong;
        }

        if ( numStrong > origNumStrong ) {
	  if ( numStrong == bestNumStrong ) {
	    // Found at two equally sucessful changes - cannot determine correct one.
	    found = false;
	  } else if ( numStrong > bestNumStrong ) {
	    // Found a sucessful change - record it and try another combination.
            mutpos_hit = mutpos;
	    mutbase_hit = mutbase;
	    cutoff_prob = mutprob/error_prob_cutoff;
            bestNumStrong = numStrong;
	    found = true;
          }
        }
      }

      // reset to the original base before moving to the next position
      for (int posno = 0; posno < mutcount; ++posno) {
      	mutatedRead.Set( mutpos[posno], theRead[mutpos[posno]] );
      }
      
    }

    // Modify read if a single sucessful change was found
    if ( found ) {
      if ( verbose )
        PRINT2( qualErrorTable.size(), mutationno );
      for (int posno = 0; posno < mutpos_hit.isize(); ++posno) {
	theRead.Set( mutpos_hit[posno], mutbase_hit[posno] );
      }
      ++numchanged;
      if (mutpos_hit.size() == 1)
	++singlemut;
      else
	++multiplemut;
    }
  }
  PRINT2( numweak, numchanged );
  PRINT2( singlemut, multiplemut );
}

/**
   \copydoc ProvisionalEdits.cc

   This routine is templatized on the kmer size K. It calls:
   MakeProvisionalChanges(vecbasevector&,vecqualvector&,vec<KmerFrequencyTable*>&,const vec<vec<int>>&,const vec<vec<int>>&,const bool,BaseErrorProbProfile,const int,const int,const int)
   to make the actual edits.
*/
template <class KSHAPE>
void
MakeProvisionalChanges( vecbasevector& reads,  vecqualvector& quals,
                        const String& filename,
                        const bool verbose,
			const BaseErrorProbProfile& probProfile, 
			const int max_errors, const int max_entries,
			const int error_prob_cutoff) {

  KmerShortMap table( KSHAPE::getId(), filename );
  vec<KmerShortMap*> tablePtrs(1, &table);
  
  MakeProvisionalChanges( reads, quals, tablePtrs, verbose, 
                          probProfile, max_errors, max_entries, error_prob_cutoff);
}

#define INSTANTIATE(KSHAPE, dummy) \
  template void MakeProvisionalChanges<KSHAPE>( vecbasevector&,  vecqualvector&,  const String&, bool, const BaseErrorProbProfile&, int, int, int )
FOR_ALL_KSHAPES(INSTANTIATE,);


