/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2006) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////


#include "Basevector.h"
#include "CoreTools.h"
#include "KmerRecord.h"

/**
   Function: TransformKmerShortMap

   Takes a <KmerShortMap> file and performs a transform on each kmer-value pair,
   writing the result to a new KmerShortMap file. The kmers remain unchanged,
   although some may be missing in the new file - see below.

   Parameters:
   
     filename_in  - the original KmerShortMap file
     filename_out - the new KmerShortMap file
     transformObj - function object that performs each individual transform
   
   The transform object must provide an operator() function that takes a
   basevector and an integer as arguments. It should return a value between
   0 and USHRT_MAX (65535), or -1 to cause the removal of the kmer-value pair.
   Although the kmer basevector is passed to the operator function, it is not
   required that the function use it.

   For an example of a suitable function object class see tksmThreshold.

   *Warning:* The function object cannot be defined as a local class (one which
   is declared within a function definition)
*/
template <class KSHAPE, typename transformObj>
void
TransformKmerShortMap(const String& filename_in,
		      const String& filename_out,
		      transformObj transform
		      )
{
  const int K = KSHAPE::KSIZE;
  vec< kmer_with_count<K> > kmers;
  BinaryRead3( filename_in, kmers );  
  Binary3Writer< kmer_with_count<K> > writer( filename_out );
  int N = kmers.size( );
  basevector theKmer(K);
  for ( int i = 0; i < N; i++ ) {
    kmers[i].GetBasevector(theKmer);
    int new_value =  Min( transform(theKmer, kmers[i].Count()), USHRT_MAX );
    if (new_value >= 0)
      writer.Write( kmer_with_count<K>( theKmer, new_value ) );
  }
  writer.Close();
}

/**
   Class: ksmt_threshold

   Function Object class for use with <TransformKmerShortMap()>.

   Constructor takes a threshold value that will be used to transform the 
   <KmerShortMap> in the following way:
     All kmers in the orginal KmerShortMap with a value less than the threshold
     are removed (return -1) and those equal or above are given a value of 1.
   For example, for a threshold value of 10:
   >  TransformKmerShortMap(original, new, ksmt_threshold(10));

*/
class tksmThreshold {
public:
  tksmThreshold(int value = 0) : threshold(value) {}
  int operator()(const basevector kmer, int value) { return (value >= threshold) ? 1 : -1; }
private:
  int threshold;
};

/**
   Class: ksmt_thresholdByGc

   Function Object class for use with <TransformKmerShortMap()>.

   Constructor takes a vec<int> of kmer GC content specific threshold values
   for GC content = 0 to K. All kmers in the orginal KmerShortMap with a value 
   less than the threshold are removed (return -1) and those equal or above are
   given a value of 1.
   
   This can be used, for example, to remove all kmers which have a frequency below
   a given threshold (as done in <FindStrongKmers>).
*/
class tksmThresholdByGc {
public:
  tksmThresholdByGc(vec<int> valueByGc) : thresholdByGc(valueByGc) {}
  int operator()(const basevector kmer, int value) {
    return (value >= thresholdByGc[GcBases(kmer)] ) ? 1 : -1;
  }
private:
  vec<int> thresholdByGc;
};

/**
   Class: ksmtTrustedThresholdByGc

   Function Object class for use with <TransformKmerShortMap()>.

   Constructor takes a vec<int> of kmer GC content specific threshold values
   for GC content = 0 to K, and a KmerShortMap of Trusted Kmer Frequencies.
   All kmers in the orginal KmerShortMap with a value less than the threshold
   are removed (return -1) and those 2 above or more are given a value of 1.
   At the threshold and threshold+1 only kmers which are trusted are kept.
*/
class tksmTrustedThresholdByGc {
public:
  tksmTrustedThresholdByGc(vec<int> valueByGc, KmerShortMap& trustedKmers) 
    : thresholdByGc(valueByGc), trusted(trustedKmers)  {}
  int operator()(const basevector kmer, int value) {
    int threshold = thresholdByGc[GcBases(kmer)];
    int range = threshold/4 + 1;
    if (value > threshold + range) return 1;
    if (value < threshold - range) return -1;
    return (trusted.IsStrong(kmer) ? 1 : -1);
  }
private:
  vec<int> thresholdByGc;
  KmerShortMap& trusted;
};

