/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2005) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

#ifndef KMER_BASE_BROKER
#define KMER_BASE_BROKER

#include "CoreTools.h"
#include "Feudal.h"
#include "Basevector.h"
#include "Bitvector.h"
#include "paths/KmerPath.h"
#include "paths/SuperBaseVector.h"

#include <algorithm>  // for set_union
#include <map>

/**
   Class: KmerBaseBroker

   A class to answer questions about kmers and paths that require 
   knowing the underlying sequence (as opposed to just the <kmer ids>
   that make up a <kmer path>).  For example:
   
   - what is the base sequence of a given kmer, given its <kmer number>?
   - what is the base sequence of a <KmerPathInterval>?
   - what is the base sequence of a <KmerPath>?
   
   Quick-and-dirty -- keeps its own copy of all the big files
   (reads.{fastb,paths{_rc},pathsdb}) in memory.
   Want this more efficient in both time and space later.
   NOTE: there is now a constructor which takes the address of a preloaded
   paths{_rc} and pathsdb, thereby avoiding duplication of these structures.
   
   Note: regarding references to "high-quality k-mers", for ALLPATHS purposes
   all kmers are high-quality.
*/
class KmerBaseBroker {
public:

  KmerBaseBroker( ) 
    : self_owned(False) 
  { }

  KmerBaseBroker(String RunDir, int k, const String readsBase = "reads", bool Verbose = false) 
    : K( k ),
      bases( RunDir + "/" + readsBase + ".fastb" ),
      hqkmersfile( RunDir + "/" + readsBase + ".hqkmers.k" + ToString(K) ),
      verbose( Verbose )
  { 
    pathsp = new vecKmerPath( RunDir + "/" + readsBase + ".paths.k" + ToString(K) );
    paths_rcp = new vecKmerPath( RunDir + "/" + readsBase + ".paths_rc.k" + ToString(K) );
    vec<tagged_rpint>* nonconst_pathsdbp = new vec<tagged_rpint>;
    BREADX2( RunDir + "/" + readsBase + ".pathsdb.k" + ToString(K), *nonconst_pathsdbp ); 
    pathsdbp = nonconst_pathsdbp;
    self_owned = True; 
  }
  
  void Initialize(String RunDir, int k, const String readsBase = "reads", bool Verbose = false)
  { 
    if ( self_owned ) {
      delete pathsp;
      delete paths_rcp;
      delete pathsdbp;
    }
    K = k;
    bases.ReadAll( RunDir + "/" + readsBase + ".fastb" );
    hqkmersfile = RunDir + "/" + readsBase + ".hqkmers.k" + ToString(K);
    verbose = Verbose;
    pathsp = new vecKmerPath( RunDir + "/" + readsBase + ".paths.k" + ToString(K) );
    paths_rcp = new vecKmerPath( RunDir + "/" + readsBase + ".paths_rc.k" + ToString(K) );
    vec<tagged_rpint>* nonconst_pathsdbp = new vec<tagged_rpint>;
    BREADX2( RunDir + "/" + readsBase + ".pathsdb.k" + ToString(K), *nonconst_pathsdbp ); 
    pathsdbp = nonconst_pathsdbp;
    self_owned = True; 
  }

  /// Constructor for a sparse KmerBaseBroker, which SparseLoads a
  /// small fraction of the data.  The vec<read_id_t>s should be unique sorted.
  KmerBaseBroker( String RunDir, int k,
		  vec<read_id_t> fw_read_ids, vec<read_id_t> rc_read_ids,
                  const String readsBase = "reads" );

  KmerBaseBroker( String RunDir, int k, 
                  const vecKmerPath& paths, const vecKmerPath& paths_rc,
                  const vec<tagged_rpint>& pathsdb, 
                  const String readsBase = "reads", bool Verbose = false )
    : K( k ),
      bases( RunDir + "/" + readsBase + ".fastb" ),
      hqkmersfile( RunDir + "/"+ readsBase + ".hqkmers.k" + ToString(K) ),
      verbose( Verbose )
  { 
    pathsp = &paths;
    paths_rcp = &paths_rc;
    pathsdbp = &pathsdb;
    self_owned = False;
  }

  // The following does not correctly set hqkmersfile.  This might cause problems.

  void Initialize( int k, 
                  const vecbasevector& Bases,
                  const vecKmerPath& paths, const vecKmerPath& paths_rc,
                  const vec<tagged_rpint>& pathsdb, 
                  bool Verbose = false )
  { K = k;
    bases = Bases;
    hqkmersfile = "";
    verbose = Verbose;
    pathsp = &paths;
    paths_rcp = &paths_rc;
    pathsdbp = &pathsdb;
    self_owned = False;
  }
  
  ~KmerBaseBroker( )
  {
    if (self_owned) {    
      delete pathsp;
      delete paths_rcp;
      delete pathsdbp;
    }
  }

  int GetK() const { return K; }

  /// Method: Bases(kmer_id_t)
  /// Convert a <kmer number> to its sequence.
  /// See also: <ClearBasesCache()>.
  const basevector& Bases(kmer_id_t k) const;

  /// Method: Bases(KmerPathInterval)
  /// Convert an interval of kmers to its sequence
  basevector Bases(KmerPathInterval rpi) const;

  // Method: ClearBasesCache
  // <Bases(kmer_id_t)> caches and returns references to the cache.
  // This lets you clear the cache, which also invalidates those refs.
  void ClearBasesCache() { bases_cache.clear(); }

  /// Method: ToSequence
  /// Convert a KmerPath to its sequence-with-gaps.
  ///
  /// This will assert if passed a corrupted KmerPath, ie one
  /// not representable in base space because of bad kmer proximities.
  ///
  /// In the returned object, gap lengths are in bases, not kmers,
  /// so their lengths are possibly negative (as low as -K+1).
  /// Negative gaps whose length is determined by the bases are just
  /// returned as continuous sequence.
  SuperBaseVector ToSequence( const KmerPath& path, String name="" ) const;

  /// Method: Seq
  /// Return the sequence of a gap-free <KmerPath>.
  basevector Seq( const KmerPath& path ) const;

  /// Method: ToSequenceLoc
  /// Convert a <KmerPathLoc> to a location in the corresponding
  /// SuperBaseVector (presumably already created by ToSequence).
  ///  This actually does not need to know anything about underlying
  ///  sequence, but it lives inside KmerBaseBroker because it and
  ///  ToSequence must behave identically wrt walking through neg gaps.
  SuperBaseVector::Loc ToSequenceLoc( const KmerPathLoc& rpl, int offset ) const;
  
  SuperBaseVector::Loc ToSequenceLocFirst( const KmerPathLoc& rpl ) const
    { return( ToSequenceLoc( rpl, 0 ) ); }
  SuperBaseVector::Loc ToSequenceLocLast( const KmerPathLoc& rpl ) const
    { return( ToSequenceLoc( rpl, K-1 ) ); }

  /// Given a KmerPathLocation on a particular read -- that is, on a
  /// read-with-gaps -- identify the corresponding base on the un-gapped read.
  //  Usually possible by embedding the gapped read in the ungapped, but if
  //  that's ambiguous, it will load the reads.hqkmers bitvector.
  //  (The read_id argument is i for paths[i] and -i-1 for paths_rc[i])

  int ToReadBaseFirst( const KmerPathLoc& rpl, int read_id ) const
    { return( ToReadBase( rpl, read_id, 0 ) ); }
  int ToReadBaseLast( const KmerPathLoc& rpl, int read_id ) const
    { return( ToReadBase( rpl, read_id, K-1 ) ); }
  int ToReadBase( const KmerPathLoc& rpl, int read_id, int offset ) const;


  // These utilities are primarily useful for negative gap validation:
  // Return the minimum d>=0 such that k2 can start d bases past k1 start.
  // Returns 0 only if k1=k2; returns 1 if k2 can follow k1; etc.
  // Maximum possible return value is K.
  // Begins searching at d_min, if given.
  int MinOffset(longlong k1, longlong k2, int d_min=0) const;

  // Same for maximum offset.  Of course this only makes sense if
  // d_max < K.  The goal is to completely fix the size of small gaps.
  int MaxOffset(longlong k1, longlong k2, int d_max) const;

  // Is the given offset possible?
  bool PossibleOffset( longlong k1, longlong k2, int d ) const;

  /// If you have two kmers separated by a gap of fixed size <K,
  /// the intervening kmers are determined (but might not have numbers!).
  /// This returns the kmers that go in the gap, if it is easy to figure
  /// them out from (=those two kmers at that separation appear in) the reads.
  //  Philosophically doesn't belong here because it doesn't involve actual
  //  bases, but a KBB owns the ungapped paths and pathsdb.
  bool KmersBetween( longlong k1, longlong k2, int gapsize, KmerPath& ans ) const;

private:

  // Certainly one should never copy one of these!
  KmerBaseBroker(const KmerBaseBroker&);
  KmerBaseBroker& operator=(const KmerBaseBroker&);

  int K;

  const vecKmerPath *pathsp, *paths_rcp;
  const vec<tagged_rpint>* pathsdbp;
  Bool self_owned;

  vecbasevector bases;
  mutable map<longlong, basevector> bases_cache;

  String hqkmersfile;  // remember this if needed for later kmerHQ loading
  mutable vecbitvector kmerHQ;  // ToReadBase SparseReads into this as needed

  bool verbose;


  struct cache_key {
    longlong m_k1, m_k2;
    int m_gapsize;
    cache_key(longlong k1, longlong k2, int g)
      : m_k1(k1), m_k2(k2), m_gapsize(g) { };
    friend 
    bool operator<(const cache_key& lhs, const cache_key& rhs) {
      return(lhs.m_k1 < rhs.m_k1 ||
	     (lhs.m_k1 == rhs.m_k1 &&
	      (lhs.m_gapsize < rhs.m_gapsize ||
	       (lhs.m_gapsize == rhs.m_gapsize &&
		(lhs.m_k2 < rhs.m_k2)))));
    }
  };

  mutable map<cache_key,KmerPath> between_cache;

  mutable map<cache_key,int> min_offset_cache;
  mutable map<cache_key,int> max_offset_cache;

};


#endif
