/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2007) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

#ifndef SERIAL_FEUDAL_H
#define SERIAL_FEUDAL_H

/// Pretend we are a mastervec, read only chunks from file to save memory.
///
/// \file SerialFeudal.h 
///
/// Adapter for mastervec files that are being
/// processed serially and need to limit the number of objects in
/// memory at once.  
///
/// The definition of "serial" is strict, i.e. once
/// v[i] is accessed v[j] is never needed again, for all j<i.  However,
/// there is no need to start at position 0 and no loss of efficiency
/// for starting later in the vector.  The intended audience is simple
/// code that uses few of the Feudal features, though more could be
/// added to this simulation of the mastervec interface.
///
/// The selected value of bufsize is important for performance, and it
/// depends on the machine you are running on and the size of the
/// serfvecs in the mastervec.  Too small will waste I/O time while too
/// large will take up excess memory. 
///
/// Here are some tests on solexa01. The file was about 3G overall,
/// and each serfvec contained 26 four_base elements. These suggest
/// that for these sizes and 4G of RAM, bufsizes between 1K and 1M give
/// very similar results.
///
/// - BUFSIZE    :    100       1000   10000   100000   1000000
/// - mean       : 124.9	45.1	45.4	46.9	48.3
/// - stdev      :  10.9	4.8	6.0	4.7	7.1 
/// .
///
///
/// Header-only module--no .cc file.



#include "CoreTools.h"
#include "Feudal.h"

template<class MasterVecType>
class SerialFeudal {
  mutable MasterVecType v_;
  String filename_;
  const unsigned int bufsize_;
  mutable unsigned int bufstart_, bufend_;
  unsigned int N_;

  void Init() {
    bufstart_= bufend_ = N_ = 0;
  }

  void GetRangeStarting(unsigned int i) const
  {
    bufstart_ = i;
    bufend_ = min(bufstart_+bufsize_, N_);
    ForceAssertGt(bufend_, bufstart_);
    // Read this block in, overwriting the old if present, and
    // recalculating data size.
    v_.clear();
    //PRINT3(bufstart_, bufend_, v_.size());
    v_.resize(bufend_ - bufstart_);
    //PRINT3(bufstart_, bufend_, v_.size());
    v_.ReadRange(filename_, bufstart_, bufend_, 0, false, true);
  }

public:

  SerialFeudal(int bufsize) : bufsize_(bufsize) { Init(); }

  SerialFeudal(const String &filename, unsigned int bufsize): bufsize_(bufsize)
  { Init(); ReadAll(filename); }
  // Despite the name, chosen to mimic mastervec interface, don't read
  // anything but the file size unless bufsize==0.

  void ReadAll(const String &filename)
  {
    filename_ = filename;
    if (0==bufsize_) {
      v_.ReadAll(filename_);
      N_ = v_.size();
      bufstart_ = 0;
      bufend_ = N_;
    } else
      N_ = MastervecFileObjectCount( filename );
  }

  typename MasterVecType::value_type & operator[] (int i) 
  {
    AssertLt( longlong(i), longlong(N_) );
    if ( longlong(i) >= longlong(bufend_)
	 || longlong(i) < longlong(bufstart_)) 
      GetRangeStarting(i); 
    AssertGe( longlong(i), longlong(bufstart_) );
    return v_[i-bufstart_];   
  }

  const typename MasterVecType::value_type & operator[] (int i) const 
  {
    AssertLt( longlong(i), longlong(N_) );
    if ( longlong(i) >= longlong(bufend_)
	 || longlong(i) < longlong(bufstart_)) 
      GetRangeStarting(i); 
    AssertGe( longlong(i), longlong(bufstart_) );
    return v_[i-bufstart_];   
  }

  /// Problem: this will return a negative size if N_ > 2^31.
  /// But we want the return type to be int to match mastervec.
  int size() const { return N_; }

  bool empty() const { return 0 == size(); }
};
 





/// Write mastervec data a chunk at a time.
///
///Class to collect up the logic of writing blocks of reads to disk in
///a buffered fashion.  Tracks the number of reads and the size of the
///first read.  To use, alternate adding reads to the data() vector and
///calling write(), which will clear the data vector when it decides to
///write the data to disk.  The destructor writes any remaining data. 
template <class Mastervec>
class WriteBuffered {
  Mastervec v;
  String filename; 
  const int bufsize; // bufsize==0 means write once at the end
  unsigned int nreads;
  unsigned int readsize;
  unsigned int oldsize;
  unsigned int maxchunk;
  void forceWrite() {
    //PRINT2(filename, v.size());
    v.Write(filename, 0, v.size());
  }
  void finalWrite() {
    if (filename.empty()) {
      // Check programming error of putting data in buffer before
      // setting filename
      ForceAssert(v.empty()); 
    } else {
      forceWrite();
      MergeMastervecFiles(filename);
    }
  }
  void ClearInternals()
  { nreads = 0; oldsize = 0; v.clear(); }

public:
  /// Create a buffering wrapper for the mastervec.  If maxchunk and
  /// readsize are given, reserve memory accordingly for efficiency.
  WriteBuffered(int bufsize, unsigned int maxchunk=0, 
		unsigned int readsize=0) :
    v(), filename(), bufsize(bufsize), nreads(0), readsize(readsize),
    oldsize(0), maxchunk(maxchunk)
  {
    if (maxchunk>0 && readsize>0) 
      v.Reserve((bufsize+maxchunk)*readsize, (bufsize+maxchunk));
  }

  /// Reset filename, flushing old buffer, and start over.  No harm if
  /// the old buffer was unused.
  void SetFilename(const String &file)
  { finalWrite(); filename = file; Remove(file); ClearInternals(); }

  /// Add reads to data() however you want, but don't remove any. 
  Mastervec & data() { return v; }

  /// Write to disk if it's time
  void write() {
    // Update max chunk size
    unsigned int currentReads = NumberOfReads(); 
    unsigned int chunksize = currentReads - oldsize;
    if (chunksize>maxchunk) maxchunk = chunksize;
    oldsize = currentReads;
    // Now consider writing
    if (bufsize>0 && v.size()>=bufsize) {
      nreads += v.size();
      if (0==readsize) readsize = v[0].size();
      forceWrite();
      v.clear();
    }
  }
  // Finish writing if there's still data in the buffer
  ~WriteBuffered() {
    finalWrite();
  }
 
  // number of reads read so far
  unsigned int NumberOfReads() const { return nreads + v.size(); }
  // size of reads read (assumes all are the same size)
  unsigned int ReadSize() 
  {
    if (0==readsize && v.size()>0) readsize = v[0].size();
    return readsize;
  }
  unsigned int BiggestChunk() const { return maxchunk; }
};

#endif //SERIAL_FEUDAL_H
