// Copyright (c) 2000-2003 Whitehead Institute for Biomedical Research
// 


#ifndef COMPRESSEDSEQUENCE_H
#define COMPRESSEDSEQUENCE_H

#include "Vec.h"
#include "Basevector.h"
#include "Bitvector.h"

// This class stores constant sequences of [ACTGN] in some fraction of the space 
// required for full text, with reasonably speedy expansion and compaction.  
// For simplicity's sake, they are more or less required to be const.

class CompressedSequence {

 public:
  typedef unsigned short          value_type;

  CompressedSequence();
  CompressedSequence(const char* char_ptr);
  CompressedSequence(const vec<char>& vec_char);
  CompressedSequence(const basevector& bv);

  CompressedSequence(const CompressedSequence& original);
  CompressedSequence& operator= (const CompressedSequence& original);

  friend const bool operator== (const CompressedSequence& lhs,
				const CompressedSequence& rhs);

  friend const bool operator!= (const CompressedSequence& lhs,
				const CompressedSequence& rhs);

  ~CompressedSequence();

  void ReverseComplement();
  
  vec<char> asVecChar() const;
  vec<char> SubAsVecChar( int begin, int end ) const;
  basevector asBasevector() const;
  String asString() const;

  // pass-by-reference versions of "as" methods
  void asVecChar( vec<char> &vec_char ) const;
  void SubAsVecChar( vec<char> &vec_char, int begin, int end ) const;
  void asBasevector( basevector &bases ) const;
  void getAmbBases( bitvector &ambBases ) const;

  const int size() const;

  const int real_size() const;

  // serf functions (c.f. Feudal.h)

  const char* StartOfStaticData() const
    { return (const char*) &size_; }

  const int SizeOfStaticData() const
    { return sizeof(unsigned int); }

  const unsigned short* StartOfDynamicData() const
    { return data_; }
  
  longlong SizeOfDynamicData() const
    { return (size_+4)/5; }
  
  longlong ExtraSpaceForDynamicData() const
    { return (extra_+size_+4)/5 - SizeOfDynamicData(); }

  Bool SelfOwned() const
    { return self_owned_; }

  void SetExternal( const char* start_of_static_data,          
		    unsigned short* pointer_to_dynamic_data,
		    int size_of_dynamic_data,
		    int extra_space_for_dynamic_data );

  void ShiftStartOfDynamicData( unsigned short* new_start,
				unsigned short* old_start );

  void Reinitialize();

  void Blank();

  void Swap( CompressedSequence& other ) {
    swap( data_, other.data_ );
    swap( size_, other.size_ );
    swap( extra_, other.extra_ );
    swap( self_owned_, other.self_owned_ );
  }

 private:
  // For full compatibility across 32-bit and 64-bit architectures, we use a
  // union to pad the data_ pointer so that 8 bytes are always used.  Of course
  // this wastes space on 32-bit systems.

  union {
    /// The bases themselves, stored at 4 bases per character.
    unsigned short * data_;
    longlong unused_;
  };
  unsigned int size_;
  unsigned short extra_;
  unsigned short self_owned_;
};

typedef mastervec<CompressedSequence,unsigned short> veccompseq;

/// Specialization needed because sizeof(bitvector) differs between archs.
template<> bool IsGoodFeudalFile<CompressedSequence>( const String & filename,
    const CompressedSequence * dummy, bool verbose, bool ok3);

#endif
