/////////////////////////////////////////////////////////////////////////////
//                   SOFTWARE COPYRIGHT NOTICE AGREEMENT                   //
//       This software and its documentation are copyright (2007) by the   //
//   Broad Institute/Massachusetts Institute of Technology.  All rights    //
//   are reserved.  This software is supplied without any warranty or      //
//   guaranteed support whatsoever. Neither the Broad Institute nor MIT    //
//   can be responsible for its use, misuse, or functionality.             //
/////////////////////////////////////////////////////////////////////////////

/// Class String can replace class std::string to reduce compilation time.
/// \class String
/// This file defines the class String.
///
/// Class String implements a small subset of the stl class string.  
/// It implements it in such a way that compilation time is minimized.  
/// Hence class String is appropriate for situations where execution-time 
/// performance of strings is not an issue, but compilation-time performance is.
/// The use of class String in place of class string can (in some cases) 
/// reduce compilation time by a factor of three or more.
///
/// You can mix the use of strings and Strings.  Within reason, you
/// should be able to go back and forth seamlessly.
///
/// To get faster execution (but slower compilation), define STRING_FAST_EXECUTE.
///
///
/// SOME NOTES ABOUT THE CONVERSION OF STRING TO A FEUDAL-FRIENDLY CLASS.
///
/// The top bit of length_ is used to indicate whether the given object
/// manages its own memory (length_ & TopBit32 == 0, i.e. SelfOwned())
/// or whether it references memory out of its control (! SelfOwned()).
/// Note that this use of the top bit is the reverse of other feudal
/// vectors, i.e. the top bit of length_ is a "vassal" flag: if it is
/// set, the String does not own what it points to.  This results in
/// less bit-setting in constructors and generally makes for cleaner,
/// more easily-understood code.
///
/// Any time that a String is resized, it becomes self-owned if it is
/// not already UNLESS the new size is the same as the old size, in
/// which case the ownership state is preserved.
///
/// This implies that size() should be used in lieu of length_ unless
/// it is known that the String in question is self-owned, either by
/// knowing that all new Strings are self-owned [see the constructors],
/// by an explicit call to SelfOwned() [see the destructor], or
/// because we can guarantee that the String's size has changed [see
/// String::operator+=( const char c)].

#ifndef STRING
#define STRING

#include <string>
#include <vector>
#include <algorithm>

#include "system/Assert.h"
#include "system/Types.h"

class String {

 private:

  enum { cutoff_length = 12 };

  /// Number of characters in the string.
  unsigned int length_;
  char start_of_buffer_[cutoff_length - sizeof(char*)];
  char* data_ptr_;

 public:

  typedef char         value_type;
  
  unsigned int size( ) const
    { return length_ & Bits31; }
  
  Bool SelfOwned() const { return ( length_ & TopBit32 ) == 0; }

 private:
#ifdef STRING_FAST_EXECUTE
  char* data( )
  { return ( ( SelfOwned() && ( length_ < cutoff_length ) ) ? start_of_buffer_ : data_ptr_ ); }
  const char* data( ) const
  { return ( ( SelfOwned() && ( length_ < cutoff_length ) ) ? start_of_buffer_ : data_ptr_ ); }
#else
  char* data( ) { return data_not_inlined( ); }
  const char* data( ) const { return data_not_inlined( ); }
#endif

public:
  
  void resize(unsigned int n);

  int isize( ) const { return size( ); }
  
  const char* c_str( ) const 
    { return data(); }
  
  /// Note that indiscriminate use of this method can have very bad,
  /// hard-to-trace effects.  Use with extreme caution.

  char* c_str_mutable( )
    { return data(); }
  
  void Set( const char* x, int l );

  // Constructors.
  
  String( );
  String(const String& s);
  String(const string& s);
  String(const char* x);
  explicit String( const vector<char>& v );
  explicit String (int n);
  explicit String(char c);
  String( const String& s, const int n ); // Concatenation of n copies of s.

  // operator[ ] to return a character.

  char& operator[] (unsigned int i)
    {
      AssertLe( i, size() );
      return data()[i];    
    }

  const char& operator[] (unsigned int i) const
    {
      AssertLe( i, size() );
      return data()[i];
    }
     
  String& operator= (const String& s);
  String& operator= (const char* x);   
  String& operator= (const char c); 
  
  String& operator+= ( const String& s );
  String& operator+= ( const char* x );
  String& operator+= ( const char c );

  ~String( );

  friend String operator+ ( const String& first, const String& second );
  
  friend ostream& operator<< ( ostream& out, const String& s );
 
  /// This will consume, but not store, the first whitespace after the String.
  /// To be precise, if you are reading in a String that is terminated by
  /// any whitespace, the String read in will not contain the space character, 
  /// and the next character in the istream will be the one after the
  /// whitespace character. This behavior is different from that of
  /// std::string, which does not consume the whitespace separator.
  friend istream& operator>> ( istream& in, String& s );

  friend bool operator== ( const String& left, const String& right );
  friend bool operator!= ( const String& left, const String& right );
  friend bool operator< ( const String& left, const String& right );
  friend bool operator> ( const String& left, const String& right );

  friend bool operator_eq ( const String& left, const String& right );
  friend bool operator_ne ( const String& left, const String& right );
  friend bool operator_lt ( const String& left, const String& right );

  bool empty( ) const { return size() == 0; }
  bool nonempty( ) const { return size() != 0; }
     
  operator string( ) const;

  inline void clear( ) { resize(0); }

  /// Replace the part of a string which starts at a given position and has
  /// a given length.
  /// (The length is reduced if need be to avoid going past the end of the string.)

  void replace(unsigned int start, unsigned int length, const char* replacement);

  void replace(unsigned int start, unsigned int length, const String& replacement)
  {    return replace( start, length, replacement.c_str( ) );    }

  /// substr: Return the part of a string which starts at a given position and has a 
  /// given length.  If length is not provided, go from start to end of string.
  
  String substr( unsigned int start, int length = -1 ) const;
  
  /// Delete the part of a string which starts at a given position and has a
  /// given length.  The no-argument version erases the whole string.
  /// (The length is reduced if need be to avoid going past the end of the string.)
  
  void erase( int start, int length );
  void erase();

  /// Append the part of a string which starts at a given position and has a
  /// given length.
  /// (The length is reduced if need be to avoid going past the end of the string.)
  
  void append( const String& s, int start, int length );
  
  friend void getline( istream& in_strm, String& out_str );

  /// Read in all characters from the input stream.
  void Slurp(istream & is);

 private:

  char* data_not_inlined( );
  const char* data_not_inlined( ) const;

  void realloc_( const unsigned int new_size );
  void realloc_and_copy_( const unsigned int new_size );

 public:

  /// Old String methods.
  
  /// Test if a string contains a given substring.

  bool Contains( const char* x ) const;
  bool Contains( const String& s ) const;
  
  /// Test if a string contains a given substring at a given position.
  /// If the position is -1, it means at the end of the string.
  
  bool Contains( const char* x, int pos ) const;
  bool Contains( const String& s, int pos ) const;
  
  /// a.Before(b): find the first occurrence s of b in a; return the part of
  ///              a before s.
  
  String Before( const char * x ) const;
  String Before( const String& s ) const;

  ///Return the part of this before s, or all of it if s not found.
  String SafeBefore( const String& s ) const;
  
  String After( const char * x ) const;
  String After( const String& s ) const;

  // RevBefore, RevAfter: same as Before and After, but use last occurrence rather
  // than first.

  inline String RevBefore( const String& s ) const
  {    int pos = PosRev(s);
       return substr( 0, pos );    }
  inline String RevAfter( const String& s ) const
  {    int pos = PosRev(s);
       return substr( pos + s.isize( ), -1 );    }

  ///Return the part of this after s, or all of it if s not found.
  String SafeAfter( const String& s ) const;

  String Between( const String& s1, const String& s2 ) const
  {    return (*this).After(s1).Before(s2);    }
  
  /// ReplaceBy( from, to ): Find the first occurrence of "from" in *this
  /// (which must exist).  Replace it by "to".
  
  void ReplaceBy( const String& from, const String& to );
  
  /// GlobalReplaceBy( from, to ): Replace each instance (zero or more) of "from"
  /// in *this by "to".  If "from" is a subset of "to", don't do more than once,
  /// to avoid infinite loop.

  void GlobalReplaceBy( const String& from, const String& to );

  /// Return the first position of a given string in *this, or else -1 if
  /// it doesn't appear.
  
  int Position( const char* x ) const;
  int Position( const String& s ) const;

  /// Return last position of a given string in *this, or else -1 if it 
  /// doesn't appear.

  int PosRev( const String& s ) const;

  /// only look for "x" up to position "endSearchAt".
  int Position( const String& x, const int &endSearchAt ) const;

  /// Freq: count occurrences of a given string in *this.

  int Freq( const String& s );

  ///  start search after a given position
  int PositionAfter( const String& x, const int &startSearchAt ) const;

  /// IsInt.  Check if Int( ) will work.  Empty strings will pass.

  Bool IsInt( ) const;
  
  /// Int.  Convert to a longlong, or exit with error if it can't be done.
  /// Empty strings will be converted to zero.  Terminal K, M, or G will be
  /// interpreted as multiplier (1,000 or 1,000,000 or 1,000,000,000).
  longlong Int( ) const;

  /// Check if Double( ) will work.  Empty strings will pass.
  Bool IsDouble( ) const;

  /// Convert to a double, or exit with error if it cannot be done.
  /// Empty strings will be converted to zero.
  double Double() const;
  
  /// Check if Bool( ) will work.  Empty strings will pass.
  Bool IsBool( ) const;

  /// Convert to a bool, or exit with error if it cannot be done.
  /// Empty strings will be converted to False
  /// We cannot call this function Bool because it clashes with the typedef.
  Bool ToBool() const;
  
  ///Convert to lowercase.
  void ToLower();
  ///Convert to uppercase.
  void ToUpper();

	  
  /// Evaluate as an integer arithmetic expression containing "()+-/*[0-9]"
  /// Return a string containing the integer value of the expression, or 
  /// an empty string on error.
  
  String Evaluate(const bool verbose = false) const;
  
 private:
  
  bool FindOperands_( const int operator_pos, 
		      int& first_start, int& first_length,
		      int& second_start, int& second_length,
		      const bool verbose = false ) const;

 public:
  /// Methods needed for Feudal vectors of Strings.
  
  const char* StartOfDynamicData( ) const { return data(); }
  longlong    SizeOfDynamicData( )  const { return size() + 1; }
  ///Strings never have any extra capacity, so this returns 0.
  longlong    ExtraSpaceForDynamicData() const { return 0; }
  
  const char* StartOfStaticData( ) const { return 0; }
  const int   SizeOfStaticData( )  const { return 0; }

  void ShiftStartOfDynamicData( char * const new_start, char * const old_start )
  {
    data_ptr_ += new_start - old_start; 
  }

  void SetExternal( const char * const start_of_static_data,
                    char * const start_of_dynamic_data,
                    int size_of_dynamic_data, 
                    int extra_space_for_dynamic_data )
  {
    length_ = (size_of_dynamic_data - 1) | TopBit32;
    data_ptr_ = start_of_dynamic_data;
  }

  void Reinitialize() 
  {
    if ( SelfOwned() && length_ >= cutoff_length ) 
      delete [] data_ptr_; 
    length_ = 0;
  }

  void Blank()        
  {
    length_ = 0;
  }

  void Swap( String &other )
  {
    std::swap( length_, other.length_ );
    const int start_of_buffer_size = cutoff_length - sizeof(char*);
    char swap_buffer[ start_of_buffer_size ];
    memcpy( swap_buffer, start_of_buffer_, start_of_buffer_size );
    memcpy( start_of_buffer_, other.start_of_buffer_, start_of_buffer_size );
    memcpy( other.start_of_buffer_, swap_buffer, start_of_buffer_size );
    std::swap( data_ptr_, other.data_ptr_ );
  }
};

String ToString( int x ); 
String ToString( unsigned int x ); 
String ToString( longlong x ); 
String ToString( ulonglong x ); 
String ToString( float x, int precision = 2 ); 
String ToString( double x, int precision = 2 ); 


///Format a possibly large integer for human consumption by putting it
///in terms of thousands, millions, or billions, with appropriate suffix.
String ToStringAbbrev( const longlong x );

///Transform a string to lowercase using isupper and tolower.
///Not safe for international strings, which is probably irrelevant.
String ToLower(const String & s);
///Transform a string to lowercase using islower and toupper.
///Not safe for international strings, which is probably irrelevant.
String ToUpper(const String & s);

void DeleteLeadingWhiteSpace( String& s );
void DeleteTrailingWhiteSpace( String& s );
String WhiteSpaceFree( String s );

/// operator+ has been inlined to try to avoid the cost of constructing
/// and destroying temporaries (answer and the return value).

String operator_plus( const String& first, const String& second );
#ifdef STRING_FAST_EXECUTE
     inline String operator+ ( const String& first, const String& second )
     {    String answer( first );
          answer += second;
          return answer;    }
#else
     inline String operator+ ( const String& first, const String& second )
     {    return operator_plus( first, second );    }
#endif


/// operator== has been inlined because it's used all the time and it's short.

bool operator_eq ( const String& left, const String& right );
#ifdef STRING_FAST_EXECUTE
     inline bool operator== ( const String& left, const String& right )
     {    return ( left.size() == right.size() &&
                   strcmp( left.data(), right.data() ) == 0 );    }
#else
     inline bool operator== ( const String& left, const String& right )
     {    return operator_eq( left, right );    }
#endif

//Define operators==,!= for const char * to avoid creating String temporaries.
inline bool operator== ( const String& left, const char * right )
{    return ( strcmp( left.c_str(), right ) == 0 );    }
inline bool operator== ( const char * left, const String & right )
{    return ( strcmp( left, right.c_str() ) == 0 );    }
inline bool operator!= ( const String& left, const char * right )
{    return !(left == right);    }
inline bool operator!= ( const char * left, const String & right )
{    return !(left == right);    }


bool operator_ne ( const String& left, const String& right );
#ifdef STRING_FAST_EXECUTE
/// operator!= has been inlined because it's used all the time and it's short.
inline bool operator!= ( const String& left, const String& right )
{    return !( left == right );    }
#else
inline bool operator!= ( const String& left, const String& right )
{    return operator_ne( left, right );    }

#endif

/// operator< has been inlined because it's used all the time and it's
/// short.

bool operator_lt ( const String& left, const String& right );
#ifdef STRING_FAST_EXECUTE
     inline bool operator< ( const String& left, const String& right )
     {    return ( strcmp(left.data(), right.data()) < 0 );    }
#else
     inline bool operator< ( const String& left, const String& right )
     {    return operator_lt( left, right );    }
#endif
inline bool operator> ( const String& left, const String& right )
{    return right < left;   }

/// cmp_numeric: compare two strings, in such a way that they will be sorted
/// numerically if they contain integers.  For example, chr1 < chr2 < chr10.

Bool cmp_numeric( const String& s1, const String& s2 );

// BaseAlpha: convert a nonnegative integer into an alphabetic string, according to
// the order A, B, ..., Z, AA, AB, ..., etc.

String BaseAlpha( int n );

   
#endif
