zorbastring.h

Go to the documentation of this file.
00001 /*
00002  * Copyright 2006-2008 The FLWOR Foundation.
00003  *
00004  * Licensed under the Apache License, Version 2.0 (the "License");
00005  * you may not use this file except in compliance with the License.
00006  * You may obtain a copy of the License at
00007  * 
00008  * http://www.apache.org/licenses/LICENSE-2.0
00009  *
00010  * Unless required by applicable law or agreed to in writing, software
00011  * distributed under the License is distributed on an "AS IS" BASIS,
00012  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013  * See the License for the specific language governing permissions and
00014  * limitations under the License.
00015  */
00016 #ifndef ZORBA_STRING_API_H
00017 #define ZORBA_STRING_API_H
00018 
00019 #include <string>
00020 
00021 #include <zorba/config.h>
00022 
00023 namespace zorba {
00024 
00025 class xqpStringStore;
00026 
00027 /** \brief The Zorba String class.
00028  *
00029  * The interface of this class is similar to that of std::string. However, it contains Unicode 
00030  * characters encoded as UTF-8.
00031  * UTF-8 FAQ @see [http://unicode.org/faq/utf_bom.html#2].
00032  */
00033 class ZORBA_EXTERN_DECL String 
00034 {
00035 public:
00036   /**
00037    * \brief Empty default constructor
00038    */
00039   String();
00040 
00041   /** \brief Copy constructor
00042    */
00043   String(const String& other);
00044 
00045   /** \brief Constructor that is used to construct Items in the Zorba engine itself.
00046    *
00047    * This constructor is for internal use only.
00048    */
00049   String(xqpStringStore* aString);
00050 
00051   /** \brief Constructor to construct a String from a const char*.
00052    *
00053    * @param aString the const char* to construct the String from.
00054    */
00055   String(const char* aString);
00056 
00057   /** \brief Constructor to construct a String from a std::string.
00058    *
00059    * @param aString the std::string to construct the String from.
00060    */
00061   String(const std::string& aString);
00062 
00063   /** \brief Destructor
00064    */
00065   ~String();
00066 
00067   /** \brief Assignment operator
00068    */
00069   const String& 
00070   operator =(const String& other);
00071 
00072   /** \brief Assingment operator that is used in the Zorba engine itself.
00073    *
00074    * This operator is for internal use only.
00075    */
00076   const String&
00077   operator =(xqpStringStore *other);
00078 
00079   /** \brief Returns a non-modifiable standard C character array version of the string.
00080    *
00081    * @return const char*  a const pointer to a UTF-8 encoded C string, identical to the current string. 
00082    *         The returned string is null-terminated.
00083    */
00084   const char*
00085   c_str() const;
00086 
00087   /** \brief Compares two strings.
00088    *
00089    * @return -1 if this < aString, 0 if this == aString, 1 if this > aString.
00090    */
00091   int
00092   compare(const String& aString) const;
00093 
00094   /** \brief Compares two strings.
00095    *
00096    * @return true if this == aString.
00097    * @see byteEqual().
00098    */
00099   bool
00100   equals(const String& aString) const;
00101 
00102   /** \brief Returns the number of characters in the string, not including any null-termination.
00103    *
00104    * @return The number of UTF-8 characters.
00105    * @see bytes(), empty().
00106    */
00107   size_t
00108   length() const;
00109 
00110   /** \brief Returns the number of bytes in the string, not including any null-termination.
00111    *
00112    * @return The number of bytes.
00113    * @see length(), empty().
00114    */
00115   size_t
00116   bytes() const;
00117 
00118   /** \brief True if the string has no characters.
00119    *
00120    * @return True if the string has no elements, false otherwise.
00121    * @see length(), bytes().
00122    */
00123   bool
00124   empty() const;
00125 
00126   bool
00127   operator==(const String& str) const;
00128 
00129   bool
00130   operator!=(const String& str) const;
00131 
00132   bool
00133   operator<(const String& str) const;
00134 
00135   bool
00136   operator<=(const String& str) const;
00137 
00138   bool
00139   operator>(const String& str) const;
00140 
00141   bool
00142   operator>=(const String& str) const;
00143 
00144   /** \brief Byte by byte comparison of two strings.
00145    *
00146    */  
00147   bool
00148   byteEqual(const char* aString, unsigned int aBytes) const;
00149 
00150   /** \brief Locate in "this" the first occurrence of the "pattern" substring.
00151    *
00152    * @return The offset into this of the start of "pattern", or -1 if not found.
00153    */  
00154   int
00155   indexOf(const char* pattern) const;
00156 
00157   /** \brief Locate in "this" the last occurrence of the "pattern" substring.
00158    *
00159    * @return The offset into this of the start of "pattern", or -1 if not found.
00160    */  
00161   int
00162   lastIndexOf(const char* pattern) const;
00163 
00164   /** \brief Determine if "pattern" is a suffix of "this".
00165    *
00166    * @return True if "this" ends with "pattern".
00167    */  
00168   bool
00169   endsWith(const char* pattern) const;
00170 
00171   /** \brief Append characters onto "this".
00172    *
00173    */  
00174   const String&
00175   append(const char* suffix);
00176 
00177   /** \brief Convert to uppercase.
00178    *
00179    * @return String the String convert to uppercase.
00180    */  
00181   const String&
00182   uppercase();
00183 
00184   /** \brief Convert to lowercase.
00185    *
00186    * @return String the String convert to lowercase.
00187    */  
00188   const String&
00189   lowercase();
00190 
00191   /** \brief Returns the value of "this" with whitespace normalized by stripping leading 
00192    *         and trailing whitespace and replacing sequences of one or more than one 
00193    *         whitespace character with a single space, \#x20.
00194    *
00195    * @note whitespace = " \t\r\n" meaning (\#x20) (\#x9) (\#xD) (\#xA).
00196    * @return String the whitespace normalized String.
00197    */  
00198   const String&
00199   normalizeSpace();
00200 
00201   /** \brief Removes leading and trailing whitespace.
00202    *
00203    * @note If called with trim(" \t\r\n", 4) it will strip leading and trailing whitespaces.
00204    * Whitespace = " \t\r\n" meaning (\#x20) (\#x9) (\#xD) (\#xA).
00205    */  
00206   const String&
00207   trim(const char* start, int len);
00208 
00209   /** \brief Removes leading and trailing whitespace.
00210    *
00211    * @note Space = " " meaning (\#x20).
00212    */  
00213   const String&
00214   trim();
00215 
00216   /** \brief Escape all characters except US-ASCII coded character set.
00217    *
00218    */
00219   const String&
00220   formatAsXML();
00221 
00222   /** \brief Escape all characters except printable characters of the US-ASCII coded character set, 
00223    *         specifically the octets ranging from 32 to 126 (decimal).
00224    *
00225    */
00226   const String&
00227   escapeHtmlUri();
00228 
00229   /** \brief Converts an String containing an IRI into a URI.
00230    *         see Section 3.1 [http://www.ietf.org/rfc/rfc3987.txt]
00231    *
00232    */
00233   const String&
00234   iriToUri();
00235 
00236   /** \brief Encodes reserved characters in an String that is intended to be used in the 
00237    *         path segment of a URI.
00238    *         see Section 2 [http://www.ietf.org/rfc/rfc3986.txt]
00239    *
00240    */
00241   const String&
00242   encodeForUri();
00243 
00244 private:
00245   friend class Unmarshaller;
00246   xqpStringStore *m_string;
00247 };
00248 
00249 ZORBA_EXTERN_DECL
00250 std::ostream& operator <<(std::ostream& os, const String& str);
00251 
00252 } // namespace zorba
00253 
00254 #endif