> The XQuery Processor
00001 /* 00002 * Copyright 2006-2008 The FLWOR Foundation. 00003 * 00004 * Licensed under the Apache License, Version 2.0 (the "License"); 00005 * you may not use this file except in compliance with the License. 00006 * You may obtain a copy of the License at 00007 * 00008 * http://www.apache.org/licenses/LICENSE-2.0 00009 * 00010 * Unless required by applicable law or agreed to in writing, software 00011 * distributed under the License is distributed on an "AS IS" BASIS, 00012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00013 * See the License for the specific language governing permissions and 00014 * limitations under the License. 00015 */ 00016 #ifndef ZORBA_STRING_API_H 00017 #define ZORBA_STRING_API_H 00018 00019 #include <string> 00020 00021 #include <zorba/config.h> 00022 00023 namespace zorba { 00024 00025 class xqpStringStore; 00026 00027 /** \brief The Zorba String class. 00028 * 00029 * The interface of this class is similar to that of std::string. However, it contains Unicode 00030 * characters encoded as UTF-8. 00031 * UTF-8 FAQ @see [http://unicode.org/faq/utf_bom.html#2]. 00032 */ 00033 class ZORBA_EXTERN_DECL String 00034 { 00035 public: 00036 /** 00037 * \brief Empty default constructor 00038 */ 00039 String(); 00040 00041 /** \brief Copy constructor 00042 */ 00043 String(const String& other); 00044 00045 /** \brief Constructor that is used to construct Items in the Zorba engine itself. 00046 * 00047 * This constructor is for internal use only. 00048 */ 00049 String(xqpStringStore* aString); 00050 00051 /** \brief Constructor to construct a String from a const char*. 00052 * 00053 * @param aString the const char* to construct the String from. 00054 */ 00055 String(const char* aString); 00056 00057 /** \brief Constructor to construct a String from a std::string. 00058 * 00059 * @param aString the std::string to construct the String from. 00060 */ 00061 String(const std::string& aString); 00062 00063 /** \brief Destructor 00064 */ 00065 ~String(); 00066 00067 /** \brief Assignment operator 00068 */ 00069 const String& 00070 operator =(const String& other); 00071 00072 /** \brief Assingment operator that is used in the Zorba engine itself. 00073 * 00074 * This operator is for internal use only. 00075 */ 00076 const String& 00077 operator =(xqpStringStore *other); 00078 00079 /** \brief Returns a non-modifiable standard C character array version of the string. 00080 * 00081 * @return const char* a const pointer to a UTF-8 encoded C string, identical to the current string. 00082 * The returned string is null-terminated. 00083 */ 00084 const char* 00085 c_str() const; 00086 00087 /** \brief Compares two strings. 00088 * 00089 * @return -1 if this < aString, 0 if this == aString, 1 if this > aString. 00090 */ 00091 int 00092 compare(const String& aString) const; 00093 00094 /** \brief Compares two strings. 00095 * 00096 * @return true if this == aString. 00097 * @see byteEqual(). 00098 */ 00099 bool 00100 equals(const String& aString) const; 00101 00102 /** \brief Returns the number of characters in the string, not including any null-termination. 00103 * 00104 * @return The number of UTF-8 characters. 00105 * @see bytes(), empty(). 00106 */ 00107 size_t 00108 length() const; 00109 00110 /** \brief Returns the number of bytes in the string, not including any null-termination. 00111 * 00112 * @return The number of bytes. 00113 * @see length(), empty(). 00114 */ 00115 size_t 00116 bytes() const; 00117 00118 /** \brief True if the string has no characters. 00119 * 00120 * @return True if the string has no elements, false otherwise. 00121 * @see length(), bytes(). 00122 */ 00123 bool 00124 empty() const; 00125 00126 bool 00127 operator==(const String& str) const; 00128 00129 bool 00130 operator!=(const String& str) const; 00131 00132 bool 00133 operator<(const String& str) const; 00134 00135 bool 00136 operator<=(const String& str) const; 00137 00138 bool 00139 operator>(const String& str) const; 00140 00141 bool 00142 operator>=(const String& str) const; 00143 00144 /** \brief Byte by byte comparison of two strings. 00145 * 00146 */ 00147 bool 00148 byteEqual(const char* aString, unsigned int aBytes) const; 00149 00150 /** \brief Locate in "this" the first occurrence of the "pattern" substring. 00151 * 00152 * @return The offset into this of the start of "pattern", or -1 if not found. 00153 */ 00154 int 00155 indexOf(const char* pattern) const; 00156 00157 /** \brief Locate in "this" the last occurrence of the "pattern" substring. 00158 * 00159 * @return The offset into this of the start of "pattern", or -1 if not found. 00160 */ 00161 int 00162 lastIndexOf(const char* pattern) const; 00163 00164 /** \brief Determine if "pattern" is a suffix of "this". 00165 * 00166 * @return True if "this" ends with "pattern". 00167 */ 00168 bool 00169 endsWith(const char* pattern) const; 00170 00171 /** \brief Append characters onto "this". 00172 * 00173 */ 00174 const String& 00175 append(const char* suffix); 00176 00177 /** \brief Convert to uppercase. 00178 * 00179 * @return String the String convert to uppercase. 00180 */ 00181 const String& 00182 uppercase(); 00183 00184 /** \brief Convert to lowercase. 00185 * 00186 * @return String the String convert to lowercase. 00187 */ 00188 const String& 00189 lowercase(); 00190 00191 /** \brief Returns the value of "this" with whitespace normalized by stripping leading 00192 * and trailing whitespace and replacing sequences of one or more than one 00193 * whitespace character with a single space, \#x20. 00194 * 00195 * @note whitespace = " \t\r\n" meaning (\#x20) (\#x9) (\#xD) (\#xA). 00196 * @return String the whitespace normalized String. 00197 */ 00198 const String& 00199 normalizeSpace(); 00200 00201 /** \brief Removes leading and trailing whitespace. 00202 * 00203 * @note If called with trim(" \t\r\n", 4) it will strip leading and trailing whitespaces. 00204 * Whitespace = " \t\r\n" meaning (\#x20) (\#x9) (\#xD) (\#xA). 00205 */ 00206 const String& 00207 trim(const char* start, int len); 00208 00209 /** \brief Removes leading and trailing whitespace. 00210 * 00211 * @note Space = " " meaning (\#x20). 00212 */ 00213 const String& 00214 trim(); 00215 00216 /** \brief Escape all characters except US-ASCII coded character set. 00217 * 00218 */ 00219 const String& 00220 formatAsXML(); 00221 00222 /** \brief Escape all characters except printable characters of the US-ASCII coded character set, 00223 * specifically the octets ranging from 32 to 126 (decimal). 00224 * 00225 */ 00226 const String& 00227 escapeHtmlUri(); 00228 00229 /** \brief Converts an String containing an IRI into a URI. 00230 * see Section 3.1 [http://www.ietf.org/rfc/rfc3987.txt] 00231 * 00232 */ 00233 const String& 00234 iriToUri(); 00235 00236 /** \brief Encodes reserved characters in an String that is intended to be used in the 00237 * path segment of a URI. 00238 * see Section 2 [http://www.ietf.org/rfc/rfc3986.txt] 00239 * 00240 */ 00241 const String& 00242 encodeForUri(); 00243 00244 private: 00245 friend class Unmarshaller; 00246 xqpStringStore *m_string; 00247 }; 00248 00249 ZORBA_EXTERN_DECL 00250 std::ostream& operator <<(std::ostream& os, const String& str); 00251 00252 } // namespace zorba 00253 00254 #endif