> The XQuery Processor
00001 /* 00002 * Copyright 2006-2008 The FLWOR Foundation. 00003 * 00004 * Licensed under the Apache License, Version 2.0 (the "License"); 00005 * you may not use this file except in compliance with the License. 00006 * You may obtain a copy of the License at 00007 * 00008 * http://www.apache.org/licenses/LICENSE-2.0 00009 * 00010 * Unless required by applicable law or agreed to in writing, software 00011 * distributed under the License is distributed on an "AS IS" BASIS, 00012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00013 * See the License for the specific language governing permissions and 00014 * limitations under the License. 00015 */ 00016 #ifndef ZORBA_STRING_API_H 00017 #define ZORBA_STRING_API_H 00018 00019 #include <string> 00020 00021 #include <zorba/config.h> 00022 00023 namespace zorba { 00024 00025 class xqpStringStore; 00026 00027 /** \brief The Zorba String class. 00028 * 00029 * The interface of this class is similar to that of std::string. However, it contains Unicode 00030 * characters encoded as UTF-8. 00031 * UTF-8 FAQ @see [http://unicode.org/faq/utf_bom.html#2]. 00032 */ 00033 class ZORBA_EXTERN_DECL String 00034 { 00035 public: 00036 /** \brief Copy constructor 00037 */ 00038 String(const String& other); 00039 00040 /** \brief Constructor that is used to construct Items in the Zorba engine itself. 00041 * 00042 * This constructor is for internal use only. 00043 */ 00044 String(xqpStringStore* aString); 00045 00046 /** \brief Constructor to construct a String from a const char*. 00047 * 00048 * @param aString the const char* to construct the String from. 00049 */ 00050 String(const char* aString); 00051 00052 /** \brief Constructor to construct a String from a std::string. 00053 * 00054 * @param aString the std::string to construct the String from. 00055 */ 00056 String(const std::string& aString); 00057 00058 /** \brief Destructor 00059 */ 00060 ~String(); 00061 00062 /** \brief Assignment operator 00063 */ 00064 const String& 00065 operator =(const String& other); 00066 00067 /** \brief Assingment operator that is used in the Zorba engine itself. 00068 * 00069 * This operator is for internal use only. 00070 */ 00071 const String& 00072 operator =(xqpStringStore *other); 00073 00074 /** \brief Returns a non-modifiable standard C character array version of the string. 00075 * 00076 * @return const char* a const pointer to a UTF-8 encoded C string, identical to the current string. 00077 * The returned string is null-terminated. 00078 */ 00079 const char* 00080 c_str() const; 00081 00082 /** \brief Compares two strings. 00083 * 00084 * @return -1 if this < aString, 0 if this == aString, 1 if this > aString. 00085 */ 00086 int 00087 compare(const String& aString) const; 00088 00089 /** \brief Compares two strings. 00090 * 00091 * @return true if this == aString. 00092 * @see byteEqual(). 00093 */ 00094 bool 00095 equals(const String& aString) const; 00096 00097 /** \brief Returns the number of characters in the string, not including any null-termination. 00098 * 00099 * @return The number of UTF-8 characters. 00100 * @see bytes(), empty(). 00101 */ 00102 size_t 00103 length() const; 00104 00105 /** \brief Returns the number of bytes in the string, not including any null-termination. 00106 * 00107 * @return The number of bytes. 00108 * @see length(), empty(). 00109 */ 00110 size_t 00111 bytes() const; 00112 00113 /** \brief True if the string has no characters. 00114 * 00115 * @return True if the string has no elements, false otherwise. 00116 * @see length(), bytes(). 00117 */ 00118 bool 00119 empty() const; 00120 00121 bool 00122 operator==(const String& str) const; 00123 00124 bool 00125 operator!=(const String& str) const; 00126 00127 /** \brief Byte by byte comparison of two strings. 00128 * 00129 */ 00130 bool 00131 byteEqual(const char* aString, unsigned int aBytes) const; 00132 00133 /** \brief Locate in "this" the first occurrence of the "pattern" substring. 00134 * 00135 * @return The offset into this of the start of "pattern", or -1 if not found. 00136 */ 00137 int 00138 indexOf(const char* pattern) const; 00139 00140 /** \brief Locate in "this" the last occurrence of the "pattern" substring. 00141 * 00142 * @return The offset into this of the start of "pattern", or -1 if not found. 00143 */ 00144 int 00145 lastIndexOf(const char* pattern) const; 00146 00147 /** \brief Determine if "pattern" is a suffix of "this". 00148 * 00149 * @return True if "this" ends with "pattern". 00150 */ 00151 bool 00152 endsWith(const char* pattern) const; 00153 00154 /** \brief Append characters onto "this". 00155 * 00156 */ 00157 const String& 00158 append(const char* suffix); 00159 00160 /** \brief Convert to uppercase. 00161 * 00162 * @return String the String convert to uppercase. 00163 */ 00164 const String& 00165 uppercase(); 00166 00167 /** \brief Convert to lowercase. 00168 * 00169 * @return String the String convert to lowercase. 00170 */ 00171 const String& 00172 lowercase(); 00173 00174 /** \brief Returns the value of "this" with whitespace normalized by stripping leading 00175 * and trailing whitespace and replacing sequences of one or more than one 00176 * whitespace character with a single space, \#x20. 00177 * 00178 * @note whitespace = " \t\r\n" meaning (\#x20) (\#x9) (\#xD) (\#xA). 00179 * @return String the whitespace normalized String. 00180 */ 00181 const String& 00182 normalizeSpace(); 00183 00184 /** \brief Removes leading and trailing whitespace. 00185 * 00186 * @note If called with trim(" \t\r\n", 4) it will strip leading and trailing whitespaces. 00187 * Whitespace = " \t\r\n" meaning (\#x20) (\#x9) (\#xD) (\#xA). 00188 */ 00189 const String& 00190 trim(const char* start, int len); 00191 00192 /** \brief Removes leading and trailing whitespace. 00193 * 00194 * @note Space = " " meaning (\#x20). 00195 */ 00196 const String& 00197 trim(); 00198 00199 /** \brief Escape all characters except US-ASCII coded character set. 00200 * 00201 */ 00202 const String& 00203 formatAsXML(); 00204 00205 /** \brief Escape all characters except printable characters of the US-ASCII coded character set, 00206 * specifically the octets ranging from 32 to 126 (decimal). 00207 * 00208 */ 00209 const String& 00210 escapeHtmlUri(); 00211 00212 /** \brief Converts an String containing an IRI into a URI. 00213 * see Section 3.1 [http://www.ietf.org/rfc/rfc3987.txt] 00214 * 00215 */ 00216 const String& 00217 iriToUri(); 00218 00219 /** \brief Encodes reserved characters in an String that is intended to be used in the 00220 * path segment of a URI. 00221 * see Section 2 [http://www.ietf.org/rfc/rfc3986.txt] 00222 * 00223 */ 00224 const String& 00225 encodeForUri(); 00226 00227 private: 00228 friend class Unmarshaller; 00229 xqpStringStore *m_string; 00230 }; 00231 00232 ZORBA_EXTERN_DECL 00233 std::ostream& operator <<(std::ostream& os, const String& str); 00234 00235 } // namespace zorba 00236 00237 #endif