zorbastring.h

Go to the documentation of this file.
00001 /*
00002  * Copyright 2006-2008 The FLWOR Foundation.
00003  *
00004  * Licensed under the Apache License, Version 2.0 (the "License");
00005  * you may not use this file except in compliance with the License.
00006  * You may obtain a copy of the License at
00007  * 
00008  * http://www.apache.org/licenses/LICENSE-2.0
00009  *
00010  * Unless required by applicable law or agreed to in writing, software
00011  * distributed under the License is distributed on an "AS IS" BASIS,
00012  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013  * See the License for the specific language governing permissions and
00014  * limitations under the License.
00015  */
00016 #ifndef ZORBA_STRING_API_H
00017 #define ZORBA_STRING_API_H
00018 
00019 #include <string>
00020 
00021 #include <zorba/config.h>
00022 
00023 namespace zorba {
00024 
00025 class xqpStringStore;
00026 
00027 /** \brief The Zorba String class.
00028  *
00029  * The interface of this class is similar to that of std::string. However, it contains Unicode 
00030  * characters encoded as UTF-8.
00031  * UTF-8 FAQ @see [http://unicode.org/faq/utf_bom.html#2].
00032  */
00033 class ZORBA_EXTERN_DECL String 
00034 {
00035 public:
00036   /** \brief Copy constructor
00037    */
00038   String(const String& other);
00039 
00040   /** \brief Constructor that is used to construct Items in the Zorba engine itself.
00041    *
00042    * This constructor is for internal use only.
00043    */
00044   String(xqpStringStore* aString);
00045 
00046   /** \brief Constructor to construct a String from a const char*.
00047    *
00048    * @param aString the const char* to construct the String from.
00049    */
00050   String(const char* aString);
00051 
00052   /** \brief Constructor to construct a String from a std::string.
00053    *
00054    * @param aString the std::string to construct the String from.
00055    */
00056   String(const std::string& aString);
00057 
00058   /** \brief Destructor
00059    */
00060   ~String();
00061 
00062   /** \brief Assignment operator
00063    */
00064   const String& 
00065   operator =(const String& other);
00066 
00067   /** \brief Assingment operator that is used in the Zorba engine itself.
00068    *
00069    * This operator is for internal use only.
00070    */
00071   const String&
00072   operator =(xqpStringStore *other);
00073 
00074   /** \brief Returns a non-modifiable standard C character array version of the string.
00075    *
00076    * @return const char*  a const pointer to a UTF-8 encoded C string, identical to the current string. 
00077    *         The returned string is null-terminated.
00078    */
00079   const char*
00080   c_str() const;
00081 
00082   /** \brief Compares two strings.
00083    *
00084    * @return -1 if this < aString, 0 if this == aString, 1 if this > aString.
00085    */
00086   int
00087   compare(const String& aString) const;
00088 
00089   /** \brief Compares two strings.
00090    *
00091    * @return true if this == aString.
00092    * @see byteEqual().
00093    */
00094   bool
00095   equals(const String& aString) const;
00096 
00097   /** \brief Returns the number of characters in the string, not including any null-termination.
00098    *
00099    * @return The number of UTF-8 characters.
00100    * @see bytes(), empty().
00101    */
00102   size_t
00103   length() const;
00104 
00105   /** \brief Returns the number of bytes in the string, not including any null-termination.
00106    *
00107    * @return The number of bytes.
00108    * @see length(), empty().
00109    */
00110   size_t
00111   bytes() const;
00112 
00113   /** \brief True if the string has no characters.
00114    *
00115    * @return True if the string has no elements, false otherwise.
00116    * @see length(), bytes().
00117    */
00118   bool
00119   empty() const;
00120 
00121   bool
00122   operator==(const String& str) const;
00123 
00124   bool
00125   operator!=(const String& str) const;
00126 
00127   /** \brief Byte by byte comparison of two strings.
00128    *
00129    */  
00130   bool
00131   byteEqual(const char* aString, unsigned int aBytes) const;
00132 
00133   /** \brief Locate in "this" the first occurrence of the "pattern" substring.
00134    *
00135    * @return The offset into this of the start of "pattern", or -1 if not found.
00136    */  
00137   int
00138   indexOf(const char* pattern) const;
00139 
00140   /** \brief Locate in "this" the last occurrence of the "pattern" substring.
00141    *
00142    * @return The offset into this of the start of "pattern", or -1 if not found.
00143    */  
00144   int
00145   lastIndexOf(const char* pattern) const;
00146 
00147   /** \brief Determine if "pattern" is a suffix of "this".
00148    *
00149    * @return True if "this" ends with "pattern".
00150    */  
00151   bool
00152   endsWith(const char* pattern) const;
00153 
00154   /** \brief Append characters onto "this".
00155    *
00156    */  
00157   const String&
00158   append(const char* suffix);
00159 
00160   /** \brief Convert to uppercase.
00161    *
00162    * @return String the String convert to uppercase.
00163    */  
00164   const String&
00165   uppercase();
00166 
00167   /** \brief Convert to lowercase.
00168    *
00169    * @return String the String convert to lowercase.
00170    */  
00171   const String&
00172   lowercase();
00173 
00174   /** \brief Returns the value of "this" with whitespace normalized by stripping leading 
00175    *         and trailing whitespace and replacing sequences of one or more than one 
00176    *         whitespace character with a single space, \#x20.
00177    *
00178    * @note whitespace = " \t\r\n" meaning (\#x20) (\#x9) (\#xD) (\#xA).
00179    * @return String the whitespace normalized String.
00180    */  
00181   const String&
00182   normalizeSpace();
00183 
00184   /** \brief Removes leading and trailing whitespace.
00185    *
00186    * @note If called with trim(" \t\r\n", 4) it will strip leading and trailing whitespaces.
00187    * Whitespace = " \t\r\n" meaning (\#x20) (\#x9) (\#xD) (\#xA).
00188    */  
00189   const String&
00190   trim(const char* start, int len);
00191 
00192   /** \brief Removes leading and trailing whitespace.
00193    *
00194    * @note Space = " " meaning (\#x20).
00195    */  
00196   const String&
00197   trim();
00198 
00199   /** \brief Escape all characters except US-ASCII coded character set.
00200    *
00201    */
00202   const String&
00203   formatAsXML();
00204 
00205   /** \brief Escape all characters except printable characters of the US-ASCII coded character set, 
00206    *         specifically the octets ranging from 32 to 126 (decimal).
00207    *
00208    */
00209   const String&
00210   escapeHtmlUri();
00211 
00212   /** \brief Converts an String containing an IRI into a URI.
00213    *         see Section 3.1 [http://www.ietf.org/rfc/rfc3987.txt]
00214    *
00215    */
00216   const String&
00217   iriToUri();
00218 
00219   /** \brief Encodes reserved characters in an String that is intended to be used in the 
00220    *         path segment of a URI.
00221    *         see Section 2 [http://www.ietf.org/rfc/rfc3986.txt]
00222    *
00223    */
00224   const String&
00225   encodeForUri();
00226 
00227 private:
00228   friend class Unmarshaller;
00229   xqpStringStore *m_string;
00230 };
00231 
00232 ZORBA_EXTERN_DECL
00233 std::ostream& operator <<(std::ostream& os, const String& str);
00234 
00235 } // namespace zorba
00236 
00237 #endif