michael@0: /* michael@0: * Copyright (C) 2005 The Android Open Source Project michael@0: * michael@0: * Licensed under the Apache License, Version 2.0 (the "License"); michael@0: * you may not use this file except in compliance with the License. michael@0: * You may obtain a copy of the License at michael@0: * michael@0: * http://www.apache.org/licenses/LICENSE-2.0 michael@0: * michael@0: * Unless required by applicable law or agreed to in writing, software michael@0: * distributed under the License is distributed on an "AS IS" BASIS, michael@0: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. michael@0: * See the License for the specific language governing permissions and michael@0: * limitations under the License. michael@0: */ michael@0: michael@0: #ifndef ANDROID_STRING8_H michael@0: #define ANDROID_STRING8_H michael@0: michael@0: #include michael@0: michael@0: // Need this for the char16_t type; String8.h should not michael@0: // be depedent on the String16 class. michael@0: #include michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: // --------------------------------------------------------------------------- michael@0: michael@0: extern "C" { michael@0: michael@0: #if !defined(__cplusplus) || __cplusplus == 199711L // C or C++98 michael@0: typedef uint32_t char32_t; michael@0: #endif michael@0: michael@0: size_t strlen32(const char32_t *); michael@0: size_t strnlen32(const char32_t *, size_t); michael@0: michael@0: /* michael@0: * Returns the length of "src" when "src" is valid UTF-8 string. michael@0: * Returns 0 if src is NULL, 0-length string or non UTF-8 string. michael@0: * This function should be used to determine whether "src" is valid UTF-8 michael@0: * characters with valid unicode codepoints. "src" must be null-terminated. michael@0: * michael@0: * If you are going to use other GetUtf... functions defined in this header michael@0: * with string which may not be valid UTF-8 with valid codepoint (form 0 to michael@0: * 0x10FFFF), you should use this function before calling others, since the michael@0: * other functions do not check whether the string is valid UTF-8 or not. michael@0: * michael@0: * If you do not care whether "src" is valid UTF-8 or not, you should use michael@0: * strlen() as usual, which should be much faster. michael@0: */ michael@0: size_t utf8_length(const char *src); michael@0: michael@0: /* michael@0: * Returns the UTF-32 length of "src". michael@0: */ michael@0: size_t utf32_length(const char *src, size_t src_len); michael@0: michael@0: /* michael@0: * Returns the UTF-8 length of "src". michael@0: */ michael@0: size_t utf8_length_from_utf16(const char16_t *src, size_t src_len); michael@0: michael@0: /* michael@0: * Returns the UTF-8 length of "src". michael@0: */ michael@0: size_t utf8_length_from_utf32(const char32_t *src, size_t src_len); michael@0: michael@0: /* michael@0: * Returns the unicode value at "index". michael@0: * Returns -1 when the index is invalid (equals to or more than "src_len"). michael@0: * If returned value is positive, it is able to be converted to char32_t, which michael@0: * is unsigned. Then, if "next_index" is not NULL, the next index to be used is michael@0: * stored in "next_index". "next_index" can be NULL. michael@0: */ michael@0: int32_t utf32_at(const char *src, size_t src_len, michael@0: size_t index, size_t *next_index); michael@0: michael@0: /* michael@0: * Stores a UTF-32 string converted from "src" in "dst", if "dst_length" is not michael@0: * large enough to store the string, the part of the "src" string is stored michael@0: * into "dst". michael@0: * Returns the size actually used for storing the string. michael@0: * "dst" is not null-terminated when dst_len is fully used (like strncpy). michael@0: */ michael@0: size_t utf8_to_utf32(const char* src, size_t src_len, michael@0: char32_t* dst, size_t dst_len); michael@0: michael@0: /* michael@0: * Stores a UTF-8 string converted from "src" in "dst", if "dst_length" is not michael@0: * large enough to store the string, the part of the "src" string is stored michael@0: * into "dst" as much as possible. See the examples for more detail. michael@0: * Returns the size actually used for storing the string. michael@0: * dst" is not null-terminated when dst_len is fully used (like strncpy). michael@0: * michael@0: * Example 1 michael@0: * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) michael@0: * "src_len" == 2 michael@0: * "dst_len" >= 7 michael@0: * -> michael@0: * Returned value == 6 michael@0: * "dst" becomes \xE3\x81\x82\xE3\x81\x84\0 michael@0: * (note that "dst" is null-terminated) michael@0: * michael@0: * Example 2 michael@0: * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) michael@0: * "src_len" == 2 michael@0: * "dst_len" == 5 michael@0: * -> michael@0: * Returned value == 3 michael@0: * "dst" becomes \xE3\x81\x82\0 michael@0: * (note that "dst" is null-terminated, but \u3044 is not stored in "dst" michael@0: * since "dst" does not have enough size to store the character) michael@0: * michael@0: * Example 3 michael@0: * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) michael@0: * "src_len" == 2 michael@0: * "dst_len" == 6 michael@0: * -> michael@0: * Returned value == 6 michael@0: * "dst" becomes \xE3\x81\x82\xE3\x81\x84 michael@0: * (note that "dst" is NOT null-terminated, like strncpy) michael@0: */ michael@0: size_t utf32_to_utf8(const char32_t* src, size_t src_len, michael@0: char* dst, size_t dst_len); michael@0: michael@0: size_t utf16_to_utf8(const char16_t* src, size_t src_len, michael@0: char* dst, size_t dst_len); michael@0: michael@0: } michael@0: michael@0: // --------------------------------------------------------------------------- michael@0: michael@0: namespace android { michael@0: michael@0: class TextOutput; michael@0: michael@0: //! This is a string holding UTF-8 characters. Does not allow the value more michael@0: // than 0x10FFFF, which is not valid unicode codepoint. michael@0: class String8 michael@0: { michael@0: public: michael@0: String8(); michael@0: String8(const String8& o); michael@0: explicit String8(const char* o); michael@0: explicit String8(const char* o, size_t numChars); michael@0: michael@0: explicit String8(const String16& o); michael@0: explicit String8(const char16_t* o); michael@0: explicit String8(const char16_t* o, size_t numChars); michael@0: explicit String8(const char32_t* o); michael@0: explicit String8(const char32_t* o, size_t numChars); michael@0: ~String8(); michael@0: michael@0: inline const char* string() const; michael@0: inline size_t size() const; michael@0: inline size_t length() const; michael@0: inline size_t bytes() const; michael@0: michael@0: inline const SharedBuffer* sharedBuffer() const; michael@0: michael@0: void setTo(const String8& other); michael@0: status_t setTo(const char* other); michael@0: status_t setTo(const char* other, size_t numChars); michael@0: status_t setTo(const char16_t* other, size_t numChars); michael@0: status_t setTo(const char32_t* other, michael@0: size_t length); michael@0: michael@0: status_t append(const String8& other); michael@0: status_t append(const char* other); michael@0: status_t append(const char* other, size_t numChars); michael@0: michael@0: status_t appendFormat(const char* fmt, ...) michael@0: __attribute__((format (printf, 2, 3))); michael@0: michael@0: // Note that this function takes O(N) time to calculate the value. michael@0: // No cache value is stored. michael@0: size_t getUtf32Length() const; michael@0: int32_t getUtf32At(size_t index, michael@0: size_t *next_index) const; michael@0: size_t getUtf32(char32_t* dst, size_t dst_len) const; michael@0: michael@0: inline String8& operator=(const String8& other); michael@0: inline String8& operator=(const char* other); michael@0: michael@0: inline String8& operator+=(const String8& other); michael@0: inline String8 operator+(const String8& other) const; michael@0: michael@0: inline String8& operator+=(const char* other); michael@0: inline String8 operator+(const char* other) const; michael@0: michael@0: inline int compare(const String8& other) const; michael@0: michael@0: inline bool operator<(const String8& other) const; michael@0: inline bool operator<=(const String8& other) const; michael@0: inline bool operator==(const String8& other) const; michael@0: inline bool operator!=(const String8& other) const; michael@0: inline bool operator>=(const String8& other) const; michael@0: inline bool operator>(const String8& other) const; michael@0: michael@0: inline bool operator<(const char* other) const; michael@0: inline bool operator<=(const char* other) const; michael@0: inline bool operator==(const char* other) const; michael@0: inline bool operator!=(const char* other) const; michael@0: inline bool operator>=(const char* other) const; michael@0: inline bool operator>(const char* other) const; michael@0: michael@0: inline operator const char*() const; michael@0: michael@0: char* lockBuffer(size_t size); michael@0: void unlockBuffer(); michael@0: status_t unlockBuffer(size_t size); michael@0: michael@0: // return the index of the first byte of other in this at or after michael@0: // start, or -1 if not found michael@0: ssize_t find(const char* other, size_t start = 0) const; michael@0: michael@0: void toLower(); michael@0: void toLower(size_t start, size_t numChars); michael@0: void toUpper(); michael@0: void toUpper(size_t start, size_t numChars); michael@0: michael@0: /* michael@0: * These methods operate on the string as if it were a path name. michael@0: */ michael@0: michael@0: /* michael@0: * Set the filename field to a specific value. michael@0: * michael@0: * Normalizes the filename, removing a trailing '/' if present. michael@0: */ michael@0: void setPathName(const char* name); michael@0: void setPathName(const char* name, size_t numChars); michael@0: michael@0: /* michael@0: * Get just the filename component. michael@0: * michael@0: * "/tmp/foo/bar.c" --> "bar.c" michael@0: */ michael@0: String8 getPathLeaf(void) const; michael@0: michael@0: /* michael@0: * Remove the last (file name) component, leaving just the directory michael@0: * name. michael@0: * michael@0: * "/tmp/foo/bar.c" --> "/tmp/foo" michael@0: * "/tmp" --> "" // ????? shouldn't this be "/" ???? XXX michael@0: * "bar.c" --> "" michael@0: */ michael@0: String8 getPathDir(void) const; michael@0: michael@0: /* michael@0: * Retrieve the front (root dir) component. Optionally also return the michael@0: * remaining components. michael@0: * michael@0: * "/tmp/foo/bar.c" --> "tmp" (remain = "foo/bar.c") michael@0: * "/tmp" --> "tmp" (remain = "") michael@0: * "bar.c" --> "bar.c" (remain = "") michael@0: */ michael@0: String8 walkPath(String8* outRemains = NULL) const; michael@0: michael@0: /* michael@0: * Return the filename extension. This is the last '.' and up to michael@0: * four characters that follow it. The '.' is included in case we michael@0: * decide to expand our definition of what constitutes an extension. michael@0: * michael@0: * "/tmp/foo/bar.c" --> ".c" michael@0: * "/tmp" --> "" michael@0: * "/tmp/foo.bar/baz" --> "" michael@0: * "foo.jpeg" --> ".jpeg" michael@0: * "foo." --> "" michael@0: */ michael@0: String8 getPathExtension(void) const; michael@0: michael@0: /* michael@0: * Return the path without the extension. Rules for what constitutes michael@0: * an extension are described in the comment for getPathExtension(). michael@0: * michael@0: * "/tmp/foo/bar.c" --> "/tmp/foo/bar" michael@0: */ michael@0: String8 getBasePath(void) const; michael@0: michael@0: /* michael@0: * Add a component to the pathname. We guarantee that there is michael@0: * exactly one path separator between the old path and the new. michael@0: * If there is no existing name, we just copy the new name in. michael@0: * michael@0: * If leaf is a fully qualified path (i.e. starts with '/', it michael@0: * replaces whatever was there before. michael@0: */ michael@0: String8& appendPath(const char* leaf); michael@0: String8& appendPath(const String8& leaf) { return appendPath(leaf.string()); } michael@0: michael@0: /* michael@0: * Like appendPath(), but does not affect this string. Returns a new one instead. michael@0: */ michael@0: String8 appendPathCopy(const char* leaf) const michael@0: { String8 p(*this); p.appendPath(leaf); return p; } michael@0: String8 appendPathCopy(const String8& leaf) const { return appendPathCopy(leaf.string()); } michael@0: michael@0: /* michael@0: * Converts all separators in this string to /, the default path separator. michael@0: * michael@0: * If the default OS separator is backslash, this converts all michael@0: * backslashes to slashes, in-place. Otherwise it does nothing. michael@0: * Returns self. michael@0: */ michael@0: String8& convertToResPath(); michael@0: michael@0: private: michael@0: status_t real_append(const char* other, size_t numChars); michael@0: char* find_extension(void) const; michael@0: michael@0: const char* mString; michael@0: }; michael@0: michael@0: TextOutput& operator<<(TextOutput& to, const String16& val); michael@0: michael@0: // --------------------------------------------------------------------------- michael@0: // No user servicable parts below. michael@0: michael@0: inline int compare_type(const String8& lhs, const String8& rhs) michael@0: { michael@0: return lhs.compare(rhs); michael@0: } michael@0: michael@0: inline int strictly_order_type(const String8& lhs, const String8& rhs) michael@0: { michael@0: return compare_type(lhs, rhs) < 0; michael@0: } michael@0: michael@0: inline const char* String8::string() const michael@0: { michael@0: return mString; michael@0: } michael@0: michael@0: inline size_t String8::length() const michael@0: { michael@0: return SharedBuffer::sizeFromData(mString)-1; michael@0: } michael@0: michael@0: inline size_t String8::size() const michael@0: { michael@0: return length(); michael@0: } michael@0: michael@0: inline size_t String8::bytes() const michael@0: { michael@0: return SharedBuffer::sizeFromData(mString)-1; michael@0: } michael@0: michael@0: inline const SharedBuffer* String8::sharedBuffer() const michael@0: { michael@0: return SharedBuffer::bufferFromData(mString); michael@0: } michael@0: michael@0: inline String8& String8::operator=(const String8& other) michael@0: { michael@0: setTo(other); michael@0: return *this; michael@0: } michael@0: michael@0: inline String8& String8::operator=(const char* other) michael@0: { michael@0: setTo(other); michael@0: return *this; michael@0: } michael@0: michael@0: inline String8& String8::operator+=(const String8& other) michael@0: { michael@0: append(other); michael@0: return *this; michael@0: } michael@0: michael@0: inline String8 String8::operator+(const String8& other) const michael@0: { michael@0: String8 tmp(*this); michael@0: tmp += other; michael@0: return tmp; michael@0: } michael@0: michael@0: inline String8& String8::operator+=(const char* other) michael@0: { michael@0: append(other); michael@0: return *this; michael@0: } michael@0: michael@0: inline String8 String8::operator+(const char* other) const michael@0: { michael@0: String8 tmp(*this); michael@0: tmp += other; michael@0: return tmp; michael@0: } michael@0: michael@0: inline int String8::compare(const String8& other) const michael@0: { michael@0: return strcmp(mString, other.mString); michael@0: } michael@0: michael@0: inline bool String8::operator<(const String8& other) const michael@0: { michael@0: return strcmp(mString, other.mString) < 0; michael@0: } michael@0: michael@0: inline bool String8::operator<=(const String8& other) const michael@0: { michael@0: return strcmp(mString, other.mString) <= 0; michael@0: } michael@0: michael@0: inline bool String8::operator==(const String8& other) const michael@0: { michael@0: return strcmp(mString, other.mString) == 0; michael@0: } michael@0: michael@0: inline bool String8::operator!=(const String8& other) const michael@0: { michael@0: return strcmp(mString, other.mString) != 0; michael@0: } michael@0: michael@0: inline bool String8::operator>=(const String8& other) const michael@0: { michael@0: return strcmp(mString, other.mString) >= 0; michael@0: } michael@0: michael@0: inline bool String8::operator>(const String8& other) const michael@0: { michael@0: return strcmp(mString, other.mString) > 0; michael@0: } michael@0: michael@0: inline bool String8::operator<(const char* other) const michael@0: { michael@0: return strcmp(mString, other) < 0; michael@0: } michael@0: michael@0: inline bool String8::operator<=(const char* other) const michael@0: { michael@0: return strcmp(mString, other) <= 0; michael@0: } michael@0: michael@0: inline bool String8::operator==(const char* other) const michael@0: { michael@0: return strcmp(mString, other) == 0; michael@0: } michael@0: michael@0: inline bool String8::operator!=(const char* other) const michael@0: { michael@0: return strcmp(mString, other) != 0; michael@0: } michael@0: michael@0: inline bool String8::operator>=(const char* other) const michael@0: { michael@0: return strcmp(mString, other) >= 0; michael@0: } michael@0: michael@0: inline bool String8::operator>(const char* other) const michael@0: { michael@0: return strcmp(mString, other) > 0; michael@0: } michael@0: michael@0: inline String8::operator const char*() const michael@0: { michael@0: return mString; michael@0: } michael@0: michael@0: } // namespace android michael@0: michael@0: // --------------------------------------------------------------------------- michael@0: michael@0: #endif // ANDROID_STRING8_H