|
1 /* |
|
2 * Copyright (C) 2005 The Android Open Source Project |
|
3 * |
|
4 * Licensed under the Apache License, Version 2.0 (the "License"); |
|
5 * you may not use this file except in compliance with the License. |
|
6 * You may obtain a copy of the License at |
|
7 * |
|
8 * http://www.apache.org/licenses/LICENSE-2.0 |
|
9 * |
|
10 * Unless required by applicable law or agreed to in writing, software |
|
11 * distributed under the License is distributed on an "AS IS" BASIS, |
|
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
13 * See the License for the specific language governing permissions and |
|
14 * limitations under the License. |
|
15 */ |
|
16 |
|
17 #ifndef ANDROID_STRING8_H |
|
18 #define ANDROID_STRING8_H |
|
19 |
|
20 #include <utils/Errors.h> |
|
21 |
|
22 // Need this for the char16_t type; String8.h should not |
|
23 // be depedent on the String16 class. |
|
24 #include <utils/String16.h> |
|
25 |
|
26 #include <stdint.h> |
|
27 #include <string.h> |
|
28 #include <sys/types.h> |
|
29 |
|
30 // --------------------------------------------------------------------------- |
|
31 |
|
32 extern "C" { |
|
33 |
|
34 #if !defined(__cplusplus) || __cplusplus == 199711L // C or C++98 |
|
35 typedef uint32_t char32_t; |
|
36 #endif |
|
37 |
|
38 size_t strlen32(const char32_t *); |
|
39 size_t strnlen32(const char32_t *, size_t); |
|
40 |
|
41 /* |
|
42 * Returns the length of "src" when "src" is valid UTF-8 string. |
|
43 * Returns 0 if src is NULL, 0-length string or non UTF-8 string. |
|
44 * This function should be used to determine whether "src" is valid UTF-8 |
|
45 * characters with valid unicode codepoints. "src" must be null-terminated. |
|
46 * |
|
47 * If you are going to use other GetUtf... functions defined in this header |
|
48 * with string which may not be valid UTF-8 with valid codepoint (form 0 to |
|
49 * 0x10FFFF), you should use this function before calling others, since the |
|
50 * other functions do not check whether the string is valid UTF-8 or not. |
|
51 * |
|
52 * If you do not care whether "src" is valid UTF-8 or not, you should use |
|
53 * strlen() as usual, which should be much faster. |
|
54 */ |
|
55 size_t utf8_length(const char *src); |
|
56 |
|
57 /* |
|
58 * Returns the UTF-32 length of "src". |
|
59 */ |
|
60 size_t utf32_length(const char *src, size_t src_len); |
|
61 |
|
62 /* |
|
63 * Returns the UTF-8 length of "src". |
|
64 */ |
|
65 size_t utf8_length_from_utf16(const char16_t *src, size_t src_len); |
|
66 |
|
67 /* |
|
68 * Returns the UTF-8 length of "src". |
|
69 */ |
|
70 size_t utf8_length_from_utf32(const char32_t *src, size_t src_len); |
|
71 |
|
72 /* |
|
73 * Returns the unicode value at "index". |
|
74 * Returns -1 when the index is invalid (equals to or more than "src_len"). |
|
75 * If returned value is positive, it is able to be converted to char32_t, which |
|
76 * is unsigned. Then, if "next_index" is not NULL, the next index to be used is |
|
77 * stored in "next_index". "next_index" can be NULL. |
|
78 */ |
|
79 int32_t utf32_at(const char *src, size_t src_len, |
|
80 size_t index, size_t *next_index); |
|
81 |
|
82 /* |
|
83 * Stores a UTF-32 string converted from "src" in "dst", if "dst_length" is not |
|
84 * large enough to store the string, the part of the "src" string is stored |
|
85 * into "dst". |
|
86 * Returns the size actually used for storing the string. |
|
87 * "dst" is not null-terminated when dst_len is fully used (like strncpy). |
|
88 */ |
|
89 size_t utf8_to_utf32(const char* src, size_t src_len, |
|
90 char32_t* dst, size_t dst_len); |
|
91 |
|
92 /* |
|
93 * Stores a UTF-8 string converted from "src" in "dst", if "dst_length" is not |
|
94 * large enough to store the string, the part of the "src" string is stored |
|
95 * into "dst" as much as possible. See the examples for more detail. |
|
96 * Returns the size actually used for storing the string. |
|
97 * dst" is not null-terminated when dst_len is fully used (like strncpy). |
|
98 * |
|
99 * Example 1 |
|
100 * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) |
|
101 * "src_len" == 2 |
|
102 * "dst_len" >= 7 |
|
103 * -> |
|
104 * Returned value == 6 |
|
105 * "dst" becomes \xE3\x81\x82\xE3\x81\x84\0 |
|
106 * (note that "dst" is null-terminated) |
|
107 * |
|
108 * Example 2 |
|
109 * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) |
|
110 * "src_len" == 2 |
|
111 * "dst_len" == 5 |
|
112 * -> |
|
113 * Returned value == 3 |
|
114 * "dst" becomes \xE3\x81\x82\0 |
|
115 * (note that "dst" is null-terminated, but \u3044 is not stored in "dst" |
|
116 * since "dst" does not have enough size to store the character) |
|
117 * |
|
118 * Example 3 |
|
119 * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) |
|
120 * "src_len" == 2 |
|
121 * "dst_len" == 6 |
|
122 * -> |
|
123 * Returned value == 6 |
|
124 * "dst" becomes \xE3\x81\x82\xE3\x81\x84 |
|
125 * (note that "dst" is NOT null-terminated, like strncpy) |
|
126 */ |
|
127 size_t utf32_to_utf8(const char32_t* src, size_t src_len, |
|
128 char* dst, size_t dst_len); |
|
129 |
|
130 size_t utf16_to_utf8(const char16_t* src, size_t src_len, |
|
131 char* dst, size_t dst_len); |
|
132 |
|
133 } |
|
134 |
|
135 // --------------------------------------------------------------------------- |
|
136 |
|
137 namespace android { |
|
138 |
|
139 class TextOutput; |
|
140 |
|
141 //! This is a string holding UTF-8 characters. Does not allow the value more |
|
142 // than 0x10FFFF, which is not valid unicode codepoint. |
|
143 class String8 |
|
144 { |
|
145 public: |
|
146 String8(); |
|
147 String8(const String8& o); |
|
148 explicit String8(const char* o); |
|
149 explicit String8(const char* o, size_t numChars); |
|
150 |
|
151 explicit String8(const String16& o); |
|
152 explicit String8(const char16_t* o); |
|
153 explicit String8(const char16_t* o, size_t numChars); |
|
154 explicit String8(const char32_t* o); |
|
155 explicit String8(const char32_t* o, size_t numChars); |
|
156 ~String8(); |
|
157 |
|
158 inline const char* string() const; |
|
159 inline size_t size() const; |
|
160 inline size_t length() const; |
|
161 inline size_t bytes() const; |
|
162 |
|
163 inline const SharedBuffer* sharedBuffer() const; |
|
164 |
|
165 void setTo(const String8& other); |
|
166 status_t setTo(const char* other); |
|
167 status_t setTo(const char* other, size_t numChars); |
|
168 status_t setTo(const char16_t* other, size_t numChars); |
|
169 status_t setTo(const char32_t* other, |
|
170 size_t length); |
|
171 |
|
172 status_t append(const String8& other); |
|
173 status_t append(const char* other); |
|
174 status_t append(const char* other, size_t numChars); |
|
175 |
|
176 // Note that this function takes O(N) time to calculate the value. |
|
177 // No cache value is stored. |
|
178 size_t getUtf32Length() const; |
|
179 int32_t getUtf32At(size_t index, |
|
180 size_t *next_index) const; |
|
181 size_t getUtf32(char32_t* dst, size_t dst_len) const; |
|
182 |
|
183 inline String8& operator=(const String8& other); |
|
184 inline String8& operator=(const char* other); |
|
185 |
|
186 inline String8& operator+=(const String8& other); |
|
187 inline String8 operator+(const String8& other) const; |
|
188 |
|
189 inline String8& operator+=(const char* other); |
|
190 inline String8 operator+(const char* other) const; |
|
191 |
|
192 inline int compare(const String8& other) const; |
|
193 |
|
194 inline bool operator<(const String8& other) const; |
|
195 inline bool operator<=(const String8& other) const; |
|
196 inline bool operator==(const String8& other) const; |
|
197 inline bool operator!=(const String8& other) const; |
|
198 inline bool operator>=(const String8& other) const; |
|
199 inline bool operator>(const String8& other) const; |
|
200 |
|
201 inline bool operator<(const char* other) const; |
|
202 inline bool operator<=(const char* other) const; |
|
203 inline bool operator==(const char* other) const; |
|
204 inline bool operator!=(const char* other) const; |
|
205 inline bool operator>=(const char* other) const; |
|
206 inline bool operator>(const char* other) const; |
|
207 |
|
208 inline operator const char*() const; |
|
209 |
|
210 char* lockBuffer(size_t size); |
|
211 void unlockBuffer(); |
|
212 status_t unlockBuffer(size_t size); |
|
213 |
|
214 // return the index of the first byte of other in this at or after |
|
215 // start, or -1 if not found |
|
216 ssize_t find(const char* other, size_t start = 0) const; |
|
217 |
|
218 void toLower(); |
|
219 void toLower(size_t start, size_t numChars); |
|
220 void toUpper(); |
|
221 void toUpper(size_t start, size_t numChars); |
|
222 |
|
223 /* |
|
224 * These methods operate on the string as if it were a path name. |
|
225 */ |
|
226 |
|
227 /* |
|
228 * Set the filename field to a specific value. |
|
229 * |
|
230 * Normalizes the filename, removing a trailing '/' if present. |
|
231 */ |
|
232 void setPathName(const char* name); |
|
233 void setPathName(const char* name, size_t numChars); |
|
234 |
|
235 /* |
|
236 * Get just the filename component. |
|
237 * |
|
238 * "/tmp/foo/bar.c" --> "bar.c" |
|
239 */ |
|
240 String8 getPathLeaf(void) const; |
|
241 |
|
242 /* |
|
243 * Remove the last (file name) component, leaving just the directory |
|
244 * name. |
|
245 * |
|
246 * "/tmp/foo/bar.c" --> "/tmp/foo" |
|
247 * "/tmp" --> "" // ????? shouldn't this be "/" ???? XXX |
|
248 * "bar.c" --> "" |
|
249 */ |
|
250 String8 getPathDir(void) const; |
|
251 |
|
252 /* |
|
253 * Retrieve the front (root dir) component. Optionally also return the |
|
254 * remaining components. |
|
255 * |
|
256 * "/tmp/foo/bar.c" --> "tmp" (remain = "foo/bar.c") |
|
257 * "/tmp" --> "tmp" (remain = "") |
|
258 * "bar.c" --> "bar.c" (remain = "") |
|
259 */ |
|
260 String8 walkPath(String8* outRemains = NULL) const; |
|
261 |
|
262 /* |
|
263 * Return the filename extension. This is the last '.' and up to |
|
264 * four characters that follow it. The '.' is included in case we |
|
265 * decide to expand our definition of what constitutes an extension. |
|
266 * |
|
267 * "/tmp/foo/bar.c" --> ".c" |
|
268 * "/tmp" --> "" |
|
269 * "/tmp/foo.bar/baz" --> "" |
|
270 * "foo.jpeg" --> ".jpeg" |
|
271 * "foo." --> "" |
|
272 */ |
|
273 String8 getPathExtension(void) const; |
|
274 |
|
275 /* |
|
276 * Return the path without the extension. Rules for what constitutes |
|
277 * an extension are described in the comment for getPathExtension(). |
|
278 * |
|
279 * "/tmp/foo/bar.c" --> "/tmp/foo/bar" |
|
280 */ |
|
281 String8 getBasePath(void) const; |
|
282 |
|
283 /* |
|
284 * Add a component to the pathname. We guarantee that there is |
|
285 * exactly one path separator between the old path and the new. |
|
286 * If there is no existing name, we just copy the new name in. |
|
287 * |
|
288 * If leaf is a fully qualified path (i.e. starts with '/', it |
|
289 * replaces whatever was there before. |
|
290 */ |
|
291 String8& appendPath(const char* leaf); |
|
292 String8& appendPath(const String8& leaf) { return appendPath(leaf.string()); } |
|
293 |
|
294 /* |
|
295 * Like appendPath(), but does not affect this string. Returns a new one instead. |
|
296 */ |
|
297 String8 appendPathCopy(const char* leaf) const |
|
298 { String8 p(*this); p.appendPath(leaf); return p; } |
|
299 String8 appendPathCopy(const String8& leaf) const { return appendPathCopy(leaf.string()); } |
|
300 |
|
301 /* |
|
302 * Converts all separators in this string to /, the default path separator. |
|
303 * |
|
304 * If the default OS separator is backslash, this converts all |
|
305 * backslashes to slashes, in-place. Otherwise it does nothing. |
|
306 * Returns self. |
|
307 */ |
|
308 String8& convertToResPath(); |
|
309 |
|
310 private: |
|
311 status_t real_append(const char* other, size_t numChars); |
|
312 char* find_extension(void) const; |
|
313 |
|
314 const char* mString; |
|
315 }; |
|
316 |
|
317 TextOutput& operator<<(TextOutput& to, const String16& val); |
|
318 |
|
319 // --------------------------------------------------------------------------- |
|
320 // No user servicable parts below. |
|
321 |
|
322 inline int compare_type(const String8& lhs, const String8& rhs) |
|
323 { |
|
324 return lhs.compare(rhs); |
|
325 } |
|
326 |
|
327 inline int strictly_order_type(const String8& lhs, const String8& rhs) |
|
328 { |
|
329 return compare_type(lhs, rhs) < 0; |
|
330 } |
|
331 |
|
332 inline const char* String8::string() const |
|
333 { |
|
334 return mString; |
|
335 } |
|
336 |
|
337 inline size_t String8::length() const |
|
338 { |
|
339 return SharedBuffer::sizeFromData(mString)-1; |
|
340 } |
|
341 |
|
342 inline size_t String8::size() const |
|
343 { |
|
344 return length(); |
|
345 } |
|
346 |
|
347 inline size_t String8::bytes() const |
|
348 { |
|
349 return SharedBuffer::sizeFromData(mString)-1; |
|
350 } |
|
351 |
|
352 inline const SharedBuffer* String8::sharedBuffer() const |
|
353 { |
|
354 return SharedBuffer::bufferFromData(mString); |
|
355 } |
|
356 |
|
357 inline String8& String8::operator=(const String8& other) |
|
358 { |
|
359 setTo(other); |
|
360 return *this; |
|
361 } |
|
362 |
|
363 inline String8& String8::operator=(const char* other) |
|
364 { |
|
365 setTo(other); |
|
366 return *this; |
|
367 } |
|
368 |
|
369 inline String8& String8::operator+=(const String8& other) |
|
370 { |
|
371 append(other); |
|
372 return *this; |
|
373 } |
|
374 |
|
375 inline String8 String8::operator+(const String8& other) const |
|
376 { |
|
377 String8 tmp; |
|
378 tmp += other; |
|
379 return tmp; |
|
380 } |
|
381 |
|
382 inline String8& String8::operator+=(const char* other) |
|
383 { |
|
384 append(other); |
|
385 return *this; |
|
386 } |
|
387 |
|
388 inline String8 String8::operator+(const char* other) const |
|
389 { |
|
390 String8 tmp; |
|
391 tmp += other; |
|
392 return tmp; |
|
393 } |
|
394 |
|
395 inline int String8::compare(const String8& other) const |
|
396 { |
|
397 return strcmp(mString, other.mString); |
|
398 } |
|
399 |
|
400 inline bool String8::operator<(const String8& other) const |
|
401 { |
|
402 return strcmp(mString, other.mString) < 0; |
|
403 } |
|
404 |
|
405 inline bool String8::operator<=(const String8& other) const |
|
406 { |
|
407 return strcmp(mString, other.mString) <= 0; |
|
408 } |
|
409 |
|
410 inline bool String8::operator==(const String8& other) const |
|
411 { |
|
412 return strcmp(mString, other.mString) == 0; |
|
413 } |
|
414 |
|
415 inline bool String8::operator!=(const String8& other) const |
|
416 { |
|
417 return strcmp(mString, other.mString) != 0; |
|
418 } |
|
419 |
|
420 inline bool String8::operator>=(const String8& other) const |
|
421 { |
|
422 return strcmp(mString, other.mString) >= 0; |
|
423 } |
|
424 |
|
425 inline bool String8::operator>(const String8& other) const |
|
426 { |
|
427 return strcmp(mString, other.mString) > 0; |
|
428 } |
|
429 |
|
430 inline bool String8::operator<(const char* other) const |
|
431 { |
|
432 return strcmp(mString, other) < 0; |
|
433 } |
|
434 |
|
435 inline bool String8::operator<=(const char* other) const |
|
436 { |
|
437 return strcmp(mString, other) <= 0; |
|
438 } |
|
439 |
|
440 inline bool String8::operator==(const char* other) const |
|
441 { |
|
442 return strcmp(mString, other) == 0; |
|
443 } |
|
444 |
|
445 inline bool String8::operator!=(const char* other) const |
|
446 { |
|
447 return strcmp(mString, other) != 0; |
|
448 } |
|
449 |
|
450 inline bool String8::operator>=(const char* other) const |
|
451 { |
|
452 return strcmp(mString, other) >= 0; |
|
453 } |
|
454 |
|
455 inline bool String8::operator>(const char* other) const |
|
456 { |
|
457 return strcmp(mString, other) > 0; |
|
458 } |
|
459 |
|
460 inline String8::operator const char*() const |
|
461 { |
|
462 return mString; |
|
463 } |
|
464 |
|
465 } // namespace android |
|
466 |
|
467 // --------------------------------------------------------------------------- |
|
468 |
|
469 #endif // ANDROID_STRING8_H |