|
1 /* |
|
2 * Copyright (C) 2005 The Android Open Source Project |
|
3 * |
|
4 * Licensed under the Apache License, Version 2.0 (the "License"); |
|
5 * you may not use this file except in compliance with the License. |
|
6 * You may obtain a copy of the License at |
|
7 * |
|
8 * http://www.apache.org/licenses/LICENSE-2.0 |
|
9 * |
|
10 * Unless required by applicable law or agreed to in writing, software |
|
11 * distributed under the License is distributed on an "AS IS" BASIS, |
|
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
13 * See the License for the specific language governing permissions and |
|
14 * limitations under the License. |
|
15 */ |
|
16 |
|
17 #ifndef ANDROID_STRING8_H |
|
18 #define ANDROID_STRING8_H |
|
19 |
|
20 #include <utils/Errors.h> |
|
21 |
|
22 // Need this for the char16_t type; String8.h should not |
|
23 // be depedent on the String16 class. |
|
24 #include <utils/String16.h> |
|
25 |
|
26 #include <stdint.h> |
|
27 #include <string.h> |
|
28 #include <sys/types.h> |
|
29 |
|
30 // --------------------------------------------------------------------------- |
|
31 |
|
32 extern "C" { |
|
33 |
|
34 #if !defined(__cplusplus) || __cplusplus == 199711L // C or C++98 |
|
35 typedef uint32_t char32_t; |
|
36 #endif |
|
37 |
|
38 size_t strlen32(const char32_t *); |
|
39 size_t strnlen32(const char32_t *, size_t); |
|
40 |
|
41 /* |
|
42 * Returns the length of "src" when "src" is valid UTF-8 string. |
|
43 * Returns 0 if src is NULL, 0-length string or non UTF-8 string. |
|
44 * This function should be used to determine whether "src" is valid UTF-8 |
|
45 * characters with valid unicode codepoints. "src" must be null-terminated. |
|
46 * |
|
47 * If you are going to use other GetUtf... functions defined in this header |
|
48 * with string which may not be valid UTF-8 with valid codepoint (form 0 to |
|
49 * 0x10FFFF), you should use this function before calling others, since the |
|
50 * other functions do not check whether the string is valid UTF-8 or not. |
|
51 * |
|
52 * If you do not care whether "src" is valid UTF-8 or not, you should use |
|
53 * strlen() as usual, which should be much faster. |
|
54 */ |
|
55 size_t utf8_length(const char *src); |
|
56 |
|
57 /* |
|
58 * Returns the UTF-32 length of "src". |
|
59 */ |
|
60 size_t utf32_length(const char *src, size_t src_len); |
|
61 |
|
62 /* |
|
63 * Returns the UTF-8 length of "src". |
|
64 */ |
|
65 size_t utf8_length_from_utf16(const char16_t *src, size_t src_len); |
|
66 |
|
67 /* |
|
68 * Returns the UTF-8 length of "src". |
|
69 */ |
|
70 size_t utf8_length_from_utf32(const char32_t *src, size_t src_len); |
|
71 |
|
72 /* |
|
73 * Returns the unicode value at "index". |
|
74 * Returns -1 when the index is invalid (equals to or more than "src_len"). |
|
75 * If returned value is positive, it is able to be converted to char32_t, which |
|
76 * is unsigned. Then, if "next_index" is not NULL, the next index to be used is |
|
77 * stored in "next_index". "next_index" can be NULL. |
|
78 */ |
|
79 int32_t utf32_at(const char *src, size_t src_len, |
|
80 size_t index, size_t *next_index); |
|
81 |
|
82 /* |
|
83 * Stores a UTF-32 string converted from "src" in "dst", if "dst_length" is not |
|
84 * large enough to store the string, the part of the "src" string is stored |
|
85 * into "dst". |
|
86 * Returns the size actually used for storing the string. |
|
87 * "dst" is not null-terminated when dst_len is fully used (like strncpy). |
|
88 */ |
|
89 size_t utf8_to_utf32(const char* src, size_t src_len, |
|
90 char32_t* dst, size_t dst_len); |
|
91 |
|
92 /* |
|
93 * Stores a UTF-8 string converted from "src" in "dst", if "dst_length" is not |
|
94 * large enough to store the string, the part of the "src" string is stored |
|
95 * into "dst" as much as possible. See the examples for more detail. |
|
96 * Returns the size actually used for storing the string. |
|
97 * dst" is not null-terminated when dst_len is fully used (like strncpy). |
|
98 * |
|
99 * Example 1 |
|
100 * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) |
|
101 * "src_len" == 2 |
|
102 * "dst_len" >= 7 |
|
103 * -> |
|
104 * Returned value == 6 |
|
105 * "dst" becomes \xE3\x81\x82\xE3\x81\x84\0 |
|
106 * (note that "dst" is null-terminated) |
|
107 * |
|
108 * Example 2 |
|
109 * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) |
|
110 * "src_len" == 2 |
|
111 * "dst_len" == 5 |
|
112 * -> |
|
113 * Returned value == 3 |
|
114 * "dst" becomes \xE3\x81\x82\0 |
|
115 * (note that "dst" is null-terminated, but \u3044 is not stored in "dst" |
|
116 * since "dst" does not have enough size to store the character) |
|
117 * |
|
118 * Example 3 |
|
119 * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) |
|
120 * "src_len" == 2 |
|
121 * "dst_len" == 6 |
|
122 * -> |
|
123 * Returned value == 6 |
|
124 * "dst" becomes \xE3\x81\x82\xE3\x81\x84 |
|
125 * (note that "dst" is NOT null-terminated, like strncpy) |
|
126 */ |
|
127 size_t utf32_to_utf8(const char32_t* src, size_t src_len, |
|
128 char* dst, size_t dst_len); |
|
129 |
|
130 size_t utf16_to_utf8(const char16_t* src, size_t src_len, |
|
131 char* dst, size_t dst_len); |
|
132 |
|
133 } |
|
134 |
|
135 // --------------------------------------------------------------------------- |
|
136 |
|
137 namespace android { |
|
138 |
|
139 class TextOutput; |
|
140 |
|
141 //! This is a string holding UTF-8 characters. Does not allow the value more |
|
142 // than 0x10FFFF, which is not valid unicode codepoint. |
|
143 class String8 |
|
144 { |
|
145 public: |
|
146 String8(); |
|
147 String8(const String8& o); |
|
148 explicit String8(const char* o); |
|
149 explicit String8(const char* o, size_t numChars); |
|
150 |
|
151 explicit String8(const String16& o); |
|
152 explicit String8(const char16_t* o); |
|
153 explicit String8(const char16_t* o, size_t numChars); |
|
154 explicit String8(const char32_t* o); |
|
155 explicit String8(const char32_t* o, size_t numChars); |
|
156 ~String8(); |
|
157 |
|
158 inline const char* string() const; |
|
159 inline size_t size() const; |
|
160 inline size_t length() const; |
|
161 inline size_t bytes() const; |
|
162 |
|
163 inline const SharedBuffer* sharedBuffer() const; |
|
164 |
|
165 void setTo(const String8& other); |
|
166 status_t setTo(const char* other); |
|
167 status_t setTo(const char* other, size_t numChars); |
|
168 status_t setTo(const char16_t* other, size_t numChars); |
|
169 status_t setTo(const char32_t* other, |
|
170 size_t length); |
|
171 |
|
172 status_t append(const String8& other); |
|
173 status_t append(const char* other); |
|
174 status_t append(const char* other, size_t numChars); |
|
175 |
|
176 status_t appendFormat(const char* fmt, ...) |
|
177 __attribute__((format (printf, 2, 3))); |
|
178 |
|
179 // Note that this function takes O(N) time to calculate the value. |
|
180 // No cache value is stored. |
|
181 size_t getUtf32Length() const; |
|
182 int32_t getUtf32At(size_t index, |
|
183 size_t *next_index) const; |
|
184 size_t getUtf32(char32_t* dst, size_t dst_len) const; |
|
185 |
|
186 inline String8& operator=(const String8& other); |
|
187 inline String8& operator=(const char* other); |
|
188 |
|
189 inline String8& operator+=(const String8& other); |
|
190 inline String8 operator+(const String8& other) const; |
|
191 |
|
192 inline String8& operator+=(const char* other); |
|
193 inline String8 operator+(const char* other) const; |
|
194 |
|
195 inline int compare(const String8& other) const; |
|
196 |
|
197 inline bool operator<(const String8& other) const; |
|
198 inline bool operator<=(const String8& other) const; |
|
199 inline bool operator==(const String8& other) const; |
|
200 inline bool operator!=(const String8& other) const; |
|
201 inline bool operator>=(const String8& other) const; |
|
202 inline bool operator>(const String8& other) const; |
|
203 |
|
204 inline bool operator<(const char* other) const; |
|
205 inline bool operator<=(const char* other) const; |
|
206 inline bool operator==(const char* other) const; |
|
207 inline bool operator!=(const char* other) const; |
|
208 inline bool operator>=(const char* other) const; |
|
209 inline bool operator>(const char* other) const; |
|
210 |
|
211 inline operator const char*() const; |
|
212 |
|
213 char* lockBuffer(size_t size); |
|
214 void unlockBuffer(); |
|
215 status_t unlockBuffer(size_t size); |
|
216 |
|
217 // return the index of the first byte of other in this at or after |
|
218 // start, or -1 if not found |
|
219 ssize_t find(const char* other, size_t start = 0) const; |
|
220 |
|
221 void toLower(); |
|
222 void toLower(size_t start, size_t numChars); |
|
223 void toUpper(); |
|
224 void toUpper(size_t start, size_t numChars); |
|
225 |
|
226 /* |
|
227 * These methods operate on the string as if it were a path name. |
|
228 */ |
|
229 |
|
230 /* |
|
231 * Set the filename field to a specific value. |
|
232 * |
|
233 * Normalizes the filename, removing a trailing '/' if present. |
|
234 */ |
|
235 void setPathName(const char* name); |
|
236 void setPathName(const char* name, size_t numChars); |
|
237 |
|
238 /* |
|
239 * Get just the filename component. |
|
240 * |
|
241 * "/tmp/foo/bar.c" --> "bar.c" |
|
242 */ |
|
243 String8 getPathLeaf(void) const; |
|
244 |
|
245 /* |
|
246 * Remove the last (file name) component, leaving just the directory |
|
247 * name. |
|
248 * |
|
249 * "/tmp/foo/bar.c" --> "/tmp/foo" |
|
250 * "/tmp" --> "" // ????? shouldn't this be "/" ???? XXX |
|
251 * "bar.c" --> "" |
|
252 */ |
|
253 String8 getPathDir(void) const; |
|
254 |
|
255 /* |
|
256 * Retrieve the front (root dir) component. Optionally also return the |
|
257 * remaining components. |
|
258 * |
|
259 * "/tmp/foo/bar.c" --> "tmp" (remain = "foo/bar.c") |
|
260 * "/tmp" --> "tmp" (remain = "") |
|
261 * "bar.c" --> "bar.c" (remain = "") |
|
262 */ |
|
263 String8 walkPath(String8* outRemains = NULL) const; |
|
264 |
|
265 /* |
|
266 * Return the filename extension. This is the last '.' and up to |
|
267 * four characters that follow it. The '.' is included in case we |
|
268 * decide to expand our definition of what constitutes an extension. |
|
269 * |
|
270 * "/tmp/foo/bar.c" --> ".c" |
|
271 * "/tmp" --> "" |
|
272 * "/tmp/foo.bar/baz" --> "" |
|
273 * "foo.jpeg" --> ".jpeg" |
|
274 * "foo." --> "" |
|
275 */ |
|
276 String8 getPathExtension(void) const; |
|
277 |
|
278 /* |
|
279 * Return the path without the extension. Rules for what constitutes |
|
280 * an extension are described in the comment for getPathExtension(). |
|
281 * |
|
282 * "/tmp/foo/bar.c" --> "/tmp/foo/bar" |
|
283 */ |
|
284 String8 getBasePath(void) const; |
|
285 |
|
286 /* |
|
287 * Add a component to the pathname. We guarantee that there is |
|
288 * exactly one path separator between the old path and the new. |
|
289 * If there is no existing name, we just copy the new name in. |
|
290 * |
|
291 * If leaf is a fully qualified path (i.e. starts with '/', it |
|
292 * replaces whatever was there before. |
|
293 */ |
|
294 String8& appendPath(const char* leaf); |
|
295 String8& appendPath(const String8& leaf) { return appendPath(leaf.string()); } |
|
296 |
|
297 /* |
|
298 * Like appendPath(), but does not affect this string. Returns a new one instead. |
|
299 */ |
|
300 String8 appendPathCopy(const char* leaf) const |
|
301 { String8 p(*this); p.appendPath(leaf); return p; } |
|
302 String8 appendPathCopy(const String8& leaf) const { return appendPathCopy(leaf.string()); } |
|
303 |
|
304 /* |
|
305 * Converts all separators in this string to /, the default path separator. |
|
306 * |
|
307 * If the default OS separator is backslash, this converts all |
|
308 * backslashes to slashes, in-place. Otherwise it does nothing. |
|
309 * Returns self. |
|
310 */ |
|
311 String8& convertToResPath(); |
|
312 |
|
313 private: |
|
314 status_t real_append(const char* other, size_t numChars); |
|
315 char* find_extension(void) const; |
|
316 |
|
317 const char* mString; |
|
318 }; |
|
319 |
|
320 TextOutput& operator<<(TextOutput& to, const String16& val); |
|
321 |
|
322 // --------------------------------------------------------------------------- |
|
323 // No user servicable parts below. |
|
324 |
|
325 inline int compare_type(const String8& lhs, const String8& rhs) |
|
326 { |
|
327 return lhs.compare(rhs); |
|
328 } |
|
329 |
|
330 inline int strictly_order_type(const String8& lhs, const String8& rhs) |
|
331 { |
|
332 return compare_type(lhs, rhs) < 0; |
|
333 } |
|
334 |
|
335 inline const char* String8::string() const |
|
336 { |
|
337 return mString; |
|
338 } |
|
339 |
|
340 inline size_t String8::length() const |
|
341 { |
|
342 return SharedBuffer::sizeFromData(mString)-1; |
|
343 } |
|
344 |
|
345 inline size_t String8::size() const |
|
346 { |
|
347 return length(); |
|
348 } |
|
349 |
|
350 inline size_t String8::bytes() const |
|
351 { |
|
352 return SharedBuffer::sizeFromData(mString)-1; |
|
353 } |
|
354 |
|
355 inline const SharedBuffer* String8::sharedBuffer() const |
|
356 { |
|
357 return SharedBuffer::bufferFromData(mString); |
|
358 } |
|
359 |
|
360 inline String8& String8::operator=(const String8& other) |
|
361 { |
|
362 setTo(other); |
|
363 return *this; |
|
364 } |
|
365 |
|
366 inline String8& String8::operator=(const char* other) |
|
367 { |
|
368 setTo(other); |
|
369 return *this; |
|
370 } |
|
371 |
|
372 inline String8& String8::operator+=(const String8& other) |
|
373 { |
|
374 append(other); |
|
375 return *this; |
|
376 } |
|
377 |
|
378 inline String8 String8::operator+(const String8& other) const |
|
379 { |
|
380 String8 tmp(*this); |
|
381 tmp += other; |
|
382 return tmp; |
|
383 } |
|
384 |
|
385 inline String8& String8::operator+=(const char* other) |
|
386 { |
|
387 append(other); |
|
388 return *this; |
|
389 } |
|
390 |
|
391 inline String8 String8::operator+(const char* other) const |
|
392 { |
|
393 String8 tmp(*this); |
|
394 tmp += other; |
|
395 return tmp; |
|
396 } |
|
397 |
|
398 inline int String8::compare(const String8& other) const |
|
399 { |
|
400 return strcmp(mString, other.mString); |
|
401 } |
|
402 |
|
403 inline bool String8::operator<(const String8& other) const |
|
404 { |
|
405 return strcmp(mString, other.mString) < 0; |
|
406 } |
|
407 |
|
408 inline bool String8::operator<=(const String8& other) const |
|
409 { |
|
410 return strcmp(mString, other.mString) <= 0; |
|
411 } |
|
412 |
|
413 inline bool String8::operator==(const String8& other) const |
|
414 { |
|
415 return strcmp(mString, other.mString) == 0; |
|
416 } |
|
417 |
|
418 inline bool String8::operator!=(const String8& other) const |
|
419 { |
|
420 return strcmp(mString, other.mString) != 0; |
|
421 } |
|
422 |
|
423 inline bool String8::operator>=(const String8& other) const |
|
424 { |
|
425 return strcmp(mString, other.mString) >= 0; |
|
426 } |
|
427 |
|
428 inline bool String8::operator>(const String8& other) const |
|
429 { |
|
430 return strcmp(mString, other.mString) > 0; |
|
431 } |
|
432 |
|
433 inline bool String8::operator<(const char* other) const |
|
434 { |
|
435 return strcmp(mString, other) < 0; |
|
436 } |
|
437 |
|
438 inline bool String8::operator<=(const char* other) const |
|
439 { |
|
440 return strcmp(mString, other) <= 0; |
|
441 } |
|
442 |
|
443 inline bool String8::operator==(const char* other) const |
|
444 { |
|
445 return strcmp(mString, other) == 0; |
|
446 } |
|
447 |
|
448 inline bool String8::operator!=(const char* other) const |
|
449 { |
|
450 return strcmp(mString, other) != 0; |
|
451 } |
|
452 |
|
453 inline bool String8::operator>=(const char* other) const |
|
454 { |
|
455 return strcmp(mString, other) >= 0; |
|
456 } |
|
457 |
|
458 inline bool String8::operator>(const char* other) const |
|
459 { |
|
460 return strcmp(mString, other) > 0; |
|
461 } |
|
462 |
|
463 inline String8::operator const char*() const |
|
464 { |
|
465 return mString; |
|
466 } |
|
467 |
|
468 } // namespace android |
|
469 |
|
470 // --------------------------------------------------------------------------- |
|
471 |
|
472 #endif // ANDROID_STRING8_H |