|
1 // Copyright 2013 The Chromium Authors. All rights reserved. |
|
2 // Use of this source code is governed by a BSD-style license that can be |
|
3 // found in the LICENSE file. |
|
4 // |
|
5 // This file defines utility functions for working with strings. |
|
6 |
|
7 #ifndef BASE_STRINGS_STRING_UTIL_H_ |
|
8 #define BASE_STRINGS_STRING_UTIL_H_ |
|
9 |
|
10 #include <ctype.h> |
|
11 #include <stdarg.h> // va_list |
|
12 |
|
13 #include <string> |
|
14 #include <vector> |
|
15 |
|
16 #include "base/base_export.h" |
|
17 #include "base/basictypes.h" |
|
18 #include "base/compiler_specific.h" |
|
19 #include "base/strings/string16.h" |
|
20 #include "base/strings/string_piece.h" // For implicit conversions. |
|
21 |
|
22 // Safe standard library wrappers for all platforms. |
|
23 |
|
24 namespace base { |
|
25 |
|
26 // C standard-library functions like "strncasecmp" and "snprintf" that aren't |
|
27 // cross-platform are provided as "base::strncasecmp", and their prototypes |
|
28 // are listed below. These functions are then implemented as inline calls |
|
29 // to the platform-specific equivalents in the platform-specific headers. |
|
30 |
|
31 // Compares the two strings s1 and s2 without regard to case using |
|
32 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if |
|
33 // s2 > s1 according to a lexicographic comparison. |
|
34 int strcasecmp(const char* s1, const char* s2); |
|
35 |
|
36 // Compares up to count characters of s1 and s2 without regard to case using |
|
37 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if |
|
38 // s2 > s1 according to a lexicographic comparison. |
|
39 int strncasecmp(const char* s1, const char* s2, size_t count); |
|
40 |
|
41 // Same as strncmp but for char16 strings. |
|
42 int strncmp16(const char16* s1, const char16* s2, size_t count); |
|
43 |
|
44 // Wrapper for vsnprintf that always null-terminates and always returns the |
|
45 // number of characters that would be in an untruncated formatted |
|
46 // string, even when truncation occurs. |
|
47 int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments) |
|
48 PRINTF_FORMAT(3, 0); |
|
49 |
|
50 // vswprintf always null-terminates, but when truncation occurs, it will either |
|
51 // return -1 or the number of characters that would be in an untruncated |
|
52 // formatted string. The actual return value depends on the underlying |
|
53 // C library's vswprintf implementation. |
|
54 int vswprintf(wchar_t* buffer, size_t size, |
|
55 const wchar_t* format, va_list arguments) |
|
56 WPRINTF_FORMAT(3, 0); |
|
57 |
|
58 // Some of these implementations need to be inlined. |
|
59 |
|
60 // We separate the declaration from the implementation of this inline |
|
61 // function just so the PRINTF_FORMAT works. |
|
62 inline int snprintf(char* buffer, size_t size, const char* format, ...) |
|
63 PRINTF_FORMAT(3, 4); |
|
64 inline int snprintf(char* buffer, size_t size, const char* format, ...) { |
|
65 va_list arguments; |
|
66 va_start(arguments, format); |
|
67 int result = vsnprintf(buffer, size, format, arguments); |
|
68 va_end(arguments); |
|
69 return result; |
|
70 } |
|
71 |
|
72 // We separate the declaration from the implementation of this inline |
|
73 // function just so the WPRINTF_FORMAT works. |
|
74 inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) |
|
75 WPRINTF_FORMAT(3, 4); |
|
76 inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) { |
|
77 va_list arguments; |
|
78 va_start(arguments, format); |
|
79 int result = vswprintf(buffer, size, format, arguments); |
|
80 va_end(arguments); |
|
81 return result; |
|
82 } |
|
83 |
|
84 // BSD-style safe and consistent string copy functions. |
|
85 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. |
|
86 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as |
|
87 // long as |dst_size| is not 0. Returns the length of |src| in characters. |
|
88 // If the return value is >= dst_size, then the output was truncated. |
|
89 // NOTE: All sizes are in number of characters, NOT in bytes. |
|
90 BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size); |
|
91 BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); |
|
92 |
|
93 // Scan a wprintf format string to determine whether it's portable across a |
|
94 // variety of systems. This function only checks that the conversion |
|
95 // specifiers used by the format string are supported and have the same meaning |
|
96 // on a variety of systems. It doesn't check for other errors that might occur |
|
97 // within a format string. |
|
98 // |
|
99 // Nonportable conversion specifiers for wprintf are: |
|
100 // - 's' and 'c' without an 'l' length modifier. %s and %c operate on char |
|
101 // data on all systems except Windows, which treat them as wchar_t data. |
|
102 // Use %ls and %lc for wchar_t data instead. |
|
103 // - 'S' and 'C', which operate on wchar_t data on all systems except Windows, |
|
104 // which treat them as char data. Use %ls and %lc for wchar_t data |
|
105 // instead. |
|
106 // - 'F', which is not identified by Windows wprintf documentation. |
|
107 // - 'D', 'O', and 'U', which are deprecated and not available on all systems. |
|
108 // Use %ld, %lo, and %lu instead. |
|
109 // |
|
110 // Note that there is no portable conversion specifier for char data when |
|
111 // working with wprintf. |
|
112 // |
|
113 // This function is intended to be called from base::vswprintf. |
|
114 BASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format); |
|
115 |
|
116 // ASCII-specific tolower. The standard library's tolower is locale sensitive, |
|
117 // so we don't want to use it here. |
|
118 template <class Char> inline Char ToLowerASCII(Char c) { |
|
119 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; |
|
120 } |
|
121 |
|
122 // ASCII-specific toupper. The standard library's toupper is locale sensitive, |
|
123 // so we don't want to use it here. |
|
124 template <class Char> inline Char ToUpperASCII(Char c) { |
|
125 return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; |
|
126 } |
|
127 |
|
128 // Function objects to aid in comparing/searching strings. |
|
129 |
|
130 template<typename Char> struct CaseInsensitiveCompare { |
|
131 public: |
|
132 bool operator()(Char x, Char y) const { |
|
133 // TODO(darin): Do we really want to do locale sensitive comparisons here? |
|
134 // See http://crbug.com/24917 |
|
135 return tolower(x) == tolower(y); |
|
136 } |
|
137 }; |
|
138 |
|
139 template<typename Char> struct CaseInsensitiveCompareASCII { |
|
140 public: |
|
141 bool operator()(Char x, Char y) const { |
|
142 return ToLowerASCII(x) == ToLowerASCII(y); |
|
143 } |
|
144 }; |
|
145 |
|
146 } // namespace base |
|
147 |
|
148 #if defined(OS_WIN) |
|
149 #include "base/strings/string_util_win.h" |
|
150 #elif defined(OS_POSIX) |
|
151 #include "base/strings/string_util_posix.h" |
|
152 #else |
|
153 #error Define string operations appropriately for your platform |
|
154 #endif |
|
155 |
|
156 // These threadsafe functions return references to globally unique empty |
|
157 // strings. |
|
158 // |
|
159 // DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS. |
|
160 // There is only one case where you should use these: functions which need to |
|
161 // return a string by reference (e.g. as a class member accessor), and don't |
|
162 // have an empty string to use (e.g. in an error case). These should not be |
|
163 // used as initializers, function arguments, or return values for functions |
|
164 // which return by value or outparam. |
|
165 BASE_EXPORT const std::string& EmptyString(); |
|
166 BASE_EXPORT const std::wstring& EmptyWString(); |
|
167 BASE_EXPORT const string16& EmptyString16(); |
|
168 |
|
169 BASE_EXPORT extern const wchar_t kWhitespaceWide[]; |
|
170 BASE_EXPORT extern const char16 kWhitespaceUTF16[]; |
|
171 BASE_EXPORT extern const char kWhitespaceASCII[]; |
|
172 |
|
173 BASE_EXPORT extern const char kUtf8ByteOrderMark[]; |
|
174 |
|
175 // Removes characters in |remove_chars| from anywhere in |input|. Returns true |
|
176 // if any characters were removed. |remove_chars| must be null-terminated. |
|
177 // NOTE: Safe to use the same variable for both |input| and |output|. |
|
178 BASE_EXPORT bool RemoveChars(const string16& input, |
|
179 const char16 remove_chars[], |
|
180 string16* output); |
|
181 BASE_EXPORT bool RemoveChars(const std::string& input, |
|
182 const char remove_chars[], |
|
183 std::string* output); |
|
184 |
|
185 // Replaces characters in |replace_chars| from anywhere in |input| with |
|
186 // |replace_with|. Each character in |replace_chars| will be replaced with |
|
187 // the |replace_with| string. Returns true if any characters were replaced. |
|
188 // |replace_chars| must be null-terminated. |
|
189 // NOTE: Safe to use the same variable for both |input| and |output|. |
|
190 BASE_EXPORT bool ReplaceChars(const string16& input, |
|
191 const char16 replace_chars[], |
|
192 const string16& replace_with, |
|
193 string16* output); |
|
194 BASE_EXPORT bool ReplaceChars(const std::string& input, |
|
195 const char replace_chars[], |
|
196 const std::string& replace_with, |
|
197 std::string* output); |
|
198 |
|
199 // Removes characters in |trim_chars| from the beginning and end of |input|. |
|
200 // |trim_chars| must be null-terminated. |
|
201 // NOTE: Safe to use the same variable for both |input| and |output|. |
|
202 BASE_EXPORT bool TrimString(const std::wstring& input, |
|
203 const wchar_t trim_chars[], |
|
204 std::wstring* output); |
|
205 BASE_EXPORT bool TrimString(const string16& input, |
|
206 const char16 trim_chars[], |
|
207 string16* output); |
|
208 BASE_EXPORT bool TrimString(const std::string& input, |
|
209 const char trim_chars[], |
|
210 std::string* output); |
|
211 |
|
212 // Truncates a string to the nearest UTF-8 character that will leave |
|
213 // the string less than or equal to the specified byte size. |
|
214 BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input, |
|
215 const size_t byte_size, |
|
216 std::string* output); |
|
217 |
|
218 // Trims any whitespace from either end of the input string. Returns where |
|
219 // whitespace was found. |
|
220 // The non-wide version has two functions: |
|
221 // * TrimWhitespaceASCII() |
|
222 // This function is for ASCII strings and only looks for ASCII whitespace; |
|
223 // Please choose the best one according to your usage. |
|
224 // NOTE: Safe to use the same variable for both input and output. |
|
225 enum TrimPositions { |
|
226 TRIM_NONE = 0, |
|
227 TRIM_LEADING = 1 << 0, |
|
228 TRIM_TRAILING = 1 << 1, |
|
229 TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, |
|
230 }; |
|
231 BASE_EXPORT TrimPositions TrimWhitespace(const string16& input, |
|
232 TrimPositions positions, |
|
233 string16* output); |
|
234 BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input, |
|
235 TrimPositions positions, |
|
236 std::string* output); |
|
237 |
|
238 // Deprecated. This function is only for backward compatibility and calls |
|
239 // TrimWhitespaceASCII(). |
|
240 BASE_EXPORT TrimPositions TrimWhitespace(const std::string& input, |
|
241 TrimPositions positions, |
|
242 std::string* output); |
|
243 |
|
244 // Searches for CR or LF characters. Removes all contiguous whitespace |
|
245 // strings that contain them. This is useful when trying to deal with text |
|
246 // copied from terminals. |
|
247 // Returns |text|, with the following three transformations: |
|
248 // (1) Leading and trailing whitespace is trimmed. |
|
249 // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace |
|
250 // sequences containing a CR or LF are trimmed. |
|
251 // (3) All other whitespace sequences are converted to single spaces. |
|
252 BASE_EXPORT std::wstring CollapseWhitespace( |
|
253 const std::wstring& text, |
|
254 bool trim_sequences_with_line_breaks); |
|
255 BASE_EXPORT string16 CollapseWhitespace( |
|
256 const string16& text, |
|
257 bool trim_sequences_with_line_breaks); |
|
258 BASE_EXPORT std::string CollapseWhitespaceASCII( |
|
259 const std::string& text, |
|
260 bool trim_sequences_with_line_breaks); |
|
261 |
|
262 // Returns true if the passed string is empty or contains only white-space |
|
263 // characters. |
|
264 BASE_EXPORT bool ContainsOnlyWhitespaceASCII(const std::string& str); |
|
265 BASE_EXPORT bool ContainsOnlyWhitespace(const string16& str); |
|
266 |
|
267 // Returns true if |input| is empty or contains only characters found in |
|
268 // |characters|. |
|
269 BASE_EXPORT bool ContainsOnlyChars(const std::wstring& input, |
|
270 const std::wstring& characters); |
|
271 BASE_EXPORT bool ContainsOnlyChars(const string16& input, |
|
272 const string16& characters); |
|
273 BASE_EXPORT bool ContainsOnlyChars(const std::string& input, |
|
274 const std::string& characters); |
|
275 |
|
276 // Converts to 7-bit ASCII by truncating. The result must be known to be ASCII |
|
277 // beforehand. |
|
278 BASE_EXPORT std::string WideToASCII(const std::wstring& wide); |
|
279 BASE_EXPORT std::string UTF16ToASCII(const string16& utf16); |
|
280 |
|
281 // Converts the given wide string to the corresponding Latin1. This will fail |
|
282 // (return false) if any characters are more than 255. |
|
283 BASE_EXPORT bool WideToLatin1(const std::wstring& wide, std::string* latin1); |
|
284 |
|
285 // Returns true if the specified string matches the criteria. How can a wide |
|
286 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the |
|
287 // first case) or characters that use only 8-bits and whose 8-bit |
|
288 // representation looks like a UTF-8 string (the second case). |
|
289 // |
|
290 // Note that IsStringUTF8 checks not only if the input is structurally |
|
291 // valid but also if it doesn't contain any non-character codepoint |
|
292 // (e.g. U+FFFE). It's done on purpose because all the existing callers want |
|
293 // to have the maximum 'discriminating' power from other encodings. If |
|
294 // there's a use case for just checking the structural validity, we have to |
|
295 // add a new function for that. |
|
296 BASE_EXPORT bool IsStringUTF8(const std::string& str); |
|
297 BASE_EXPORT bool IsStringASCII(const std::wstring& str); |
|
298 BASE_EXPORT bool IsStringASCII(const string16& str); |
|
299 |
|
300 // Converts the elements of the given string. This version uses a pointer to |
|
301 // clearly differentiate it from the non-pointer variant. |
|
302 template <class str> inline void StringToLowerASCII(str* s) { |
|
303 for (typename str::iterator i = s->begin(); i != s->end(); ++i) |
|
304 *i = base::ToLowerASCII(*i); |
|
305 } |
|
306 |
|
307 template <class str> inline str StringToLowerASCII(const str& s) { |
|
308 // for std::string and std::wstring |
|
309 str output(s); |
|
310 StringToLowerASCII(&output); |
|
311 return output; |
|
312 } |
|
313 |
|
314 // Converts the elements of the given string. This version uses a pointer to |
|
315 // clearly differentiate it from the non-pointer variant. |
|
316 template <class str> inline void StringToUpperASCII(str* s) { |
|
317 for (typename str::iterator i = s->begin(); i != s->end(); ++i) |
|
318 *i = base::ToUpperASCII(*i); |
|
319 } |
|
320 |
|
321 template <class str> inline str StringToUpperASCII(const str& s) { |
|
322 // for std::string and std::wstring |
|
323 str output(s); |
|
324 StringToUpperASCII(&output); |
|
325 return output; |
|
326 } |
|
327 |
|
328 // Compare the lower-case form of the given string against the given ASCII |
|
329 // string. This is useful for doing checking if an input string matches some |
|
330 // token, and it is optimized to avoid intermediate string copies. This API is |
|
331 // borrowed from the equivalent APIs in Mozilla. |
|
332 BASE_EXPORT bool LowerCaseEqualsASCII(const std::string& a, const char* b); |
|
333 BASE_EXPORT bool LowerCaseEqualsASCII(const std::wstring& a, const char* b); |
|
334 BASE_EXPORT bool LowerCaseEqualsASCII(const string16& a, const char* b); |
|
335 |
|
336 // Same thing, but with string iterators instead. |
|
337 BASE_EXPORT bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, |
|
338 std::string::const_iterator a_end, |
|
339 const char* b); |
|
340 BASE_EXPORT bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, |
|
341 std::wstring::const_iterator a_end, |
|
342 const char* b); |
|
343 BASE_EXPORT bool LowerCaseEqualsASCII(string16::const_iterator a_begin, |
|
344 string16::const_iterator a_end, |
|
345 const char* b); |
|
346 BASE_EXPORT bool LowerCaseEqualsASCII(const char* a_begin, |
|
347 const char* a_end, |
|
348 const char* b); |
|
349 BASE_EXPORT bool LowerCaseEqualsASCII(const wchar_t* a_begin, |
|
350 const wchar_t* a_end, |
|
351 const char* b); |
|
352 BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin, |
|
353 const char16* a_end, |
|
354 const char* b); |
|
355 |
|
356 // Performs a case-sensitive string compare. The behavior is undefined if both |
|
357 // strings are not ASCII. |
|
358 BASE_EXPORT bool EqualsASCII(const string16& a, const base::StringPiece& b); |
|
359 |
|
360 // Returns true if str starts with search, or false otherwise. |
|
361 BASE_EXPORT bool StartsWithASCII(const std::string& str, |
|
362 const std::string& search, |
|
363 bool case_sensitive); |
|
364 BASE_EXPORT bool StartsWith(const std::wstring& str, |
|
365 const std::wstring& search, |
|
366 bool case_sensitive); |
|
367 BASE_EXPORT bool StartsWith(const string16& str, |
|
368 const string16& search, |
|
369 bool case_sensitive); |
|
370 |
|
371 // Returns true if str ends with search, or false otherwise. |
|
372 BASE_EXPORT bool EndsWith(const std::string& str, |
|
373 const std::string& search, |
|
374 bool case_sensitive); |
|
375 BASE_EXPORT bool EndsWith(const std::wstring& str, |
|
376 const std::wstring& search, |
|
377 bool case_sensitive); |
|
378 BASE_EXPORT bool EndsWith(const string16& str, |
|
379 const string16& search, |
|
380 bool case_sensitive); |
|
381 |
|
382 |
|
383 // Determines the type of ASCII character, independent of locale (the C |
|
384 // library versions will change based on locale). |
|
385 template <typename Char> |
|
386 inline bool IsAsciiWhitespace(Char c) { |
|
387 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; |
|
388 } |
|
389 template <typename Char> |
|
390 inline bool IsAsciiAlpha(Char c) { |
|
391 return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); |
|
392 } |
|
393 template <typename Char> |
|
394 inline bool IsAsciiDigit(Char c) { |
|
395 return c >= '0' && c <= '9'; |
|
396 } |
|
397 |
|
398 template <typename Char> |
|
399 inline bool IsHexDigit(Char c) { |
|
400 return (c >= '0' && c <= '9') || |
|
401 (c >= 'A' && c <= 'F') || |
|
402 (c >= 'a' && c <= 'f'); |
|
403 } |
|
404 |
|
405 template <typename Char> |
|
406 inline Char HexDigitToInt(Char c) { |
|
407 DCHECK(IsHexDigit(c)); |
|
408 if (c >= '0' && c <= '9') |
|
409 return c - '0'; |
|
410 if (c >= 'A' && c <= 'F') |
|
411 return c - 'A' + 10; |
|
412 if (c >= 'a' && c <= 'f') |
|
413 return c - 'a' + 10; |
|
414 return 0; |
|
415 } |
|
416 |
|
417 // Returns true if it's a whitespace character. |
|
418 inline bool IsWhitespace(wchar_t c) { |
|
419 return wcschr(kWhitespaceWide, c) != NULL; |
|
420 } |
|
421 |
|
422 // Return a byte string in human-readable format with a unit suffix. Not |
|
423 // appropriate for use in any UI; use of FormatBytes and friends in ui/base is |
|
424 // highly recommended instead. TODO(avi): Figure out how to get callers to use |
|
425 // FormatBytes instead; remove this. |
|
426 BASE_EXPORT string16 FormatBytesUnlocalized(int64 bytes); |
|
427 |
|
428 // Starting at |start_offset| (usually 0), replace the first instance of |
|
429 // |find_this| with |replace_with|. |
|
430 BASE_EXPORT void ReplaceFirstSubstringAfterOffset( |
|
431 string16* str, |
|
432 string16::size_type start_offset, |
|
433 const string16& find_this, |
|
434 const string16& replace_with); |
|
435 BASE_EXPORT void ReplaceFirstSubstringAfterOffset( |
|
436 std::string* str, |
|
437 std::string::size_type start_offset, |
|
438 const std::string& find_this, |
|
439 const std::string& replace_with); |
|
440 |
|
441 // Starting at |start_offset| (usually 0), look through |str| and replace all |
|
442 // instances of |find_this| with |replace_with|. |
|
443 // |
|
444 // This does entire substrings; use std::replace in <algorithm> for single |
|
445 // characters, for example: |
|
446 // std::replace(str.begin(), str.end(), 'a', 'b'); |
|
447 BASE_EXPORT void ReplaceSubstringsAfterOffset( |
|
448 string16* str, |
|
449 string16::size_type start_offset, |
|
450 const string16& find_this, |
|
451 const string16& replace_with); |
|
452 BASE_EXPORT void ReplaceSubstringsAfterOffset( |
|
453 std::string* str, |
|
454 std::string::size_type start_offset, |
|
455 const std::string& find_this, |
|
456 const std::string& replace_with); |
|
457 |
|
458 // Reserves enough memory in |str| to accommodate |length_with_null| characters, |
|
459 // sets the size of |str| to |length_with_null - 1| characters, and returns a |
|
460 // pointer to the underlying contiguous array of characters. This is typically |
|
461 // used when calling a function that writes results into a character array, but |
|
462 // the caller wants the data to be managed by a string-like object. It is |
|
463 // convenient in that is can be used inline in the call, and fast in that it |
|
464 // avoids copying the results of the call from a char* into a string. |
|
465 // |
|
466 // |length_with_null| must be at least 2, since otherwise the underlying string |
|
467 // would have size 0, and trying to access &((*str)[0]) in that case can result |
|
468 // in a number of problems. |
|
469 // |
|
470 // Internally, this takes linear time because the resize() call 0-fills the |
|
471 // underlying array for potentially all |
|
472 // (|length_with_null - 1| * sizeof(string_type::value_type)) bytes. Ideally we |
|
473 // could avoid this aspect of the resize() call, as we expect the caller to |
|
474 // immediately write over this memory, but there is no other way to set the size |
|
475 // of the string, and not doing that will mean people who access |str| rather |
|
476 // than str.c_str() will get back a string of whatever size |str| had on entry |
|
477 // to this function (probably 0). |
|
478 template <class string_type> |
|
479 inline typename string_type::value_type* WriteInto(string_type* str, |
|
480 size_t length_with_null) { |
|
481 DCHECK_GT(length_with_null, 1u); |
|
482 str->reserve(length_with_null); |
|
483 str->resize(length_with_null - 1); |
|
484 return &((*str)[0]); |
|
485 } |
|
486 |
|
487 //----------------------------------------------------------------------------- |
|
488 |
|
489 // Splits a string into its fields delimited by any of the characters in |
|
490 // |delimiters|. Each field is added to the |tokens| vector. Returns the |
|
491 // number of tokens found. |
|
492 BASE_EXPORT size_t Tokenize(const std::wstring& str, |
|
493 const std::wstring& delimiters, |
|
494 std::vector<std::wstring>* tokens); |
|
495 BASE_EXPORT size_t Tokenize(const string16& str, |
|
496 const string16& delimiters, |
|
497 std::vector<string16>* tokens); |
|
498 BASE_EXPORT size_t Tokenize(const std::string& str, |
|
499 const std::string& delimiters, |
|
500 std::vector<std::string>* tokens); |
|
501 BASE_EXPORT size_t Tokenize(const base::StringPiece& str, |
|
502 const base::StringPiece& delimiters, |
|
503 std::vector<base::StringPiece>* tokens); |
|
504 |
|
505 // Does the opposite of SplitString(). |
|
506 BASE_EXPORT string16 JoinString(const std::vector<string16>& parts, char16 s); |
|
507 BASE_EXPORT std::string JoinString( |
|
508 const std::vector<std::string>& parts, char s); |
|
509 |
|
510 // Join |parts| using |separator|. |
|
511 BASE_EXPORT std::string JoinString( |
|
512 const std::vector<std::string>& parts, |
|
513 const std::string& separator); |
|
514 BASE_EXPORT string16 JoinString( |
|
515 const std::vector<string16>& parts, |
|
516 const string16& separator); |
|
517 |
|
518 // Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively. |
|
519 // Additionally, any number of consecutive '$' characters is replaced by that |
|
520 // number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be |
|
521 // NULL. This only allows you to use up to nine replacements. |
|
522 BASE_EXPORT string16 ReplaceStringPlaceholders( |
|
523 const string16& format_string, |
|
524 const std::vector<string16>& subst, |
|
525 std::vector<size_t>* offsets); |
|
526 |
|
527 BASE_EXPORT std::string ReplaceStringPlaceholders( |
|
528 const base::StringPiece& format_string, |
|
529 const std::vector<std::string>& subst, |
|
530 std::vector<size_t>* offsets); |
|
531 |
|
532 // Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. |
|
533 BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string, |
|
534 const string16& a, |
|
535 size_t* offset); |
|
536 |
|
537 // Returns true if the string passed in matches the pattern. The pattern |
|
538 // string can contain wildcards like * and ? |
|
539 // The backslash character (\) is an escape character for * and ? |
|
540 // We limit the patterns to having a max of 16 * or ? characters. |
|
541 // ? matches 0 or 1 character, while * matches 0 or more characters. |
|
542 BASE_EXPORT bool MatchPattern(const base::StringPiece& string, |
|
543 const base::StringPiece& pattern); |
|
544 BASE_EXPORT bool MatchPattern(const string16& string, const string16& pattern); |
|
545 |
|
546 // Hack to convert any char-like type to its unsigned counterpart. |
|
547 // For example, it will convert char, signed char and unsigned char to unsigned |
|
548 // char. |
|
549 template<typename T> |
|
550 struct ToUnsigned { |
|
551 typedef T Unsigned; |
|
552 }; |
|
553 |
|
554 template<> |
|
555 struct ToUnsigned<char> { |
|
556 typedef unsigned char Unsigned; |
|
557 }; |
|
558 template<> |
|
559 struct ToUnsigned<signed char> { |
|
560 typedef unsigned char Unsigned; |
|
561 }; |
|
562 template<> |
|
563 struct ToUnsigned<wchar_t> { |
|
564 #if defined(WCHAR_T_IS_UTF16) |
|
565 typedef unsigned short Unsigned; |
|
566 #elif defined(WCHAR_T_IS_UTF32) |
|
567 typedef uint32 Unsigned; |
|
568 #endif |
|
569 }; |
|
570 template<> |
|
571 struct ToUnsigned<short> { |
|
572 typedef unsigned short Unsigned; |
|
573 }; |
|
574 |
|
575 #endif // BASE_STRINGS_STRING_UTIL_H_ |