Wed, 31 Dec 2014 07:16:47 +0100
Revert simplistic fix pending revisit of Mozilla integration attempt.
michael@0 | 1 | // Copyright 2013 The Chromium Authors. All rights reserved. |
michael@0 | 2 | // Use of this source code is governed by a BSD-style license that can be |
michael@0 | 3 | // found in the LICENSE file. |
michael@0 | 4 | // |
michael@0 | 5 | // This file defines utility functions for working with strings. |
michael@0 | 6 | |
michael@0 | 7 | #ifndef BASE_STRINGS_STRING_UTIL_H_ |
michael@0 | 8 | #define BASE_STRINGS_STRING_UTIL_H_ |
michael@0 | 9 | |
michael@0 | 10 | #include <ctype.h> |
michael@0 | 11 | #include <stdarg.h> // va_list |
michael@0 | 12 | |
michael@0 | 13 | #include <string> |
michael@0 | 14 | #include <vector> |
michael@0 | 15 | |
michael@0 | 16 | #include "base/base_export.h" |
michael@0 | 17 | #include "base/basictypes.h" |
michael@0 | 18 | #include "base/compiler_specific.h" |
michael@0 | 19 | #include "base/strings/string16.h" |
michael@0 | 20 | #include "base/strings/string_piece.h" // For implicit conversions. |
michael@0 | 21 | |
michael@0 | 22 | // Safe standard library wrappers for all platforms. |
michael@0 | 23 | |
michael@0 | 24 | namespace base { |
michael@0 | 25 | |
michael@0 | 26 | // C standard-library functions like "strncasecmp" and "snprintf" that aren't |
michael@0 | 27 | // cross-platform are provided as "base::strncasecmp", and their prototypes |
michael@0 | 28 | // are listed below. These functions are then implemented as inline calls |
michael@0 | 29 | // to the platform-specific equivalents in the platform-specific headers. |
michael@0 | 30 | |
michael@0 | 31 | // Compares the two strings s1 and s2 without regard to case using |
michael@0 | 32 | // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if |
michael@0 | 33 | // s2 > s1 according to a lexicographic comparison. |
michael@0 | 34 | int strcasecmp(const char* s1, const char* s2); |
michael@0 | 35 | |
michael@0 | 36 | // Compares up to count characters of s1 and s2 without regard to case using |
michael@0 | 37 | // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if |
michael@0 | 38 | // s2 > s1 according to a lexicographic comparison. |
michael@0 | 39 | int strncasecmp(const char* s1, const char* s2, size_t count); |
michael@0 | 40 | |
michael@0 | 41 | // Same as strncmp but for char16 strings. |
michael@0 | 42 | int strncmp16(const char16* s1, const char16* s2, size_t count); |
michael@0 | 43 | |
michael@0 | 44 | // Wrapper for vsnprintf that always null-terminates and always returns the |
michael@0 | 45 | // number of characters that would be in an untruncated formatted |
michael@0 | 46 | // string, even when truncation occurs. |
michael@0 | 47 | int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments) |
michael@0 | 48 | PRINTF_FORMAT(3, 0); |
michael@0 | 49 | |
michael@0 | 50 | // vswprintf always null-terminates, but when truncation occurs, it will either |
michael@0 | 51 | // return -1 or the number of characters that would be in an untruncated |
michael@0 | 52 | // formatted string. The actual return value depends on the underlying |
michael@0 | 53 | // C library's vswprintf implementation. |
michael@0 | 54 | int vswprintf(wchar_t* buffer, size_t size, |
michael@0 | 55 | const wchar_t* format, va_list arguments) |
michael@0 | 56 | WPRINTF_FORMAT(3, 0); |
michael@0 | 57 | |
michael@0 | 58 | // Some of these implementations need to be inlined. |
michael@0 | 59 | |
michael@0 | 60 | // We separate the declaration from the implementation of this inline |
michael@0 | 61 | // function just so the PRINTF_FORMAT works. |
michael@0 | 62 | inline int snprintf(char* buffer, size_t size, const char* format, ...) |
michael@0 | 63 | PRINTF_FORMAT(3, 4); |
michael@0 | 64 | inline int snprintf(char* buffer, size_t size, const char* format, ...) { |
michael@0 | 65 | va_list arguments; |
michael@0 | 66 | va_start(arguments, format); |
michael@0 | 67 | int result = vsnprintf(buffer, size, format, arguments); |
michael@0 | 68 | va_end(arguments); |
michael@0 | 69 | return result; |
michael@0 | 70 | } |
michael@0 | 71 | |
michael@0 | 72 | // We separate the declaration from the implementation of this inline |
michael@0 | 73 | // function just so the WPRINTF_FORMAT works. |
michael@0 | 74 | inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) |
michael@0 | 75 | WPRINTF_FORMAT(3, 4); |
michael@0 | 76 | inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) { |
michael@0 | 77 | va_list arguments; |
michael@0 | 78 | va_start(arguments, format); |
michael@0 | 79 | int result = vswprintf(buffer, size, format, arguments); |
michael@0 | 80 | va_end(arguments); |
michael@0 | 81 | return result; |
michael@0 | 82 | } |
michael@0 | 83 | |
michael@0 | 84 | // BSD-style safe and consistent string copy functions. |
michael@0 | 85 | // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. |
michael@0 | 86 | // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as |
michael@0 | 87 | // long as |dst_size| is not 0. Returns the length of |src| in characters. |
michael@0 | 88 | // If the return value is >= dst_size, then the output was truncated. |
michael@0 | 89 | // NOTE: All sizes are in number of characters, NOT in bytes. |
michael@0 | 90 | BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size); |
michael@0 | 91 | BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); |
michael@0 | 92 | |
michael@0 | 93 | // Scan a wprintf format string to determine whether it's portable across a |
michael@0 | 94 | // variety of systems. This function only checks that the conversion |
michael@0 | 95 | // specifiers used by the format string are supported and have the same meaning |
michael@0 | 96 | // on a variety of systems. It doesn't check for other errors that might occur |
michael@0 | 97 | // within a format string. |
michael@0 | 98 | // |
michael@0 | 99 | // Nonportable conversion specifiers for wprintf are: |
michael@0 | 100 | // - 's' and 'c' without an 'l' length modifier. %s and %c operate on char |
michael@0 | 101 | // data on all systems except Windows, which treat them as wchar_t data. |
michael@0 | 102 | // Use %ls and %lc for wchar_t data instead. |
michael@0 | 103 | // - 'S' and 'C', which operate on wchar_t data on all systems except Windows, |
michael@0 | 104 | // which treat them as char data. Use %ls and %lc for wchar_t data |
michael@0 | 105 | // instead. |
michael@0 | 106 | // - 'F', which is not identified by Windows wprintf documentation. |
michael@0 | 107 | // - 'D', 'O', and 'U', which are deprecated and not available on all systems. |
michael@0 | 108 | // Use %ld, %lo, and %lu instead. |
michael@0 | 109 | // |
michael@0 | 110 | // Note that there is no portable conversion specifier for char data when |
michael@0 | 111 | // working with wprintf. |
michael@0 | 112 | // |
michael@0 | 113 | // This function is intended to be called from base::vswprintf. |
michael@0 | 114 | BASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format); |
michael@0 | 115 | |
michael@0 | 116 | // ASCII-specific tolower. The standard library's tolower is locale sensitive, |
michael@0 | 117 | // so we don't want to use it here. |
michael@0 | 118 | template <class Char> inline Char ToLowerASCII(Char c) { |
michael@0 | 119 | return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; |
michael@0 | 120 | } |
michael@0 | 121 | |
michael@0 | 122 | // ASCII-specific toupper. The standard library's toupper is locale sensitive, |
michael@0 | 123 | // so we don't want to use it here. |
michael@0 | 124 | template <class Char> inline Char ToUpperASCII(Char c) { |
michael@0 | 125 | return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; |
michael@0 | 126 | } |
michael@0 | 127 | |
michael@0 | 128 | // Function objects to aid in comparing/searching strings. |
michael@0 | 129 | |
michael@0 | 130 | template<typename Char> struct CaseInsensitiveCompare { |
michael@0 | 131 | public: |
michael@0 | 132 | bool operator()(Char x, Char y) const { |
michael@0 | 133 | // TODO(darin): Do we really want to do locale sensitive comparisons here? |
michael@0 | 134 | // See http://crbug.com/24917 |
michael@0 | 135 | return tolower(x) == tolower(y); |
michael@0 | 136 | } |
michael@0 | 137 | }; |
michael@0 | 138 | |
michael@0 | 139 | template<typename Char> struct CaseInsensitiveCompareASCII { |
michael@0 | 140 | public: |
michael@0 | 141 | bool operator()(Char x, Char y) const { |
michael@0 | 142 | return ToLowerASCII(x) == ToLowerASCII(y); |
michael@0 | 143 | } |
michael@0 | 144 | }; |
michael@0 | 145 | |
michael@0 | 146 | } // namespace base |
michael@0 | 147 | |
michael@0 | 148 | #if defined(OS_WIN) |
michael@0 | 149 | #include "base/strings/string_util_win.h" |
michael@0 | 150 | #elif defined(OS_POSIX) |
michael@0 | 151 | #include "base/strings/string_util_posix.h" |
michael@0 | 152 | #else |
michael@0 | 153 | #error Define string operations appropriately for your platform |
michael@0 | 154 | #endif |
michael@0 | 155 | |
michael@0 | 156 | // These threadsafe functions return references to globally unique empty |
michael@0 | 157 | // strings. |
michael@0 | 158 | // |
michael@0 | 159 | // DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS. |
michael@0 | 160 | // There is only one case where you should use these: functions which need to |
michael@0 | 161 | // return a string by reference (e.g. as a class member accessor), and don't |
michael@0 | 162 | // have an empty string to use (e.g. in an error case). These should not be |
michael@0 | 163 | // used as initializers, function arguments, or return values for functions |
michael@0 | 164 | // which return by value or outparam. |
michael@0 | 165 | BASE_EXPORT const std::string& EmptyString(); |
michael@0 | 166 | BASE_EXPORT const std::wstring& EmptyWString(); |
michael@0 | 167 | BASE_EXPORT const string16& EmptyString16(); |
michael@0 | 168 | |
michael@0 | 169 | BASE_EXPORT extern const wchar_t kWhitespaceWide[]; |
michael@0 | 170 | BASE_EXPORT extern const char16 kWhitespaceUTF16[]; |
michael@0 | 171 | BASE_EXPORT extern const char kWhitespaceASCII[]; |
michael@0 | 172 | |
michael@0 | 173 | BASE_EXPORT extern const char kUtf8ByteOrderMark[]; |
michael@0 | 174 | |
michael@0 | 175 | // Removes characters in |remove_chars| from anywhere in |input|. Returns true |
michael@0 | 176 | // if any characters were removed. |remove_chars| must be null-terminated. |
michael@0 | 177 | // NOTE: Safe to use the same variable for both |input| and |output|. |
michael@0 | 178 | BASE_EXPORT bool RemoveChars(const string16& input, |
michael@0 | 179 | const char16 remove_chars[], |
michael@0 | 180 | string16* output); |
michael@0 | 181 | BASE_EXPORT bool RemoveChars(const std::string& input, |
michael@0 | 182 | const char remove_chars[], |
michael@0 | 183 | std::string* output); |
michael@0 | 184 | |
michael@0 | 185 | // Replaces characters in |replace_chars| from anywhere in |input| with |
michael@0 | 186 | // |replace_with|. Each character in |replace_chars| will be replaced with |
michael@0 | 187 | // the |replace_with| string. Returns true if any characters were replaced. |
michael@0 | 188 | // |replace_chars| must be null-terminated. |
michael@0 | 189 | // NOTE: Safe to use the same variable for both |input| and |output|. |
michael@0 | 190 | BASE_EXPORT bool ReplaceChars(const string16& input, |
michael@0 | 191 | const char16 replace_chars[], |
michael@0 | 192 | const string16& replace_with, |
michael@0 | 193 | string16* output); |
michael@0 | 194 | BASE_EXPORT bool ReplaceChars(const std::string& input, |
michael@0 | 195 | const char replace_chars[], |
michael@0 | 196 | const std::string& replace_with, |
michael@0 | 197 | std::string* output); |
michael@0 | 198 | |
michael@0 | 199 | // Removes characters in |trim_chars| from the beginning and end of |input|. |
michael@0 | 200 | // |trim_chars| must be null-terminated. |
michael@0 | 201 | // NOTE: Safe to use the same variable for both |input| and |output|. |
michael@0 | 202 | BASE_EXPORT bool TrimString(const std::wstring& input, |
michael@0 | 203 | const wchar_t trim_chars[], |
michael@0 | 204 | std::wstring* output); |
michael@0 | 205 | BASE_EXPORT bool TrimString(const string16& input, |
michael@0 | 206 | const char16 trim_chars[], |
michael@0 | 207 | string16* output); |
michael@0 | 208 | BASE_EXPORT bool TrimString(const std::string& input, |
michael@0 | 209 | const char trim_chars[], |
michael@0 | 210 | std::string* output); |
michael@0 | 211 | |
michael@0 | 212 | // Truncates a string to the nearest UTF-8 character that will leave |
michael@0 | 213 | // the string less than or equal to the specified byte size. |
michael@0 | 214 | BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input, |
michael@0 | 215 | const size_t byte_size, |
michael@0 | 216 | std::string* output); |
michael@0 | 217 | |
michael@0 | 218 | // Trims any whitespace from either end of the input string. Returns where |
michael@0 | 219 | // whitespace was found. |
michael@0 | 220 | // The non-wide version has two functions: |
michael@0 | 221 | // * TrimWhitespaceASCII() |
michael@0 | 222 | // This function is for ASCII strings and only looks for ASCII whitespace; |
michael@0 | 223 | // Please choose the best one according to your usage. |
michael@0 | 224 | // NOTE: Safe to use the same variable for both input and output. |
michael@0 | 225 | enum TrimPositions { |
michael@0 | 226 | TRIM_NONE = 0, |
michael@0 | 227 | TRIM_LEADING = 1 << 0, |
michael@0 | 228 | TRIM_TRAILING = 1 << 1, |
michael@0 | 229 | TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, |
michael@0 | 230 | }; |
michael@0 | 231 | BASE_EXPORT TrimPositions TrimWhitespace(const string16& input, |
michael@0 | 232 | TrimPositions positions, |
michael@0 | 233 | string16* output); |
michael@0 | 234 | BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input, |
michael@0 | 235 | TrimPositions positions, |
michael@0 | 236 | std::string* output); |
michael@0 | 237 | |
michael@0 | 238 | // Deprecated. This function is only for backward compatibility and calls |
michael@0 | 239 | // TrimWhitespaceASCII(). |
michael@0 | 240 | BASE_EXPORT TrimPositions TrimWhitespace(const std::string& input, |
michael@0 | 241 | TrimPositions positions, |
michael@0 | 242 | std::string* output); |
michael@0 | 243 | |
michael@0 | 244 | // Searches for CR or LF characters. Removes all contiguous whitespace |
michael@0 | 245 | // strings that contain them. This is useful when trying to deal with text |
michael@0 | 246 | // copied from terminals. |
michael@0 | 247 | // Returns |text|, with the following three transformations: |
michael@0 | 248 | // (1) Leading and trailing whitespace is trimmed. |
michael@0 | 249 | // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace |
michael@0 | 250 | // sequences containing a CR or LF are trimmed. |
michael@0 | 251 | // (3) All other whitespace sequences are converted to single spaces. |
michael@0 | 252 | BASE_EXPORT std::wstring CollapseWhitespace( |
michael@0 | 253 | const std::wstring& text, |
michael@0 | 254 | bool trim_sequences_with_line_breaks); |
michael@0 | 255 | BASE_EXPORT string16 CollapseWhitespace( |
michael@0 | 256 | const string16& text, |
michael@0 | 257 | bool trim_sequences_with_line_breaks); |
michael@0 | 258 | BASE_EXPORT std::string CollapseWhitespaceASCII( |
michael@0 | 259 | const std::string& text, |
michael@0 | 260 | bool trim_sequences_with_line_breaks); |
michael@0 | 261 | |
michael@0 | 262 | // Returns true if the passed string is empty or contains only white-space |
michael@0 | 263 | // characters. |
michael@0 | 264 | BASE_EXPORT bool ContainsOnlyWhitespaceASCII(const std::string& str); |
michael@0 | 265 | BASE_EXPORT bool ContainsOnlyWhitespace(const string16& str); |
michael@0 | 266 | |
michael@0 | 267 | // Returns true if |input| is empty or contains only characters found in |
michael@0 | 268 | // |characters|. |
michael@0 | 269 | BASE_EXPORT bool ContainsOnlyChars(const std::wstring& input, |
michael@0 | 270 | const std::wstring& characters); |
michael@0 | 271 | BASE_EXPORT bool ContainsOnlyChars(const string16& input, |
michael@0 | 272 | const string16& characters); |
michael@0 | 273 | BASE_EXPORT bool ContainsOnlyChars(const std::string& input, |
michael@0 | 274 | const std::string& characters); |
michael@0 | 275 | |
michael@0 | 276 | // Converts to 7-bit ASCII by truncating. The result must be known to be ASCII |
michael@0 | 277 | // beforehand. |
michael@0 | 278 | BASE_EXPORT std::string WideToASCII(const std::wstring& wide); |
michael@0 | 279 | BASE_EXPORT std::string UTF16ToASCII(const string16& utf16); |
michael@0 | 280 | |
michael@0 | 281 | // Converts the given wide string to the corresponding Latin1. This will fail |
michael@0 | 282 | // (return false) if any characters are more than 255. |
michael@0 | 283 | BASE_EXPORT bool WideToLatin1(const std::wstring& wide, std::string* latin1); |
michael@0 | 284 | |
michael@0 | 285 | // Returns true if the specified string matches the criteria. How can a wide |
michael@0 | 286 | // string be 8-bit or UTF8? It contains only characters that are < 256 (in the |
michael@0 | 287 | // first case) or characters that use only 8-bits and whose 8-bit |
michael@0 | 288 | // representation looks like a UTF-8 string (the second case). |
michael@0 | 289 | // |
michael@0 | 290 | // Note that IsStringUTF8 checks not only if the input is structurally |
michael@0 | 291 | // valid but also if it doesn't contain any non-character codepoint |
michael@0 | 292 | // (e.g. U+FFFE). It's done on purpose because all the existing callers want |
michael@0 | 293 | // to have the maximum 'discriminating' power from other encodings. If |
michael@0 | 294 | // there's a use case for just checking the structural validity, we have to |
michael@0 | 295 | // add a new function for that. |
michael@0 | 296 | BASE_EXPORT bool IsStringUTF8(const std::string& str); |
michael@0 | 297 | BASE_EXPORT bool IsStringASCII(const std::wstring& str); |
michael@0 | 298 | BASE_EXPORT bool IsStringASCII(const string16& str); |
michael@0 | 299 | |
michael@0 | 300 | // Converts the elements of the given string. This version uses a pointer to |
michael@0 | 301 | // clearly differentiate it from the non-pointer variant. |
michael@0 | 302 | template <class str> inline void StringToLowerASCII(str* s) { |
michael@0 | 303 | for (typename str::iterator i = s->begin(); i != s->end(); ++i) |
michael@0 | 304 | *i = base::ToLowerASCII(*i); |
michael@0 | 305 | } |
michael@0 | 306 | |
michael@0 | 307 | template <class str> inline str StringToLowerASCII(const str& s) { |
michael@0 | 308 | // for std::string and std::wstring |
michael@0 | 309 | str output(s); |
michael@0 | 310 | StringToLowerASCII(&output); |
michael@0 | 311 | return output; |
michael@0 | 312 | } |
michael@0 | 313 | |
michael@0 | 314 | // Converts the elements of the given string. This version uses a pointer to |
michael@0 | 315 | // clearly differentiate it from the non-pointer variant. |
michael@0 | 316 | template <class str> inline void StringToUpperASCII(str* s) { |
michael@0 | 317 | for (typename str::iterator i = s->begin(); i != s->end(); ++i) |
michael@0 | 318 | *i = base::ToUpperASCII(*i); |
michael@0 | 319 | } |
michael@0 | 320 | |
michael@0 | 321 | template <class str> inline str StringToUpperASCII(const str& s) { |
michael@0 | 322 | // for std::string and std::wstring |
michael@0 | 323 | str output(s); |
michael@0 | 324 | StringToUpperASCII(&output); |
michael@0 | 325 | return output; |
michael@0 | 326 | } |
michael@0 | 327 | |
michael@0 | 328 | // Compare the lower-case form of the given string against the given ASCII |
michael@0 | 329 | // string. This is useful for doing checking if an input string matches some |
michael@0 | 330 | // token, and it is optimized to avoid intermediate string copies. This API is |
michael@0 | 331 | // borrowed from the equivalent APIs in Mozilla. |
michael@0 | 332 | BASE_EXPORT bool LowerCaseEqualsASCII(const std::string& a, const char* b); |
michael@0 | 333 | BASE_EXPORT bool LowerCaseEqualsASCII(const std::wstring& a, const char* b); |
michael@0 | 334 | BASE_EXPORT bool LowerCaseEqualsASCII(const string16& a, const char* b); |
michael@0 | 335 | |
michael@0 | 336 | // Same thing, but with string iterators instead. |
michael@0 | 337 | BASE_EXPORT bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, |
michael@0 | 338 | std::string::const_iterator a_end, |
michael@0 | 339 | const char* b); |
michael@0 | 340 | BASE_EXPORT bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, |
michael@0 | 341 | std::wstring::const_iterator a_end, |
michael@0 | 342 | const char* b); |
michael@0 | 343 | BASE_EXPORT bool LowerCaseEqualsASCII(string16::const_iterator a_begin, |
michael@0 | 344 | string16::const_iterator a_end, |
michael@0 | 345 | const char* b); |
michael@0 | 346 | BASE_EXPORT bool LowerCaseEqualsASCII(const char* a_begin, |
michael@0 | 347 | const char* a_end, |
michael@0 | 348 | const char* b); |
michael@0 | 349 | BASE_EXPORT bool LowerCaseEqualsASCII(const wchar_t* a_begin, |
michael@0 | 350 | const wchar_t* a_end, |
michael@0 | 351 | const char* b); |
michael@0 | 352 | BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin, |
michael@0 | 353 | const char16* a_end, |
michael@0 | 354 | const char* b); |
michael@0 | 355 | |
michael@0 | 356 | // Performs a case-sensitive string compare. The behavior is undefined if both |
michael@0 | 357 | // strings are not ASCII. |
michael@0 | 358 | BASE_EXPORT bool EqualsASCII(const string16& a, const base::StringPiece& b); |
michael@0 | 359 | |
michael@0 | 360 | // Returns true if str starts with search, or false otherwise. |
michael@0 | 361 | BASE_EXPORT bool StartsWithASCII(const std::string& str, |
michael@0 | 362 | const std::string& search, |
michael@0 | 363 | bool case_sensitive); |
michael@0 | 364 | BASE_EXPORT bool StartsWith(const std::wstring& str, |
michael@0 | 365 | const std::wstring& search, |
michael@0 | 366 | bool case_sensitive); |
michael@0 | 367 | BASE_EXPORT bool StartsWith(const string16& str, |
michael@0 | 368 | const string16& search, |
michael@0 | 369 | bool case_sensitive); |
michael@0 | 370 | |
michael@0 | 371 | // Returns true if str ends with search, or false otherwise. |
michael@0 | 372 | BASE_EXPORT bool EndsWith(const std::string& str, |
michael@0 | 373 | const std::string& search, |
michael@0 | 374 | bool case_sensitive); |
michael@0 | 375 | BASE_EXPORT bool EndsWith(const std::wstring& str, |
michael@0 | 376 | const std::wstring& search, |
michael@0 | 377 | bool case_sensitive); |
michael@0 | 378 | BASE_EXPORT bool EndsWith(const string16& str, |
michael@0 | 379 | const string16& search, |
michael@0 | 380 | bool case_sensitive); |
michael@0 | 381 | |
michael@0 | 382 | |
michael@0 | 383 | // Determines the type of ASCII character, independent of locale (the C |
michael@0 | 384 | // library versions will change based on locale). |
michael@0 | 385 | template <typename Char> |
michael@0 | 386 | inline bool IsAsciiWhitespace(Char c) { |
michael@0 | 387 | return c == ' ' || c == '\r' || c == '\n' || c == '\t'; |
michael@0 | 388 | } |
michael@0 | 389 | template <typename Char> |
michael@0 | 390 | inline bool IsAsciiAlpha(Char c) { |
michael@0 | 391 | return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); |
michael@0 | 392 | } |
michael@0 | 393 | template <typename Char> |
michael@0 | 394 | inline bool IsAsciiDigit(Char c) { |
michael@0 | 395 | return c >= '0' && c <= '9'; |
michael@0 | 396 | } |
michael@0 | 397 | |
michael@0 | 398 | template <typename Char> |
michael@0 | 399 | inline bool IsHexDigit(Char c) { |
michael@0 | 400 | return (c >= '0' && c <= '9') || |
michael@0 | 401 | (c >= 'A' && c <= 'F') || |
michael@0 | 402 | (c >= 'a' && c <= 'f'); |
michael@0 | 403 | } |
michael@0 | 404 | |
michael@0 | 405 | template <typename Char> |
michael@0 | 406 | inline Char HexDigitToInt(Char c) { |
michael@0 | 407 | DCHECK(IsHexDigit(c)); |
michael@0 | 408 | if (c >= '0' && c <= '9') |
michael@0 | 409 | return c - '0'; |
michael@0 | 410 | if (c >= 'A' && c <= 'F') |
michael@0 | 411 | return c - 'A' + 10; |
michael@0 | 412 | if (c >= 'a' && c <= 'f') |
michael@0 | 413 | return c - 'a' + 10; |
michael@0 | 414 | return 0; |
michael@0 | 415 | } |
michael@0 | 416 | |
michael@0 | 417 | // Returns true if it's a whitespace character. |
michael@0 | 418 | inline bool IsWhitespace(wchar_t c) { |
michael@0 | 419 | return wcschr(kWhitespaceWide, c) != NULL; |
michael@0 | 420 | } |
michael@0 | 421 | |
michael@0 | 422 | // Return a byte string in human-readable format with a unit suffix. Not |
michael@0 | 423 | // appropriate for use in any UI; use of FormatBytes and friends in ui/base is |
michael@0 | 424 | // highly recommended instead. TODO(avi): Figure out how to get callers to use |
michael@0 | 425 | // FormatBytes instead; remove this. |
michael@0 | 426 | BASE_EXPORT string16 FormatBytesUnlocalized(int64 bytes); |
michael@0 | 427 | |
michael@0 | 428 | // Starting at |start_offset| (usually 0), replace the first instance of |
michael@0 | 429 | // |find_this| with |replace_with|. |
michael@0 | 430 | BASE_EXPORT void ReplaceFirstSubstringAfterOffset( |
michael@0 | 431 | string16* str, |
michael@0 | 432 | string16::size_type start_offset, |
michael@0 | 433 | const string16& find_this, |
michael@0 | 434 | const string16& replace_with); |
michael@0 | 435 | BASE_EXPORT void ReplaceFirstSubstringAfterOffset( |
michael@0 | 436 | std::string* str, |
michael@0 | 437 | std::string::size_type start_offset, |
michael@0 | 438 | const std::string& find_this, |
michael@0 | 439 | const std::string& replace_with); |
michael@0 | 440 | |
michael@0 | 441 | // Starting at |start_offset| (usually 0), look through |str| and replace all |
michael@0 | 442 | // instances of |find_this| with |replace_with|. |
michael@0 | 443 | // |
michael@0 | 444 | // This does entire substrings; use std::replace in <algorithm> for single |
michael@0 | 445 | // characters, for example: |
michael@0 | 446 | // std::replace(str.begin(), str.end(), 'a', 'b'); |
michael@0 | 447 | BASE_EXPORT void ReplaceSubstringsAfterOffset( |
michael@0 | 448 | string16* str, |
michael@0 | 449 | string16::size_type start_offset, |
michael@0 | 450 | const string16& find_this, |
michael@0 | 451 | const string16& replace_with); |
michael@0 | 452 | BASE_EXPORT void ReplaceSubstringsAfterOffset( |
michael@0 | 453 | std::string* str, |
michael@0 | 454 | std::string::size_type start_offset, |
michael@0 | 455 | const std::string& find_this, |
michael@0 | 456 | const std::string& replace_with); |
michael@0 | 457 | |
michael@0 | 458 | // Reserves enough memory in |str| to accommodate |length_with_null| characters, |
michael@0 | 459 | // sets the size of |str| to |length_with_null - 1| characters, and returns a |
michael@0 | 460 | // pointer to the underlying contiguous array of characters. This is typically |
michael@0 | 461 | // used when calling a function that writes results into a character array, but |
michael@0 | 462 | // the caller wants the data to be managed by a string-like object. It is |
michael@0 | 463 | // convenient in that is can be used inline in the call, and fast in that it |
michael@0 | 464 | // avoids copying the results of the call from a char* into a string. |
michael@0 | 465 | // |
michael@0 | 466 | // |length_with_null| must be at least 2, since otherwise the underlying string |
michael@0 | 467 | // would have size 0, and trying to access &((*str)[0]) in that case can result |
michael@0 | 468 | // in a number of problems. |
michael@0 | 469 | // |
michael@0 | 470 | // Internally, this takes linear time because the resize() call 0-fills the |
michael@0 | 471 | // underlying array for potentially all |
michael@0 | 472 | // (|length_with_null - 1| * sizeof(string_type::value_type)) bytes. Ideally we |
michael@0 | 473 | // could avoid this aspect of the resize() call, as we expect the caller to |
michael@0 | 474 | // immediately write over this memory, but there is no other way to set the size |
michael@0 | 475 | // of the string, and not doing that will mean people who access |str| rather |
michael@0 | 476 | // than str.c_str() will get back a string of whatever size |str| had on entry |
michael@0 | 477 | // to this function (probably 0). |
michael@0 | 478 | template <class string_type> |
michael@0 | 479 | inline typename string_type::value_type* WriteInto(string_type* str, |
michael@0 | 480 | size_t length_with_null) { |
michael@0 | 481 | DCHECK_GT(length_with_null, 1u); |
michael@0 | 482 | str->reserve(length_with_null); |
michael@0 | 483 | str->resize(length_with_null - 1); |
michael@0 | 484 | return &((*str)[0]); |
michael@0 | 485 | } |
michael@0 | 486 | |
michael@0 | 487 | //----------------------------------------------------------------------------- |
michael@0 | 488 | |
michael@0 | 489 | // Splits a string into its fields delimited by any of the characters in |
michael@0 | 490 | // |delimiters|. Each field is added to the |tokens| vector. Returns the |
michael@0 | 491 | // number of tokens found. |
michael@0 | 492 | BASE_EXPORT size_t Tokenize(const std::wstring& str, |
michael@0 | 493 | const std::wstring& delimiters, |
michael@0 | 494 | std::vector<std::wstring>* tokens); |
michael@0 | 495 | BASE_EXPORT size_t Tokenize(const string16& str, |
michael@0 | 496 | const string16& delimiters, |
michael@0 | 497 | std::vector<string16>* tokens); |
michael@0 | 498 | BASE_EXPORT size_t Tokenize(const std::string& str, |
michael@0 | 499 | const std::string& delimiters, |
michael@0 | 500 | std::vector<std::string>* tokens); |
michael@0 | 501 | BASE_EXPORT size_t Tokenize(const base::StringPiece& str, |
michael@0 | 502 | const base::StringPiece& delimiters, |
michael@0 | 503 | std::vector<base::StringPiece>* tokens); |
michael@0 | 504 | |
michael@0 | 505 | // Does the opposite of SplitString(). |
michael@0 | 506 | BASE_EXPORT string16 JoinString(const std::vector<string16>& parts, char16 s); |
michael@0 | 507 | BASE_EXPORT std::string JoinString( |
michael@0 | 508 | const std::vector<std::string>& parts, char s); |
michael@0 | 509 | |
michael@0 | 510 | // Join |parts| using |separator|. |
michael@0 | 511 | BASE_EXPORT std::string JoinString( |
michael@0 | 512 | const std::vector<std::string>& parts, |
michael@0 | 513 | const std::string& separator); |
michael@0 | 514 | BASE_EXPORT string16 JoinString( |
michael@0 | 515 | const std::vector<string16>& parts, |
michael@0 | 516 | const string16& separator); |
michael@0 | 517 | |
michael@0 | 518 | // Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively. |
michael@0 | 519 | // Additionally, any number of consecutive '$' characters is replaced by that |
michael@0 | 520 | // number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be |
michael@0 | 521 | // NULL. This only allows you to use up to nine replacements. |
michael@0 | 522 | BASE_EXPORT string16 ReplaceStringPlaceholders( |
michael@0 | 523 | const string16& format_string, |
michael@0 | 524 | const std::vector<string16>& subst, |
michael@0 | 525 | std::vector<size_t>* offsets); |
michael@0 | 526 | |
michael@0 | 527 | BASE_EXPORT std::string ReplaceStringPlaceholders( |
michael@0 | 528 | const base::StringPiece& format_string, |
michael@0 | 529 | const std::vector<std::string>& subst, |
michael@0 | 530 | std::vector<size_t>* offsets); |
michael@0 | 531 | |
michael@0 | 532 | // Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. |
michael@0 | 533 | BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string, |
michael@0 | 534 | const string16& a, |
michael@0 | 535 | size_t* offset); |
michael@0 | 536 | |
michael@0 | 537 | // Returns true if the string passed in matches the pattern. The pattern |
michael@0 | 538 | // string can contain wildcards like * and ? |
michael@0 | 539 | // The backslash character (\) is an escape character for * and ? |
michael@0 | 540 | // We limit the patterns to having a max of 16 * or ? characters. |
michael@0 | 541 | // ? matches 0 or 1 character, while * matches 0 or more characters. |
michael@0 | 542 | BASE_EXPORT bool MatchPattern(const base::StringPiece& string, |
michael@0 | 543 | const base::StringPiece& pattern); |
michael@0 | 544 | BASE_EXPORT bool MatchPattern(const string16& string, const string16& pattern); |
michael@0 | 545 | |
michael@0 | 546 | // Hack to convert any char-like type to its unsigned counterpart. |
michael@0 | 547 | // For example, it will convert char, signed char and unsigned char to unsigned |
michael@0 | 548 | // char. |
michael@0 | 549 | template<typename T> |
michael@0 | 550 | struct ToUnsigned { |
michael@0 | 551 | typedef T Unsigned; |
michael@0 | 552 | }; |
michael@0 | 553 | |
michael@0 | 554 | template<> |
michael@0 | 555 | struct ToUnsigned<char> { |
michael@0 | 556 | typedef unsigned char Unsigned; |
michael@0 | 557 | }; |
michael@0 | 558 | template<> |
michael@0 | 559 | struct ToUnsigned<signed char> { |
michael@0 | 560 | typedef unsigned char Unsigned; |
michael@0 | 561 | }; |
michael@0 | 562 | template<> |
michael@0 | 563 | struct ToUnsigned<wchar_t> { |
michael@0 | 564 | #if defined(WCHAR_T_IS_UTF16) |
michael@0 | 565 | typedef unsigned short Unsigned; |
michael@0 | 566 | #elif defined(WCHAR_T_IS_UTF32) |
michael@0 | 567 | typedef uint32 Unsigned; |
michael@0 | 568 | #endif |
michael@0 | 569 | }; |
michael@0 | 570 | template<> |
michael@0 | 571 | struct ToUnsigned<short> { |
michael@0 | 572 | typedef unsigned short Unsigned; |
michael@0 | 573 | }; |
michael@0 | 574 | |
michael@0 | 575 | #endif // BASE_STRINGS_STRING_UTIL_H_ |