security/sandbox/chromium/base/strings/string_util.h

Wed, 31 Dec 2014 07:16:47 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:16:47 +0100
branch
TOR_BUG_9701
changeset 3
141e0f1194b1
permissions
-rw-r--r--

Revert simplistic fix pending revisit of Mozilla integration attempt.

michael@0 1 // Copyright 2013 The Chromium Authors. All rights reserved.
michael@0 2 // Use of this source code is governed by a BSD-style license that can be
michael@0 3 // found in the LICENSE file.
michael@0 4 //
michael@0 5 // This file defines utility functions for working with strings.
michael@0 6
michael@0 7 #ifndef BASE_STRINGS_STRING_UTIL_H_
michael@0 8 #define BASE_STRINGS_STRING_UTIL_H_
michael@0 9
michael@0 10 #include <ctype.h>
michael@0 11 #include <stdarg.h> // va_list
michael@0 12
michael@0 13 #include <string>
michael@0 14 #include <vector>
michael@0 15
michael@0 16 #include "base/base_export.h"
michael@0 17 #include "base/basictypes.h"
michael@0 18 #include "base/compiler_specific.h"
michael@0 19 #include "base/strings/string16.h"
michael@0 20 #include "base/strings/string_piece.h" // For implicit conversions.
michael@0 21
michael@0 22 // Safe standard library wrappers for all platforms.
michael@0 23
michael@0 24 namespace base {
michael@0 25
michael@0 26 // C standard-library functions like "strncasecmp" and "snprintf" that aren't
michael@0 27 // cross-platform are provided as "base::strncasecmp", and their prototypes
michael@0 28 // are listed below. These functions are then implemented as inline calls
michael@0 29 // to the platform-specific equivalents in the platform-specific headers.
michael@0 30
michael@0 31 // Compares the two strings s1 and s2 without regard to case using
michael@0 32 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if
michael@0 33 // s2 > s1 according to a lexicographic comparison.
michael@0 34 int strcasecmp(const char* s1, const char* s2);
michael@0 35
michael@0 36 // Compares up to count characters of s1 and s2 without regard to case using
michael@0 37 // the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if
michael@0 38 // s2 > s1 according to a lexicographic comparison.
michael@0 39 int strncasecmp(const char* s1, const char* s2, size_t count);
michael@0 40
michael@0 41 // Same as strncmp but for char16 strings.
michael@0 42 int strncmp16(const char16* s1, const char16* s2, size_t count);
michael@0 43
michael@0 44 // Wrapper for vsnprintf that always null-terminates and always returns the
michael@0 45 // number of characters that would be in an untruncated formatted
michael@0 46 // string, even when truncation occurs.
michael@0 47 int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments)
michael@0 48 PRINTF_FORMAT(3, 0);
michael@0 49
michael@0 50 // vswprintf always null-terminates, but when truncation occurs, it will either
michael@0 51 // return -1 or the number of characters that would be in an untruncated
michael@0 52 // formatted string. The actual return value depends on the underlying
michael@0 53 // C library's vswprintf implementation.
michael@0 54 int vswprintf(wchar_t* buffer, size_t size,
michael@0 55 const wchar_t* format, va_list arguments)
michael@0 56 WPRINTF_FORMAT(3, 0);
michael@0 57
michael@0 58 // Some of these implementations need to be inlined.
michael@0 59
michael@0 60 // We separate the declaration from the implementation of this inline
michael@0 61 // function just so the PRINTF_FORMAT works.
michael@0 62 inline int snprintf(char* buffer, size_t size, const char* format, ...)
michael@0 63 PRINTF_FORMAT(3, 4);
michael@0 64 inline int snprintf(char* buffer, size_t size, const char* format, ...) {
michael@0 65 va_list arguments;
michael@0 66 va_start(arguments, format);
michael@0 67 int result = vsnprintf(buffer, size, format, arguments);
michael@0 68 va_end(arguments);
michael@0 69 return result;
michael@0 70 }
michael@0 71
michael@0 72 // We separate the declaration from the implementation of this inline
michael@0 73 // function just so the WPRINTF_FORMAT works.
michael@0 74 inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...)
michael@0 75 WPRINTF_FORMAT(3, 4);
michael@0 76 inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) {
michael@0 77 va_list arguments;
michael@0 78 va_start(arguments, format);
michael@0 79 int result = vswprintf(buffer, size, format, arguments);
michael@0 80 va_end(arguments);
michael@0 81 return result;
michael@0 82 }
michael@0 83
michael@0 84 // BSD-style safe and consistent string copy functions.
michael@0 85 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|.
michael@0 86 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as
michael@0 87 // long as |dst_size| is not 0. Returns the length of |src| in characters.
michael@0 88 // If the return value is >= dst_size, then the output was truncated.
michael@0 89 // NOTE: All sizes are in number of characters, NOT in bytes.
michael@0 90 BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size);
michael@0 91 BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size);
michael@0 92
michael@0 93 // Scan a wprintf format string to determine whether it's portable across a
michael@0 94 // variety of systems. This function only checks that the conversion
michael@0 95 // specifiers used by the format string are supported and have the same meaning
michael@0 96 // on a variety of systems. It doesn't check for other errors that might occur
michael@0 97 // within a format string.
michael@0 98 //
michael@0 99 // Nonportable conversion specifiers for wprintf are:
michael@0 100 // - 's' and 'c' without an 'l' length modifier. %s and %c operate on char
michael@0 101 // data on all systems except Windows, which treat them as wchar_t data.
michael@0 102 // Use %ls and %lc for wchar_t data instead.
michael@0 103 // - 'S' and 'C', which operate on wchar_t data on all systems except Windows,
michael@0 104 // which treat them as char data. Use %ls and %lc for wchar_t data
michael@0 105 // instead.
michael@0 106 // - 'F', which is not identified by Windows wprintf documentation.
michael@0 107 // - 'D', 'O', and 'U', which are deprecated and not available on all systems.
michael@0 108 // Use %ld, %lo, and %lu instead.
michael@0 109 //
michael@0 110 // Note that there is no portable conversion specifier for char data when
michael@0 111 // working with wprintf.
michael@0 112 //
michael@0 113 // This function is intended to be called from base::vswprintf.
michael@0 114 BASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format);
michael@0 115
michael@0 116 // ASCII-specific tolower. The standard library's tolower is locale sensitive,
michael@0 117 // so we don't want to use it here.
michael@0 118 template <class Char> inline Char ToLowerASCII(Char c) {
michael@0 119 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
michael@0 120 }
michael@0 121
michael@0 122 // ASCII-specific toupper. The standard library's toupper is locale sensitive,
michael@0 123 // so we don't want to use it here.
michael@0 124 template <class Char> inline Char ToUpperASCII(Char c) {
michael@0 125 return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
michael@0 126 }
michael@0 127
michael@0 128 // Function objects to aid in comparing/searching strings.
michael@0 129
michael@0 130 template<typename Char> struct CaseInsensitiveCompare {
michael@0 131 public:
michael@0 132 bool operator()(Char x, Char y) const {
michael@0 133 // TODO(darin): Do we really want to do locale sensitive comparisons here?
michael@0 134 // See http://crbug.com/24917
michael@0 135 return tolower(x) == tolower(y);
michael@0 136 }
michael@0 137 };
michael@0 138
michael@0 139 template<typename Char> struct CaseInsensitiveCompareASCII {
michael@0 140 public:
michael@0 141 bool operator()(Char x, Char y) const {
michael@0 142 return ToLowerASCII(x) == ToLowerASCII(y);
michael@0 143 }
michael@0 144 };
michael@0 145
michael@0 146 } // namespace base
michael@0 147
michael@0 148 #if defined(OS_WIN)
michael@0 149 #include "base/strings/string_util_win.h"
michael@0 150 #elif defined(OS_POSIX)
michael@0 151 #include "base/strings/string_util_posix.h"
michael@0 152 #else
michael@0 153 #error Define string operations appropriately for your platform
michael@0 154 #endif
michael@0 155
michael@0 156 // These threadsafe functions return references to globally unique empty
michael@0 157 // strings.
michael@0 158 //
michael@0 159 // DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS.
michael@0 160 // There is only one case where you should use these: functions which need to
michael@0 161 // return a string by reference (e.g. as a class member accessor), and don't
michael@0 162 // have an empty string to use (e.g. in an error case). These should not be
michael@0 163 // used as initializers, function arguments, or return values for functions
michael@0 164 // which return by value or outparam.
michael@0 165 BASE_EXPORT const std::string& EmptyString();
michael@0 166 BASE_EXPORT const std::wstring& EmptyWString();
michael@0 167 BASE_EXPORT const string16& EmptyString16();
michael@0 168
michael@0 169 BASE_EXPORT extern const wchar_t kWhitespaceWide[];
michael@0 170 BASE_EXPORT extern const char16 kWhitespaceUTF16[];
michael@0 171 BASE_EXPORT extern const char kWhitespaceASCII[];
michael@0 172
michael@0 173 BASE_EXPORT extern const char kUtf8ByteOrderMark[];
michael@0 174
michael@0 175 // Removes characters in |remove_chars| from anywhere in |input|. Returns true
michael@0 176 // if any characters were removed. |remove_chars| must be null-terminated.
michael@0 177 // NOTE: Safe to use the same variable for both |input| and |output|.
michael@0 178 BASE_EXPORT bool RemoveChars(const string16& input,
michael@0 179 const char16 remove_chars[],
michael@0 180 string16* output);
michael@0 181 BASE_EXPORT bool RemoveChars(const std::string& input,
michael@0 182 const char remove_chars[],
michael@0 183 std::string* output);
michael@0 184
michael@0 185 // Replaces characters in |replace_chars| from anywhere in |input| with
michael@0 186 // |replace_with|. Each character in |replace_chars| will be replaced with
michael@0 187 // the |replace_with| string. Returns true if any characters were replaced.
michael@0 188 // |replace_chars| must be null-terminated.
michael@0 189 // NOTE: Safe to use the same variable for both |input| and |output|.
michael@0 190 BASE_EXPORT bool ReplaceChars(const string16& input,
michael@0 191 const char16 replace_chars[],
michael@0 192 const string16& replace_with,
michael@0 193 string16* output);
michael@0 194 BASE_EXPORT bool ReplaceChars(const std::string& input,
michael@0 195 const char replace_chars[],
michael@0 196 const std::string& replace_with,
michael@0 197 std::string* output);
michael@0 198
michael@0 199 // Removes characters in |trim_chars| from the beginning and end of |input|.
michael@0 200 // |trim_chars| must be null-terminated.
michael@0 201 // NOTE: Safe to use the same variable for both |input| and |output|.
michael@0 202 BASE_EXPORT bool TrimString(const std::wstring& input,
michael@0 203 const wchar_t trim_chars[],
michael@0 204 std::wstring* output);
michael@0 205 BASE_EXPORT bool TrimString(const string16& input,
michael@0 206 const char16 trim_chars[],
michael@0 207 string16* output);
michael@0 208 BASE_EXPORT bool TrimString(const std::string& input,
michael@0 209 const char trim_chars[],
michael@0 210 std::string* output);
michael@0 211
michael@0 212 // Truncates a string to the nearest UTF-8 character that will leave
michael@0 213 // the string less than or equal to the specified byte size.
michael@0 214 BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input,
michael@0 215 const size_t byte_size,
michael@0 216 std::string* output);
michael@0 217
michael@0 218 // Trims any whitespace from either end of the input string. Returns where
michael@0 219 // whitespace was found.
michael@0 220 // The non-wide version has two functions:
michael@0 221 // * TrimWhitespaceASCII()
michael@0 222 // This function is for ASCII strings and only looks for ASCII whitespace;
michael@0 223 // Please choose the best one according to your usage.
michael@0 224 // NOTE: Safe to use the same variable for both input and output.
michael@0 225 enum TrimPositions {
michael@0 226 TRIM_NONE = 0,
michael@0 227 TRIM_LEADING = 1 << 0,
michael@0 228 TRIM_TRAILING = 1 << 1,
michael@0 229 TRIM_ALL = TRIM_LEADING | TRIM_TRAILING,
michael@0 230 };
michael@0 231 BASE_EXPORT TrimPositions TrimWhitespace(const string16& input,
michael@0 232 TrimPositions positions,
michael@0 233 string16* output);
michael@0 234 BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input,
michael@0 235 TrimPositions positions,
michael@0 236 std::string* output);
michael@0 237
michael@0 238 // Deprecated. This function is only for backward compatibility and calls
michael@0 239 // TrimWhitespaceASCII().
michael@0 240 BASE_EXPORT TrimPositions TrimWhitespace(const std::string& input,
michael@0 241 TrimPositions positions,
michael@0 242 std::string* output);
michael@0 243
michael@0 244 // Searches for CR or LF characters. Removes all contiguous whitespace
michael@0 245 // strings that contain them. This is useful when trying to deal with text
michael@0 246 // copied from terminals.
michael@0 247 // Returns |text|, with the following three transformations:
michael@0 248 // (1) Leading and trailing whitespace is trimmed.
michael@0 249 // (2) If |trim_sequences_with_line_breaks| is true, any other whitespace
michael@0 250 // sequences containing a CR or LF are trimmed.
michael@0 251 // (3) All other whitespace sequences are converted to single spaces.
michael@0 252 BASE_EXPORT std::wstring CollapseWhitespace(
michael@0 253 const std::wstring& text,
michael@0 254 bool trim_sequences_with_line_breaks);
michael@0 255 BASE_EXPORT string16 CollapseWhitespace(
michael@0 256 const string16& text,
michael@0 257 bool trim_sequences_with_line_breaks);
michael@0 258 BASE_EXPORT std::string CollapseWhitespaceASCII(
michael@0 259 const std::string& text,
michael@0 260 bool trim_sequences_with_line_breaks);
michael@0 261
michael@0 262 // Returns true if the passed string is empty or contains only white-space
michael@0 263 // characters.
michael@0 264 BASE_EXPORT bool ContainsOnlyWhitespaceASCII(const std::string& str);
michael@0 265 BASE_EXPORT bool ContainsOnlyWhitespace(const string16& str);
michael@0 266
michael@0 267 // Returns true if |input| is empty or contains only characters found in
michael@0 268 // |characters|.
michael@0 269 BASE_EXPORT bool ContainsOnlyChars(const std::wstring& input,
michael@0 270 const std::wstring& characters);
michael@0 271 BASE_EXPORT bool ContainsOnlyChars(const string16& input,
michael@0 272 const string16& characters);
michael@0 273 BASE_EXPORT bool ContainsOnlyChars(const std::string& input,
michael@0 274 const std::string& characters);
michael@0 275
michael@0 276 // Converts to 7-bit ASCII by truncating. The result must be known to be ASCII
michael@0 277 // beforehand.
michael@0 278 BASE_EXPORT std::string WideToASCII(const std::wstring& wide);
michael@0 279 BASE_EXPORT std::string UTF16ToASCII(const string16& utf16);
michael@0 280
michael@0 281 // Converts the given wide string to the corresponding Latin1. This will fail
michael@0 282 // (return false) if any characters are more than 255.
michael@0 283 BASE_EXPORT bool WideToLatin1(const std::wstring& wide, std::string* latin1);
michael@0 284
michael@0 285 // Returns true if the specified string matches the criteria. How can a wide
michael@0 286 // string be 8-bit or UTF8? It contains only characters that are < 256 (in the
michael@0 287 // first case) or characters that use only 8-bits and whose 8-bit
michael@0 288 // representation looks like a UTF-8 string (the second case).
michael@0 289 //
michael@0 290 // Note that IsStringUTF8 checks not only if the input is structurally
michael@0 291 // valid but also if it doesn't contain any non-character codepoint
michael@0 292 // (e.g. U+FFFE). It's done on purpose because all the existing callers want
michael@0 293 // to have the maximum 'discriminating' power from other encodings. If
michael@0 294 // there's a use case for just checking the structural validity, we have to
michael@0 295 // add a new function for that.
michael@0 296 BASE_EXPORT bool IsStringUTF8(const std::string& str);
michael@0 297 BASE_EXPORT bool IsStringASCII(const std::wstring& str);
michael@0 298 BASE_EXPORT bool IsStringASCII(const string16& str);
michael@0 299
michael@0 300 // Converts the elements of the given string. This version uses a pointer to
michael@0 301 // clearly differentiate it from the non-pointer variant.
michael@0 302 template <class str> inline void StringToLowerASCII(str* s) {
michael@0 303 for (typename str::iterator i = s->begin(); i != s->end(); ++i)
michael@0 304 *i = base::ToLowerASCII(*i);
michael@0 305 }
michael@0 306
michael@0 307 template <class str> inline str StringToLowerASCII(const str& s) {
michael@0 308 // for std::string and std::wstring
michael@0 309 str output(s);
michael@0 310 StringToLowerASCII(&output);
michael@0 311 return output;
michael@0 312 }
michael@0 313
michael@0 314 // Converts the elements of the given string. This version uses a pointer to
michael@0 315 // clearly differentiate it from the non-pointer variant.
michael@0 316 template <class str> inline void StringToUpperASCII(str* s) {
michael@0 317 for (typename str::iterator i = s->begin(); i != s->end(); ++i)
michael@0 318 *i = base::ToUpperASCII(*i);
michael@0 319 }
michael@0 320
michael@0 321 template <class str> inline str StringToUpperASCII(const str& s) {
michael@0 322 // for std::string and std::wstring
michael@0 323 str output(s);
michael@0 324 StringToUpperASCII(&output);
michael@0 325 return output;
michael@0 326 }
michael@0 327
michael@0 328 // Compare the lower-case form of the given string against the given ASCII
michael@0 329 // string. This is useful for doing checking if an input string matches some
michael@0 330 // token, and it is optimized to avoid intermediate string copies. This API is
michael@0 331 // borrowed from the equivalent APIs in Mozilla.
michael@0 332 BASE_EXPORT bool LowerCaseEqualsASCII(const std::string& a, const char* b);
michael@0 333 BASE_EXPORT bool LowerCaseEqualsASCII(const std::wstring& a, const char* b);
michael@0 334 BASE_EXPORT bool LowerCaseEqualsASCII(const string16& a, const char* b);
michael@0 335
michael@0 336 // Same thing, but with string iterators instead.
michael@0 337 BASE_EXPORT bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
michael@0 338 std::string::const_iterator a_end,
michael@0 339 const char* b);
michael@0 340 BASE_EXPORT bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
michael@0 341 std::wstring::const_iterator a_end,
michael@0 342 const char* b);
michael@0 343 BASE_EXPORT bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
michael@0 344 string16::const_iterator a_end,
michael@0 345 const char* b);
michael@0 346 BASE_EXPORT bool LowerCaseEqualsASCII(const char* a_begin,
michael@0 347 const char* a_end,
michael@0 348 const char* b);
michael@0 349 BASE_EXPORT bool LowerCaseEqualsASCII(const wchar_t* a_begin,
michael@0 350 const wchar_t* a_end,
michael@0 351 const char* b);
michael@0 352 BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin,
michael@0 353 const char16* a_end,
michael@0 354 const char* b);
michael@0 355
michael@0 356 // Performs a case-sensitive string compare. The behavior is undefined if both
michael@0 357 // strings are not ASCII.
michael@0 358 BASE_EXPORT bool EqualsASCII(const string16& a, const base::StringPiece& b);
michael@0 359
michael@0 360 // Returns true if str starts with search, or false otherwise.
michael@0 361 BASE_EXPORT bool StartsWithASCII(const std::string& str,
michael@0 362 const std::string& search,
michael@0 363 bool case_sensitive);
michael@0 364 BASE_EXPORT bool StartsWith(const std::wstring& str,
michael@0 365 const std::wstring& search,
michael@0 366 bool case_sensitive);
michael@0 367 BASE_EXPORT bool StartsWith(const string16& str,
michael@0 368 const string16& search,
michael@0 369 bool case_sensitive);
michael@0 370
michael@0 371 // Returns true if str ends with search, or false otherwise.
michael@0 372 BASE_EXPORT bool EndsWith(const std::string& str,
michael@0 373 const std::string& search,
michael@0 374 bool case_sensitive);
michael@0 375 BASE_EXPORT bool EndsWith(const std::wstring& str,
michael@0 376 const std::wstring& search,
michael@0 377 bool case_sensitive);
michael@0 378 BASE_EXPORT bool EndsWith(const string16& str,
michael@0 379 const string16& search,
michael@0 380 bool case_sensitive);
michael@0 381
michael@0 382
michael@0 383 // Determines the type of ASCII character, independent of locale (the C
michael@0 384 // library versions will change based on locale).
michael@0 385 template <typename Char>
michael@0 386 inline bool IsAsciiWhitespace(Char c) {
michael@0 387 return c == ' ' || c == '\r' || c == '\n' || c == '\t';
michael@0 388 }
michael@0 389 template <typename Char>
michael@0 390 inline bool IsAsciiAlpha(Char c) {
michael@0 391 return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z'));
michael@0 392 }
michael@0 393 template <typename Char>
michael@0 394 inline bool IsAsciiDigit(Char c) {
michael@0 395 return c >= '0' && c <= '9';
michael@0 396 }
michael@0 397
michael@0 398 template <typename Char>
michael@0 399 inline bool IsHexDigit(Char c) {
michael@0 400 return (c >= '0' && c <= '9') ||
michael@0 401 (c >= 'A' && c <= 'F') ||
michael@0 402 (c >= 'a' && c <= 'f');
michael@0 403 }
michael@0 404
michael@0 405 template <typename Char>
michael@0 406 inline Char HexDigitToInt(Char c) {
michael@0 407 DCHECK(IsHexDigit(c));
michael@0 408 if (c >= '0' && c <= '9')
michael@0 409 return c - '0';
michael@0 410 if (c >= 'A' && c <= 'F')
michael@0 411 return c - 'A' + 10;
michael@0 412 if (c >= 'a' && c <= 'f')
michael@0 413 return c - 'a' + 10;
michael@0 414 return 0;
michael@0 415 }
michael@0 416
michael@0 417 // Returns true if it's a whitespace character.
michael@0 418 inline bool IsWhitespace(wchar_t c) {
michael@0 419 return wcschr(kWhitespaceWide, c) != NULL;
michael@0 420 }
michael@0 421
michael@0 422 // Return a byte string in human-readable format with a unit suffix. Not
michael@0 423 // appropriate for use in any UI; use of FormatBytes and friends in ui/base is
michael@0 424 // highly recommended instead. TODO(avi): Figure out how to get callers to use
michael@0 425 // FormatBytes instead; remove this.
michael@0 426 BASE_EXPORT string16 FormatBytesUnlocalized(int64 bytes);
michael@0 427
michael@0 428 // Starting at |start_offset| (usually 0), replace the first instance of
michael@0 429 // |find_this| with |replace_with|.
michael@0 430 BASE_EXPORT void ReplaceFirstSubstringAfterOffset(
michael@0 431 string16* str,
michael@0 432 string16::size_type start_offset,
michael@0 433 const string16& find_this,
michael@0 434 const string16& replace_with);
michael@0 435 BASE_EXPORT void ReplaceFirstSubstringAfterOffset(
michael@0 436 std::string* str,
michael@0 437 std::string::size_type start_offset,
michael@0 438 const std::string& find_this,
michael@0 439 const std::string& replace_with);
michael@0 440
michael@0 441 // Starting at |start_offset| (usually 0), look through |str| and replace all
michael@0 442 // instances of |find_this| with |replace_with|.
michael@0 443 //
michael@0 444 // This does entire substrings; use std::replace in <algorithm> for single
michael@0 445 // characters, for example:
michael@0 446 // std::replace(str.begin(), str.end(), 'a', 'b');
michael@0 447 BASE_EXPORT void ReplaceSubstringsAfterOffset(
michael@0 448 string16* str,
michael@0 449 string16::size_type start_offset,
michael@0 450 const string16& find_this,
michael@0 451 const string16& replace_with);
michael@0 452 BASE_EXPORT void ReplaceSubstringsAfterOffset(
michael@0 453 std::string* str,
michael@0 454 std::string::size_type start_offset,
michael@0 455 const std::string& find_this,
michael@0 456 const std::string& replace_with);
michael@0 457
michael@0 458 // Reserves enough memory in |str| to accommodate |length_with_null| characters,
michael@0 459 // sets the size of |str| to |length_with_null - 1| characters, and returns a
michael@0 460 // pointer to the underlying contiguous array of characters. This is typically
michael@0 461 // used when calling a function that writes results into a character array, but
michael@0 462 // the caller wants the data to be managed by a string-like object. It is
michael@0 463 // convenient in that is can be used inline in the call, and fast in that it
michael@0 464 // avoids copying the results of the call from a char* into a string.
michael@0 465 //
michael@0 466 // |length_with_null| must be at least 2, since otherwise the underlying string
michael@0 467 // would have size 0, and trying to access &((*str)[0]) in that case can result
michael@0 468 // in a number of problems.
michael@0 469 //
michael@0 470 // Internally, this takes linear time because the resize() call 0-fills the
michael@0 471 // underlying array for potentially all
michael@0 472 // (|length_with_null - 1| * sizeof(string_type::value_type)) bytes. Ideally we
michael@0 473 // could avoid this aspect of the resize() call, as we expect the caller to
michael@0 474 // immediately write over this memory, but there is no other way to set the size
michael@0 475 // of the string, and not doing that will mean people who access |str| rather
michael@0 476 // than str.c_str() will get back a string of whatever size |str| had on entry
michael@0 477 // to this function (probably 0).
michael@0 478 template <class string_type>
michael@0 479 inline typename string_type::value_type* WriteInto(string_type* str,
michael@0 480 size_t length_with_null) {
michael@0 481 DCHECK_GT(length_with_null, 1u);
michael@0 482 str->reserve(length_with_null);
michael@0 483 str->resize(length_with_null - 1);
michael@0 484 return &((*str)[0]);
michael@0 485 }
michael@0 486
michael@0 487 //-----------------------------------------------------------------------------
michael@0 488
michael@0 489 // Splits a string into its fields delimited by any of the characters in
michael@0 490 // |delimiters|. Each field is added to the |tokens| vector. Returns the
michael@0 491 // number of tokens found.
michael@0 492 BASE_EXPORT size_t Tokenize(const std::wstring& str,
michael@0 493 const std::wstring& delimiters,
michael@0 494 std::vector<std::wstring>* tokens);
michael@0 495 BASE_EXPORT size_t Tokenize(const string16& str,
michael@0 496 const string16& delimiters,
michael@0 497 std::vector<string16>* tokens);
michael@0 498 BASE_EXPORT size_t Tokenize(const std::string& str,
michael@0 499 const std::string& delimiters,
michael@0 500 std::vector<std::string>* tokens);
michael@0 501 BASE_EXPORT size_t Tokenize(const base::StringPiece& str,
michael@0 502 const base::StringPiece& delimiters,
michael@0 503 std::vector<base::StringPiece>* tokens);
michael@0 504
michael@0 505 // Does the opposite of SplitString().
michael@0 506 BASE_EXPORT string16 JoinString(const std::vector<string16>& parts, char16 s);
michael@0 507 BASE_EXPORT std::string JoinString(
michael@0 508 const std::vector<std::string>& parts, char s);
michael@0 509
michael@0 510 // Join |parts| using |separator|.
michael@0 511 BASE_EXPORT std::string JoinString(
michael@0 512 const std::vector<std::string>& parts,
michael@0 513 const std::string& separator);
michael@0 514 BASE_EXPORT string16 JoinString(
michael@0 515 const std::vector<string16>& parts,
michael@0 516 const string16& separator);
michael@0 517
michael@0 518 // Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively.
michael@0 519 // Additionally, any number of consecutive '$' characters is replaced by that
michael@0 520 // number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be
michael@0 521 // NULL. This only allows you to use up to nine replacements.
michael@0 522 BASE_EXPORT string16 ReplaceStringPlaceholders(
michael@0 523 const string16& format_string,
michael@0 524 const std::vector<string16>& subst,
michael@0 525 std::vector<size_t>* offsets);
michael@0 526
michael@0 527 BASE_EXPORT std::string ReplaceStringPlaceholders(
michael@0 528 const base::StringPiece& format_string,
michael@0 529 const std::vector<std::string>& subst,
michael@0 530 std::vector<size_t>* offsets);
michael@0 531
michael@0 532 // Single-string shortcut for ReplaceStringHolders. |offset| may be NULL.
michael@0 533 BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string,
michael@0 534 const string16& a,
michael@0 535 size_t* offset);
michael@0 536
michael@0 537 // Returns true if the string passed in matches the pattern. The pattern
michael@0 538 // string can contain wildcards like * and ?
michael@0 539 // The backslash character (\) is an escape character for * and ?
michael@0 540 // We limit the patterns to having a max of 16 * or ? characters.
michael@0 541 // ? matches 0 or 1 character, while * matches 0 or more characters.
michael@0 542 BASE_EXPORT bool MatchPattern(const base::StringPiece& string,
michael@0 543 const base::StringPiece& pattern);
michael@0 544 BASE_EXPORT bool MatchPattern(const string16& string, const string16& pattern);
michael@0 545
michael@0 546 // Hack to convert any char-like type to its unsigned counterpart.
michael@0 547 // For example, it will convert char, signed char and unsigned char to unsigned
michael@0 548 // char.
michael@0 549 template<typename T>
michael@0 550 struct ToUnsigned {
michael@0 551 typedef T Unsigned;
michael@0 552 };
michael@0 553
michael@0 554 template<>
michael@0 555 struct ToUnsigned<char> {
michael@0 556 typedef unsigned char Unsigned;
michael@0 557 };
michael@0 558 template<>
michael@0 559 struct ToUnsigned<signed char> {
michael@0 560 typedef unsigned char Unsigned;
michael@0 561 };
michael@0 562 template<>
michael@0 563 struct ToUnsigned<wchar_t> {
michael@0 564 #if defined(WCHAR_T_IS_UTF16)
michael@0 565 typedef unsigned short Unsigned;
michael@0 566 #elif defined(WCHAR_T_IS_UTF32)
michael@0 567 typedef uint32 Unsigned;
michael@0 568 #endif
michael@0 569 };
michael@0 570 template<>
michael@0 571 struct ToUnsigned<short> {
michael@0 572 typedef unsigned short Unsigned;
michael@0 573 };
michael@0 574
michael@0 575 #endif // BASE_STRINGS_STRING_UTIL_H_

mercurial