security/sandbox/chromium/base/strings/string_util.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/security/sandbox/chromium/base/strings/string_util.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,575 @@
     1.4 +// Copyright 2013 The Chromium Authors. All rights reserved.
     1.5 +// Use of this source code is governed by a BSD-style license that can be
     1.6 +// found in the LICENSE file.
     1.7 +//
     1.8 +// This file defines utility functions for working with strings.
     1.9 +
    1.10 +#ifndef BASE_STRINGS_STRING_UTIL_H_
    1.11 +#define BASE_STRINGS_STRING_UTIL_H_
    1.12 +
    1.13 +#include <ctype.h>
    1.14 +#include <stdarg.h>   // va_list
    1.15 +
    1.16 +#include <string>
    1.17 +#include <vector>
    1.18 +
    1.19 +#include "base/base_export.h"
    1.20 +#include "base/basictypes.h"
    1.21 +#include "base/compiler_specific.h"
    1.22 +#include "base/strings/string16.h"
    1.23 +#include "base/strings/string_piece.h"  // For implicit conversions.
    1.24 +
    1.25 +// Safe standard library wrappers for all platforms.
    1.26 +
    1.27 +namespace base {
    1.28 +
    1.29 +// C standard-library functions like "strncasecmp" and "snprintf" that aren't
    1.30 +// cross-platform are provided as "base::strncasecmp", and their prototypes
    1.31 +// are listed below.  These functions are then implemented as inline calls
    1.32 +// to the platform-specific equivalents in the platform-specific headers.
    1.33 +
    1.34 +// Compares the two strings s1 and s2 without regard to case using
    1.35 +// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if
    1.36 +// s2 > s1 according to a lexicographic comparison.
    1.37 +int strcasecmp(const char* s1, const char* s2);
    1.38 +
    1.39 +// Compares up to count characters of s1 and s2 without regard to case using
    1.40 +// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if
    1.41 +// s2 > s1 according to a lexicographic comparison.
    1.42 +int strncasecmp(const char* s1, const char* s2, size_t count);
    1.43 +
    1.44 +// Same as strncmp but for char16 strings.
    1.45 +int strncmp16(const char16* s1, const char16* s2, size_t count);
    1.46 +
    1.47 +// Wrapper for vsnprintf that always null-terminates and always returns the
    1.48 +// number of characters that would be in an untruncated formatted
    1.49 +// string, even when truncation occurs.
    1.50 +int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments)
    1.51 +    PRINTF_FORMAT(3, 0);
    1.52 +
    1.53 +// vswprintf always null-terminates, but when truncation occurs, it will either
    1.54 +// return -1 or the number of characters that would be in an untruncated
    1.55 +// formatted string.  The actual return value depends on the underlying
    1.56 +// C library's vswprintf implementation.
    1.57 +int vswprintf(wchar_t* buffer, size_t size,
    1.58 +              const wchar_t* format, va_list arguments)
    1.59 +    WPRINTF_FORMAT(3, 0);
    1.60 +
    1.61 +// Some of these implementations need to be inlined.
    1.62 +
    1.63 +// We separate the declaration from the implementation of this inline
    1.64 +// function just so the PRINTF_FORMAT works.
    1.65 +inline int snprintf(char* buffer, size_t size, const char* format, ...)
    1.66 +    PRINTF_FORMAT(3, 4);
    1.67 +inline int snprintf(char* buffer, size_t size, const char* format, ...) {
    1.68 +  va_list arguments;
    1.69 +  va_start(arguments, format);
    1.70 +  int result = vsnprintf(buffer, size, format, arguments);
    1.71 +  va_end(arguments);
    1.72 +  return result;
    1.73 +}
    1.74 +
    1.75 +// We separate the declaration from the implementation of this inline
    1.76 +// function just so the WPRINTF_FORMAT works.
    1.77 +inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...)
    1.78 +    WPRINTF_FORMAT(3, 4);
    1.79 +inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) {
    1.80 +  va_list arguments;
    1.81 +  va_start(arguments, format);
    1.82 +  int result = vswprintf(buffer, size, format, arguments);
    1.83 +  va_end(arguments);
    1.84 +  return result;
    1.85 +}
    1.86 +
    1.87 +// BSD-style safe and consistent string copy functions.
    1.88 +// Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|.
    1.89 +// Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as
    1.90 +// long as |dst_size| is not 0.  Returns the length of |src| in characters.
    1.91 +// If the return value is >= dst_size, then the output was truncated.
    1.92 +// NOTE: All sizes are in number of characters, NOT in bytes.
    1.93 +BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size);
    1.94 +BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size);
    1.95 +
    1.96 +// Scan a wprintf format string to determine whether it's portable across a
    1.97 +// variety of systems.  This function only checks that the conversion
    1.98 +// specifiers used by the format string are supported and have the same meaning
    1.99 +// on a variety of systems.  It doesn't check for other errors that might occur
   1.100 +// within a format string.
   1.101 +//
   1.102 +// Nonportable conversion specifiers for wprintf are:
   1.103 +//  - 's' and 'c' without an 'l' length modifier.  %s and %c operate on char
   1.104 +//     data on all systems except Windows, which treat them as wchar_t data.
   1.105 +//     Use %ls and %lc for wchar_t data instead.
   1.106 +//  - 'S' and 'C', which operate on wchar_t data on all systems except Windows,
   1.107 +//     which treat them as char data.  Use %ls and %lc for wchar_t data
   1.108 +//     instead.
   1.109 +//  - 'F', which is not identified by Windows wprintf documentation.
   1.110 +//  - 'D', 'O', and 'U', which are deprecated and not available on all systems.
   1.111 +//     Use %ld, %lo, and %lu instead.
   1.112 +//
   1.113 +// Note that there is no portable conversion specifier for char data when
   1.114 +// working with wprintf.
   1.115 +//
   1.116 +// This function is intended to be called from base::vswprintf.
   1.117 +BASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format);
   1.118 +
   1.119 +// ASCII-specific tolower.  The standard library's tolower is locale sensitive,
   1.120 +// so we don't want to use it here.
   1.121 +template <class Char> inline Char ToLowerASCII(Char c) {
   1.122 +  return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
   1.123 +}
   1.124 +
   1.125 +// ASCII-specific toupper.  The standard library's toupper is locale sensitive,
   1.126 +// so we don't want to use it here.
   1.127 +template <class Char> inline Char ToUpperASCII(Char c) {
   1.128 +  return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
   1.129 +}
   1.130 +
   1.131 +// Function objects to aid in comparing/searching strings.
   1.132 +
   1.133 +template<typename Char> struct CaseInsensitiveCompare {
   1.134 + public:
   1.135 +  bool operator()(Char x, Char y) const {
   1.136 +    // TODO(darin): Do we really want to do locale sensitive comparisons here?
   1.137 +    // See http://crbug.com/24917
   1.138 +    return tolower(x) == tolower(y);
   1.139 +  }
   1.140 +};
   1.141 +
   1.142 +template<typename Char> struct CaseInsensitiveCompareASCII {
   1.143 + public:
   1.144 +  bool operator()(Char x, Char y) const {
   1.145 +    return ToLowerASCII(x) == ToLowerASCII(y);
   1.146 +  }
   1.147 +};
   1.148 +
   1.149 +}  // namespace base
   1.150 +
   1.151 +#if defined(OS_WIN)
   1.152 +#include "base/strings/string_util_win.h"
   1.153 +#elif defined(OS_POSIX)
   1.154 +#include "base/strings/string_util_posix.h"
   1.155 +#else
   1.156 +#error Define string operations appropriately for your platform
   1.157 +#endif
   1.158 +
   1.159 +// These threadsafe functions return references to globally unique empty
   1.160 +// strings.
   1.161 +//
   1.162 +// DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS.
   1.163 +// There is only one case where you should use these: functions which need to
   1.164 +// return a string by reference (e.g. as a class member accessor), and don't
   1.165 +// have an empty string to use (e.g. in an error case).  These should not be
   1.166 +// used as initializers, function arguments, or return values for functions
   1.167 +// which return by value or outparam.
   1.168 +BASE_EXPORT const std::string& EmptyString();
   1.169 +BASE_EXPORT const std::wstring& EmptyWString();
   1.170 +BASE_EXPORT const string16& EmptyString16();
   1.171 +
   1.172 +BASE_EXPORT extern const wchar_t kWhitespaceWide[];
   1.173 +BASE_EXPORT extern const char16 kWhitespaceUTF16[];
   1.174 +BASE_EXPORT extern const char kWhitespaceASCII[];
   1.175 +
   1.176 +BASE_EXPORT extern const char kUtf8ByteOrderMark[];
   1.177 +
   1.178 +// Removes characters in |remove_chars| from anywhere in |input|.  Returns true
   1.179 +// if any characters were removed.  |remove_chars| must be null-terminated.
   1.180 +// NOTE: Safe to use the same variable for both |input| and |output|.
   1.181 +BASE_EXPORT bool RemoveChars(const string16& input,
   1.182 +                             const char16 remove_chars[],
   1.183 +                             string16* output);
   1.184 +BASE_EXPORT bool RemoveChars(const std::string& input,
   1.185 +                             const char remove_chars[],
   1.186 +                             std::string* output);
   1.187 +
   1.188 +// Replaces characters in |replace_chars| from anywhere in |input| with
   1.189 +// |replace_with|.  Each character in |replace_chars| will be replaced with
   1.190 +// the |replace_with| string.  Returns true if any characters were replaced.
   1.191 +// |replace_chars| must be null-terminated.
   1.192 +// NOTE: Safe to use the same variable for both |input| and |output|.
   1.193 +BASE_EXPORT bool ReplaceChars(const string16& input,
   1.194 +                              const char16 replace_chars[],
   1.195 +                              const string16& replace_with,
   1.196 +                              string16* output);
   1.197 +BASE_EXPORT bool ReplaceChars(const std::string& input,
   1.198 +                              const char replace_chars[],
   1.199 +                              const std::string& replace_with,
   1.200 +                              std::string* output);
   1.201 +
   1.202 +// Removes characters in |trim_chars| from the beginning and end of |input|.
   1.203 +// |trim_chars| must be null-terminated.
   1.204 +// NOTE: Safe to use the same variable for both |input| and |output|.
   1.205 +BASE_EXPORT bool TrimString(const std::wstring& input,
   1.206 +                            const wchar_t trim_chars[],
   1.207 +                            std::wstring* output);
   1.208 +BASE_EXPORT bool TrimString(const string16& input,
   1.209 +                            const char16 trim_chars[],
   1.210 +                            string16* output);
   1.211 +BASE_EXPORT bool TrimString(const std::string& input,
   1.212 +                            const char trim_chars[],
   1.213 +                            std::string* output);
   1.214 +
   1.215 +// Truncates a string to the nearest UTF-8 character that will leave
   1.216 +// the string less than or equal to the specified byte size.
   1.217 +BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input,
   1.218 +                                        const size_t byte_size,
   1.219 +                                        std::string* output);
   1.220 +
   1.221 +// Trims any whitespace from either end of the input string.  Returns where
   1.222 +// whitespace was found.
   1.223 +// The non-wide version has two functions:
   1.224 +// * TrimWhitespaceASCII()
   1.225 +//   This function is for ASCII strings and only looks for ASCII whitespace;
   1.226 +// Please choose the best one according to your usage.
   1.227 +// NOTE: Safe to use the same variable for both input and output.
   1.228 +enum TrimPositions {
   1.229 +  TRIM_NONE     = 0,
   1.230 +  TRIM_LEADING  = 1 << 0,
   1.231 +  TRIM_TRAILING = 1 << 1,
   1.232 +  TRIM_ALL      = TRIM_LEADING | TRIM_TRAILING,
   1.233 +};
   1.234 +BASE_EXPORT TrimPositions TrimWhitespace(const string16& input,
   1.235 +                                         TrimPositions positions,
   1.236 +                                         string16* output);
   1.237 +BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input,
   1.238 +                                              TrimPositions positions,
   1.239 +                                              std::string* output);
   1.240 +
   1.241 +// Deprecated. This function is only for backward compatibility and calls
   1.242 +// TrimWhitespaceASCII().
   1.243 +BASE_EXPORT TrimPositions TrimWhitespace(const std::string& input,
   1.244 +                                         TrimPositions positions,
   1.245 +                                         std::string* output);
   1.246 +
   1.247 +// Searches  for CR or LF characters.  Removes all contiguous whitespace
   1.248 +// strings that contain them.  This is useful when trying to deal with text
   1.249 +// copied from terminals.
   1.250 +// Returns |text|, with the following three transformations:
   1.251 +// (1) Leading and trailing whitespace is trimmed.
   1.252 +// (2) If |trim_sequences_with_line_breaks| is true, any other whitespace
   1.253 +//     sequences containing a CR or LF are trimmed.
   1.254 +// (3) All other whitespace sequences are converted to single spaces.
   1.255 +BASE_EXPORT std::wstring CollapseWhitespace(
   1.256 +    const std::wstring& text,
   1.257 +    bool trim_sequences_with_line_breaks);
   1.258 +BASE_EXPORT string16 CollapseWhitespace(
   1.259 +    const string16& text,
   1.260 +    bool trim_sequences_with_line_breaks);
   1.261 +BASE_EXPORT std::string CollapseWhitespaceASCII(
   1.262 +    const std::string& text,
   1.263 +    bool trim_sequences_with_line_breaks);
   1.264 +
   1.265 +// Returns true if the passed string is empty or contains only white-space
   1.266 +// characters.
   1.267 +BASE_EXPORT bool ContainsOnlyWhitespaceASCII(const std::string& str);
   1.268 +BASE_EXPORT bool ContainsOnlyWhitespace(const string16& str);
   1.269 +
   1.270 +// Returns true if |input| is empty or contains only characters found in
   1.271 +// |characters|.
   1.272 +BASE_EXPORT bool ContainsOnlyChars(const std::wstring& input,
   1.273 +                                   const std::wstring& characters);
   1.274 +BASE_EXPORT bool ContainsOnlyChars(const string16& input,
   1.275 +                                   const string16& characters);
   1.276 +BASE_EXPORT bool ContainsOnlyChars(const std::string& input,
   1.277 +                                   const std::string& characters);
   1.278 +
   1.279 +// Converts to 7-bit ASCII by truncating. The result must be known to be ASCII
   1.280 +// beforehand.
   1.281 +BASE_EXPORT std::string WideToASCII(const std::wstring& wide);
   1.282 +BASE_EXPORT std::string UTF16ToASCII(const string16& utf16);
   1.283 +
   1.284 +// Converts the given wide string to the corresponding Latin1. This will fail
   1.285 +// (return false) if any characters are more than 255.
   1.286 +BASE_EXPORT bool WideToLatin1(const std::wstring& wide, std::string* latin1);
   1.287 +
   1.288 +// Returns true if the specified string matches the criteria. How can a wide
   1.289 +// string be 8-bit or UTF8? It contains only characters that are < 256 (in the
   1.290 +// first case) or characters that use only 8-bits and whose 8-bit
   1.291 +// representation looks like a UTF-8 string (the second case).
   1.292 +//
   1.293 +// Note that IsStringUTF8 checks not only if the input is structurally
   1.294 +// valid but also if it doesn't contain any non-character codepoint
   1.295 +// (e.g. U+FFFE). It's done on purpose because all the existing callers want
   1.296 +// to have the maximum 'discriminating' power from other encodings. If
   1.297 +// there's a use case for just checking the structural validity, we have to
   1.298 +// add a new function for that.
   1.299 +BASE_EXPORT bool IsStringUTF8(const std::string& str);
   1.300 +BASE_EXPORT bool IsStringASCII(const std::wstring& str);
   1.301 +BASE_EXPORT bool IsStringASCII(const string16& str);
   1.302 +
   1.303 +// Converts the elements of the given string.  This version uses a pointer to
   1.304 +// clearly differentiate it from the non-pointer variant.
   1.305 +template <class str> inline void StringToLowerASCII(str* s) {
   1.306 +  for (typename str::iterator i = s->begin(); i != s->end(); ++i)
   1.307 +    *i = base::ToLowerASCII(*i);
   1.308 +}
   1.309 +
   1.310 +template <class str> inline str StringToLowerASCII(const str& s) {
   1.311 +  // for std::string and std::wstring
   1.312 +  str output(s);
   1.313 +  StringToLowerASCII(&output);
   1.314 +  return output;
   1.315 +}
   1.316 +
   1.317 +// Converts the elements of the given string.  This version uses a pointer to
   1.318 +// clearly differentiate it from the non-pointer variant.
   1.319 +template <class str> inline void StringToUpperASCII(str* s) {
   1.320 +  for (typename str::iterator i = s->begin(); i != s->end(); ++i)
   1.321 +    *i = base::ToUpperASCII(*i);
   1.322 +}
   1.323 +
   1.324 +template <class str> inline str StringToUpperASCII(const str& s) {
   1.325 +  // for std::string and std::wstring
   1.326 +  str output(s);
   1.327 +  StringToUpperASCII(&output);
   1.328 +  return output;
   1.329 +}
   1.330 +
   1.331 +// Compare the lower-case form of the given string against the given ASCII
   1.332 +// string.  This is useful for doing checking if an input string matches some
   1.333 +// token, and it is optimized to avoid intermediate string copies.  This API is
   1.334 +// borrowed from the equivalent APIs in Mozilla.
   1.335 +BASE_EXPORT bool LowerCaseEqualsASCII(const std::string& a, const char* b);
   1.336 +BASE_EXPORT bool LowerCaseEqualsASCII(const std::wstring& a, const char* b);
   1.337 +BASE_EXPORT bool LowerCaseEqualsASCII(const string16& a, const char* b);
   1.338 +
   1.339 +// Same thing, but with string iterators instead.
   1.340 +BASE_EXPORT bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
   1.341 +                                      std::string::const_iterator a_end,
   1.342 +                                      const char* b);
   1.343 +BASE_EXPORT bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
   1.344 +                                      std::wstring::const_iterator a_end,
   1.345 +                                      const char* b);
   1.346 +BASE_EXPORT bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
   1.347 +                                      string16::const_iterator a_end,
   1.348 +                                      const char* b);
   1.349 +BASE_EXPORT bool LowerCaseEqualsASCII(const char* a_begin,
   1.350 +                                      const char* a_end,
   1.351 +                                      const char* b);
   1.352 +BASE_EXPORT bool LowerCaseEqualsASCII(const wchar_t* a_begin,
   1.353 +                                      const wchar_t* a_end,
   1.354 +                                      const char* b);
   1.355 +BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin,
   1.356 +                                      const char16* a_end,
   1.357 +                                      const char* b);
   1.358 +
   1.359 +// Performs a case-sensitive string compare. The behavior is undefined if both
   1.360 +// strings are not ASCII.
   1.361 +BASE_EXPORT bool EqualsASCII(const string16& a, const base::StringPiece& b);
   1.362 +
   1.363 +// Returns true if str starts with search, or false otherwise.
   1.364 +BASE_EXPORT bool StartsWithASCII(const std::string& str,
   1.365 +                                 const std::string& search,
   1.366 +                                 bool case_sensitive);
   1.367 +BASE_EXPORT bool StartsWith(const std::wstring& str,
   1.368 +                            const std::wstring& search,
   1.369 +                            bool case_sensitive);
   1.370 +BASE_EXPORT bool StartsWith(const string16& str,
   1.371 +                            const string16& search,
   1.372 +                            bool case_sensitive);
   1.373 +
   1.374 +// Returns true if str ends with search, or false otherwise.
   1.375 +BASE_EXPORT bool EndsWith(const std::string& str,
   1.376 +                          const std::string& search,
   1.377 +                          bool case_sensitive);
   1.378 +BASE_EXPORT bool EndsWith(const std::wstring& str,
   1.379 +                          const std::wstring& search,
   1.380 +                          bool case_sensitive);
   1.381 +BASE_EXPORT bool EndsWith(const string16& str,
   1.382 +                          const string16& search,
   1.383 +                          bool case_sensitive);
   1.384 +
   1.385 +
   1.386 +// Determines the type of ASCII character, independent of locale (the C
   1.387 +// library versions will change based on locale).
   1.388 +template <typename Char>
   1.389 +inline bool IsAsciiWhitespace(Char c) {
   1.390 +  return c == ' ' || c == '\r' || c == '\n' || c == '\t';
   1.391 +}
   1.392 +template <typename Char>
   1.393 +inline bool IsAsciiAlpha(Char c) {
   1.394 +  return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z'));
   1.395 +}
   1.396 +template <typename Char>
   1.397 +inline bool IsAsciiDigit(Char c) {
   1.398 +  return c >= '0' && c <= '9';
   1.399 +}
   1.400 +
   1.401 +template <typename Char>
   1.402 +inline bool IsHexDigit(Char c) {
   1.403 +  return (c >= '0' && c <= '9') ||
   1.404 +         (c >= 'A' && c <= 'F') ||
   1.405 +         (c >= 'a' && c <= 'f');
   1.406 +}
   1.407 +
   1.408 +template <typename Char>
   1.409 +inline Char HexDigitToInt(Char c) {
   1.410 +  DCHECK(IsHexDigit(c));
   1.411 +  if (c >= '0' && c <= '9')
   1.412 +    return c - '0';
   1.413 +  if (c >= 'A' && c <= 'F')
   1.414 +    return c - 'A' + 10;
   1.415 +  if (c >= 'a' && c <= 'f')
   1.416 +    return c - 'a' + 10;
   1.417 +  return 0;
   1.418 +}
   1.419 +
   1.420 +// Returns true if it's a whitespace character.
   1.421 +inline bool IsWhitespace(wchar_t c) {
   1.422 +  return wcschr(kWhitespaceWide, c) != NULL;
   1.423 +}
   1.424 +
   1.425 +// Return a byte string in human-readable format with a unit suffix. Not
   1.426 +// appropriate for use in any UI; use of FormatBytes and friends in ui/base is
   1.427 +// highly recommended instead. TODO(avi): Figure out how to get callers to use
   1.428 +// FormatBytes instead; remove this.
   1.429 +BASE_EXPORT string16 FormatBytesUnlocalized(int64 bytes);
   1.430 +
   1.431 +// Starting at |start_offset| (usually 0), replace the first instance of
   1.432 +// |find_this| with |replace_with|.
   1.433 +BASE_EXPORT void ReplaceFirstSubstringAfterOffset(
   1.434 +    string16* str,
   1.435 +    string16::size_type start_offset,
   1.436 +    const string16& find_this,
   1.437 +    const string16& replace_with);
   1.438 +BASE_EXPORT void ReplaceFirstSubstringAfterOffset(
   1.439 +    std::string* str,
   1.440 +    std::string::size_type start_offset,
   1.441 +    const std::string& find_this,
   1.442 +    const std::string& replace_with);
   1.443 +
   1.444 +// Starting at |start_offset| (usually 0), look through |str| and replace all
   1.445 +// instances of |find_this| with |replace_with|.
   1.446 +//
   1.447 +// This does entire substrings; use std::replace in <algorithm> for single
   1.448 +// characters, for example:
   1.449 +//   std::replace(str.begin(), str.end(), 'a', 'b');
   1.450 +BASE_EXPORT void ReplaceSubstringsAfterOffset(
   1.451 +    string16* str,
   1.452 +    string16::size_type start_offset,
   1.453 +    const string16& find_this,
   1.454 +    const string16& replace_with);
   1.455 +BASE_EXPORT void ReplaceSubstringsAfterOffset(
   1.456 +    std::string* str,
   1.457 +    std::string::size_type start_offset,
   1.458 +    const std::string& find_this,
   1.459 +    const std::string& replace_with);
   1.460 +
   1.461 +// Reserves enough memory in |str| to accommodate |length_with_null| characters,
   1.462 +// sets the size of |str| to |length_with_null - 1| characters, and returns a
   1.463 +// pointer to the underlying contiguous array of characters.  This is typically
   1.464 +// used when calling a function that writes results into a character array, but
   1.465 +// the caller wants the data to be managed by a string-like object.  It is
   1.466 +// convenient in that is can be used inline in the call, and fast in that it
   1.467 +// avoids copying the results of the call from a char* into a string.
   1.468 +//
   1.469 +// |length_with_null| must be at least 2, since otherwise the underlying string
   1.470 +// would have size 0, and trying to access &((*str)[0]) in that case can result
   1.471 +// in a number of problems.
   1.472 +//
   1.473 +// Internally, this takes linear time because the resize() call 0-fills the
   1.474 +// underlying array for potentially all
   1.475 +// (|length_with_null - 1| * sizeof(string_type::value_type)) bytes.  Ideally we
   1.476 +// could avoid this aspect of the resize() call, as we expect the caller to
   1.477 +// immediately write over this memory, but there is no other way to set the size
   1.478 +// of the string, and not doing that will mean people who access |str| rather
   1.479 +// than str.c_str() will get back a string of whatever size |str| had on entry
   1.480 +// to this function (probably 0).
   1.481 +template <class string_type>
   1.482 +inline typename string_type::value_type* WriteInto(string_type* str,
   1.483 +                                                   size_t length_with_null) {
   1.484 +  DCHECK_GT(length_with_null, 1u);
   1.485 +  str->reserve(length_with_null);
   1.486 +  str->resize(length_with_null - 1);
   1.487 +  return &((*str)[0]);
   1.488 +}
   1.489 +
   1.490 +//-----------------------------------------------------------------------------
   1.491 +
   1.492 +// Splits a string into its fields delimited by any of the characters in
   1.493 +// |delimiters|.  Each field is added to the |tokens| vector.  Returns the
   1.494 +// number of tokens found.
   1.495 +BASE_EXPORT size_t Tokenize(const std::wstring& str,
   1.496 +                            const std::wstring& delimiters,
   1.497 +                            std::vector<std::wstring>* tokens);
   1.498 +BASE_EXPORT size_t Tokenize(const string16& str,
   1.499 +                            const string16& delimiters,
   1.500 +                            std::vector<string16>* tokens);
   1.501 +BASE_EXPORT size_t Tokenize(const std::string& str,
   1.502 +                            const std::string& delimiters,
   1.503 +                            std::vector<std::string>* tokens);
   1.504 +BASE_EXPORT size_t Tokenize(const base::StringPiece& str,
   1.505 +                            const base::StringPiece& delimiters,
   1.506 +                            std::vector<base::StringPiece>* tokens);
   1.507 +
   1.508 +// Does the opposite of SplitString().
   1.509 +BASE_EXPORT string16 JoinString(const std::vector<string16>& parts, char16 s);
   1.510 +BASE_EXPORT std::string JoinString(
   1.511 +    const std::vector<std::string>& parts, char s);
   1.512 +
   1.513 +// Join |parts| using |separator|.
   1.514 +BASE_EXPORT std::string JoinString(
   1.515 +    const std::vector<std::string>& parts,
   1.516 +    const std::string& separator);
   1.517 +BASE_EXPORT string16 JoinString(
   1.518 +    const std::vector<string16>& parts,
   1.519 +    const string16& separator);
   1.520 +
   1.521 +// Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively.
   1.522 +// Additionally, any number of consecutive '$' characters is replaced by that
   1.523 +// number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be
   1.524 +// NULL. This only allows you to use up to nine replacements.
   1.525 +BASE_EXPORT string16 ReplaceStringPlaceholders(
   1.526 +    const string16& format_string,
   1.527 +    const std::vector<string16>& subst,
   1.528 +    std::vector<size_t>* offsets);
   1.529 +
   1.530 +BASE_EXPORT std::string ReplaceStringPlaceholders(
   1.531 +    const base::StringPiece& format_string,
   1.532 +    const std::vector<std::string>& subst,
   1.533 +    std::vector<size_t>* offsets);
   1.534 +
   1.535 +// Single-string shortcut for ReplaceStringHolders. |offset| may be NULL.
   1.536 +BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string,
   1.537 +                                               const string16& a,
   1.538 +                                               size_t* offset);
   1.539 +
   1.540 +// Returns true if the string passed in matches the pattern. The pattern
   1.541 +// string can contain wildcards like * and ?
   1.542 +// The backslash character (\) is an escape character for * and ?
   1.543 +// We limit the patterns to having a max of 16 * or ? characters.
   1.544 +// ? matches 0 or 1 character, while * matches 0 or more characters.
   1.545 +BASE_EXPORT bool MatchPattern(const base::StringPiece& string,
   1.546 +                              const base::StringPiece& pattern);
   1.547 +BASE_EXPORT bool MatchPattern(const string16& string, const string16& pattern);
   1.548 +
   1.549 +// Hack to convert any char-like type to its unsigned counterpart.
   1.550 +// For example, it will convert char, signed char and unsigned char to unsigned
   1.551 +// char.
   1.552 +template<typename T>
   1.553 +struct ToUnsigned {
   1.554 +  typedef T Unsigned;
   1.555 +};
   1.556 +
   1.557 +template<>
   1.558 +struct ToUnsigned<char> {
   1.559 +  typedef unsigned char Unsigned;
   1.560 +};
   1.561 +template<>
   1.562 +struct ToUnsigned<signed char> {
   1.563 +  typedef unsigned char Unsigned;
   1.564 +};
   1.565 +template<>
   1.566 +struct ToUnsigned<wchar_t> {
   1.567 +#if defined(WCHAR_T_IS_UTF16)
   1.568 +  typedef unsigned short Unsigned;
   1.569 +#elif defined(WCHAR_T_IS_UTF32)
   1.570 +  typedef uint32 Unsigned;
   1.571 +#endif
   1.572 +};
   1.573 +template<>
   1.574 +struct ToUnsigned<short> {
   1.575 +  typedef unsigned short Unsigned;
   1.576 +};
   1.577 +
   1.578 +#endif  // BASE_STRINGS_STRING_UTIL_H_

mercurial