1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/security/sandbox/chromium/base/strings/string_util.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,575 @@ 1.4 +// Copyright 2013 The Chromium Authors. All rights reserved. 1.5 +// Use of this source code is governed by a BSD-style license that can be 1.6 +// found in the LICENSE file. 1.7 +// 1.8 +// This file defines utility functions for working with strings. 1.9 + 1.10 +#ifndef BASE_STRINGS_STRING_UTIL_H_ 1.11 +#define BASE_STRINGS_STRING_UTIL_H_ 1.12 + 1.13 +#include <ctype.h> 1.14 +#include <stdarg.h> // va_list 1.15 + 1.16 +#include <string> 1.17 +#include <vector> 1.18 + 1.19 +#include "base/base_export.h" 1.20 +#include "base/basictypes.h" 1.21 +#include "base/compiler_specific.h" 1.22 +#include "base/strings/string16.h" 1.23 +#include "base/strings/string_piece.h" // For implicit conversions. 1.24 + 1.25 +// Safe standard library wrappers for all platforms. 1.26 + 1.27 +namespace base { 1.28 + 1.29 +// C standard-library functions like "strncasecmp" and "snprintf" that aren't 1.30 +// cross-platform are provided as "base::strncasecmp", and their prototypes 1.31 +// are listed below. These functions are then implemented as inline calls 1.32 +// to the platform-specific equivalents in the platform-specific headers. 1.33 + 1.34 +// Compares the two strings s1 and s2 without regard to case using 1.35 +// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if 1.36 +// s2 > s1 according to a lexicographic comparison. 1.37 +int strcasecmp(const char* s1, const char* s2); 1.38 + 1.39 +// Compares up to count characters of s1 and s2 without regard to case using 1.40 +// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if 1.41 +// s2 > s1 according to a lexicographic comparison. 1.42 +int strncasecmp(const char* s1, const char* s2, size_t count); 1.43 + 1.44 +// Same as strncmp but for char16 strings. 1.45 +int strncmp16(const char16* s1, const char16* s2, size_t count); 1.46 + 1.47 +// Wrapper for vsnprintf that always null-terminates and always returns the 1.48 +// number of characters that would be in an untruncated formatted 1.49 +// string, even when truncation occurs. 1.50 +int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments) 1.51 + PRINTF_FORMAT(3, 0); 1.52 + 1.53 +// vswprintf always null-terminates, but when truncation occurs, it will either 1.54 +// return -1 or the number of characters that would be in an untruncated 1.55 +// formatted string. The actual return value depends on the underlying 1.56 +// C library's vswprintf implementation. 1.57 +int vswprintf(wchar_t* buffer, size_t size, 1.58 + const wchar_t* format, va_list arguments) 1.59 + WPRINTF_FORMAT(3, 0); 1.60 + 1.61 +// Some of these implementations need to be inlined. 1.62 + 1.63 +// We separate the declaration from the implementation of this inline 1.64 +// function just so the PRINTF_FORMAT works. 1.65 +inline int snprintf(char* buffer, size_t size, const char* format, ...) 1.66 + PRINTF_FORMAT(3, 4); 1.67 +inline int snprintf(char* buffer, size_t size, const char* format, ...) { 1.68 + va_list arguments; 1.69 + va_start(arguments, format); 1.70 + int result = vsnprintf(buffer, size, format, arguments); 1.71 + va_end(arguments); 1.72 + return result; 1.73 +} 1.74 + 1.75 +// We separate the declaration from the implementation of this inline 1.76 +// function just so the WPRINTF_FORMAT works. 1.77 +inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) 1.78 + WPRINTF_FORMAT(3, 4); 1.79 +inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) { 1.80 + va_list arguments; 1.81 + va_start(arguments, format); 1.82 + int result = vswprintf(buffer, size, format, arguments); 1.83 + va_end(arguments); 1.84 + return result; 1.85 +} 1.86 + 1.87 +// BSD-style safe and consistent string copy functions. 1.88 +// Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. 1.89 +// Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as 1.90 +// long as |dst_size| is not 0. Returns the length of |src| in characters. 1.91 +// If the return value is >= dst_size, then the output was truncated. 1.92 +// NOTE: All sizes are in number of characters, NOT in bytes. 1.93 +BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size); 1.94 +BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); 1.95 + 1.96 +// Scan a wprintf format string to determine whether it's portable across a 1.97 +// variety of systems. This function only checks that the conversion 1.98 +// specifiers used by the format string are supported and have the same meaning 1.99 +// on a variety of systems. It doesn't check for other errors that might occur 1.100 +// within a format string. 1.101 +// 1.102 +// Nonportable conversion specifiers for wprintf are: 1.103 +// - 's' and 'c' without an 'l' length modifier. %s and %c operate on char 1.104 +// data on all systems except Windows, which treat them as wchar_t data. 1.105 +// Use %ls and %lc for wchar_t data instead. 1.106 +// - 'S' and 'C', which operate on wchar_t data on all systems except Windows, 1.107 +// which treat them as char data. Use %ls and %lc for wchar_t data 1.108 +// instead. 1.109 +// - 'F', which is not identified by Windows wprintf documentation. 1.110 +// - 'D', 'O', and 'U', which are deprecated and not available on all systems. 1.111 +// Use %ld, %lo, and %lu instead. 1.112 +// 1.113 +// Note that there is no portable conversion specifier for char data when 1.114 +// working with wprintf. 1.115 +// 1.116 +// This function is intended to be called from base::vswprintf. 1.117 +BASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format); 1.118 + 1.119 +// ASCII-specific tolower. The standard library's tolower is locale sensitive, 1.120 +// so we don't want to use it here. 1.121 +template <class Char> inline Char ToLowerASCII(Char c) { 1.122 + return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; 1.123 +} 1.124 + 1.125 +// ASCII-specific toupper. The standard library's toupper is locale sensitive, 1.126 +// so we don't want to use it here. 1.127 +template <class Char> inline Char ToUpperASCII(Char c) { 1.128 + return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; 1.129 +} 1.130 + 1.131 +// Function objects to aid in comparing/searching strings. 1.132 + 1.133 +template<typename Char> struct CaseInsensitiveCompare { 1.134 + public: 1.135 + bool operator()(Char x, Char y) const { 1.136 + // TODO(darin): Do we really want to do locale sensitive comparisons here? 1.137 + // See http://crbug.com/24917 1.138 + return tolower(x) == tolower(y); 1.139 + } 1.140 +}; 1.141 + 1.142 +template<typename Char> struct CaseInsensitiveCompareASCII { 1.143 + public: 1.144 + bool operator()(Char x, Char y) const { 1.145 + return ToLowerASCII(x) == ToLowerASCII(y); 1.146 + } 1.147 +}; 1.148 + 1.149 +} // namespace base 1.150 + 1.151 +#if defined(OS_WIN) 1.152 +#include "base/strings/string_util_win.h" 1.153 +#elif defined(OS_POSIX) 1.154 +#include "base/strings/string_util_posix.h" 1.155 +#else 1.156 +#error Define string operations appropriately for your platform 1.157 +#endif 1.158 + 1.159 +// These threadsafe functions return references to globally unique empty 1.160 +// strings. 1.161 +// 1.162 +// DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS. 1.163 +// There is only one case where you should use these: functions which need to 1.164 +// return a string by reference (e.g. as a class member accessor), and don't 1.165 +// have an empty string to use (e.g. in an error case). These should not be 1.166 +// used as initializers, function arguments, or return values for functions 1.167 +// which return by value or outparam. 1.168 +BASE_EXPORT const std::string& EmptyString(); 1.169 +BASE_EXPORT const std::wstring& EmptyWString(); 1.170 +BASE_EXPORT const string16& EmptyString16(); 1.171 + 1.172 +BASE_EXPORT extern const wchar_t kWhitespaceWide[]; 1.173 +BASE_EXPORT extern const char16 kWhitespaceUTF16[]; 1.174 +BASE_EXPORT extern const char kWhitespaceASCII[]; 1.175 + 1.176 +BASE_EXPORT extern const char kUtf8ByteOrderMark[]; 1.177 + 1.178 +// Removes characters in |remove_chars| from anywhere in |input|. Returns true 1.179 +// if any characters were removed. |remove_chars| must be null-terminated. 1.180 +// NOTE: Safe to use the same variable for both |input| and |output|. 1.181 +BASE_EXPORT bool RemoveChars(const string16& input, 1.182 + const char16 remove_chars[], 1.183 + string16* output); 1.184 +BASE_EXPORT bool RemoveChars(const std::string& input, 1.185 + const char remove_chars[], 1.186 + std::string* output); 1.187 + 1.188 +// Replaces characters in |replace_chars| from anywhere in |input| with 1.189 +// |replace_with|. Each character in |replace_chars| will be replaced with 1.190 +// the |replace_with| string. Returns true if any characters were replaced. 1.191 +// |replace_chars| must be null-terminated. 1.192 +// NOTE: Safe to use the same variable for both |input| and |output|. 1.193 +BASE_EXPORT bool ReplaceChars(const string16& input, 1.194 + const char16 replace_chars[], 1.195 + const string16& replace_with, 1.196 + string16* output); 1.197 +BASE_EXPORT bool ReplaceChars(const std::string& input, 1.198 + const char replace_chars[], 1.199 + const std::string& replace_with, 1.200 + std::string* output); 1.201 + 1.202 +// Removes characters in |trim_chars| from the beginning and end of |input|. 1.203 +// |trim_chars| must be null-terminated. 1.204 +// NOTE: Safe to use the same variable for both |input| and |output|. 1.205 +BASE_EXPORT bool TrimString(const std::wstring& input, 1.206 + const wchar_t trim_chars[], 1.207 + std::wstring* output); 1.208 +BASE_EXPORT bool TrimString(const string16& input, 1.209 + const char16 trim_chars[], 1.210 + string16* output); 1.211 +BASE_EXPORT bool TrimString(const std::string& input, 1.212 + const char trim_chars[], 1.213 + std::string* output); 1.214 + 1.215 +// Truncates a string to the nearest UTF-8 character that will leave 1.216 +// the string less than or equal to the specified byte size. 1.217 +BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input, 1.218 + const size_t byte_size, 1.219 + std::string* output); 1.220 + 1.221 +// Trims any whitespace from either end of the input string. Returns where 1.222 +// whitespace was found. 1.223 +// The non-wide version has two functions: 1.224 +// * TrimWhitespaceASCII() 1.225 +// This function is for ASCII strings and only looks for ASCII whitespace; 1.226 +// Please choose the best one according to your usage. 1.227 +// NOTE: Safe to use the same variable for both input and output. 1.228 +enum TrimPositions { 1.229 + TRIM_NONE = 0, 1.230 + TRIM_LEADING = 1 << 0, 1.231 + TRIM_TRAILING = 1 << 1, 1.232 + TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, 1.233 +}; 1.234 +BASE_EXPORT TrimPositions TrimWhitespace(const string16& input, 1.235 + TrimPositions positions, 1.236 + string16* output); 1.237 +BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input, 1.238 + TrimPositions positions, 1.239 + std::string* output); 1.240 + 1.241 +// Deprecated. This function is only for backward compatibility and calls 1.242 +// TrimWhitespaceASCII(). 1.243 +BASE_EXPORT TrimPositions TrimWhitespace(const std::string& input, 1.244 + TrimPositions positions, 1.245 + std::string* output); 1.246 + 1.247 +// Searches for CR or LF characters. Removes all contiguous whitespace 1.248 +// strings that contain them. This is useful when trying to deal with text 1.249 +// copied from terminals. 1.250 +// Returns |text|, with the following three transformations: 1.251 +// (1) Leading and trailing whitespace is trimmed. 1.252 +// (2) If |trim_sequences_with_line_breaks| is true, any other whitespace 1.253 +// sequences containing a CR or LF are trimmed. 1.254 +// (3) All other whitespace sequences are converted to single spaces. 1.255 +BASE_EXPORT std::wstring CollapseWhitespace( 1.256 + const std::wstring& text, 1.257 + bool trim_sequences_with_line_breaks); 1.258 +BASE_EXPORT string16 CollapseWhitespace( 1.259 + const string16& text, 1.260 + bool trim_sequences_with_line_breaks); 1.261 +BASE_EXPORT std::string CollapseWhitespaceASCII( 1.262 + const std::string& text, 1.263 + bool trim_sequences_with_line_breaks); 1.264 + 1.265 +// Returns true if the passed string is empty or contains only white-space 1.266 +// characters. 1.267 +BASE_EXPORT bool ContainsOnlyWhitespaceASCII(const std::string& str); 1.268 +BASE_EXPORT bool ContainsOnlyWhitespace(const string16& str); 1.269 + 1.270 +// Returns true if |input| is empty or contains only characters found in 1.271 +// |characters|. 1.272 +BASE_EXPORT bool ContainsOnlyChars(const std::wstring& input, 1.273 + const std::wstring& characters); 1.274 +BASE_EXPORT bool ContainsOnlyChars(const string16& input, 1.275 + const string16& characters); 1.276 +BASE_EXPORT bool ContainsOnlyChars(const std::string& input, 1.277 + const std::string& characters); 1.278 + 1.279 +// Converts to 7-bit ASCII by truncating. The result must be known to be ASCII 1.280 +// beforehand. 1.281 +BASE_EXPORT std::string WideToASCII(const std::wstring& wide); 1.282 +BASE_EXPORT std::string UTF16ToASCII(const string16& utf16); 1.283 + 1.284 +// Converts the given wide string to the corresponding Latin1. This will fail 1.285 +// (return false) if any characters are more than 255. 1.286 +BASE_EXPORT bool WideToLatin1(const std::wstring& wide, std::string* latin1); 1.287 + 1.288 +// Returns true if the specified string matches the criteria. How can a wide 1.289 +// string be 8-bit or UTF8? It contains only characters that are < 256 (in the 1.290 +// first case) or characters that use only 8-bits and whose 8-bit 1.291 +// representation looks like a UTF-8 string (the second case). 1.292 +// 1.293 +// Note that IsStringUTF8 checks not only if the input is structurally 1.294 +// valid but also if it doesn't contain any non-character codepoint 1.295 +// (e.g. U+FFFE). It's done on purpose because all the existing callers want 1.296 +// to have the maximum 'discriminating' power from other encodings. If 1.297 +// there's a use case for just checking the structural validity, we have to 1.298 +// add a new function for that. 1.299 +BASE_EXPORT bool IsStringUTF8(const std::string& str); 1.300 +BASE_EXPORT bool IsStringASCII(const std::wstring& str); 1.301 +BASE_EXPORT bool IsStringASCII(const string16& str); 1.302 + 1.303 +// Converts the elements of the given string. This version uses a pointer to 1.304 +// clearly differentiate it from the non-pointer variant. 1.305 +template <class str> inline void StringToLowerASCII(str* s) { 1.306 + for (typename str::iterator i = s->begin(); i != s->end(); ++i) 1.307 + *i = base::ToLowerASCII(*i); 1.308 +} 1.309 + 1.310 +template <class str> inline str StringToLowerASCII(const str& s) { 1.311 + // for std::string and std::wstring 1.312 + str output(s); 1.313 + StringToLowerASCII(&output); 1.314 + return output; 1.315 +} 1.316 + 1.317 +// Converts the elements of the given string. This version uses a pointer to 1.318 +// clearly differentiate it from the non-pointer variant. 1.319 +template <class str> inline void StringToUpperASCII(str* s) { 1.320 + for (typename str::iterator i = s->begin(); i != s->end(); ++i) 1.321 + *i = base::ToUpperASCII(*i); 1.322 +} 1.323 + 1.324 +template <class str> inline str StringToUpperASCII(const str& s) { 1.325 + // for std::string and std::wstring 1.326 + str output(s); 1.327 + StringToUpperASCII(&output); 1.328 + return output; 1.329 +} 1.330 + 1.331 +// Compare the lower-case form of the given string against the given ASCII 1.332 +// string. This is useful for doing checking if an input string matches some 1.333 +// token, and it is optimized to avoid intermediate string copies. This API is 1.334 +// borrowed from the equivalent APIs in Mozilla. 1.335 +BASE_EXPORT bool LowerCaseEqualsASCII(const std::string& a, const char* b); 1.336 +BASE_EXPORT bool LowerCaseEqualsASCII(const std::wstring& a, const char* b); 1.337 +BASE_EXPORT bool LowerCaseEqualsASCII(const string16& a, const char* b); 1.338 + 1.339 +// Same thing, but with string iterators instead. 1.340 +BASE_EXPORT bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, 1.341 + std::string::const_iterator a_end, 1.342 + const char* b); 1.343 +BASE_EXPORT bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, 1.344 + std::wstring::const_iterator a_end, 1.345 + const char* b); 1.346 +BASE_EXPORT bool LowerCaseEqualsASCII(string16::const_iterator a_begin, 1.347 + string16::const_iterator a_end, 1.348 + const char* b); 1.349 +BASE_EXPORT bool LowerCaseEqualsASCII(const char* a_begin, 1.350 + const char* a_end, 1.351 + const char* b); 1.352 +BASE_EXPORT bool LowerCaseEqualsASCII(const wchar_t* a_begin, 1.353 + const wchar_t* a_end, 1.354 + const char* b); 1.355 +BASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin, 1.356 + const char16* a_end, 1.357 + const char* b); 1.358 + 1.359 +// Performs a case-sensitive string compare. The behavior is undefined if both 1.360 +// strings are not ASCII. 1.361 +BASE_EXPORT bool EqualsASCII(const string16& a, const base::StringPiece& b); 1.362 + 1.363 +// Returns true if str starts with search, or false otherwise. 1.364 +BASE_EXPORT bool StartsWithASCII(const std::string& str, 1.365 + const std::string& search, 1.366 + bool case_sensitive); 1.367 +BASE_EXPORT bool StartsWith(const std::wstring& str, 1.368 + const std::wstring& search, 1.369 + bool case_sensitive); 1.370 +BASE_EXPORT bool StartsWith(const string16& str, 1.371 + const string16& search, 1.372 + bool case_sensitive); 1.373 + 1.374 +// Returns true if str ends with search, or false otherwise. 1.375 +BASE_EXPORT bool EndsWith(const std::string& str, 1.376 + const std::string& search, 1.377 + bool case_sensitive); 1.378 +BASE_EXPORT bool EndsWith(const std::wstring& str, 1.379 + const std::wstring& search, 1.380 + bool case_sensitive); 1.381 +BASE_EXPORT bool EndsWith(const string16& str, 1.382 + const string16& search, 1.383 + bool case_sensitive); 1.384 + 1.385 + 1.386 +// Determines the type of ASCII character, independent of locale (the C 1.387 +// library versions will change based on locale). 1.388 +template <typename Char> 1.389 +inline bool IsAsciiWhitespace(Char c) { 1.390 + return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 1.391 +} 1.392 +template <typename Char> 1.393 +inline bool IsAsciiAlpha(Char c) { 1.394 + return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); 1.395 +} 1.396 +template <typename Char> 1.397 +inline bool IsAsciiDigit(Char c) { 1.398 + return c >= '0' && c <= '9'; 1.399 +} 1.400 + 1.401 +template <typename Char> 1.402 +inline bool IsHexDigit(Char c) { 1.403 + return (c >= '0' && c <= '9') || 1.404 + (c >= 'A' && c <= 'F') || 1.405 + (c >= 'a' && c <= 'f'); 1.406 +} 1.407 + 1.408 +template <typename Char> 1.409 +inline Char HexDigitToInt(Char c) { 1.410 + DCHECK(IsHexDigit(c)); 1.411 + if (c >= '0' && c <= '9') 1.412 + return c - '0'; 1.413 + if (c >= 'A' && c <= 'F') 1.414 + return c - 'A' + 10; 1.415 + if (c >= 'a' && c <= 'f') 1.416 + return c - 'a' + 10; 1.417 + return 0; 1.418 +} 1.419 + 1.420 +// Returns true if it's a whitespace character. 1.421 +inline bool IsWhitespace(wchar_t c) { 1.422 + return wcschr(kWhitespaceWide, c) != NULL; 1.423 +} 1.424 + 1.425 +// Return a byte string in human-readable format with a unit suffix. Not 1.426 +// appropriate for use in any UI; use of FormatBytes and friends in ui/base is 1.427 +// highly recommended instead. TODO(avi): Figure out how to get callers to use 1.428 +// FormatBytes instead; remove this. 1.429 +BASE_EXPORT string16 FormatBytesUnlocalized(int64 bytes); 1.430 + 1.431 +// Starting at |start_offset| (usually 0), replace the first instance of 1.432 +// |find_this| with |replace_with|. 1.433 +BASE_EXPORT void ReplaceFirstSubstringAfterOffset( 1.434 + string16* str, 1.435 + string16::size_type start_offset, 1.436 + const string16& find_this, 1.437 + const string16& replace_with); 1.438 +BASE_EXPORT void ReplaceFirstSubstringAfterOffset( 1.439 + std::string* str, 1.440 + std::string::size_type start_offset, 1.441 + const std::string& find_this, 1.442 + const std::string& replace_with); 1.443 + 1.444 +// Starting at |start_offset| (usually 0), look through |str| and replace all 1.445 +// instances of |find_this| with |replace_with|. 1.446 +// 1.447 +// This does entire substrings; use std::replace in <algorithm> for single 1.448 +// characters, for example: 1.449 +// std::replace(str.begin(), str.end(), 'a', 'b'); 1.450 +BASE_EXPORT void ReplaceSubstringsAfterOffset( 1.451 + string16* str, 1.452 + string16::size_type start_offset, 1.453 + const string16& find_this, 1.454 + const string16& replace_with); 1.455 +BASE_EXPORT void ReplaceSubstringsAfterOffset( 1.456 + std::string* str, 1.457 + std::string::size_type start_offset, 1.458 + const std::string& find_this, 1.459 + const std::string& replace_with); 1.460 + 1.461 +// Reserves enough memory in |str| to accommodate |length_with_null| characters, 1.462 +// sets the size of |str| to |length_with_null - 1| characters, and returns a 1.463 +// pointer to the underlying contiguous array of characters. This is typically 1.464 +// used when calling a function that writes results into a character array, but 1.465 +// the caller wants the data to be managed by a string-like object. It is 1.466 +// convenient in that is can be used inline in the call, and fast in that it 1.467 +// avoids copying the results of the call from a char* into a string. 1.468 +// 1.469 +// |length_with_null| must be at least 2, since otherwise the underlying string 1.470 +// would have size 0, and trying to access &((*str)[0]) in that case can result 1.471 +// in a number of problems. 1.472 +// 1.473 +// Internally, this takes linear time because the resize() call 0-fills the 1.474 +// underlying array for potentially all 1.475 +// (|length_with_null - 1| * sizeof(string_type::value_type)) bytes. Ideally we 1.476 +// could avoid this aspect of the resize() call, as we expect the caller to 1.477 +// immediately write over this memory, but there is no other way to set the size 1.478 +// of the string, and not doing that will mean people who access |str| rather 1.479 +// than str.c_str() will get back a string of whatever size |str| had on entry 1.480 +// to this function (probably 0). 1.481 +template <class string_type> 1.482 +inline typename string_type::value_type* WriteInto(string_type* str, 1.483 + size_t length_with_null) { 1.484 + DCHECK_GT(length_with_null, 1u); 1.485 + str->reserve(length_with_null); 1.486 + str->resize(length_with_null - 1); 1.487 + return &((*str)[0]); 1.488 +} 1.489 + 1.490 +//----------------------------------------------------------------------------- 1.491 + 1.492 +// Splits a string into its fields delimited by any of the characters in 1.493 +// |delimiters|. Each field is added to the |tokens| vector. Returns the 1.494 +// number of tokens found. 1.495 +BASE_EXPORT size_t Tokenize(const std::wstring& str, 1.496 + const std::wstring& delimiters, 1.497 + std::vector<std::wstring>* tokens); 1.498 +BASE_EXPORT size_t Tokenize(const string16& str, 1.499 + const string16& delimiters, 1.500 + std::vector<string16>* tokens); 1.501 +BASE_EXPORT size_t Tokenize(const std::string& str, 1.502 + const std::string& delimiters, 1.503 + std::vector<std::string>* tokens); 1.504 +BASE_EXPORT size_t Tokenize(const base::StringPiece& str, 1.505 + const base::StringPiece& delimiters, 1.506 + std::vector<base::StringPiece>* tokens); 1.507 + 1.508 +// Does the opposite of SplitString(). 1.509 +BASE_EXPORT string16 JoinString(const std::vector<string16>& parts, char16 s); 1.510 +BASE_EXPORT std::string JoinString( 1.511 + const std::vector<std::string>& parts, char s); 1.512 + 1.513 +// Join |parts| using |separator|. 1.514 +BASE_EXPORT std::string JoinString( 1.515 + const std::vector<std::string>& parts, 1.516 + const std::string& separator); 1.517 +BASE_EXPORT string16 JoinString( 1.518 + const std::vector<string16>& parts, 1.519 + const string16& separator); 1.520 + 1.521 +// Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively. 1.522 +// Additionally, any number of consecutive '$' characters is replaced by that 1.523 +// number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be 1.524 +// NULL. This only allows you to use up to nine replacements. 1.525 +BASE_EXPORT string16 ReplaceStringPlaceholders( 1.526 + const string16& format_string, 1.527 + const std::vector<string16>& subst, 1.528 + std::vector<size_t>* offsets); 1.529 + 1.530 +BASE_EXPORT std::string ReplaceStringPlaceholders( 1.531 + const base::StringPiece& format_string, 1.532 + const std::vector<std::string>& subst, 1.533 + std::vector<size_t>* offsets); 1.534 + 1.535 +// Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. 1.536 +BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string, 1.537 + const string16& a, 1.538 + size_t* offset); 1.539 + 1.540 +// Returns true if the string passed in matches the pattern. The pattern 1.541 +// string can contain wildcards like * and ? 1.542 +// The backslash character (\) is an escape character for * and ? 1.543 +// We limit the patterns to having a max of 16 * or ? characters. 1.544 +// ? matches 0 or 1 character, while * matches 0 or more characters. 1.545 +BASE_EXPORT bool MatchPattern(const base::StringPiece& string, 1.546 + const base::StringPiece& pattern); 1.547 +BASE_EXPORT bool MatchPattern(const string16& string, const string16& pattern); 1.548 + 1.549 +// Hack to convert any char-like type to its unsigned counterpart. 1.550 +// For example, it will convert char, signed char and unsigned char to unsigned 1.551 +// char. 1.552 +template<typename T> 1.553 +struct ToUnsigned { 1.554 + typedef T Unsigned; 1.555 +}; 1.556 + 1.557 +template<> 1.558 +struct ToUnsigned<char> { 1.559 + typedef unsigned char Unsigned; 1.560 +}; 1.561 +template<> 1.562 +struct ToUnsigned<signed char> { 1.563 + typedef unsigned char Unsigned; 1.564 +}; 1.565 +template<> 1.566 +struct ToUnsigned<wchar_t> { 1.567 +#if defined(WCHAR_T_IS_UTF16) 1.568 + typedef unsigned short Unsigned; 1.569 +#elif defined(WCHAR_T_IS_UTF32) 1.570 + typedef uint32 Unsigned; 1.571 +#endif 1.572 +}; 1.573 +template<> 1.574 +struct ToUnsigned<short> { 1.575 + typedef unsigned short Unsigned; 1.576 +}; 1.577 + 1.578 +#endif // BASE_STRINGS_STRING_UTIL_H_