security/sandbox/chromium/base/strings/utf_string_conversions.cc

Wed, 31 Dec 2014 06:55:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:55:50 +0100
changeset 2
7e26c7da4463
permissions
-rw-r--r--

Added tag UPSTREAM_283F7C6 for changeset ca08bd8f51b2

michael@0 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
michael@0 2 // Use of this source code is governed by a BSD-style license that can be
michael@0 3 // found in the LICENSE file.
michael@0 4
michael@0 5 #include "base/strings/utf_string_conversions.h"
michael@0 6
michael@0 7 #include "base/strings/string_piece.h"
michael@0 8 #include "base/strings/string_util.h"
michael@0 9 #include "base/strings/utf_string_conversion_utils.h"
michael@0 10
michael@0 11 namespace base {
michael@0 12
michael@0 13 namespace {
michael@0 14
michael@0 15 // Generalized Unicode converter -----------------------------------------------
michael@0 16
michael@0 17 // Converts the given source Unicode character type to the given destination
michael@0 18 // Unicode character type as a STL string. The given input buffer and size
michael@0 19 // determine the source, and the given output STL string will be replaced by
michael@0 20 // the result.
michael@0 21 template<typename SRC_CHAR, typename DEST_STRING>
michael@0 22 bool ConvertUnicode(const SRC_CHAR* src,
michael@0 23 size_t src_len,
michael@0 24 DEST_STRING* output) {
michael@0 25 // ICU requires 32-bit numbers.
michael@0 26 bool success = true;
michael@0 27 int32 src_len32 = static_cast<int32>(src_len);
michael@0 28 for (int32 i = 0; i < src_len32; i++) {
michael@0 29 uint32 code_point;
michael@0 30 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
michael@0 31 WriteUnicodeCharacter(code_point, output);
michael@0 32 } else {
michael@0 33 WriteUnicodeCharacter(0xFFFD, output);
michael@0 34 success = false;
michael@0 35 }
michael@0 36 }
michael@0 37
michael@0 38 return success;
michael@0 39 }
michael@0 40
michael@0 41 } // namespace
michael@0 42
michael@0 43 // UTF-8 <-> Wide --------------------------------------------------------------
michael@0 44
michael@0 45 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
michael@0 46 PrepareForUTF8Output(src, src_len, output);
michael@0 47 return ConvertUnicode(src, src_len, output);
michael@0 48 }
michael@0 49
michael@0 50 std::string WideToUTF8(const std::wstring& wide) {
michael@0 51 std::string ret;
michael@0 52 // Ignore the success flag of this call, it will do the best it can for
michael@0 53 // invalid input, which is what we want here.
michael@0 54 WideToUTF8(wide.data(), wide.length(), &ret);
michael@0 55 return ret;
michael@0 56 }
michael@0 57
michael@0 58 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
michael@0 59 PrepareForUTF16Or32Output(src, src_len, output);
michael@0 60 return ConvertUnicode(src, src_len, output);
michael@0 61 }
michael@0 62
michael@0 63 std::wstring UTF8ToWide(const StringPiece& utf8) {
michael@0 64 std::wstring ret;
michael@0 65 UTF8ToWide(utf8.data(), utf8.length(), &ret);
michael@0 66 return ret;
michael@0 67 }
michael@0 68
michael@0 69 // UTF-16 <-> Wide -------------------------------------------------------------
michael@0 70
michael@0 71 #if defined(WCHAR_T_IS_UTF16)
michael@0 72
michael@0 73 // When wide == UTF-16, then conversions are a NOP.
michael@0 74 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
michael@0 75 output->assign(src, src_len);
michael@0 76 return true;
michael@0 77 }
michael@0 78
michael@0 79 string16 WideToUTF16(const std::wstring& wide) {
michael@0 80 return wide;
michael@0 81 }
michael@0 82
michael@0 83 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
michael@0 84 output->assign(src, src_len);
michael@0 85 return true;
michael@0 86 }
michael@0 87
michael@0 88 std::wstring UTF16ToWide(const string16& utf16) {
michael@0 89 return utf16;
michael@0 90 }
michael@0 91
michael@0 92 #elif defined(WCHAR_T_IS_UTF32)
michael@0 93
michael@0 94 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
michael@0 95 output->clear();
michael@0 96 // Assume that normally we won't have any non-BMP characters so the counts
michael@0 97 // will be the same.
michael@0 98 output->reserve(src_len);
michael@0 99 return ConvertUnicode(src, src_len, output);
michael@0 100 }
michael@0 101
michael@0 102 string16 WideToUTF16(const std::wstring& wide) {
michael@0 103 string16 ret;
michael@0 104 WideToUTF16(wide.data(), wide.length(), &ret);
michael@0 105 return ret;
michael@0 106 }
michael@0 107
michael@0 108 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
michael@0 109 output->clear();
michael@0 110 // Assume that normally we won't have any non-BMP characters so the counts
michael@0 111 // will be the same.
michael@0 112 output->reserve(src_len);
michael@0 113 return ConvertUnicode(src, src_len, output);
michael@0 114 }
michael@0 115
michael@0 116 std::wstring UTF16ToWide(const string16& utf16) {
michael@0 117 std::wstring ret;
michael@0 118 UTF16ToWide(utf16.data(), utf16.length(), &ret);
michael@0 119 return ret;
michael@0 120 }
michael@0 121
michael@0 122 #endif // defined(WCHAR_T_IS_UTF32)
michael@0 123
michael@0 124 // UTF16 <-> UTF8 --------------------------------------------------------------
michael@0 125
michael@0 126 #if defined(WCHAR_T_IS_UTF32)
michael@0 127
michael@0 128 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
michael@0 129 PrepareForUTF16Or32Output(src, src_len, output);
michael@0 130 return ConvertUnicode(src, src_len, output);
michael@0 131 }
michael@0 132
michael@0 133 string16 UTF8ToUTF16(const StringPiece& utf8) {
michael@0 134 string16 ret;
michael@0 135 // Ignore the success flag of this call, it will do the best it can for
michael@0 136 // invalid input, which is what we want here.
michael@0 137 UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
michael@0 138 return ret;
michael@0 139 }
michael@0 140
michael@0 141 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
michael@0 142 PrepareForUTF8Output(src, src_len, output);
michael@0 143 return ConvertUnicode(src, src_len, output);
michael@0 144 }
michael@0 145
michael@0 146 std::string UTF16ToUTF8(const string16& utf16) {
michael@0 147 std::string ret;
michael@0 148 // Ignore the success flag of this call, it will do the best it can for
michael@0 149 // invalid input, which is what we want here.
michael@0 150 UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
michael@0 151 return ret;
michael@0 152 }
michael@0 153
michael@0 154 #elif defined(WCHAR_T_IS_UTF16)
michael@0 155 // Easy case since we can use the "wide" versions we already wrote above.
michael@0 156
michael@0 157 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
michael@0 158 return UTF8ToWide(src, src_len, output);
michael@0 159 }
michael@0 160
michael@0 161 string16 UTF8ToUTF16(const StringPiece& utf8) {
michael@0 162 return UTF8ToWide(utf8);
michael@0 163 }
michael@0 164
michael@0 165 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
michael@0 166 return WideToUTF8(src, src_len, output);
michael@0 167 }
michael@0 168
michael@0 169 std::string UTF16ToUTF8(const string16& utf16) {
michael@0 170 return WideToUTF8(utf16);
michael@0 171 }
michael@0 172
michael@0 173 #endif
michael@0 174
michael@0 175 std::wstring ASCIIToWide(const StringPiece& ascii) {
michael@0 176 DCHECK(IsStringASCII(ascii)) << ascii;
michael@0 177 return std::wstring(ascii.begin(), ascii.end());
michael@0 178 }
michael@0 179
michael@0 180 string16 ASCIIToUTF16(const StringPiece& ascii) {
michael@0 181 DCHECK(IsStringASCII(ascii)) << ascii;
michael@0 182 return string16(ascii.begin(), ascii.end());
michael@0 183 }
michael@0 184
michael@0 185 } // namespace base

mercurial