Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
michael@0 | 2 | // Use of this source code is governed by a BSD-style license that can be |
michael@0 | 3 | // found in the LICENSE file. |
michael@0 | 4 | |
michael@0 | 5 | #include "base/strings/utf_string_conversions.h" |
michael@0 | 6 | |
michael@0 | 7 | #include "base/strings/string_piece.h" |
michael@0 | 8 | #include "base/strings/string_util.h" |
michael@0 | 9 | #include "base/strings/utf_string_conversion_utils.h" |
michael@0 | 10 | |
michael@0 | 11 | namespace base { |
michael@0 | 12 | |
michael@0 | 13 | namespace { |
michael@0 | 14 | |
michael@0 | 15 | // Generalized Unicode converter ----------------------------------------------- |
michael@0 | 16 | |
michael@0 | 17 | // Converts the given source Unicode character type to the given destination |
michael@0 | 18 | // Unicode character type as a STL string. The given input buffer and size |
michael@0 | 19 | // determine the source, and the given output STL string will be replaced by |
michael@0 | 20 | // the result. |
michael@0 | 21 | template<typename SRC_CHAR, typename DEST_STRING> |
michael@0 | 22 | bool ConvertUnicode(const SRC_CHAR* src, |
michael@0 | 23 | size_t src_len, |
michael@0 | 24 | DEST_STRING* output) { |
michael@0 | 25 | // ICU requires 32-bit numbers. |
michael@0 | 26 | bool success = true; |
michael@0 | 27 | int32 src_len32 = static_cast<int32>(src_len); |
michael@0 | 28 | for (int32 i = 0; i < src_len32; i++) { |
michael@0 | 29 | uint32 code_point; |
michael@0 | 30 | if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { |
michael@0 | 31 | WriteUnicodeCharacter(code_point, output); |
michael@0 | 32 | } else { |
michael@0 | 33 | WriteUnicodeCharacter(0xFFFD, output); |
michael@0 | 34 | success = false; |
michael@0 | 35 | } |
michael@0 | 36 | } |
michael@0 | 37 | |
michael@0 | 38 | return success; |
michael@0 | 39 | } |
michael@0 | 40 | |
michael@0 | 41 | } // namespace |
michael@0 | 42 | |
michael@0 | 43 | // UTF-8 <-> Wide -------------------------------------------------------------- |
michael@0 | 44 | |
michael@0 | 45 | bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) { |
michael@0 | 46 | PrepareForUTF8Output(src, src_len, output); |
michael@0 | 47 | return ConvertUnicode(src, src_len, output); |
michael@0 | 48 | } |
michael@0 | 49 | |
michael@0 | 50 | std::string WideToUTF8(const std::wstring& wide) { |
michael@0 | 51 | std::string ret; |
michael@0 | 52 | // Ignore the success flag of this call, it will do the best it can for |
michael@0 | 53 | // invalid input, which is what we want here. |
michael@0 | 54 | WideToUTF8(wide.data(), wide.length(), &ret); |
michael@0 | 55 | return ret; |
michael@0 | 56 | } |
michael@0 | 57 | |
michael@0 | 58 | bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { |
michael@0 | 59 | PrepareForUTF16Or32Output(src, src_len, output); |
michael@0 | 60 | return ConvertUnicode(src, src_len, output); |
michael@0 | 61 | } |
michael@0 | 62 | |
michael@0 | 63 | std::wstring UTF8ToWide(const StringPiece& utf8) { |
michael@0 | 64 | std::wstring ret; |
michael@0 | 65 | UTF8ToWide(utf8.data(), utf8.length(), &ret); |
michael@0 | 66 | return ret; |
michael@0 | 67 | } |
michael@0 | 68 | |
michael@0 | 69 | // UTF-16 <-> Wide ------------------------------------------------------------- |
michael@0 | 70 | |
michael@0 | 71 | #if defined(WCHAR_T_IS_UTF16) |
michael@0 | 72 | |
michael@0 | 73 | // When wide == UTF-16, then conversions are a NOP. |
michael@0 | 74 | bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { |
michael@0 | 75 | output->assign(src, src_len); |
michael@0 | 76 | return true; |
michael@0 | 77 | } |
michael@0 | 78 | |
michael@0 | 79 | string16 WideToUTF16(const std::wstring& wide) { |
michael@0 | 80 | return wide; |
michael@0 | 81 | } |
michael@0 | 82 | |
michael@0 | 83 | bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { |
michael@0 | 84 | output->assign(src, src_len); |
michael@0 | 85 | return true; |
michael@0 | 86 | } |
michael@0 | 87 | |
michael@0 | 88 | std::wstring UTF16ToWide(const string16& utf16) { |
michael@0 | 89 | return utf16; |
michael@0 | 90 | } |
michael@0 | 91 | |
michael@0 | 92 | #elif defined(WCHAR_T_IS_UTF32) |
michael@0 | 93 | |
michael@0 | 94 | bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { |
michael@0 | 95 | output->clear(); |
michael@0 | 96 | // Assume that normally we won't have any non-BMP characters so the counts |
michael@0 | 97 | // will be the same. |
michael@0 | 98 | output->reserve(src_len); |
michael@0 | 99 | return ConvertUnicode(src, src_len, output); |
michael@0 | 100 | } |
michael@0 | 101 | |
michael@0 | 102 | string16 WideToUTF16(const std::wstring& wide) { |
michael@0 | 103 | string16 ret; |
michael@0 | 104 | WideToUTF16(wide.data(), wide.length(), &ret); |
michael@0 | 105 | return ret; |
michael@0 | 106 | } |
michael@0 | 107 | |
michael@0 | 108 | bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { |
michael@0 | 109 | output->clear(); |
michael@0 | 110 | // Assume that normally we won't have any non-BMP characters so the counts |
michael@0 | 111 | // will be the same. |
michael@0 | 112 | output->reserve(src_len); |
michael@0 | 113 | return ConvertUnicode(src, src_len, output); |
michael@0 | 114 | } |
michael@0 | 115 | |
michael@0 | 116 | std::wstring UTF16ToWide(const string16& utf16) { |
michael@0 | 117 | std::wstring ret; |
michael@0 | 118 | UTF16ToWide(utf16.data(), utf16.length(), &ret); |
michael@0 | 119 | return ret; |
michael@0 | 120 | } |
michael@0 | 121 | |
michael@0 | 122 | #endif // defined(WCHAR_T_IS_UTF32) |
michael@0 | 123 | |
michael@0 | 124 | // UTF16 <-> UTF8 -------------------------------------------------------------- |
michael@0 | 125 | |
michael@0 | 126 | #if defined(WCHAR_T_IS_UTF32) |
michael@0 | 127 | |
michael@0 | 128 | bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { |
michael@0 | 129 | PrepareForUTF16Or32Output(src, src_len, output); |
michael@0 | 130 | return ConvertUnicode(src, src_len, output); |
michael@0 | 131 | } |
michael@0 | 132 | |
michael@0 | 133 | string16 UTF8ToUTF16(const StringPiece& utf8) { |
michael@0 | 134 | string16 ret; |
michael@0 | 135 | // Ignore the success flag of this call, it will do the best it can for |
michael@0 | 136 | // invalid input, which is what we want here. |
michael@0 | 137 | UTF8ToUTF16(utf8.data(), utf8.length(), &ret); |
michael@0 | 138 | return ret; |
michael@0 | 139 | } |
michael@0 | 140 | |
michael@0 | 141 | bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { |
michael@0 | 142 | PrepareForUTF8Output(src, src_len, output); |
michael@0 | 143 | return ConvertUnicode(src, src_len, output); |
michael@0 | 144 | } |
michael@0 | 145 | |
michael@0 | 146 | std::string UTF16ToUTF8(const string16& utf16) { |
michael@0 | 147 | std::string ret; |
michael@0 | 148 | // Ignore the success flag of this call, it will do the best it can for |
michael@0 | 149 | // invalid input, which is what we want here. |
michael@0 | 150 | UTF16ToUTF8(utf16.data(), utf16.length(), &ret); |
michael@0 | 151 | return ret; |
michael@0 | 152 | } |
michael@0 | 153 | |
michael@0 | 154 | #elif defined(WCHAR_T_IS_UTF16) |
michael@0 | 155 | // Easy case since we can use the "wide" versions we already wrote above. |
michael@0 | 156 | |
michael@0 | 157 | bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { |
michael@0 | 158 | return UTF8ToWide(src, src_len, output); |
michael@0 | 159 | } |
michael@0 | 160 | |
michael@0 | 161 | string16 UTF8ToUTF16(const StringPiece& utf8) { |
michael@0 | 162 | return UTF8ToWide(utf8); |
michael@0 | 163 | } |
michael@0 | 164 | |
michael@0 | 165 | bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { |
michael@0 | 166 | return WideToUTF8(src, src_len, output); |
michael@0 | 167 | } |
michael@0 | 168 | |
michael@0 | 169 | std::string UTF16ToUTF8(const string16& utf16) { |
michael@0 | 170 | return WideToUTF8(utf16); |
michael@0 | 171 | } |
michael@0 | 172 | |
michael@0 | 173 | #endif |
michael@0 | 174 | |
michael@0 | 175 | std::wstring ASCIIToWide(const StringPiece& ascii) { |
michael@0 | 176 | DCHECK(IsStringASCII(ascii)) << ascii; |
michael@0 | 177 | return std::wstring(ascii.begin(), ascii.end()); |
michael@0 | 178 | } |
michael@0 | 179 | |
michael@0 | 180 | string16 ASCIIToUTF16(const StringPiece& ascii) { |
michael@0 | 181 | DCHECK(IsStringASCII(ascii)) << ascii; |
michael@0 | 182 | return string16(ascii.begin(), ascii.end()); |
michael@0 | 183 | } |
michael@0 | 184 | |
michael@0 | 185 | } // namespace base |