michael@0: // Copyright (c) 2010 The Chromium Authors. All rights reserved. michael@0: // Use of this source code is governed by a BSD-style license that can be michael@0: // found in the LICENSE file. michael@0: michael@0: #include "base/strings/utf_string_conversions.h" michael@0: michael@0: #include "base/strings/string_piece.h" michael@0: #include "base/strings/string_util.h" michael@0: #include "base/strings/utf_string_conversion_utils.h" michael@0: michael@0: namespace base { michael@0: michael@0: namespace { michael@0: michael@0: // Generalized Unicode converter ----------------------------------------------- michael@0: michael@0: // Converts the given source Unicode character type to the given destination michael@0: // Unicode character type as a STL string. The given input buffer and size michael@0: // determine the source, and the given output STL string will be replaced by michael@0: // the result. michael@0: template michael@0: bool ConvertUnicode(const SRC_CHAR* src, michael@0: size_t src_len, michael@0: DEST_STRING* output) { michael@0: // ICU requires 32-bit numbers. michael@0: bool success = true; michael@0: int32 src_len32 = static_cast(src_len); michael@0: for (int32 i = 0; i < src_len32; i++) { michael@0: uint32 code_point; michael@0: if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { michael@0: WriteUnicodeCharacter(code_point, output); michael@0: } else { michael@0: WriteUnicodeCharacter(0xFFFD, output); michael@0: success = false; michael@0: } michael@0: } michael@0: michael@0: return success; michael@0: } michael@0: michael@0: } // namespace michael@0: michael@0: // UTF-8 <-> Wide -------------------------------------------------------------- michael@0: michael@0: bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) { michael@0: PrepareForUTF8Output(src, src_len, output); michael@0: return ConvertUnicode(src, src_len, output); michael@0: } michael@0: michael@0: std::string WideToUTF8(const std::wstring& wide) { michael@0: std::string ret; michael@0: // Ignore the success flag of this call, it will do the best it can for michael@0: // invalid input, which is what we want here. michael@0: WideToUTF8(wide.data(), wide.length(), &ret); michael@0: return ret; michael@0: } michael@0: michael@0: bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { michael@0: PrepareForUTF16Or32Output(src, src_len, output); michael@0: return ConvertUnicode(src, src_len, output); michael@0: } michael@0: michael@0: std::wstring UTF8ToWide(const StringPiece& utf8) { michael@0: std::wstring ret; michael@0: UTF8ToWide(utf8.data(), utf8.length(), &ret); michael@0: return ret; michael@0: } michael@0: michael@0: // UTF-16 <-> Wide ------------------------------------------------------------- michael@0: michael@0: #if defined(WCHAR_T_IS_UTF16) michael@0: michael@0: // When wide == UTF-16, then conversions are a NOP. michael@0: bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { michael@0: output->assign(src, src_len); michael@0: return true; michael@0: } michael@0: michael@0: string16 WideToUTF16(const std::wstring& wide) { michael@0: return wide; michael@0: } michael@0: michael@0: bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { michael@0: output->assign(src, src_len); michael@0: return true; michael@0: } michael@0: michael@0: std::wstring UTF16ToWide(const string16& utf16) { michael@0: return utf16; michael@0: } michael@0: michael@0: #elif defined(WCHAR_T_IS_UTF32) michael@0: michael@0: bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { michael@0: output->clear(); michael@0: // Assume that normally we won't have any non-BMP characters so the counts michael@0: // will be the same. michael@0: output->reserve(src_len); michael@0: return ConvertUnicode(src, src_len, output); michael@0: } michael@0: michael@0: string16 WideToUTF16(const std::wstring& wide) { michael@0: string16 ret; michael@0: WideToUTF16(wide.data(), wide.length(), &ret); michael@0: return ret; michael@0: } michael@0: michael@0: bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { michael@0: output->clear(); michael@0: // Assume that normally we won't have any non-BMP characters so the counts michael@0: // will be the same. michael@0: output->reserve(src_len); michael@0: return ConvertUnicode(src, src_len, output); michael@0: } michael@0: michael@0: std::wstring UTF16ToWide(const string16& utf16) { michael@0: std::wstring ret; michael@0: UTF16ToWide(utf16.data(), utf16.length(), &ret); michael@0: return ret; michael@0: } michael@0: michael@0: #endif // defined(WCHAR_T_IS_UTF32) michael@0: michael@0: // UTF16 <-> UTF8 -------------------------------------------------------------- michael@0: michael@0: #if defined(WCHAR_T_IS_UTF32) michael@0: michael@0: bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { michael@0: PrepareForUTF16Or32Output(src, src_len, output); michael@0: return ConvertUnicode(src, src_len, output); michael@0: } michael@0: michael@0: string16 UTF8ToUTF16(const StringPiece& utf8) { michael@0: string16 ret; michael@0: // Ignore the success flag of this call, it will do the best it can for michael@0: // invalid input, which is what we want here. michael@0: UTF8ToUTF16(utf8.data(), utf8.length(), &ret); michael@0: return ret; michael@0: } michael@0: michael@0: bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { michael@0: PrepareForUTF8Output(src, src_len, output); michael@0: return ConvertUnicode(src, src_len, output); michael@0: } michael@0: michael@0: std::string UTF16ToUTF8(const string16& utf16) { michael@0: std::string ret; michael@0: // Ignore the success flag of this call, it will do the best it can for michael@0: // invalid input, which is what we want here. michael@0: UTF16ToUTF8(utf16.data(), utf16.length(), &ret); michael@0: return ret; michael@0: } michael@0: michael@0: #elif defined(WCHAR_T_IS_UTF16) michael@0: // Easy case since we can use the "wide" versions we already wrote above. michael@0: michael@0: bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { michael@0: return UTF8ToWide(src, src_len, output); michael@0: } michael@0: michael@0: string16 UTF8ToUTF16(const StringPiece& utf8) { michael@0: return UTF8ToWide(utf8); michael@0: } michael@0: michael@0: bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { michael@0: return WideToUTF8(src, src_len, output); michael@0: } michael@0: michael@0: std::string UTF16ToUTF8(const string16& utf16) { michael@0: return WideToUTF8(utf16); michael@0: } michael@0: michael@0: #endif michael@0: michael@0: std::wstring ASCIIToWide(const StringPiece& ascii) { michael@0: DCHECK(IsStringASCII(ascii)) << ascii; michael@0: return std::wstring(ascii.begin(), ascii.end()); michael@0: } michael@0: michael@0: string16 ASCIIToUTF16(const StringPiece& ascii) { michael@0: DCHECK(IsStringASCII(ascii)) << ascii; michael@0: return string16(ascii.begin(), ascii.end()); michael@0: } michael@0: michael@0: } // namespace base