1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/toolkit/crashreporter/google-breakpad/src/common/string_conversion.cc Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,155 @@ 1.4 +// Copyright (c) 2006, Google Inc. 1.5 +// All rights reserved. 1.6 +// 1.7 +// Redistribution and use in source and binary forms, with or without 1.8 +// modification, are permitted provided that the following conditions are 1.9 +// met: 1.10 +// 1.11 +// * Redistributions of source code must retain the above copyright 1.12 +// notice, this list of conditions and the following disclaimer. 1.13 +// * Redistributions in binary form must reproduce the above 1.14 +// copyright notice, this list of conditions and the following disclaimer 1.15 +// in the documentation and/or other materials provided with the 1.16 +// distribution. 1.17 +// * Neither the name of Google Inc. nor the names of its 1.18 +// contributors may be used to endorse or promote products derived from 1.19 +// this software without specific prior written permission. 1.20 +// 1.21 +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1.22 +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1.23 +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1.24 +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1.25 +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 1.26 +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 1.27 +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 1.28 +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 1.29 +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 1.30 +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1.31 +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.32 + 1.33 +#include <string.h> 1.34 + 1.35 +#include "common/convert_UTF.h" 1.36 +#include "common/scoped_ptr.h" 1.37 +#include "common/string_conversion.h" 1.38 +#include "common/using_std_string.h" 1.39 + 1.40 +namespace google_breakpad { 1.41 + 1.42 +using std::vector; 1.43 + 1.44 +void UTF8ToUTF16(const char *in, vector<uint16_t> *out) { 1.45 + size_t source_length = strlen(in); 1.46 + const UTF8 *source_ptr = reinterpret_cast<const UTF8 *>(in); 1.47 + const UTF8 *source_end_ptr = source_ptr + source_length; 1.48 + // Erase the contents and zero fill to the expected size 1.49 + out->clear(); 1.50 + out->insert(out->begin(), source_length, 0); 1.51 + uint16_t *target_ptr = &(*out)[0]; 1.52 + uint16_t *target_end_ptr = target_ptr + out->capacity() * sizeof(uint16_t); 1.53 + ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr, 1.54 + &target_ptr, target_end_ptr, 1.55 + strictConversion); 1.56 + 1.57 + // Resize to be the size of the # of converted characters + NULL 1.58 + out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0); 1.59 +} 1.60 + 1.61 +int UTF8ToUTF16Char(const char *in, int in_length, uint16_t out[2]) { 1.62 + const UTF8 *source_ptr = reinterpret_cast<const UTF8 *>(in); 1.63 + const UTF8 *source_end_ptr = source_ptr + sizeof(char); 1.64 + uint16_t *target_ptr = out; 1.65 + uint16_t *target_end_ptr = target_ptr + 2 * sizeof(uint16_t); 1.66 + out[0] = out[1] = 0; 1.67 + 1.68 + // Process one character at a time 1.69 + while (1) { 1.70 + ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr, 1.71 + &target_ptr, target_end_ptr, 1.72 + strictConversion); 1.73 + 1.74 + if (result == conversionOK) 1.75 + return static_cast<int>(source_ptr - reinterpret_cast<const UTF8 *>(in)); 1.76 + 1.77 + // Add another character to the input stream and try again 1.78 + source_ptr = reinterpret_cast<const UTF8 *>(in); 1.79 + ++source_end_ptr; 1.80 + 1.81 + if (source_end_ptr > reinterpret_cast<const UTF8 *>(in) + in_length) 1.82 + break; 1.83 + } 1.84 + 1.85 + return 0; 1.86 +} 1.87 + 1.88 +void UTF32ToUTF16(const wchar_t *in, vector<uint16_t> *out) { 1.89 + size_t source_length = wcslen(in); 1.90 + const UTF32 *source_ptr = reinterpret_cast<const UTF32 *>(in); 1.91 + const UTF32 *source_end_ptr = source_ptr + source_length; 1.92 + // Erase the contents and zero fill to the expected size 1.93 + out->clear(); 1.94 + out->insert(out->begin(), source_length, 0); 1.95 + uint16_t *target_ptr = &(*out)[0]; 1.96 + uint16_t *target_end_ptr = target_ptr + out->capacity() * sizeof(uint16_t); 1.97 + ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr, 1.98 + &target_ptr, target_end_ptr, 1.99 + strictConversion); 1.100 + 1.101 + // Resize to be the size of the # of converted characters + NULL 1.102 + out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0); 1.103 +} 1.104 + 1.105 +void UTF32ToUTF16Char(wchar_t in, uint16_t out[2]) { 1.106 + const UTF32 *source_ptr = reinterpret_cast<const UTF32 *>(&in); 1.107 + const UTF32 *source_end_ptr = source_ptr + 1; 1.108 + uint16_t *target_ptr = out; 1.109 + uint16_t *target_end_ptr = target_ptr + 2 * sizeof(uint16_t); 1.110 + out[0] = out[1] = 0; 1.111 + ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr, 1.112 + &target_ptr, target_end_ptr, 1.113 + strictConversion); 1.114 + 1.115 + if (result != conversionOK) { 1.116 + out[0] = out[1] = 0; 1.117 + } 1.118 +} 1.119 + 1.120 +static inline uint16_t Swap(uint16_t value) { 1.121 + return (value >> 8) | static_cast<uint16_t>(value << 8); 1.122 +} 1.123 + 1.124 +string UTF16ToUTF8(const vector<uint16_t> &in, bool swap) { 1.125 + const UTF16 *source_ptr = &in[0]; 1.126 + scoped_ptr<uint16_t> source_buffer; 1.127 + 1.128 + // If we're to swap, we need to make a local copy and swap each byte pair 1.129 + if (swap) { 1.130 + int idx = 0; 1.131 + source_buffer.reset(new uint16_t[in.size()]); 1.132 + UTF16 *source_buffer_ptr = source_buffer.get(); 1.133 + for (vector<uint16_t>::const_iterator it = in.begin(); 1.134 + it != in.end(); ++it, ++idx) 1.135 + source_buffer_ptr[idx] = Swap(*it); 1.136 + 1.137 + source_ptr = source_buffer.get(); 1.138 + } 1.139 + 1.140 + // The maximum expansion would be 4x the size of the input string. 1.141 + const UTF16 *source_end_ptr = source_ptr + in.size(); 1.142 + size_t target_capacity = in.size() * 4; 1.143 + scoped_array<UTF8> target_buffer(new UTF8[target_capacity]); 1.144 + UTF8 *target_ptr = target_buffer.get(); 1.145 + UTF8 *target_end_ptr = target_ptr + target_capacity; 1.146 + ConversionResult result = ConvertUTF16toUTF8(&source_ptr, source_end_ptr, 1.147 + &target_ptr, target_end_ptr, 1.148 + strictConversion); 1.149 + 1.150 + if (result == conversionOK) { 1.151 + const char *targetPtr = reinterpret_cast<const char *>(target_buffer.get()); 1.152 + return targetPtr; 1.153 + } 1.154 + 1.155 + return ""; 1.156 +} 1.157 + 1.158 +} // namespace google_breakpad