Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | // Copyright (c) 2006, Google Inc. |
michael@0 | 2 | // All rights reserved. |
michael@0 | 3 | // |
michael@0 | 4 | // Redistribution and use in source and binary forms, with or without |
michael@0 | 5 | // modification, are permitted provided that the following conditions are |
michael@0 | 6 | // met: |
michael@0 | 7 | // |
michael@0 | 8 | // * Redistributions of source code must retain the above copyright |
michael@0 | 9 | // notice, this list of conditions and the following disclaimer. |
michael@0 | 10 | // * Redistributions in binary form must reproduce the above |
michael@0 | 11 | // copyright notice, this list of conditions and the following disclaimer |
michael@0 | 12 | // in the documentation and/or other materials provided with the |
michael@0 | 13 | // distribution. |
michael@0 | 14 | // * Neither the name of Google Inc. nor the names of its |
michael@0 | 15 | // contributors may be used to endorse or promote products derived from |
michael@0 | 16 | // this software without specific prior written permission. |
michael@0 | 17 | // |
michael@0 | 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
michael@0 | 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
michael@0 | 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
michael@0 | 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
michael@0 | 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
michael@0 | 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
michael@0 | 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
michael@0 | 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
michael@0 | 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
michael@0 | 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
michael@0 | 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
michael@0 | 29 | |
michael@0 | 30 | #include <string.h> |
michael@0 | 31 | |
michael@0 | 32 | #include "common/convert_UTF.h" |
michael@0 | 33 | #include "common/scoped_ptr.h" |
michael@0 | 34 | #include "common/string_conversion.h" |
michael@0 | 35 | #include "common/using_std_string.h" |
michael@0 | 36 | |
michael@0 | 37 | namespace google_breakpad { |
michael@0 | 38 | |
michael@0 | 39 | using std::vector; |
michael@0 | 40 | |
michael@0 | 41 | void UTF8ToUTF16(const char *in, vector<uint16_t> *out) { |
michael@0 | 42 | size_t source_length = strlen(in); |
michael@0 | 43 | const UTF8 *source_ptr = reinterpret_cast<const UTF8 *>(in); |
michael@0 | 44 | const UTF8 *source_end_ptr = source_ptr + source_length; |
michael@0 | 45 | // Erase the contents and zero fill to the expected size |
michael@0 | 46 | out->clear(); |
michael@0 | 47 | out->insert(out->begin(), source_length, 0); |
michael@0 | 48 | uint16_t *target_ptr = &(*out)[0]; |
michael@0 | 49 | uint16_t *target_end_ptr = target_ptr + out->capacity() * sizeof(uint16_t); |
michael@0 | 50 | ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr, |
michael@0 | 51 | &target_ptr, target_end_ptr, |
michael@0 | 52 | strictConversion); |
michael@0 | 53 | |
michael@0 | 54 | // Resize to be the size of the # of converted characters + NULL |
michael@0 | 55 | out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0); |
michael@0 | 56 | } |
michael@0 | 57 | |
michael@0 | 58 | int UTF8ToUTF16Char(const char *in, int in_length, uint16_t out[2]) { |
michael@0 | 59 | const UTF8 *source_ptr = reinterpret_cast<const UTF8 *>(in); |
michael@0 | 60 | const UTF8 *source_end_ptr = source_ptr + sizeof(char); |
michael@0 | 61 | uint16_t *target_ptr = out; |
michael@0 | 62 | uint16_t *target_end_ptr = target_ptr + 2 * sizeof(uint16_t); |
michael@0 | 63 | out[0] = out[1] = 0; |
michael@0 | 64 | |
michael@0 | 65 | // Process one character at a time |
michael@0 | 66 | while (1) { |
michael@0 | 67 | ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr, |
michael@0 | 68 | &target_ptr, target_end_ptr, |
michael@0 | 69 | strictConversion); |
michael@0 | 70 | |
michael@0 | 71 | if (result == conversionOK) |
michael@0 | 72 | return static_cast<int>(source_ptr - reinterpret_cast<const UTF8 *>(in)); |
michael@0 | 73 | |
michael@0 | 74 | // Add another character to the input stream and try again |
michael@0 | 75 | source_ptr = reinterpret_cast<const UTF8 *>(in); |
michael@0 | 76 | ++source_end_ptr; |
michael@0 | 77 | |
michael@0 | 78 | if (source_end_ptr > reinterpret_cast<const UTF8 *>(in) + in_length) |
michael@0 | 79 | break; |
michael@0 | 80 | } |
michael@0 | 81 | |
michael@0 | 82 | return 0; |
michael@0 | 83 | } |
michael@0 | 84 | |
michael@0 | 85 | void UTF32ToUTF16(const wchar_t *in, vector<uint16_t> *out) { |
michael@0 | 86 | size_t source_length = wcslen(in); |
michael@0 | 87 | const UTF32 *source_ptr = reinterpret_cast<const UTF32 *>(in); |
michael@0 | 88 | const UTF32 *source_end_ptr = source_ptr + source_length; |
michael@0 | 89 | // Erase the contents and zero fill to the expected size |
michael@0 | 90 | out->clear(); |
michael@0 | 91 | out->insert(out->begin(), source_length, 0); |
michael@0 | 92 | uint16_t *target_ptr = &(*out)[0]; |
michael@0 | 93 | uint16_t *target_end_ptr = target_ptr + out->capacity() * sizeof(uint16_t); |
michael@0 | 94 | ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr, |
michael@0 | 95 | &target_ptr, target_end_ptr, |
michael@0 | 96 | strictConversion); |
michael@0 | 97 | |
michael@0 | 98 | // Resize to be the size of the # of converted characters + NULL |
michael@0 | 99 | out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0); |
michael@0 | 100 | } |
michael@0 | 101 | |
michael@0 | 102 | void UTF32ToUTF16Char(wchar_t in, uint16_t out[2]) { |
michael@0 | 103 | const UTF32 *source_ptr = reinterpret_cast<const UTF32 *>(&in); |
michael@0 | 104 | const UTF32 *source_end_ptr = source_ptr + 1; |
michael@0 | 105 | uint16_t *target_ptr = out; |
michael@0 | 106 | uint16_t *target_end_ptr = target_ptr + 2 * sizeof(uint16_t); |
michael@0 | 107 | out[0] = out[1] = 0; |
michael@0 | 108 | ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr, |
michael@0 | 109 | &target_ptr, target_end_ptr, |
michael@0 | 110 | strictConversion); |
michael@0 | 111 | |
michael@0 | 112 | if (result != conversionOK) { |
michael@0 | 113 | out[0] = out[1] = 0; |
michael@0 | 114 | } |
michael@0 | 115 | } |
michael@0 | 116 | |
michael@0 | 117 | static inline uint16_t Swap(uint16_t value) { |
michael@0 | 118 | return (value >> 8) | static_cast<uint16_t>(value << 8); |
michael@0 | 119 | } |
michael@0 | 120 | |
michael@0 | 121 | string UTF16ToUTF8(const vector<uint16_t> &in, bool swap) { |
michael@0 | 122 | const UTF16 *source_ptr = &in[0]; |
michael@0 | 123 | scoped_ptr<uint16_t> source_buffer; |
michael@0 | 124 | |
michael@0 | 125 | // If we're to swap, we need to make a local copy and swap each byte pair |
michael@0 | 126 | if (swap) { |
michael@0 | 127 | int idx = 0; |
michael@0 | 128 | source_buffer.reset(new uint16_t[in.size()]); |
michael@0 | 129 | UTF16 *source_buffer_ptr = source_buffer.get(); |
michael@0 | 130 | for (vector<uint16_t>::const_iterator it = in.begin(); |
michael@0 | 131 | it != in.end(); ++it, ++idx) |
michael@0 | 132 | source_buffer_ptr[idx] = Swap(*it); |
michael@0 | 133 | |
michael@0 | 134 | source_ptr = source_buffer.get(); |
michael@0 | 135 | } |
michael@0 | 136 | |
michael@0 | 137 | // The maximum expansion would be 4x the size of the input string. |
michael@0 | 138 | const UTF16 *source_end_ptr = source_ptr + in.size(); |
michael@0 | 139 | size_t target_capacity = in.size() * 4; |
michael@0 | 140 | scoped_array<UTF8> target_buffer(new UTF8[target_capacity]); |
michael@0 | 141 | UTF8 *target_ptr = target_buffer.get(); |
michael@0 | 142 | UTF8 *target_end_ptr = target_ptr + target_capacity; |
michael@0 | 143 | ConversionResult result = ConvertUTF16toUTF8(&source_ptr, source_end_ptr, |
michael@0 | 144 | &target_ptr, target_end_ptr, |
michael@0 | 145 | strictConversion); |
michael@0 | 146 | |
michael@0 | 147 | if (result == conversionOK) { |
michael@0 | 148 | const char *targetPtr = reinterpret_cast<const char *>(target_buffer.get()); |
michael@0 | 149 | return targetPtr; |
michael@0 | 150 | } |
michael@0 | 151 | |
michael@0 | 152 | return ""; |
michael@0 | 153 | } |
michael@0 | 154 | |
michael@0 | 155 | } // namespace google_breakpad |