intl/uconv/src/nsUnicodeToUTF8.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 //----------------------------------------------------------------------
michael@0 7 // Global functions and data [declaration]
michael@0 8 #include "nsUnicodeToUTF8.h"
michael@0 9
michael@0 10 NS_IMPL_ISUPPORTS(nsUnicodeToUTF8, nsIUnicodeEncoder)
michael@0 11
michael@0 12 //----------------------------------------------------------------------
michael@0 13 // nsUnicodeToUTF8 class [implementation]
michael@0 14
michael@0 15 NS_IMETHODIMP nsUnicodeToUTF8::GetMaxLength(const char16_t * aSrc,
michael@0 16 int32_t aSrcLength,
michael@0 17 int32_t * aDestLength)
michael@0 18 {
michael@0 19 // aSrc is interpreted as UTF16, 3 is normally enough.
michael@0 20 // But when previous buffer only contains part of the surrogate pair, we
michael@0 21 // need to complete it here. If the first word in following buffer is not
michael@0 22 // in valid surrogate range, we need to convert the remaining of last buffer
michael@0 23 // to 3 bytes.
michael@0 24 *aDestLength = 3*aSrcLength + 3;
michael@0 25 return NS_OK;
michael@0 26 }
michael@0 27
michael@0 28 NS_IMETHODIMP nsUnicodeToUTF8::Convert(const char16_t * aSrc,
michael@0 29 int32_t * aSrcLength,
michael@0 30 char * aDest,
michael@0 31 int32_t * aDestLength)
michael@0 32 {
michael@0 33 const char16_t * src = aSrc;
michael@0 34 const char16_t * srcEnd = aSrc + *aSrcLength;
michael@0 35 char * dest = aDest;
michael@0 36 int32_t destLen = *aDestLength;
michael@0 37 uint32_t n;
michael@0 38
michael@0 39 //complete remaining of last conversion
michael@0 40 if (mHighSurrogate) {
michael@0 41 if (src < srcEnd) {
michael@0 42 *aDestLength = 0;
michael@0 43 return NS_OK_UENC_MOREINPUT;
michael@0 44 }
michael@0 45 if (*aDestLength < 4) {
michael@0 46 *aSrcLength = 0;
michael@0 47 *aDestLength = 0;
michael@0 48 return NS_OK_UENC_MOREOUTPUT;
michael@0 49 }
michael@0 50 if (*src < (char16_t)0xdc00 || *src > (char16_t)0xdfff) { //not a pair
michael@0 51 *dest++ = (char)0xef; //replacement character
michael@0 52 *dest++ = (char)0xbf;
michael@0 53 *dest++ = (char)0xbd;
michael@0 54 destLen -= 3;
michael@0 55 } else {
michael@0 56 n = ((mHighSurrogate - (char16_t)0xd800) << 10) +
michael@0 57 (*src - (char16_t)0xdc00) + 0x10000;
michael@0 58 *dest++ = (char)0xf0 | (n >> 18);
michael@0 59 *dest++ = (char)0x80 | ((n >> 12) & 0x3f);
michael@0 60 *dest++ = (char)0x80 | ((n >> 6) & 0x3f);
michael@0 61 *dest++ = (char)0x80 | (n & 0x3f);
michael@0 62 ++src;
michael@0 63 destLen -= 4;
michael@0 64 }
michael@0 65 mHighSurrogate = 0;
michael@0 66 }
michael@0 67
michael@0 68 while (src < srcEnd) {
michael@0 69 if ( *src <= 0x007f) {
michael@0 70 if (destLen < 1)
michael@0 71 goto error_more_output;
michael@0 72 *dest++ = (char)*src;
michael@0 73 --destLen;
michael@0 74 } else if (*src <= 0x07ff) {
michael@0 75 if (destLen < 2)
michael@0 76 goto error_more_output;
michael@0 77 *dest++ = (char)0xc0 | (*src >> 6);
michael@0 78 *dest++ = (char)0x80 | (*src & 0x003f);
michael@0 79 destLen -= 2;
michael@0 80 } else if (*src >= (char16_t)0xd800 && *src <= (char16_t)0xdfff) {
michael@0 81 if (*src >= (char16_t)0xdc00) { //not a pair
michael@0 82 if (destLen < 3)
michael@0 83 goto error_more_output;
michael@0 84 *dest++ = (char)0xef; //replacement character
michael@0 85 *dest++ = (char)0xbf;
michael@0 86 *dest++ = (char)0xbd;
michael@0 87 destLen -= 3;
michael@0 88 ++src;
michael@0 89 continue;
michael@0 90 }
michael@0 91 if ((src+1) >= srcEnd) {
michael@0 92 //we need another surrogate to complete this unicode char
michael@0 93 mHighSurrogate = *src;
michael@0 94 *aDestLength = dest - aDest;
michael@0 95 return NS_OK_UENC_MOREINPUT;
michael@0 96 }
michael@0 97 //handle surrogate
michael@0 98 if (destLen < 4)
michael@0 99 goto error_more_output;
michael@0 100 if (*(src+1) < (char16_t)0xdc00 || *(src+1) > 0xdfff) { //not a pair
michael@0 101 *dest++ = (char)0xef; //replacement character
michael@0 102 *dest++ = (char)0xbf;
michael@0 103 *dest++ = (char)0xbd;
michael@0 104 destLen -= 3;
michael@0 105 } else {
michael@0 106 n = ((*src - (char16_t)0xd800) << 10) + (*(src+1) - (char16_t)0xdc00) + (uint32_t)0x10000;
michael@0 107 *dest++ = (char)0xf0 | (n >> 18);
michael@0 108 *dest++ = (char)0x80 | ((n >> 12) & 0x3f);
michael@0 109 *dest++ = (char)0x80 | ((n >> 6) & 0x3f);
michael@0 110 *dest++ = (char)0x80 | (n & 0x3f);
michael@0 111 destLen -= 4;
michael@0 112 ++src;
michael@0 113 }
michael@0 114 } else {
michael@0 115 if (destLen < 3)
michael@0 116 goto error_more_output;
michael@0 117 //treat rest of the character as BMP
michael@0 118 *dest++ = (char)0xe0 | (*src >> 12);
michael@0 119 *dest++ = (char)0x80 | ((*src >> 6) & 0x003f);
michael@0 120 *dest++ = (char)0x80 | (*src & 0x003f);
michael@0 121 destLen -= 3;
michael@0 122 }
michael@0 123 ++src;
michael@0 124 }
michael@0 125
michael@0 126 *aDestLength = dest - aDest;
michael@0 127 return NS_OK;
michael@0 128
michael@0 129 error_more_output:
michael@0 130 *aSrcLength = src - aSrc;
michael@0 131 *aDestLength = dest - aDest;
michael@0 132 return NS_OK_UENC_MOREOUTPUT;
michael@0 133 }
michael@0 134
michael@0 135 NS_IMETHODIMP nsUnicodeToUTF8::Finish(char * aDest, int32_t * aDestLength)
michael@0 136 {
michael@0 137 char * dest = aDest;
michael@0 138
michael@0 139 if (mHighSurrogate) {
michael@0 140 if (*aDestLength < 3) {
michael@0 141 *aDestLength = 0;
michael@0 142 return NS_OK_UENC_MOREOUTPUT;
michael@0 143 }
michael@0 144 *dest++ = (char)0xef; //replacement character
michael@0 145 *dest++ = (char)0xbf;
michael@0 146 *dest++ = (char)0xbd;
michael@0 147 mHighSurrogate = 0;
michael@0 148 *aDestLength = 3;
michael@0 149 return NS_OK;
michael@0 150 }
michael@0 151
michael@0 152 *aDestLength = 0;
michael@0 153 return NS_OK;
michael@0 154 }

mercurial