1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/uconv/src/nsUnicodeToUTF8.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,154 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +//---------------------------------------------------------------------- 1.10 +// Global functions and data [declaration] 1.11 +#include "nsUnicodeToUTF8.h" 1.12 + 1.13 +NS_IMPL_ISUPPORTS(nsUnicodeToUTF8, nsIUnicodeEncoder) 1.14 + 1.15 +//---------------------------------------------------------------------- 1.16 +// nsUnicodeToUTF8 class [implementation] 1.17 + 1.18 +NS_IMETHODIMP nsUnicodeToUTF8::GetMaxLength(const char16_t * aSrc, 1.19 + int32_t aSrcLength, 1.20 + int32_t * aDestLength) 1.21 +{ 1.22 + // aSrc is interpreted as UTF16, 3 is normally enough. 1.23 + // But when previous buffer only contains part of the surrogate pair, we 1.24 + // need to complete it here. If the first word in following buffer is not 1.25 + // in valid surrogate range, we need to convert the remaining of last buffer 1.26 + // to 3 bytes. 1.27 + *aDestLength = 3*aSrcLength + 3; 1.28 + return NS_OK; 1.29 +} 1.30 + 1.31 +NS_IMETHODIMP nsUnicodeToUTF8::Convert(const char16_t * aSrc, 1.32 + int32_t * aSrcLength, 1.33 + char * aDest, 1.34 + int32_t * aDestLength) 1.35 +{ 1.36 + const char16_t * src = aSrc; 1.37 + const char16_t * srcEnd = aSrc + *aSrcLength; 1.38 + char * dest = aDest; 1.39 + int32_t destLen = *aDestLength; 1.40 + uint32_t n; 1.41 + 1.42 + //complete remaining of last conversion 1.43 + if (mHighSurrogate) { 1.44 + if (src < srcEnd) { 1.45 + *aDestLength = 0; 1.46 + return NS_OK_UENC_MOREINPUT; 1.47 + } 1.48 + if (*aDestLength < 4) { 1.49 + *aSrcLength = 0; 1.50 + *aDestLength = 0; 1.51 + return NS_OK_UENC_MOREOUTPUT; 1.52 + } 1.53 + if (*src < (char16_t)0xdc00 || *src > (char16_t)0xdfff) { //not a pair 1.54 + *dest++ = (char)0xef; //replacement character 1.55 + *dest++ = (char)0xbf; 1.56 + *dest++ = (char)0xbd; 1.57 + destLen -= 3; 1.58 + } else { 1.59 + n = ((mHighSurrogate - (char16_t)0xd800) << 10) + 1.60 + (*src - (char16_t)0xdc00) + 0x10000; 1.61 + *dest++ = (char)0xf0 | (n >> 18); 1.62 + *dest++ = (char)0x80 | ((n >> 12) & 0x3f); 1.63 + *dest++ = (char)0x80 | ((n >> 6) & 0x3f); 1.64 + *dest++ = (char)0x80 | (n & 0x3f); 1.65 + ++src; 1.66 + destLen -= 4; 1.67 + } 1.68 + mHighSurrogate = 0; 1.69 + } 1.70 + 1.71 + while (src < srcEnd) { 1.72 + if ( *src <= 0x007f) { 1.73 + if (destLen < 1) 1.74 + goto error_more_output; 1.75 + *dest++ = (char)*src; 1.76 + --destLen; 1.77 + } else if (*src <= 0x07ff) { 1.78 + if (destLen < 2) 1.79 + goto error_more_output; 1.80 + *dest++ = (char)0xc0 | (*src >> 6); 1.81 + *dest++ = (char)0x80 | (*src & 0x003f); 1.82 + destLen -= 2; 1.83 + } else if (*src >= (char16_t)0xd800 && *src <= (char16_t)0xdfff) { 1.84 + if (*src >= (char16_t)0xdc00) { //not a pair 1.85 + if (destLen < 3) 1.86 + goto error_more_output; 1.87 + *dest++ = (char)0xef; //replacement character 1.88 + *dest++ = (char)0xbf; 1.89 + *dest++ = (char)0xbd; 1.90 + destLen -= 3; 1.91 + ++src; 1.92 + continue; 1.93 + } 1.94 + if ((src+1) >= srcEnd) { 1.95 + //we need another surrogate to complete this unicode char 1.96 + mHighSurrogate = *src; 1.97 + *aDestLength = dest - aDest; 1.98 + return NS_OK_UENC_MOREINPUT; 1.99 + } 1.100 + //handle surrogate 1.101 + if (destLen < 4) 1.102 + goto error_more_output; 1.103 + if (*(src+1) < (char16_t)0xdc00 || *(src+1) > 0xdfff) { //not a pair 1.104 + *dest++ = (char)0xef; //replacement character 1.105 + *dest++ = (char)0xbf; 1.106 + *dest++ = (char)0xbd; 1.107 + destLen -= 3; 1.108 + } else { 1.109 + n = ((*src - (char16_t)0xd800) << 10) + (*(src+1) - (char16_t)0xdc00) + (uint32_t)0x10000; 1.110 + *dest++ = (char)0xf0 | (n >> 18); 1.111 + *dest++ = (char)0x80 | ((n >> 12) & 0x3f); 1.112 + *dest++ = (char)0x80 | ((n >> 6) & 0x3f); 1.113 + *dest++ = (char)0x80 | (n & 0x3f); 1.114 + destLen -= 4; 1.115 + ++src; 1.116 + } 1.117 + } else { 1.118 + if (destLen < 3) 1.119 + goto error_more_output; 1.120 + //treat rest of the character as BMP 1.121 + *dest++ = (char)0xe0 | (*src >> 12); 1.122 + *dest++ = (char)0x80 | ((*src >> 6) & 0x003f); 1.123 + *dest++ = (char)0x80 | (*src & 0x003f); 1.124 + destLen -= 3; 1.125 + } 1.126 + ++src; 1.127 + } 1.128 + 1.129 + *aDestLength = dest - aDest; 1.130 + return NS_OK; 1.131 + 1.132 +error_more_output: 1.133 + *aSrcLength = src - aSrc; 1.134 + *aDestLength = dest - aDest; 1.135 + return NS_OK_UENC_MOREOUTPUT; 1.136 +} 1.137 + 1.138 +NS_IMETHODIMP nsUnicodeToUTF8::Finish(char * aDest, int32_t * aDestLength) 1.139 +{ 1.140 + char * dest = aDest; 1.141 + 1.142 + if (mHighSurrogate) { 1.143 + if (*aDestLength < 3) { 1.144 + *aDestLength = 0; 1.145 + return NS_OK_UENC_MOREOUTPUT; 1.146 + } 1.147 + *dest++ = (char)0xef; //replacement character 1.148 + *dest++ = (char)0xbf; 1.149 + *dest++ = (char)0xbd; 1.150 + mHighSurrogate = 0; 1.151 + *aDestLength = 3; 1.152 + return NS_OK; 1.153 + } 1.154 + 1.155 + *aDestLength = 0; 1.156 + return NS_OK; 1.157 +}