michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: //---------------------------------------------------------------------- michael@0: // Global functions and data [declaration] michael@0: #include "nsUnicodeToUTF8.h" michael@0: michael@0: NS_IMPL_ISUPPORTS(nsUnicodeToUTF8, nsIUnicodeEncoder) michael@0: michael@0: //---------------------------------------------------------------------- michael@0: // nsUnicodeToUTF8 class [implementation] michael@0: michael@0: NS_IMETHODIMP nsUnicodeToUTF8::GetMaxLength(const char16_t * aSrc, michael@0: int32_t aSrcLength, michael@0: int32_t * aDestLength) michael@0: { michael@0: // aSrc is interpreted as UTF16, 3 is normally enough. michael@0: // But when previous buffer only contains part of the surrogate pair, we michael@0: // need to complete it here. If the first word in following buffer is not michael@0: // in valid surrogate range, we need to convert the remaining of last buffer michael@0: // to 3 bytes. michael@0: *aDestLength = 3*aSrcLength + 3; michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP nsUnicodeToUTF8::Convert(const char16_t * aSrc, michael@0: int32_t * aSrcLength, michael@0: char * aDest, michael@0: int32_t * aDestLength) michael@0: { michael@0: const char16_t * src = aSrc; michael@0: const char16_t * srcEnd = aSrc + *aSrcLength; michael@0: char * dest = aDest; michael@0: int32_t destLen = *aDestLength; michael@0: uint32_t n; michael@0: michael@0: //complete remaining of last conversion michael@0: if (mHighSurrogate) { michael@0: if (src < srcEnd) { michael@0: *aDestLength = 0; michael@0: return NS_OK_UENC_MOREINPUT; michael@0: } michael@0: if (*aDestLength < 4) { michael@0: *aSrcLength = 0; michael@0: *aDestLength = 0; michael@0: return NS_OK_UENC_MOREOUTPUT; michael@0: } michael@0: if (*src < (char16_t)0xdc00 || *src > (char16_t)0xdfff) { //not a pair michael@0: *dest++ = (char)0xef; //replacement character michael@0: *dest++ = (char)0xbf; michael@0: *dest++ = (char)0xbd; michael@0: destLen -= 3; michael@0: } else { michael@0: n = ((mHighSurrogate - (char16_t)0xd800) << 10) + michael@0: (*src - (char16_t)0xdc00) + 0x10000; michael@0: *dest++ = (char)0xf0 | (n >> 18); michael@0: *dest++ = (char)0x80 | ((n >> 12) & 0x3f); michael@0: *dest++ = (char)0x80 | ((n >> 6) & 0x3f); michael@0: *dest++ = (char)0x80 | (n & 0x3f); michael@0: ++src; michael@0: destLen -= 4; michael@0: } michael@0: mHighSurrogate = 0; michael@0: } michael@0: michael@0: while (src < srcEnd) { michael@0: if ( *src <= 0x007f) { michael@0: if (destLen < 1) michael@0: goto error_more_output; michael@0: *dest++ = (char)*src; michael@0: --destLen; michael@0: } else if (*src <= 0x07ff) { michael@0: if (destLen < 2) michael@0: goto error_more_output; michael@0: *dest++ = (char)0xc0 | (*src >> 6); michael@0: *dest++ = (char)0x80 | (*src & 0x003f); michael@0: destLen -= 2; michael@0: } else if (*src >= (char16_t)0xd800 && *src <= (char16_t)0xdfff) { michael@0: if (*src >= (char16_t)0xdc00) { //not a pair michael@0: if (destLen < 3) michael@0: goto error_more_output; michael@0: *dest++ = (char)0xef; //replacement character michael@0: *dest++ = (char)0xbf; michael@0: *dest++ = (char)0xbd; michael@0: destLen -= 3; michael@0: ++src; michael@0: continue; michael@0: } michael@0: if ((src+1) >= srcEnd) { michael@0: //we need another surrogate to complete this unicode char michael@0: mHighSurrogate = *src; michael@0: *aDestLength = dest - aDest; michael@0: return NS_OK_UENC_MOREINPUT; michael@0: } michael@0: //handle surrogate michael@0: if (destLen < 4) michael@0: goto error_more_output; michael@0: if (*(src+1) < (char16_t)0xdc00 || *(src+1) > 0xdfff) { //not a pair michael@0: *dest++ = (char)0xef; //replacement character michael@0: *dest++ = (char)0xbf; michael@0: *dest++ = (char)0xbd; michael@0: destLen -= 3; michael@0: } else { michael@0: n = ((*src - (char16_t)0xd800) << 10) + (*(src+1) - (char16_t)0xdc00) + (uint32_t)0x10000; michael@0: *dest++ = (char)0xf0 | (n >> 18); michael@0: *dest++ = (char)0x80 | ((n >> 12) & 0x3f); michael@0: *dest++ = (char)0x80 | ((n >> 6) & 0x3f); michael@0: *dest++ = (char)0x80 | (n & 0x3f); michael@0: destLen -= 4; michael@0: ++src; michael@0: } michael@0: } else { michael@0: if (destLen < 3) michael@0: goto error_more_output; michael@0: //treat rest of the character as BMP michael@0: *dest++ = (char)0xe0 | (*src >> 12); michael@0: *dest++ = (char)0x80 | ((*src >> 6) & 0x003f); michael@0: *dest++ = (char)0x80 | (*src & 0x003f); michael@0: destLen -= 3; michael@0: } michael@0: ++src; michael@0: } michael@0: michael@0: *aDestLength = dest - aDest; michael@0: return NS_OK; michael@0: michael@0: error_more_output: michael@0: *aSrcLength = src - aSrc; michael@0: *aDestLength = dest - aDest; michael@0: return NS_OK_UENC_MOREOUTPUT; michael@0: } michael@0: michael@0: NS_IMETHODIMP nsUnicodeToUTF8::Finish(char * aDest, int32_t * aDestLength) michael@0: { michael@0: char * dest = aDest; michael@0: michael@0: if (mHighSurrogate) { michael@0: if (*aDestLength < 3) { michael@0: *aDestLength = 0; michael@0: return NS_OK_UENC_MOREOUTPUT; michael@0: } michael@0: *dest++ = (char)0xef; //replacement character michael@0: *dest++ = (char)0xbf; michael@0: *dest++ = (char)0xbd; michael@0: mHighSurrogate = 0; michael@0: *aDestLength = 3; michael@0: return NS_OK; michael@0: } michael@0: michael@0: *aDestLength = 0; michael@0: return NS_OK; michael@0: }