michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0:  * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0:  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0: 
michael@0: //----------------------------------------------------------------------
michael@0: // Global functions and data [declaration]
michael@0: #include "nsUnicodeToUTF8.h"
michael@0: 
michael@0: NS_IMPL_ISUPPORTS(nsUnicodeToUTF8, nsIUnicodeEncoder)
michael@0: 
michael@0: //----------------------------------------------------------------------
michael@0: // nsUnicodeToUTF8 class [implementation]
michael@0: 
michael@0: NS_IMETHODIMP nsUnicodeToUTF8::GetMaxLength(const char16_t * aSrc, 
michael@0:                                               int32_t aSrcLength,
michael@0:                                               int32_t * aDestLength)
michael@0: {
michael@0:   // aSrc is interpreted as UTF16, 3 is normally enough.
michael@0:   // But when previous buffer only contains part of the surrogate pair, we 
michael@0:   // need to complete it here. If the first word in following buffer is not
michael@0:   // in valid surrogate range, we need to convert the remaining of last buffer
michael@0:   // to 3 bytes.
michael@0:   *aDestLength = 3*aSrcLength + 3;
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP nsUnicodeToUTF8::Convert(const char16_t * aSrc, 
michael@0:                                 int32_t * aSrcLength, 
michael@0:                                 char * aDest, 
michael@0:                                 int32_t * aDestLength)
michael@0: {
michael@0:   const char16_t * src = aSrc;
michael@0:   const char16_t * srcEnd = aSrc + *aSrcLength;
michael@0:   char * dest = aDest;
michael@0:   int32_t destLen = *aDestLength;
michael@0:   uint32_t n;
michael@0: 
michael@0:   //complete remaining of last conversion
michael@0:   if (mHighSurrogate) {
michael@0:     if (src < srcEnd) {
michael@0:       *aDestLength = 0;
michael@0:       return NS_OK_UENC_MOREINPUT;
michael@0:     }
michael@0:     if (*aDestLength < 4) {
michael@0:       *aSrcLength = 0;
michael@0:       *aDestLength = 0;
michael@0:       return NS_OK_UENC_MOREOUTPUT;
michael@0:     }
michael@0:     if (*src < (char16_t)0xdc00 || *src > (char16_t)0xdfff) { //not a pair
michael@0:       *dest++ = (char)0xef; //replacement character
michael@0:       *dest++ = (char)0xbf;
michael@0:       *dest++ = (char)0xbd;
michael@0:       destLen -= 3;
michael@0:     } else { 
michael@0:       n = ((mHighSurrogate - (char16_t)0xd800) << 10) + 
michael@0:               (*src - (char16_t)0xdc00) + 0x10000;
michael@0:       *dest++ = (char)0xf0 | (n >> 18);
michael@0:       *dest++ = (char)0x80 | ((n >> 12) & 0x3f);
michael@0:       *dest++ = (char)0x80 | ((n >> 6) & 0x3f);
michael@0:       *dest++ = (char)0x80 | (n & 0x3f);
michael@0:       ++src;
michael@0:       destLen -= 4;
michael@0:     }
michael@0:     mHighSurrogate = 0;
michael@0:   }
michael@0: 
michael@0:   while (src < srcEnd) {
michael@0:     if ( *src <= 0x007f) {
michael@0:       if (destLen < 1)
michael@0:         goto error_more_output;
michael@0:       *dest++ = (char)*src;
michael@0:       --destLen;
michael@0:     } else if (*src <= 0x07ff) {
michael@0:       if (destLen < 2)
michael@0:         goto error_more_output;
michael@0:       *dest++ = (char)0xc0 | (*src >> 6);
michael@0:       *dest++ = (char)0x80 | (*src & 0x003f);
michael@0:       destLen -= 2;
michael@0:     } else if (*src >= (char16_t)0xd800 && *src <= (char16_t)0xdfff) {
michael@0:       if (*src >= (char16_t)0xdc00) { //not a pair
michael@0:         if (destLen < 3)
michael@0:           goto error_more_output;
michael@0:         *dest++ = (char)0xef; //replacement character
michael@0:         *dest++ = (char)0xbf;
michael@0:         *dest++ = (char)0xbd;
michael@0:         destLen -= 3;
michael@0:         ++src;
michael@0:         continue;
michael@0:       }
michael@0:       if ((src+1) >= srcEnd) {
michael@0:         //we need another surrogate to complete this unicode char
michael@0:         mHighSurrogate = *src;
michael@0:         *aDestLength = dest - aDest;
michael@0:         return NS_OK_UENC_MOREINPUT;
michael@0:       }
michael@0:       //handle surrogate
michael@0:       if (destLen < 4)
michael@0:         goto error_more_output;
michael@0:       if (*(src+1) < (char16_t)0xdc00 || *(src+1) > 0xdfff) { //not a pair
michael@0:         *dest++ = (char)0xef; //replacement character
michael@0:         *dest++ = (char)0xbf;
michael@0:         *dest++ = (char)0xbd;
michael@0:         destLen -= 3;
michael@0:       } else {
michael@0:         n = ((*src - (char16_t)0xd800) << 10) + (*(src+1) - (char16_t)0xdc00) + (uint32_t)0x10000;
michael@0:         *dest++ = (char)0xf0 | (n >> 18);
michael@0:         *dest++ = (char)0x80 | ((n >> 12) & 0x3f);
michael@0:         *dest++ = (char)0x80 | ((n >> 6) & 0x3f);
michael@0:         *dest++ = (char)0x80 | (n & 0x3f);
michael@0:         destLen -= 4;
michael@0:         ++src;
michael@0:       }
michael@0:     } else { 
michael@0:       if (destLen < 3)
michael@0:         goto error_more_output;
michael@0:       //treat rest of the character as BMP
michael@0:       *dest++ = (char)0xe0 | (*src >> 12);
michael@0:       *dest++ = (char)0x80 | ((*src >> 6) & 0x003f);
michael@0:       *dest++ = (char)0x80 | (*src & 0x003f);
michael@0:       destLen -= 3;
michael@0:     }
michael@0:     ++src;
michael@0:   }
michael@0: 
michael@0:   *aDestLength = dest - aDest;
michael@0:   return NS_OK;
michael@0: 
michael@0: error_more_output:
michael@0:   *aSrcLength = src - aSrc;
michael@0:   *aDestLength = dest - aDest;
michael@0:   return NS_OK_UENC_MOREOUTPUT;
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP nsUnicodeToUTF8::Finish(char * aDest, int32_t * aDestLength)
michael@0: {
michael@0:   char * dest = aDest;
michael@0: 
michael@0:   if (mHighSurrogate) {
michael@0:     if (*aDestLength < 3) {
michael@0:       *aDestLength = 0;
michael@0:       return NS_OK_UENC_MOREOUTPUT;
michael@0:     }
michael@0:     *dest++ = (char)0xef; //replacement character
michael@0:     *dest++ = (char)0xbf;
michael@0:     *dest++ = (char)0xbd;
michael@0:     mHighSurrogate = 0;
michael@0:     *aDestLength = 3;
michael@0:     return NS_OK;
michael@0:   } 
michael@0: 
michael@0:   *aDestLength  = 0;
michael@0:   return NS_OK;
michael@0: }