intl/uconv/src/nsUnicodeToUTF8.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/uconv/src/nsUnicodeToUTF8.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,154 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +//----------------------------------------------------------------------
    1.10 +// Global functions and data [declaration]
    1.11 +#include "nsUnicodeToUTF8.h"
    1.12 +
    1.13 +NS_IMPL_ISUPPORTS(nsUnicodeToUTF8, nsIUnicodeEncoder)
    1.14 +
    1.15 +//----------------------------------------------------------------------
    1.16 +// nsUnicodeToUTF8 class [implementation]
    1.17 +
    1.18 +NS_IMETHODIMP nsUnicodeToUTF8::GetMaxLength(const char16_t * aSrc, 
    1.19 +                                              int32_t aSrcLength,
    1.20 +                                              int32_t * aDestLength)
    1.21 +{
    1.22 +  // aSrc is interpreted as UTF16, 3 is normally enough.
    1.23 +  // But when previous buffer only contains part of the surrogate pair, we 
    1.24 +  // need to complete it here. If the first word in following buffer is not
    1.25 +  // in valid surrogate range, we need to convert the remaining of last buffer
    1.26 +  // to 3 bytes.
    1.27 +  *aDestLength = 3*aSrcLength + 3;
    1.28 +  return NS_OK;
    1.29 +}
    1.30 +
    1.31 +NS_IMETHODIMP nsUnicodeToUTF8::Convert(const char16_t * aSrc, 
    1.32 +                                int32_t * aSrcLength, 
    1.33 +                                char * aDest, 
    1.34 +                                int32_t * aDestLength)
    1.35 +{
    1.36 +  const char16_t * src = aSrc;
    1.37 +  const char16_t * srcEnd = aSrc + *aSrcLength;
    1.38 +  char * dest = aDest;
    1.39 +  int32_t destLen = *aDestLength;
    1.40 +  uint32_t n;
    1.41 +
    1.42 +  //complete remaining of last conversion
    1.43 +  if (mHighSurrogate) {
    1.44 +    if (src < srcEnd) {
    1.45 +      *aDestLength = 0;
    1.46 +      return NS_OK_UENC_MOREINPUT;
    1.47 +    }
    1.48 +    if (*aDestLength < 4) {
    1.49 +      *aSrcLength = 0;
    1.50 +      *aDestLength = 0;
    1.51 +      return NS_OK_UENC_MOREOUTPUT;
    1.52 +    }
    1.53 +    if (*src < (char16_t)0xdc00 || *src > (char16_t)0xdfff) { //not a pair
    1.54 +      *dest++ = (char)0xef; //replacement character
    1.55 +      *dest++ = (char)0xbf;
    1.56 +      *dest++ = (char)0xbd;
    1.57 +      destLen -= 3;
    1.58 +    } else { 
    1.59 +      n = ((mHighSurrogate - (char16_t)0xd800) << 10) + 
    1.60 +              (*src - (char16_t)0xdc00) + 0x10000;
    1.61 +      *dest++ = (char)0xf0 | (n >> 18);
    1.62 +      *dest++ = (char)0x80 | ((n >> 12) & 0x3f);
    1.63 +      *dest++ = (char)0x80 | ((n >> 6) & 0x3f);
    1.64 +      *dest++ = (char)0x80 | (n & 0x3f);
    1.65 +      ++src;
    1.66 +      destLen -= 4;
    1.67 +    }
    1.68 +    mHighSurrogate = 0;
    1.69 +  }
    1.70 +
    1.71 +  while (src < srcEnd) {
    1.72 +    if ( *src <= 0x007f) {
    1.73 +      if (destLen < 1)
    1.74 +        goto error_more_output;
    1.75 +      *dest++ = (char)*src;
    1.76 +      --destLen;
    1.77 +    } else if (*src <= 0x07ff) {
    1.78 +      if (destLen < 2)
    1.79 +        goto error_more_output;
    1.80 +      *dest++ = (char)0xc0 | (*src >> 6);
    1.81 +      *dest++ = (char)0x80 | (*src & 0x003f);
    1.82 +      destLen -= 2;
    1.83 +    } else if (*src >= (char16_t)0xd800 && *src <= (char16_t)0xdfff) {
    1.84 +      if (*src >= (char16_t)0xdc00) { //not a pair
    1.85 +        if (destLen < 3)
    1.86 +          goto error_more_output;
    1.87 +        *dest++ = (char)0xef; //replacement character
    1.88 +        *dest++ = (char)0xbf;
    1.89 +        *dest++ = (char)0xbd;
    1.90 +        destLen -= 3;
    1.91 +        ++src;
    1.92 +        continue;
    1.93 +      }
    1.94 +      if ((src+1) >= srcEnd) {
    1.95 +        //we need another surrogate to complete this unicode char
    1.96 +        mHighSurrogate = *src;
    1.97 +        *aDestLength = dest - aDest;
    1.98 +        return NS_OK_UENC_MOREINPUT;
    1.99 +      }
   1.100 +      //handle surrogate
   1.101 +      if (destLen < 4)
   1.102 +        goto error_more_output;
   1.103 +      if (*(src+1) < (char16_t)0xdc00 || *(src+1) > 0xdfff) { //not a pair
   1.104 +        *dest++ = (char)0xef; //replacement character
   1.105 +        *dest++ = (char)0xbf;
   1.106 +        *dest++ = (char)0xbd;
   1.107 +        destLen -= 3;
   1.108 +      } else {
   1.109 +        n = ((*src - (char16_t)0xd800) << 10) + (*(src+1) - (char16_t)0xdc00) + (uint32_t)0x10000;
   1.110 +        *dest++ = (char)0xf0 | (n >> 18);
   1.111 +        *dest++ = (char)0x80 | ((n >> 12) & 0x3f);
   1.112 +        *dest++ = (char)0x80 | ((n >> 6) & 0x3f);
   1.113 +        *dest++ = (char)0x80 | (n & 0x3f);
   1.114 +        destLen -= 4;
   1.115 +        ++src;
   1.116 +      }
   1.117 +    } else { 
   1.118 +      if (destLen < 3)
   1.119 +        goto error_more_output;
   1.120 +      //treat rest of the character as BMP
   1.121 +      *dest++ = (char)0xe0 | (*src >> 12);
   1.122 +      *dest++ = (char)0x80 | ((*src >> 6) & 0x003f);
   1.123 +      *dest++ = (char)0x80 | (*src & 0x003f);
   1.124 +      destLen -= 3;
   1.125 +    }
   1.126 +    ++src;
   1.127 +  }
   1.128 +
   1.129 +  *aDestLength = dest - aDest;
   1.130 +  return NS_OK;
   1.131 +
   1.132 +error_more_output:
   1.133 +  *aSrcLength = src - aSrc;
   1.134 +  *aDestLength = dest - aDest;
   1.135 +  return NS_OK_UENC_MOREOUTPUT;
   1.136 +}
   1.137 +
   1.138 +NS_IMETHODIMP nsUnicodeToUTF8::Finish(char * aDest, int32_t * aDestLength)
   1.139 +{
   1.140 +  char * dest = aDest;
   1.141 +
   1.142 +  if (mHighSurrogate) {
   1.143 +    if (*aDestLength < 3) {
   1.144 +      *aDestLength = 0;
   1.145 +      return NS_OK_UENC_MOREOUTPUT;
   1.146 +    }
   1.147 +    *dest++ = (char)0xef; //replacement character
   1.148 +    *dest++ = (char)0xbf;
   1.149 +    *dest++ = (char)0xbd;
   1.150 +    mHighSurrogate = 0;
   1.151 +    *aDestLength = 3;
   1.152 +    return NS_OK;
   1.153 +  } 
   1.154 +
   1.155 +  *aDestLength  = 0;
   1.156 +  return NS_OK;
   1.157 +}

mercurial