intl/uconv/ucvko/nsISO2022KRToUnicode.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/uconv/ucvko/nsISO2022KRToUnicode.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,189 @@
     1.4 +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +#include "nsISO2022KRToUnicode.h"
     1.9 +#include "nsUCSupport.h"
    1.10 +#include "nsICharsetConverterManager.h"
    1.11 +#include "nsServiceManagerUtils.h"
    1.12 +
    1.13 +NS_IMETHODIMP nsISO2022KRToUnicode::Convert(const char * aSrc, int32_t * aSrcLen, char16_t * aDest, int32_t * aDestLen)
    1.14 +{
    1.15 +  static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
    1.16 +
    1.17 +  const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
    1.18 +  const unsigned char* src =(unsigned char*) aSrc;
    1.19 +  char16_t* destEnd = aDest + *aDestLen;
    1.20 +  char16_t* dest = aDest;
    1.21 +  while((src < srcEnd))
    1.22 +  {
    1.23 +    // if LF/CR, return to US-ASCII unconditionally.
    1.24 +    if ( *src == 0x0a || *src == 0x0d )
    1.25 +      mState = mState_Init;
    1.26 +
    1.27 +    switch(mState)
    1.28 +    {
    1.29 +      case mState_Init:
    1.30 +        if(0x1b == *src) {
    1.31 +          mLastLegalState = mState_ASCII;
    1.32 +          mState = mState_ESC;
    1.33 +          break;
    1.34 +        }
    1.35 +        mState = mState_ASCII;
    1.36 +        // fall through
    1.37 +
    1.38 +      case mState_ASCII:
    1.39 +        if(0x0e == *src) { // Shift-Out 
    1.40 +          mState = mState_KSX1001_1992;
    1.41 +          mRunLength = 0;
    1.42 +        } 
    1.43 +        else if(*src & 0x80) {
    1.44 +          if (CHECK_OVERRUN(dest, destEnd, 1))
    1.45 +            goto error1;
    1.46 +          *dest++ = 0xFFFD;
    1.47 +        } 
    1.48 +        else {
    1.49 +          if (CHECK_OVERRUN(dest, destEnd, 1))
    1.50 +            goto error1;
    1.51 +          *dest++ = (char16_t) *src;
    1.52 +        }
    1.53 +        break;
    1.54 +          
    1.55 +      case mState_ESC:
    1.56 +        if('$' == *src) {
    1.57 +          mState = mState_ESC_24;
    1.58 +        } 
    1.59 +        else  {
    1.60 +          if (CHECK_OVERRUN(dest, destEnd, 2))
    1.61 +            goto error1;
    1.62 +          *dest++ = (char16_t) 0x1b;
    1.63 +          *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
    1.64 +          mState =  mLastLegalState;
    1.65 +        }
    1.66 +        break;
    1.67 +
    1.68 +      case mState_ESC_24: // ESC $
    1.69 +        if(')' == *src) {
    1.70 +          mState = mState_ESC_24_29;
    1.71 +        } 
    1.72 +        else  {
    1.73 +          if (CHECK_OVERRUN(dest, destEnd, 3))
    1.74 +            goto error1;
    1.75 +          *dest++ = (char16_t) 0x1b;
    1.76 +          *dest++ = (char16_t) '$';
    1.77 +          *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
    1.78 +          mState = mLastLegalState;
    1.79 +        }
    1.80 +        break;
    1.81 +
    1.82 +      case mState_ESC_24_29: // ESC $ )
    1.83 +        mState = mLastLegalState;
    1.84 +        if('C' == *src) {
    1.85 +          mState = mState_ASCII;
    1.86 +          mRunLength = 0;
    1.87 +        } 
    1.88 +        else  {
    1.89 +          if (CHECK_OVERRUN(dest, destEnd, 4))
    1.90 +            goto error1;
    1.91 +          *dest++ = (char16_t) 0x1b;
    1.92 +          *dest++ = (char16_t) '$';
    1.93 +          *dest++ = (char16_t) ')';
    1.94 +          *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
    1.95 +          mState = mLastLegalState;
    1.96 +        }
    1.97 +        break;
    1.98 +
    1.99 +      case mState_KSX1001_1992:
   1.100 +        if (0x20 < (uint8_t) *src  && (uint8_t) *src < 0x7f) {
   1.101 +          mData = (uint8_t) *src;
   1.102 +          mState = mState_KSX1001_1992_2ndbyte;
   1.103 +        } 
   1.104 +        else if (0x0f == *src) { // Shift-In (SI)
   1.105 +          mState = mState_ASCII;
   1.106 +          if (mRunLength == 0) {
   1.107 +            if (CHECK_OVERRUN(dest, destEnd, 1))
   1.108 +              goto error1;
   1.109 +            *dest++ = 0xFFFD;
   1.110 +          }
   1.111 +          mRunLength = 0;
   1.112 +        } 
   1.113 +        else if ((uint8_t) *src == 0x20 || (uint8_t) *src == 0x09) {
   1.114 +          // Allow space and tab between SO and SI (i.e. in Hangul segment)
   1.115 +          if (CHECK_OVERRUN(dest, destEnd, 1))
   1.116 +            goto error1;
   1.117 +          mState = mState_KSX1001_1992;
   1.118 +          *dest++ = (char16_t) *src;
   1.119 +          ++mRunLength;
   1.120 +        } 
   1.121 +        else {         // Everything else is invalid.
   1.122 +          if (CHECK_OVERRUN(dest, destEnd, 1))
   1.123 +            goto error1;
   1.124 +          *dest++ = 0xFFFD;
   1.125 +        }
   1.126 +        break;
   1.127 +
   1.128 +      case mState_KSX1001_1992_2ndbyte:
   1.129 +        if ( 0x20 < (uint8_t) *src && (uint8_t) *src < 0x7f  ) {
   1.130 +          if (!mEUCKRDecoder) {
   1.131 +            // creating a delegate converter (EUC-KR)
   1.132 +            nsresult rv;
   1.133 +            nsCOMPtr<nsICharsetConverterManager> ccm = 
   1.134 +                  do_GetService(kCharsetConverterManagerCID, &rv);
   1.135 +            if (NS_SUCCEEDED(rv)) {
   1.136 +              rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder);
   1.137 +            }
   1.138 +          }
   1.139 +
   1.140 +          if (!mEUCKRDecoder) {// failed creating a delegate converter
   1.141 +           *dest++ = 0xFFFD;
   1.142 +          } 
   1.143 +          else {              
   1.144 +            if (CHECK_OVERRUN(dest, destEnd, 1))
   1.145 +              goto error1;
   1.146 +            unsigned char ksx[2];
   1.147 +            char16_t uni;
   1.148 +            int32_t ksxLen = 2, uniLen = 1;
   1.149 +            // mData is the original 1st byte.
   1.150 +            // *src is the present 2nd byte.
   1.151 +            // Put 2 bytes (one character) to ksx[] with EUC-KR encoding.
   1.152 +            ksx[0] = mData | 0x80;
   1.153 +            ksx[1] = *src | 0x80;
   1.154 +            // Convert EUC-KR to unicode.
   1.155 +            mEUCKRDecoder->Convert((const char *)ksx, &ksxLen, &uni, &uniLen);
   1.156 +            *dest++ = uni;
   1.157 +            ++mRunLength;
   1.158 +          }
   1.159 +          mState = mState_KSX1001_1992;
   1.160 +        } 
   1.161 +        else {        // Invalid 
   1.162 +          if ( 0x0f == *src ) {   // Shift-In (SI)
   1.163 +            mState = mState_ASCII;
   1.164 +          } 
   1.165 +          else {
   1.166 +            mState = mState_KSX1001_1992;
   1.167 +          }
   1.168 +          if (CHECK_OVERRUN(dest, destEnd, 1))
   1.169 +            goto error1;
   1.170 +          *dest++ = 0xFFFD;
   1.171 +        }
   1.172 +        break;
   1.173 +
   1.174 +      case mState_ERROR:
   1.175 +        mState = mLastLegalState;
   1.176 +        if (CHECK_OVERRUN(dest, destEnd, 1))
   1.177 +          goto error1;
   1.178 +        *dest++ = 0xFFFD;
   1.179 +        break;
   1.180 +
   1.181 +    } // switch
   1.182 +    src++;
   1.183 +  }
   1.184 +  *aDestLen = dest - aDest;
   1.185 +  return NS_OK;
   1.186 +
   1.187 +error1:
   1.188 +  *aDestLen = dest-aDest;
   1.189 +  *aSrcLen = src-(unsigned char*)aSrc;
   1.190 +  return NS_OK_UDEC_MOREOUTPUT;
   1.191 +}
   1.192 +

mercurial