michael@0: /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: #include "nsISO2022KRToUnicode.h" michael@0: #include "nsUCSupport.h" michael@0: #include "nsICharsetConverterManager.h" michael@0: #include "nsServiceManagerUtils.h" michael@0: michael@0: NS_IMETHODIMP nsISO2022KRToUnicode::Convert(const char * aSrc, int32_t * aSrcLen, char16_t * aDest, int32_t * aDestLen) michael@0: { michael@0: static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID); michael@0: michael@0: const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen; michael@0: const unsigned char* src =(unsigned char*) aSrc; michael@0: char16_t* destEnd = aDest + *aDestLen; michael@0: char16_t* dest = aDest; michael@0: while((src < srcEnd)) michael@0: { michael@0: // if LF/CR, return to US-ASCII unconditionally. michael@0: if ( *src == 0x0a || *src == 0x0d ) michael@0: mState = mState_Init; michael@0: michael@0: switch(mState) michael@0: { michael@0: case mState_Init: michael@0: if(0x1b == *src) { michael@0: mLastLegalState = mState_ASCII; michael@0: mState = mState_ESC; michael@0: break; michael@0: } michael@0: mState = mState_ASCII; michael@0: // fall through michael@0: michael@0: case mState_ASCII: michael@0: if(0x0e == *src) { // Shift-Out michael@0: mState = mState_KSX1001_1992; michael@0: mRunLength = 0; michael@0: } michael@0: else if(*src & 0x80) { michael@0: if (CHECK_OVERRUN(dest, destEnd, 1)) michael@0: goto error1; michael@0: *dest++ = 0xFFFD; michael@0: } michael@0: else { michael@0: if (CHECK_OVERRUN(dest, destEnd, 1)) michael@0: goto error1; michael@0: *dest++ = (char16_t) *src; michael@0: } michael@0: break; michael@0: michael@0: case mState_ESC: michael@0: if('$' == *src) { michael@0: mState = mState_ESC_24; michael@0: } michael@0: else { michael@0: if (CHECK_OVERRUN(dest, destEnd, 2)) michael@0: goto error1; michael@0: *dest++ = (char16_t) 0x1b; michael@0: *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; michael@0: mState = mLastLegalState; michael@0: } michael@0: break; michael@0: michael@0: case mState_ESC_24: // ESC $ michael@0: if(')' == *src) { michael@0: mState = mState_ESC_24_29; michael@0: } michael@0: else { michael@0: if (CHECK_OVERRUN(dest, destEnd, 3)) michael@0: goto error1; michael@0: *dest++ = (char16_t) 0x1b; michael@0: *dest++ = (char16_t) '$'; michael@0: *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; michael@0: mState = mLastLegalState; michael@0: } michael@0: break; michael@0: michael@0: case mState_ESC_24_29: // ESC $ ) michael@0: mState = mLastLegalState; michael@0: if('C' == *src) { michael@0: mState = mState_ASCII; michael@0: mRunLength = 0; michael@0: } michael@0: else { michael@0: if (CHECK_OVERRUN(dest, destEnd, 4)) michael@0: goto error1; michael@0: *dest++ = (char16_t) 0x1b; michael@0: *dest++ = (char16_t) '$'; michael@0: *dest++ = (char16_t) ')'; michael@0: *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; michael@0: mState = mLastLegalState; michael@0: } michael@0: break; michael@0: michael@0: case mState_KSX1001_1992: michael@0: if (0x20 < (uint8_t) *src && (uint8_t) *src < 0x7f) { michael@0: mData = (uint8_t) *src; michael@0: mState = mState_KSX1001_1992_2ndbyte; michael@0: } michael@0: else if (0x0f == *src) { // Shift-In (SI) michael@0: mState = mState_ASCII; michael@0: if (mRunLength == 0) { michael@0: if (CHECK_OVERRUN(dest, destEnd, 1)) michael@0: goto error1; michael@0: *dest++ = 0xFFFD; michael@0: } michael@0: mRunLength = 0; michael@0: } michael@0: else if ((uint8_t) *src == 0x20 || (uint8_t) *src == 0x09) { michael@0: // Allow space and tab between SO and SI (i.e. in Hangul segment) michael@0: if (CHECK_OVERRUN(dest, destEnd, 1)) michael@0: goto error1; michael@0: mState = mState_KSX1001_1992; michael@0: *dest++ = (char16_t) *src; michael@0: ++mRunLength; michael@0: } michael@0: else { // Everything else is invalid. michael@0: if (CHECK_OVERRUN(dest, destEnd, 1)) michael@0: goto error1; michael@0: *dest++ = 0xFFFD; michael@0: } michael@0: break; michael@0: michael@0: case mState_KSX1001_1992_2ndbyte: michael@0: if ( 0x20 < (uint8_t) *src && (uint8_t) *src < 0x7f ) { michael@0: if (!mEUCKRDecoder) { michael@0: // creating a delegate converter (EUC-KR) michael@0: nsresult rv; michael@0: nsCOMPtr ccm = michael@0: do_GetService(kCharsetConverterManagerCID, &rv); michael@0: if (NS_SUCCEEDED(rv)) { michael@0: rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder); michael@0: } michael@0: } michael@0: michael@0: if (!mEUCKRDecoder) {// failed creating a delegate converter michael@0: *dest++ = 0xFFFD; michael@0: } michael@0: else { michael@0: if (CHECK_OVERRUN(dest, destEnd, 1)) michael@0: goto error1; michael@0: unsigned char ksx[2]; michael@0: char16_t uni; michael@0: int32_t ksxLen = 2, uniLen = 1; michael@0: // mData is the original 1st byte. michael@0: // *src is the present 2nd byte. michael@0: // Put 2 bytes (one character) to ksx[] with EUC-KR encoding. michael@0: ksx[0] = mData | 0x80; michael@0: ksx[1] = *src | 0x80; michael@0: // Convert EUC-KR to unicode. michael@0: mEUCKRDecoder->Convert((const char *)ksx, &ksxLen, &uni, &uniLen); michael@0: *dest++ = uni; michael@0: ++mRunLength; michael@0: } michael@0: mState = mState_KSX1001_1992; michael@0: } michael@0: else { // Invalid michael@0: if ( 0x0f == *src ) { // Shift-In (SI) michael@0: mState = mState_ASCII; michael@0: } michael@0: else { michael@0: mState = mState_KSX1001_1992; michael@0: } michael@0: if (CHECK_OVERRUN(dest, destEnd, 1)) michael@0: goto error1; michael@0: *dest++ = 0xFFFD; michael@0: } michael@0: break; michael@0: michael@0: case mState_ERROR: michael@0: mState = mLastLegalState; michael@0: if (CHECK_OVERRUN(dest, destEnd, 1)) michael@0: goto error1; michael@0: *dest++ = 0xFFFD; michael@0: break; michael@0: michael@0: } // switch michael@0: src++; michael@0: } michael@0: *aDestLen = dest - aDest; michael@0: return NS_OK; michael@0: michael@0: error1: michael@0: *aDestLen = dest-aDest; michael@0: *aSrcLen = src-(unsigned char*)aSrc; michael@0: return NS_OK_UDEC_MOREOUTPUT; michael@0: } michael@0: