1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/uconv/ucvko/nsISO2022KRToUnicode.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,189 @@ 1.4 +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 +#include "nsISO2022KRToUnicode.h" 1.9 +#include "nsUCSupport.h" 1.10 +#include "nsICharsetConverterManager.h" 1.11 +#include "nsServiceManagerUtils.h" 1.12 + 1.13 +NS_IMETHODIMP nsISO2022KRToUnicode::Convert(const char * aSrc, int32_t * aSrcLen, char16_t * aDest, int32_t * aDestLen) 1.14 +{ 1.15 + static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID); 1.16 + 1.17 + const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen; 1.18 + const unsigned char* src =(unsigned char*) aSrc; 1.19 + char16_t* destEnd = aDest + *aDestLen; 1.20 + char16_t* dest = aDest; 1.21 + while((src < srcEnd)) 1.22 + { 1.23 + // if LF/CR, return to US-ASCII unconditionally. 1.24 + if ( *src == 0x0a || *src == 0x0d ) 1.25 + mState = mState_Init; 1.26 + 1.27 + switch(mState) 1.28 + { 1.29 + case mState_Init: 1.30 + if(0x1b == *src) { 1.31 + mLastLegalState = mState_ASCII; 1.32 + mState = mState_ESC; 1.33 + break; 1.34 + } 1.35 + mState = mState_ASCII; 1.36 + // fall through 1.37 + 1.38 + case mState_ASCII: 1.39 + if(0x0e == *src) { // Shift-Out 1.40 + mState = mState_KSX1001_1992; 1.41 + mRunLength = 0; 1.42 + } 1.43 + else if(*src & 0x80) { 1.44 + if (CHECK_OVERRUN(dest, destEnd, 1)) 1.45 + goto error1; 1.46 + *dest++ = 0xFFFD; 1.47 + } 1.48 + else { 1.49 + if (CHECK_OVERRUN(dest, destEnd, 1)) 1.50 + goto error1; 1.51 + *dest++ = (char16_t) *src; 1.52 + } 1.53 + break; 1.54 + 1.55 + case mState_ESC: 1.56 + if('$' == *src) { 1.57 + mState = mState_ESC_24; 1.58 + } 1.59 + else { 1.60 + if (CHECK_OVERRUN(dest, destEnd, 2)) 1.61 + goto error1; 1.62 + *dest++ = (char16_t) 0x1b; 1.63 + *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; 1.64 + mState = mLastLegalState; 1.65 + } 1.66 + break; 1.67 + 1.68 + case mState_ESC_24: // ESC $ 1.69 + if(')' == *src) { 1.70 + mState = mState_ESC_24_29; 1.71 + } 1.72 + else { 1.73 + if (CHECK_OVERRUN(dest, destEnd, 3)) 1.74 + goto error1; 1.75 + *dest++ = (char16_t) 0x1b; 1.76 + *dest++ = (char16_t) '$'; 1.77 + *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; 1.78 + mState = mLastLegalState; 1.79 + } 1.80 + break; 1.81 + 1.82 + case mState_ESC_24_29: // ESC $ ) 1.83 + mState = mLastLegalState; 1.84 + if('C' == *src) { 1.85 + mState = mState_ASCII; 1.86 + mRunLength = 0; 1.87 + } 1.88 + else { 1.89 + if (CHECK_OVERRUN(dest, destEnd, 4)) 1.90 + goto error1; 1.91 + *dest++ = (char16_t) 0x1b; 1.92 + *dest++ = (char16_t) '$'; 1.93 + *dest++ = (char16_t) ')'; 1.94 + *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; 1.95 + mState = mLastLegalState; 1.96 + } 1.97 + break; 1.98 + 1.99 + case mState_KSX1001_1992: 1.100 + if (0x20 < (uint8_t) *src && (uint8_t) *src < 0x7f) { 1.101 + mData = (uint8_t) *src; 1.102 + mState = mState_KSX1001_1992_2ndbyte; 1.103 + } 1.104 + else if (0x0f == *src) { // Shift-In (SI) 1.105 + mState = mState_ASCII; 1.106 + if (mRunLength == 0) { 1.107 + if (CHECK_OVERRUN(dest, destEnd, 1)) 1.108 + goto error1; 1.109 + *dest++ = 0xFFFD; 1.110 + } 1.111 + mRunLength = 0; 1.112 + } 1.113 + else if ((uint8_t) *src == 0x20 || (uint8_t) *src == 0x09) { 1.114 + // Allow space and tab between SO and SI (i.e. in Hangul segment) 1.115 + if (CHECK_OVERRUN(dest, destEnd, 1)) 1.116 + goto error1; 1.117 + mState = mState_KSX1001_1992; 1.118 + *dest++ = (char16_t) *src; 1.119 + ++mRunLength; 1.120 + } 1.121 + else { // Everything else is invalid. 1.122 + if (CHECK_OVERRUN(dest, destEnd, 1)) 1.123 + goto error1; 1.124 + *dest++ = 0xFFFD; 1.125 + } 1.126 + break; 1.127 + 1.128 + case mState_KSX1001_1992_2ndbyte: 1.129 + if ( 0x20 < (uint8_t) *src && (uint8_t) *src < 0x7f ) { 1.130 + if (!mEUCKRDecoder) { 1.131 + // creating a delegate converter (EUC-KR) 1.132 + nsresult rv; 1.133 + nsCOMPtr<nsICharsetConverterManager> ccm = 1.134 + do_GetService(kCharsetConverterManagerCID, &rv); 1.135 + if (NS_SUCCEEDED(rv)) { 1.136 + rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder); 1.137 + } 1.138 + } 1.139 + 1.140 + if (!mEUCKRDecoder) {// failed creating a delegate converter 1.141 + *dest++ = 0xFFFD; 1.142 + } 1.143 + else { 1.144 + if (CHECK_OVERRUN(dest, destEnd, 1)) 1.145 + goto error1; 1.146 + unsigned char ksx[2]; 1.147 + char16_t uni; 1.148 + int32_t ksxLen = 2, uniLen = 1; 1.149 + // mData is the original 1st byte. 1.150 + // *src is the present 2nd byte. 1.151 + // Put 2 bytes (one character) to ksx[] with EUC-KR encoding. 1.152 + ksx[0] = mData | 0x80; 1.153 + ksx[1] = *src | 0x80; 1.154 + // Convert EUC-KR to unicode. 1.155 + mEUCKRDecoder->Convert((const char *)ksx, &ksxLen, &uni, &uniLen); 1.156 + *dest++ = uni; 1.157 + ++mRunLength; 1.158 + } 1.159 + mState = mState_KSX1001_1992; 1.160 + } 1.161 + else { // Invalid 1.162 + if ( 0x0f == *src ) { // Shift-In (SI) 1.163 + mState = mState_ASCII; 1.164 + } 1.165 + else { 1.166 + mState = mState_KSX1001_1992; 1.167 + } 1.168 + if (CHECK_OVERRUN(dest, destEnd, 1)) 1.169 + goto error1; 1.170 + *dest++ = 0xFFFD; 1.171 + } 1.172 + break; 1.173 + 1.174 + case mState_ERROR: 1.175 + mState = mLastLegalState; 1.176 + if (CHECK_OVERRUN(dest, destEnd, 1)) 1.177 + goto error1; 1.178 + *dest++ = 0xFFFD; 1.179 + break; 1.180 + 1.181 + } // switch 1.182 + src++; 1.183 + } 1.184 + *aDestLen = dest - aDest; 1.185 + return NS_OK; 1.186 + 1.187 +error1: 1.188 + *aDestLen = dest-aDest; 1.189 + *aSrcLen = src-(unsigned char*)aSrc; 1.190 + return NS_OK_UDEC_MOREOUTPUT; 1.191 +} 1.192 +