|
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 #include "nsISO2022KRToUnicode.h" |
|
6 #include "nsUCSupport.h" |
|
7 #include "nsICharsetConverterManager.h" |
|
8 #include "nsServiceManagerUtils.h" |
|
9 |
|
10 NS_IMETHODIMP nsISO2022KRToUnicode::Convert(const char * aSrc, int32_t * aSrcLen, char16_t * aDest, int32_t * aDestLen) |
|
11 { |
|
12 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID); |
|
13 |
|
14 const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen; |
|
15 const unsigned char* src =(unsigned char*) aSrc; |
|
16 char16_t* destEnd = aDest + *aDestLen; |
|
17 char16_t* dest = aDest; |
|
18 while((src < srcEnd)) |
|
19 { |
|
20 // if LF/CR, return to US-ASCII unconditionally. |
|
21 if ( *src == 0x0a || *src == 0x0d ) |
|
22 mState = mState_Init; |
|
23 |
|
24 switch(mState) |
|
25 { |
|
26 case mState_Init: |
|
27 if(0x1b == *src) { |
|
28 mLastLegalState = mState_ASCII; |
|
29 mState = mState_ESC; |
|
30 break; |
|
31 } |
|
32 mState = mState_ASCII; |
|
33 // fall through |
|
34 |
|
35 case mState_ASCII: |
|
36 if(0x0e == *src) { // Shift-Out |
|
37 mState = mState_KSX1001_1992; |
|
38 mRunLength = 0; |
|
39 } |
|
40 else if(*src & 0x80) { |
|
41 if (CHECK_OVERRUN(dest, destEnd, 1)) |
|
42 goto error1; |
|
43 *dest++ = 0xFFFD; |
|
44 } |
|
45 else { |
|
46 if (CHECK_OVERRUN(dest, destEnd, 1)) |
|
47 goto error1; |
|
48 *dest++ = (char16_t) *src; |
|
49 } |
|
50 break; |
|
51 |
|
52 case mState_ESC: |
|
53 if('$' == *src) { |
|
54 mState = mState_ESC_24; |
|
55 } |
|
56 else { |
|
57 if (CHECK_OVERRUN(dest, destEnd, 2)) |
|
58 goto error1; |
|
59 *dest++ = (char16_t) 0x1b; |
|
60 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; |
|
61 mState = mLastLegalState; |
|
62 } |
|
63 break; |
|
64 |
|
65 case mState_ESC_24: // ESC $ |
|
66 if(')' == *src) { |
|
67 mState = mState_ESC_24_29; |
|
68 } |
|
69 else { |
|
70 if (CHECK_OVERRUN(dest, destEnd, 3)) |
|
71 goto error1; |
|
72 *dest++ = (char16_t) 0x1b; |
|
73 *dest++ = (char16_t) '$'; |
|
74 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; |
|
75 mState = mLastLegalState; |
|
76 } |
|
77 break; |
|
78 |
|
79 case mState_ESC_24_29: // ESC $ ) |
|
80 mState = mLastLegalState; |
|
81 if('C' == *src) { |
|
82 mState = mState_ASCII; |
|
83 mRunLength = 0; |
|
84 } |
|
85 else { |
|
86 if (CHECK_OVERRUN(dest, destEnd, 4)) |
|
87 goto error1; |
|
88 *dest++ = (char16_t) 0x1b; |
|
89 *dest++ = (char16_t) '$'; |
|
90 *dest++ = (char16_t) ')'; |
|
91 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src; |
|
92 mState = mLastLegalState; |
|
93 } |
|
94 break; |
|
95 |
|
96 case mState_KSX1001_1992: |
|
97 if (0x20 < (uint8_t) *src && (uint8_t) *src < 0x7f) { |
|
98 mData = (uint8_t) *src; |
|
99 mState = mState_KSX1001_1992_2ndbyte; |
|
100 } |
|
101 else if (0x0f == *src) { // Shift-In (SI) |
|
102 mState = mState_ASCII; |
|
103 if (mRunLength == 0) { |
|
104 if (CHECK_OVERRUN(dest, destEnd, 1)) |
|
105 goto error1; |
|
106 *dest++ = 0xFFFD; |
|
107 } |
|
108 mRunLength = 0; |
|
109 } |
|
110 else if ((uint8_t) *src == 0x20 || (uint8_t) *src == 0x09) { |
|
111 // Allow space and tab between SO and SI (i.e. in Hangul segment) |
|
112 if (CHECK_OVERRUN(dest, destEnd, 1)) |
|
113 goto error1; |
|
114 mState = mState_KSX1001_1992; |
|
115 *dest++ = (char16_t) *src; |
|
116 ++mRunLength; |
|
117 } |
|
118 else { // Everything else is invalid. |
|
119 if (CHECK_OVERRUN(dest, destEnd, 1)) |
|
120 goto error1; |
|
121 *dest++ = 0xFFFD; |
|
122 } |
|
123 break; |
|
124 |
|
125 case mState_KSX1001_1992_2ndbyte: |
|
126 if ( 0x20 < (uint8_t) *src && (uint8_t) *src < 0x7f ) { |
|
127 if (!mEUCKRDecoder) { |
|
128 // creating a delegate converter (EUC-KR) |
|
129 nsresult rv; |
|
130 nsCOMPtr<nsICharsetConverterManager> ccm = |
|
131 do_GetService(kCharsetConverterManagerCID, &rv); |
|
132 if (NS_SUCCEEDED(rv)) { |
|
133 rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder); |
|
134 } |
|
135 } |
|
136 |
|
137 if (!mEUCKRDecoder) {// failed creating a delegate converter |
|
138 *dest++ = 0xFFFD; |
|
139 } |
|
140 else { |
|
141 if (CHECK_OVERRUN(dest, destEnd, 1)) |
|
142 goto error1; |
|
143 unsigned char ksx[2]; |
|
144 char16_t uni; |
|
145 int32_t ksxLen = 2, uniLen = 1; |
|
146 // mData is the original 1st byte. |
|
147 // *src is the present 2nd byte. |
|
148 // Put 2 bytes (one character) to ksx[] with EUC-KR encoding. |
|
149 ksx[0] = mData | 0x80; |
|
150 ksx[1] = *src | 0x80; |
|
151 // Convert EUC-KR to unicode. |
|
152 mEUCKRDecoder->Convert((const char *)ksx, &ksxLen, &uni, &uniLen); |
|
153 *dest++ = uni; |
|
154 ++mRunLength; |
|
155 } |
|
156 mState = mState_KSX1001_1992; |
|
157 } |
|
158 else { // Invalid |
|
159 if ( 0x0f == *src ) { // Shift-In (SI) |
|
160 mState = mState_ASCII; |
|
161 } |
|
162 else { |
|
163 mState = mState_KSX1001_1992; |
|
164 } |
|
165 if (CHECK_OVERRUN(dest, destEnd, 1)) |
|
166 goto error1; |
|
167 *dest++ = 0xFFFD; |
|
168 } |
|
169 break; |
|
170 |
|
171 case mState_ERROR: |
|
172 mState = mLastLegalState; |
|
173 if (CHECK_OVERRUN(dest, destEnd, 1)) |
|
174 goto error1; |
|
175 *dest++ = 0xFFFD; |
|
176 break; |
|
177 |
|
178 } // switch |
|
179 src++; |
|
180 } |
|
181 *aDestLen = dest - aDest; |
|
182 return NS_OK; |
|
183 |
|
184 error1: |
|
185 *aDestLen = dest-aDest; |
|
186 *aSrcLen = src-(unsigned char*)aSrc; |
|
187 return NS_OK_UDEC_MOREOUTPUT; |
|
188 } |
|
189 |