|
1 /* |
|
2 ***************************************************************************** |
|
3 * |
|
4 * Copyright (C) 1998-2007, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ***************************************************************************** |
|
8 * |
|
9 * ucnv_err.c |
|
10 * Implements error behaviour functions called by T_UConverter_{from,to}Unicode |
|
11 * |
|
12 * |
|
13 * Change history: |
|
14 * |
|
15 * 06/29/2000 helena Major rewrite of the callback APIs. |
|
16 */ |
|
17 |
|
18 #include "unicode/utypes.h" |
|
19 |
|
20 #if !UCONFIG_NO_CONVERSION |
|
21 |
|
22 #include "unicode/ucnv_err.h" |
|
23 #include "unicode/ucnv_cb.h" |
|
24 #include "ucnv_cnv.h" |
|
25 #include "cmemory.h" |
|
26 #include "unicode/ucnv.h" |
|
27 #include "ustrfmt.h" |
|
28 |
|
29 #define VALUE_STRING_LENGTH 32 |
|
30 /*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */ |
|
31 #define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025 |
|
32 #define UNICODE_U_CODEPOINT 0x0055 |
|
33 #define UNICODE_X_CODEPOINT 0x0058 |
|
34 #define UNICODE_RS_CODEPOINT 0x005C |
|
35 #define UNICODE_U_LOW_CODEPOINT 0x0075 |
|
36 #define UNICODE_X_LOW_CODEPOINT 0x0078 |
|
37 #define UNICODE_AMP_CODEPOINT 0x0026 |
|
38 #define UNICODE_HASH_CODEPOINT 0x0023 |
|
39 #define UNICODE_SEMICOLON_CODEPOINT 0x003B |
|
40 #define UNICODE_PLUS_CODEPOINT 0x002B |
|
41 #define UNICODE_LEFT_CURLY_CODEPOINT 0x007B |
|
42 #define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D |
|
43 #define UNICODE_SPACE_CODEPOINT 0x0020 |
|
44 #define UCNV_PRV_ESCAPE_ICU 0 |
|
45 #define UCNV_PRV_ESCAPE_C 'C' |
|
46 #define UCNV_PRV_ESCAPE_XML_DEC 'D' |
|
47 #define UCNV_PRV_ESCAPE_XML_HEX 'X' |
|
48 #define UCNV_PRV_ESCAPE_JAVA 'J' |
|
49 #define UCNV_PRV_ESCAPE_UNICODE 'U' |
|
50 #define UCNV_PRV_ESCAPE_CSS2 'S' |
|
51 #define UCNV_PRV_STOP_ON_ILLEGAL 'i' |
|
52 |
|
53 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ |
|
54 U_CAPI void U_EXPORT2 |
|
55 UCNV_FROM_U_CALLBACK_STOP ( |
|
56 const void *context, |
|
57 UConverterFromUnicodeArgs *fromUArgs, |
|
58 const UChar* codeUnits, |
|
59 int32_t length, |
|
60 UChar32 codePoint, |
|
61 UConverterCallbackReason reason, |
|
62 UErrorCode * err) |
|
63 { |
|
64 /* the caller must have set the error code accordingly */ |
|
65 return; |
|
66 } |
|
67 |
|
68 |
|
69 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ |
|
70 U_CAPI void U_EXPORT2 |
|
71 UCNV_TO_U_CALLBACK_STOP ( |
|
72 const void *context, |
|
73 UConverterToUnicodeArgs *toUArgs, |
|
74 const char* codePoints, |
|
75 int32_t length, |
|
76 UConverterCallbackReason reason, |
|
77 UErrorCode * err) |
|
78 { |
|
79 /* the caller must have set the error code accordingly */ |
|
80 return; |
|
81 } |
|
82 |
|
83 U_CAPI void U_EXPORT2 |
|
84 UCNV_FROM_U_CALLBACK_SKIP ( |
|
85 const void *context, |
|
86 UConverterFromUnicodeArgs *fromUArgs, |
|
87 const UChar* codeUnits, |
|
88 int32_t length, |
|
89 UChar32 codePoint, |
|
90 UConverterCallbackReason reason, |
|
91 UErrorCode * err) |
|
92 { |
|
93 if (reason <= UCNV_IRREGULAR) |
|
94 { |
|
95 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) |
|
96 { |
|
97 *err = U_ZERO_ERROR; |
|
98 } |
|
99 /* else the caller must have set the error code accordingly. */ |
|
100 } |
|
101 /* else ignore the reset, close and clone calls. */ |
|
102 } |
|
103 |
|
104 U_CAPI void U_EXPORT2 |
|
105 UCNV_FROM_U_CALLBACK_SUBSTITUTE ( |
|
106 const void *context, |
|
107 UConverterFromUnicodeArgs *fromArgs, |
|
108 const UChar* codeUnits, |
|
109 int32_t length, |
|
110 UChar32 codePoint, |
|
111 UConverterCallbackReason reason, |
|
112 UErrorCode * err) |
|
113 { |
|
114 if (reason <= UCNV_IRREGULAR) |
|
115 { |
|
116 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) |
|
117 { |
|
118 *err = U_ZERO_ERROR; |
|
119 ucnv_cbFromUWriteSub(fromArgs, 0, err); |
|
120 } |
|
121 /* else the caller must have set the error code accordingly. */ |
|
122 } |
|
123 /* else ignore the reset, close and clone calls. */ |
|
124 } |
|
125 |
|
126 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence, |
|
127 *uses a clean copy (resetted) of the converter, to convert that unicode |
|
128 *escape sequence to the target codepage (if conversion failure happens then |
|
129 *we revert to substituting with subchar) |
|
130 */ |
|
131 U_CAPI void U_EXPORT2 |
|
132 UCNV_FROM_U_CALLBACK_ESCAPE ( |
|
133 const void *context, |
|
134 UConverterFromUnicodeArgs *fromArgs, |
|
135 const UChar *codeUnits, |
|
136 int32_t length, |
|
137 UChar32 codePoint, |
|
138 UConverterCallbackReason reason, |
|
139 UErrorCode * err) |
|
140 { |
|
141 |
|
142 UChar valueString[VALUE_STRING_LENGTH]; |
|
143 int32_t valueStringLength = 0; |
|
144 int32_t i = 0; |
|
145 |
|
146 const UChar *myValueSource = NULL; |
|
147 UErrorCode err2 = U_ZERO_ERROR; |
|
148 UConverterFromUCallback original = NULL; |
|
149 const void *originalContext; |
|
150 |
|
151 UConverterFromUCallback ignoredCallback = NULL; |
|
152 const void *ignoredContext; |
|
153 |
|
154 if (reason > UCNV_IRREGULAR) |
|
155 { |
|
156 return; |
|
157 } |
|
158 |
|
159 ucnv_setFromUCallBack (fromArgs->converter, |
|
160 (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE, |
|
161 NULL, |
|
162 &original, |
|
163 &originalContext, |
|
164 &err2); |
|
165 |
|
166 if (U_FAILURE (err2)) |
|
167 { |
|
168 *err = err2; |
|
169 return; |
|
170 } |
|
171 if(context==NULL) |
|
172 { |
|
173 while (i < length) |
|
174 { |
|
175 valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ |
|
176 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ |
|
177 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); |
|
178 } |
|
179 } |
|
180 else |
|
181 { |
|
182 switch(*((char*)context)) |
|
183 { |
|
184 case UCNV_PRV_ESCAPE_JAVA: |
|
185 while (i < length) |
|
186 { |
|
187 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ |
|
188 valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ |
|
189 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); |
|
190 } |
|
191 break; |
|
192 |
|
193 case UCNV_PRV_ESCAPE_C: |
|
194 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ |
|
195 |
|
196 if(length==2){ |
|
197 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ |
|
198 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8); |
|
199 |
|
200 } |
|
201 else{ |
|
202 valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ |
|
203 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); |
|
204 } |
|
205 break; |
|
206 |
|
207 case UCNV_PRV_ESCAPE_XML_DEC: |
|
208 |
|
209 valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ |
|
210 valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ |
|
211 if(length==2){ |
|
212 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0); |
|
213 } |
|
214 else{ |
|
215 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0); |
|
216 } |
|
217 valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ |
|
218 break; |
|
219 |
|
220 case UCNV_PRV_ESCAPE_XML_HEX: |
|
221 |
|
222 valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ |
|
223 valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ |
|
224 valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ |
|
225 if(length==2){ |
|
226 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); |
|
227 } |
|
228 else{ |
|
229 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0); |
|
230 } |
|
231 valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ |
|
232 break; |
|
233 |
|
234 case UCNV_PRV_ESCAPE_UNICODE: |
|
235 valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */ |
|
236 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ |
|
237 valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */ |
|
238 if (length == 2) { |
|
239 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4); |
|
240 } else { |
|
241 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); |
|
242 } |
|
243 valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */ |
|
244 break; |
|
245 |
|
246 case UCNV_PRV_ESCAPE_CSS2: |
|
247 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ |
|
248 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); |
|
249 /* Always add space character, becase the next character might be whitespace, |
|
250 which would erroneously be considered the termination of the escape sequence. */ |
|
251 valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT; |
|
252 break; |
|
253 |
|
254 default: |
|
255 while (i < length) |
|
256 { |
|
257 valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ |
|
258 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ |
|
259 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); |
|
260 } |
|
261 } |
|
262 } |
|
263 myValueSource = valueString; |
|
264 |
|
265 /* reset the error */ |
|
266 *err = U_ZERO_ERROR; |
|
267 |
|
268 ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err); |
|
269 |
|
270 ucnv_setFromUCallBack (fromArgs->converter, |
|
271 original, |
|
272 originalContext, |
|
273 &ignoredCallback, |
|
274 &ignoredContext, |
|
275 &err2); |
|
276 if (U_FAILURE (err2)) |
|
277 { |
|
278 *err = err2; |
|
279 return; |
|
280 } |
|
281 |
|
282 return; |
|
283 } |
|
284 |
|
285 |
|
286 |
|
287 U_CAPI void U_EXPORT2 |
|
288 UCNV_TO_U_CALLBACK_SKIP ( |
|
289 const void *context, |
|
290 UConverterToUnicodeArgs *toArgs, |
|
291 const char* codeUnits, |
|
292 int32_t length, |
|
293 UConverterCallbackReason reason, |
|
294 UErrorCode * err) |
|
295 { |
|
296 if (reason <= UCNV_IRREGULAR) |
|
297 { |
|
298 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) |
|
299 { |
|
300 *err = U_ZERO_ERROR; |
|
301 } |
|
302 /* else the caller must have set the error code accordingly. */ |
|
303 } |
|
304 /* else ignore the reset, close and clone calls. */ |
|
305 } |
|
306 |
|
307 U_CAPI void U_EXPORT2 |
|
308 UCNV_TO_U_CALLBACK_SUBSTITUTE ( |
|
309 const void *context, |
|
310 UConverterToUnicodeArgs *toArgs, |
|
311 const char* codeUnits, |
|
312 int32_t length, |
|
313 UConverterCallbackReason reason, |
|
314 UErrorCode * err) |
|
315 { |
|
316 if (reason <= UCNV_IRREGULAR) |
|
317 { |
|
318 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) |
|
319 { |
|
320 *err = U_ZERO_ERROR; |
|
321 ucnv_cbToUWriteSub(toArgs,0,err); |
|
322 } |
|
323 /* else the caller must have set the error code accordingly. */ |
|
324 } |
|
325 /* else ignore the reset, close and clone calls. */ |
|
326 } |
|
327 |
|
328 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence, |
|
329 *and uses that as the substitution sequence |
|
330 */ |
|
331 U_CAPI void U_EXPORT2 |
|
332 UCNV_TO_U_CALLBACK_ESCAPE ( |
|
333 const void *context, |
|
334 UConverterToUnicodeArgs *toArgs, |
|
335 const char* codeUnits, |
|
336 int32_t length, |
|
337 UConverterCallbackReason reason, |
|
338 UErrorCode * err) |
|
339 { |
|
340 UChar uniValueString[VALUE_STRING_LENGTH]; |
|
341 int32_t valueStringLength = 0; |
|
342 int32_t i = 0; |
|
343 |
|
344 if (reason > UCNV_IRREGULAR) |
|
345 { |
|
346 return; |
|
347 } |
|
348 |
|
349 if(context==NULL) |
|
350 { |
|
351 while (i < length) |
|
352 { |
|
353 uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ |
|
354 uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ |
|
355 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); |
|
356 } |
|
357 } |
|
358 else |
|
359 { |
|
360 switch(*((char*)context)) |
|
361 { |
|
362 case UCNV_PRV_ESCAPE_XML_DEC: |
|
363 while (i < length) |
|
364 { |
|
365 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ |
|
366 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ |
|
367 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0); |
|
368 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ |
|
369 } |
|
370 break; |
|
371 |
|
372 case UCNV_PRV_ESCAPE_XML_HEX: |
|
373 while (i < length) |
|
374 { |
|
375 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ |
|
376 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ |
|
377 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ |
|
378 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0); |
|
379 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ |
|
380 } |
|
381 break; |
|
382 case UCNV_PRV_ESCAPE_C: |
|
383 while (i < length) |
|
384 { |
|
385 uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ |
|
386 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ |
|
387 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2); |
|
388 } |
|
389 break; |
|
390 default: |
|
391 while (i < length) |
|
392 { |
|
393 uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ |
|
394 uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ |
|
395 uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); |
|
396 valueStringLength += 2; |
|
397 } |
|
398 } |
|
399 } |
|
400 /* reset the error */ |
|
401 *err = U_ZERO_ERROR; |
|
402 |
|
403 ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err); |
|
404 } |
|
405 |
|
406 #endif |