|
1 /* |
|
2 ****************************************************************************** |
|
3 * |
|
4 * Copyright (C) 1998-2013, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ****************************************************************************** |
|
8 * |
|
9 * ucnv.c: |
|
10 * Implements APIs for the ICU's codeset conversion library; |
|
11 * mostly calls through internal functions; |
|
12 * created by Bertrand A. Damiba |
|
13 * |
|
14 * Modification History: |
|
15 * |
|
16 * Date Name Description |
|
17 * 04/04/99 helena Fixed internal header inclusion. |
|
18 * 05/09/00 helena Added implementation to handle fallback mappings. |
|
19 * 06/20/2000 helena OS/400 port changes; mostly typecast. |
|
20 */ |
|
21 |
|
22 #include "unicode/utypes.h" |
|
23 |
|
24 #if !UCONFIG_NO_CONVERSION |
|
25 |
|
26 #include "unicode/ustring.h" |
|
27 #include "unicode/ucnv.h" |
|
28 #include "unicode/ucnv_err.h" |
|
29 #include "unicode/uset.h" |
|
30 #include "unicode/utf.h" |
|
31 #include "unicode/utf16.h" |
|
32 #include "putilimp.h" |
|
33 #include "cmemory.h" |
|
34 #include "cstring.h" |
|
35 #include "uassert.h" |
|
36 #include "utracimp.h" |
|
37 #include "ustr_imp.h" |
|
38 #include "ucnv_imp.h" |
|
39 #include "ucnv_cnv.h" |
|
40 #include "ucnv_bld.h" |
|
41 |
|
42 /* size of intermediate and preflighting buffers in ucnv_convert() */ |
|
43 #define CHUNK_SIZE 1024 |
|
44 |
|
45 typedef struct UAmbiguousConverter { |
|
46 const char *name; |
|
47 const UChar variant5c; |
|
48 } UAmbiguousConverter; |
|
49 |
|
50 static const UAmbiguousConverter ambiguousConverters[]={ |
|
51 { "ibm-897_P100-1995", 0xa5 }, |
|
52 { "ibm-942_P120-1999", 0xa5 }, |
|
53 { "ibm-943_P130-1999", 0xa5 }, |
|
54 { "ibm-946_P100-1995", 0xa5 }, |
|
55 { "ibm-33722_P120-1999", 0xa5 }, |
|
56 { "ibm-1041_P100-1995", 0xa5 }, |
|
57 /*{ "ibm-54191_P100-2006", 0xa5 },*/ |
|
58 /*{ "ibm-62383_P100-2007", 0xa5 },*/ |
|
59 /*{ "ibm-891_P100-1995", 0x20a9 },*/ |
|
60 { "ibm-944_P100-1995", 0x20a9 }, |
|
61 { "ibm-949_P110-1999", 0x20a9 }, |
|
62 { "ibm-1363_P110-1997", 0x20a9 }, |
|
63 { "ISO_2022,locale=ko,version=0", 0x20a9 }, |
|
64 { "ibm-1088_P100-1995", 0x20a9 } |
|
65 }; |
|
66 |
|
67 /*Calls through createConverter */ |
|
68 U_CAPI UConverter* U_EXPORT2 |
|
69 ucnv_open (const char *name, |
|
70 UErrorCode * err) |
|
71 { |
|
72 UConverter *r; |
|
73 |
|
74 if (err == NULL || U_FAILURE (*err)) { |
|
75 return NULL; |
|
76 } |
|
77 |
|
78 r = ucnv_createConverter(NULL, name, err); |
|
79 return r; |
|
80 } |
|
81 |
|
82 U_CAPI UConverter* U_EXPORT2 |
|
83 ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err) |
|
84 { |
|
85 return ucnv_createConverterFromPackage(packageName, converterName, err); |
|
86 } |
|
87 |
|
88 /*Extracts the UChar* to a char* and calls through createConverter */ |
|
89 U_CAPI UConverter* U_EXPORT2 |
|
90 ucnv_openU (const UChar * name, |
|
91 UErrorCode * err) |
|
92 { |
|
93 char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH]; |
|
94 |
|
95 if (err == NULL || U_FAILURE(*err)) |
|
96 return NULL; |
|
97 if (name == NULL) |
|
98 return ucnv_open (NULL, err); |
|
99 if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH) |
|
100 { |
|
101 *err = U_ILLEGAL_ARGUMENT_ERROR; |
|
102 return NULL; |
|
103 } |
|
104 return ucnv_open(u_austrcpy(asciiName, name), err); |
|
105 } |
|
106 |
|
107 /* Copy the string that is represented by the UConverterPlatform enum |
|
108 * @param platformString An output buffer |
|
109 * @param platform An enum representing a platform |
|
110 * @return the length of the copied string. |
|
111 */ |
|
112 static int32_t |
|
113 ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm) |
|
114 { |
|
115 switch (pltfrm) |
|
116 { |
|
117 case UCNV_IBM: |
|
118 uprv_strcpy(platformString, "ibm-"); |
|
119 return 4; |
|
120 case UCNV_UNKNOWN: |
|
121 break; |
|
122 } |
|
123 |
|
124 /* default to empty string */ |
|
125 *platformString = 0; |
|
126 return 0; |
|
127 } |
|
128 |
|
129 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls |
|
130 *through createConverter*/ |
|
131 U_CAPI UConverter* U_EXPORT2 |
|
132 ucnv_openCCSID (int32_t codepage, |
|
133 UConverterPlatform platform, |
|
134 UErrorCode * err) |
|
135 { |
|
136 char myName[UCNV_MAX_CONVERTER_NAME_LENGTH]; |
|
137 int32_t myNameLen; |
|
138 |
|
139 if (err == NULL || U_FAILURE (*err)) |
|
140 return NULL; |
|
141 |
|
142 /* ucnv_copyPlatformString could return "ibm-" or "cp" */ |
|
143 myNameLen = ucnv_copyPlatformString(myName, platform); |
|
144 T_CString_integerToString(myName + myNameLen, codepage, 10); |
|
145 |
|
146 return ucnv_createConverter(NULL, myName, err); |
|
147 } |
|
148 |
|
149 /* Creating a temporary stack-based object that can be used in one thread, |
|
150 and created from a converter that is shared across threads. |
|
151 */ |
|
152 |
|
153 U_CAPI UConverter* U_EXPORT2 |
|
154 ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) |
|
155 { |
|
156 UConverter *localConverter, *allocatedConverter; |
|
157 int32_t stackBufferSize; |
|
158 int32_t bufferSizeNeeded; |
|
159 char *stackBufferChars = (char *)stackBuffer; |
|
160 UErrorCode cbErr; |
|
161 UConverterToUnicodeArgs toUArgs = { |
|
162 sizeof(UConverterToUnicodeArgs), |
|
163 TRUE, |
|
164 NULL, |
|
165 NULL, |
|
166 NULL, |
|
167 NULL, |
|
168 NULL, |
|
169 NULL |
|
170 }; |
|
171 UConverterFromUnicodeArgs fromUArgs = { |
|
172 sizeof(UConverterFromUnicodeArgs), |
|
173 TRUE, |
|
174 NULL, |
|
175 NULL, |
|
176 NULL, |
|
177 NULL, |
|
178 NULL, |
|
179 NULL |
|
180 }; |
|
181 |
|
182 UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE); |
|
183 |
|
184 if (status == NULL || U_FAILURE(*status)){ |
|
185 UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR); |
|
186 return NULL; |
|
187 } |
|
188 |
|
189 if (cnv == NULL) { |
|
190 *status = U_ILLEGAL_ARGUMENT_ERROR; |
|
191 UTRACE_EXIT_STATUS(*status); |
|
192 return NULL; |
|
193 } |
|
194 |
|
195 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p", |
|
196 ucnv_getName(cnv, status), cnv, stackBuffer); |
|
197 |
|
198 if (cnv->sharedData->impl->safeClone != NULL) { |
|
199 /* call the custom safeClone function for sizing */ |
|
200 bufferSizeNeeded = 0; |
|
201 cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status); |
|
202 if (U_FAILURE(*status)) { |
|
203 UTRACE_EXIT_STATUS(*status); |
|
204 return NULL; |
|
205 } |
|
206 } |
|
207 else |
|
208 { |
|
209 /* inherent sizing */ |
|
210 bufferSizeNeeded = sizeof(UConverter); |
|
211 } |
|
212 |
|
213 if (pBufferSize == NULL) { |
|
214 stackBufferSize = 1; |
|
215 pBufferSize = &stackBufferSize; |
|
216 } else { |
|
217 stackBufferSize = *pBufferSize; |
|
218 if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ |
|
219 *pBufferSize = bufferSizeNeeded; |
|
220 UTRACE_EXIT_VALUE(bufferSizeNeeded); |
|
221 return NULL; |
|
222 } |
|
223 } |
|
224 |
|
225 |
|
226 /* Pointers on 64-bit platforms need to be aligned |
|
227 * on a 64-bit boundary in memory. |
|
228 */ |
|
229 if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { |
|
230 int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); |
|
231 if(stackBufferSize > offsetUp) { |
|
232 stackBufferSize -= offsetUp; |
|
233 stackBufferChars += offsetUp; |
|
234 } else { |
|
235 /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ |
|
236 stackBufferSize = 1; |
|
237 } |
|
238 } |
|
239 |
|
240 stackBuffer = (void *)stackBufferChars; |
|
241 |
|
242 /* Now, see if we must allocate any memory */ |
|
243 if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL) |
|
244 { |
|
245 /* allocate one here...*/ |
|
246 localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded); |
|
247 |
|
248 if(localConverter == NULL) { |
|
249 *status = U_MEMORY_ALLOCATION_ERROR; |
|
250 UTRACE_EXIT_STATUS(*status); |
|
251 return NULL; |
|
252 } |
|
253 *status = U_SAFECLONE_ALLOCATED_WARNING; |
|
254 |
|
255 /* record the fact that memory was allocated */ |
|
256 *pBufferSize = bufferSizeNeeded; |
|
257 } else { |
|
258 /* just use the stack buffer */ |
|
259 localConverter = (UConverter*) stackBuffer; |
|
260 allocatedConverter = NULL; |
|
261 } |
|
262 |
|
263 uprv_memset(localConverter, 0, bufferSizeNeeded); |
|
264 |
|
265 /* Copy initial state */ |
|
266 uprv_memcpy(localConverter, cnv, sizeof(UConverter)); |
|
267 localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE; |
|
268 |
|
269 /* copy the substitution string */ |
|
270 if (cnv->subChars == (uint8_t *)cnv->subUChars) { |
|
271 localConverter->subChars = (uint8_t *)localConverter->subUChars; |
|
272 } else { |
|
273 localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); |
|
274 if (localConverter->subChars == NULL) { |
|
275 uprv_free(allocatedConverter); |
|
276 UTRACE_EXIT_STATUS(*status); |
|
277 return NULL; |
|
278 } |
|
279 uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); |
|
280 } |
|
281 |
|
282 /* now either call the safeclone fcn or not */ |
|
283 if (cnv->sharedData->impl->safeClone != NULL) { |
|
284 /* call the custom safeClone function */ |
|
285 localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status); |
|
286 } |
|
287 |
|
288 if(localConverter==NULL || U_FAILURE(*status)) { |
|
289 if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) { |
|
290 uprv_free(allocatedConverter->subChars); |
|
291 } |
|
292 uprv_free(allocatedConverter); |
|
293 UTRACE_EXIT_STATUS(*status); |
|
294 return NULL; |
|
295 } |
|
296 |
|
297 /* increment refcount of shared data if needed */ |
|
298 /* |
|
299 Checking whether it's an algorithic converter is okay |
|
300 in multithreaded applications because the value never changes. |
|
301 Don't check referenceCounter for any other value. |
|
302 */ |
|
303 if (cnv->sharedData->referenceCounter != ~0) { |
|
304 ucnv_incrementRefCount(cnv->sharedData); |
|
305 } |
|
306 |
|
307 if(localConverter == (UConverter*)stackBuffer) { |
|
308 /* we're using user provided data - set to not destroy */ |
|
309 localConverter->isCopyLocal = TRUE; |
|
310 } |
|
311 |
|
312 /* allow callback functions to handle any memory allocation */ |
|
313 toUArgs.converter = fromUArgs.converter = localConverter; |
|
314 cbErr = U_ZERO_ERROR; |
|
315 cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr); |
|
316 cbErr = U_ZERO_ERROR; |
|
317 cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr); |
|
318 |
|
319 UTRACE_EXIT_PTR_STATUS(localConverter, *status); |
|
320 return localConverter; |
|
321 } |
|
322 |
|
323 |
|
324 |
|
325 /*Decreases the reference counter in the shared immutable section of the object |
|
326 *and frees the mutable part*/ |
|
327 |
|
328 U_CAPI void U_EXPORT2 |
|
329 ucnv_close (UConverter * converter) |
|
330 { |
|
331 UErrorCode errorCode = U_ZERO_ERROR; |
|
332 |
|
333 UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE); |
|
334 |
|
335 if (converter == NULL) |
|
336 { |
|
337 UTRACE_EXIT(); |
|
338 return; |
|
339 } |
|
340 |
|
341 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b", |
|
342 ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal); |
|
343 |
|
344 /* In order to speed up the close, only call the callbacks when they have been changed. |
|
345 This performance check will only work when the callbacks are set within a shared library |
|
346 or from user code that statically links this code. */ |
|
347 /* first, notify the callback functions that the converter is closed */ |
|
348 if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { |
|
349 UConverterToUnicodeArgs toUArgs = { |
|
350 sizeof(UConverterToUnicodeArgs), |
|
351 TRUE, |
|
352 NULL, |
|
353 NULL, |
|
354 NULL, |
|
355 NULL, |
|
356 NULL, |
|
357 NULL |
|
358 }; |
|
359 |
|
360 toUArgs.converter = converter; |
|
361 errorCode = U_ZERO_ERROR; |
|
362 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode); |
|
363 } |
|
364 if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { |
|
365 UConverterFromUnicodeArgs fromUArgs = { |
|
366 sizeof(UConverterFromUnicodeArgs), |
|
367 TRUE, |
|
368 NULL, |
|
369 NULL, |
|
370 NULL, |
|
371 NULL, |
|
372 NULL, |
|
373 NULL |
|
374 }; |
|
375 fromUArgs.converter = converter; |
|
376 errorCode = U_ZERO_ERROR; |
|
377 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode); |
|
378 } |
|
379 |
|
380 if (converter->sharedData->impl->close != NULL) { |
|
381 converter->sharedData->impl->close(converter); |
|
382 } |
|
383 |
|
384 if (converter->subChars != (uint8_t *)converter->subUChars) { |
|
385 uprv_free(converter->subChars); |
|
386 } |
|
387 |
|
388 /* |
|
389 Checking whether it's an algorithic converter is okay |
|
390 in multithreaded applications because the value never changes. |
|
391 Don't check referenceCounter for any other value. |
|
392 */ |
|
393 if (converter->sharedData->referenceCounter != ~0) { |
|
394 ucnv_unloadSharedDataIfReady(converter->sharedData); |
|
395 } |
|
396 |
|
397 if(!converter->isCopyLocal){ |
|
398 uprv_free(converter); |
|
399 } |
|
400 |
|
401 UTRACE_EXIT(); |
|
402 } |
|
403 |
|
404 /*returns a single Name from the list, will return NULL if out of bounds |
|
405 */ |
|
406 U_CAPI const char* U_EXPORT2 |
|
407 ucnv_getAvailableName (int32_t n) |
|
408 { |
|
409 if (0 <= n && n <= 0xffff) { |
|
410 UErrorCode err = U_ZERO_ERROR; |
|
411 const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err); |
|
412 if (U_SUCCESS(err)) { |
|
413 return name; |
|
414 } |
|
415 } |
|
416 return NULL; |
|
417 } |
|
418 |
|
419 U_CAPI int32_t U_EXPORT2 |
|
420 ucnv_countAvailable () |
|
421 { |
|
422 UErrorCode err = U_ZERO_ERROR; |
|
423 return ucnv_bld_countAvailableConverters(&err); |
|
424 } |
|
425 |
|
426 U_CAPI void U_EXPORT2 |
|
427 ucnv_getSubstChars (const UConverter * converter, |
|
428 char *mySubChar, |
|
429 int8_t * len, |
|
430 UErrorCode * err) |
|
431 { |
|
432 if (U_FAILURE (*err)) |
|
433 return; |
|
434 |
|
435 if (converter->subCharLen <= 0) { |
|
436 /* Unicode string or empty string from ucnv_setSubstString(). */ |
|
437 *len = 0; |
|
438 return; |
|
439 } |
|
440 |
|
441 if (*len < converter->subCharLen) /*not enough space in subChars */ |
|
442 { |
|
443 *err = U_INDEX_OUTOFBOUNDS_ERROR; |
|
444 return; |
|
445 } |
|
446 |
|
447 uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */ |
|
448 *len = converter->subCharLen; /*store # of bytes copied to buffer */ |
|
449 } |
|
450 |
|
451 U_CAPI void U_EXPORT2 |
|
452 ucnv_setSubstChars (UConverter * converter, |
|
453 const char *mySubChar, |
|
454 int8_t len, |
|
455 UErrorCode * err) |
|
456 { |
|
457 if (U_FAILURE (*err)) |
|
458 return; |
|
459 |
|
460 /*Makes sure that the subChar is within the codepages char length boundaries */ |
|
461 if ((len > converter->sharedData->staticData->maxBytesPerChar) |
|
462 || (len < converter->sharedData->staticData->minBytesPerChar)) |
|
463 { |
|
464 *err = U_ILLEGAL_ARGUMENT_ERROR; |
|
465 return; |
|
466 } |
|
467 |
|
468 uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */ |
|
469 converter->subCharLen = len; /*sets the new len */ |
|
470 |
|
471 /* |
|
472 * There is currently (2001Feb) no separate API to set/get subChar1. |
|
473 * In order to always have subChar written after it is explicitly set, |
|
474 * we set subChar1 to 0. |
|
475 */ |
|
476 converter->subChar1 = 0; |
|
477 |
|
478 return; |
|
479 } |
|
480 |
|
481 U_CAPI void U_EXPORT2 |
|
482 ucnv_setSubstString(UConverter *cnv, |
|
483 const UChar *s, |
|
484 int32_t length, |
|
485 UErrorCode *err) { |
|
486 UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1]; |
|
487 char chars[UCNV_ERROR_BUFFER_LENGTH]; |
|
488 |
|
489 UConverter *clone; |
|
490 uint8_t *subChars; |
|
491 int32_t cloneSize, length8; |
|
492 |
|
493 /* Let the following functions check all arguments. */ |
|
494 cloneSize = sizeof(cloneBuffer); |
|
495 clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err); |
|
496 ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err); |
|
497 length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err); |
|
498 ucnv_close(clone); |
|
499 if (U_FAILURE(*err)) { |
|
500 return; |
|
501 } |
|
502 |
|
503 if (cnv->sharedData->impl->writeSub == NULL |
|
504 #if !UCONFIG_NO_LEGACY_CONVERSION |
|
505 || (cnv->sharedData->staticData->conversionType == UCNV_MBCS && |
|
506 ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL) |
|
507 #endif |
|
508 ) { |
|
509 /* The converter is not stateful. Store the charset bytes as a fixed string. */ |
|
510 subChars = (uint8_t *)chars; |
|
511 } else { |
|
512 /* |
|
513 * The converter has a non-default writeSub() function, indicating |
|
514 * that it is stateful. |
|
515 * Store the Unicode string for on-the-fly conversion for correct |
|
516 * state handling. |
|
517 */ |
|
518 if (length > UCNV_ERROR_BUFFER_LENGTH) { |
|
519 /* |
|
520 * Should not occur. The converter should output at least one byte |
|
521 * per UChar, which means that ucnv_fromUChars() should catch all |
|
522 * overflows. |
|
523 */ |
|
524 *err = U_BUFFER_OVERFLOW_ERROR; |
|
525 return; |
|
526 } |
|
527 subChars = (uint8_t *)s; |
|
528 if (length < 0) { |
|
529 length = u_strlen(s); |
|
530 } |
|
531 length8 = length * U_SIZEOF_UCHAR; |
|
532 } |
|
533 |
|
534 /* |
|
535 * For storing the substitution string, select either the small buffer inside |
|
536 * UConverter or allocate a subChars buffer. |
|
537 */ |
|
538 if (length8 > UCNV_MAX_SUBCHAR_LEN) { |
|
539 /* Use a separate buffer for the string. Outside UConverter to not make it too large. */ |
|
540 if (cnv->subChars == (uint8_t *)cnv->subUChars) { |
|
541 /* Allocate a new buffer for the string. */ |
|
542 cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); |
|
543 if (cnv->subChars == NULL) { |
|
544 cnv->subChars = (uint8_t *)cnv->subUChars; |
|
545 *err = U_MEMORY_ALLOCATION_ERROR; |
|
546 return; |
|
547 } |
|
548 uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); |
|
549 } |
|
550 } |
|
551 |
|
552 /* Copy the substitution string into the UConverter or its subChars buffer. */ |
|
553 if (length8 == 0) { |
|
554 cnv->subCharLen = 0; |
|
555 } else { |
|
556 uprv_memcpy(cnv->subChars, subChars, length8); |
|
557 if (subChars == (uint8_t *)chars) { |
|
558 cnv->subCharLen = (int8_t)length8; |
|
559 } else /* subChars == s */ { |
|
560 cnv->subCharLen = (int8_t)-length; |
|
561 } |
|
562 } |
|
563 |
|
564 /* See comment in ucnv_setSubstChars(). */ |
|
565 cnv->subChar1 = 0; |
|
566 } |
|
567 |
|
568 /*resets the internal states of a converter |
|
569 *goal : have the same behaviour than a freshly created converter |
|
570 */ |
|
571 static void _reset(UConverter *converter, UConverterResetChoice choice, |
|
572 UBool callCallback) { |
|
573 if(converter == NULL) { |
|
574 return; |
|
575 } |
|
576 |
|
577 if(callCallback) { |
|
578 /* first, notify the callback functions that the converter is reset */ |
|
579 UErrorCode errorCode; |
|
580 |
|
581 if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { |
|
582 UConverterToUnicodeArgs toUArgs = { |
|
583 sizeof(UConverterToUnicodeArgs), |
|
584 TRUE, |
|
585 NULL, |
|
586 NULL, |
|
587 NULL, |
|
588 NULL, |
|
589 NULL, |
|
590 NULL |
|
591 }; |
|
592 toUArgs.converter = converter; |
|
593 errorCode = U_ZERO_ERROR; |
|
594 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode); |
|
595 } |
|
596 if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { |
|
597 UConverterFromUnicodeArgs fromUArgs = { |
|
598 sizeof(UConverterFromUnicodeArgs), |
|
599 TRUE, |
|
600 NULL, |
|
601 NULL, |
|
602 NULL, |
|
603 NULL, |
|
604 NULL, |
|
605 NULL |
|
606 }; |
|
607 fromUArgs.converter = converter; |
|
608 errorCode = U_ZERO_ERROR; |
|
609 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode); |
|
610 } |
|
611 } |
|
612 |
|
613 /* now reset the converter itself */ |
|
614 if(choice<=UCNV_RESET_TO_UNICODE) { |
|
615 converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus; |
|
616 converter->mode = 0; |
|
617 converter->toULength = 0; |
|
618 converter->invalidCharLength = converter->UCharErrorBufferLength = 0; |
|
619 converter->preToULength = 0; |
|
620 } |
|
621 if(choice!=UCNV_RESET_TO_UNICODE) { |
|
622 converter->fromUnicodeStatus = 0; |
|
623 converter->fromUChar32 = 0; |
|
624 converter->invalidUCharLength = converter->charErrorBufferLength = 0; |
|
625 converter->preFromUFirstCP = U_SENTINEL; |
|
626 converter->preFromULength = 0; |
|
627 } |
|
628 |
|
629 if (converter->sharedData->impl->reset != NULL) { |
|
630 /* call the custom reset function */ |
|
631 converter->sharedData->impl->reset(converter, choice); |
|
632 } |
|
633 } |
|
634 |
|
635 U_CAPI void U_EXPORT2 |
|
636 ucnv_reset(UConverter *converter) |
|
637 { |
|
638 _reset(converter, UCNV_RESET_BOTH, TRUE); |
|
639 } |
|
640 |
|
641 U_CAPI void U_EXPORT2 |
|
642 ucnv_resetToUnicode(UConverter *converter) |
|
643 { |
|
644 _reset(converter, UCNV_RESET_TO_UNICODE, TRUE); |
|
645 } |
|
646 |
|
647 U_CAPI void U_EXPORT2 |
|
648 ucnv_resetFromUnicode(UConverter *converter) |
|
649 { |
|
650 _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE); |
|
651 } |
|
652 |
|
653 U_CAPI int8_t U_EXPORT2 |
|
654 ucnv_getMaxCharSize (const UConverter * converter) |
|
655 { |
|
656 return converter->maxBytesPerUChar; |
|
657 } |
|
658 |
|
659 |
|
660 U_CAPI int8_t U_EXPORT2 |
|
661 ucnv_getMinCharSize (const UConverter * converter) |
|
662 { |
|
663 return converter->sharedData->staticData->minBytesPerChar; |
|
664 } |
|
665 |
|
666 U_CAPI const char* U_EXPORT2 |
|
667 ucnv_getName (const UConverter * converter, UErrorCode * err) |
|
668 |
|
669 { |
|
670 if (U_FAILURE (*err)) |
|
671 return NULL; |
|
672 if(converter->sharedData->impl->getName){ |
|
673 const char* temp= converter->sharedData->impl->getName(converter); |
|
674 if(temp) |
|
675 return temp; |
|
676 } |
|
677 return converter->sharedData->staticData->name; |
|
678 } |
|
679 |
|
680 U_CAPI int32_t U_EXPORT2 |
|
681 ucnv_getCCSID(const UConverter * converter, |
|
682 UErrorCode * err) |
|
683 { |
|
684 int32_t ccsid; |
|
685 if (U_FAILURE (*err)) |
|
686 return -1; |
|
687 |
|
688 ccsid = converter->sharedData->staticData->codepage; |
|
689 if (ccsid == 0) { |
|
690 /* Rare case. This is for cases like gb18030, |
|
691 which doesn't have an IBM canonical name, but does have an IBM alias. */ |
|
692 const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err); |
|
693 if (U_SUCCESS(*err) && standardName) { |
|
694 const char *ccsidStr = uprv_strchr(standardName, '-'); |
|
695 if (ccsidStr) { |
|
696 ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */ |
|
697 } |
|
698 } |
|
699 } |
|
700 return ccsid; |
|
701 } |
|
702 |
|
703 |
|
704 U_CAPI UConverterPlatform U_EXPORT2 |
|
705 ucnv_getPlatform (const UConverter * converter, |
|
706 UErrorCode * err) |
|
707 { |
|
708 if (U_FAILURE (*err)) |
|
709 return UCNV_UNKNOWN; |
|
710 |
|
711 return (UConverterPlatform)converter->sharedData->staticData->platform; |
|
712 } |
|
713 |
|
714 U_CAPI void U_EXPORT2 |
|
715 ucnv_getToUCallBack (const UConverter * converter, |
|
716 UConverterToUCallback *action, |
|
717 const void **context) |
|
718 { |
|
719 *action = converter->fromCharErrorBehaviour; |
|
720 *context = converter->toUContext; |
|
721 } |
|
722 |
|
723 U_CAPI void U_EXPORT2 |
|
724 ucnv_getFromUCallBack (const UConverter * converter, |
|
725 UConverterFromUCallback *action, |
|
726 const void **context) |
|
727 { |
|
728 *action = converter->fromUCharErrorBehaviour; |
|
729 *context = converter->fromUContext; |
|
730 } |
|
731 |
|
732 U_CAPI void U_EXPORT2 |
|
733 ucnv_setToUCallBack (UConverter * converter, |
|
734 UConverterToUCallback newAction, |
|
735 const void* newContext, |
|
736 UConverterToUCallback *oldAction, |
|
737 const void** oldContext, |
|
738 UErrorCode * err) |
|
739 { |
|
740 if (U_FAILURE (*err)) |
|
741 return; |
|
742 if (oldAction) *oldAction = converter->fromCharErrorBehaviour; |
|
743 converter->fromCharErrorBehaviour = newAction; |
|
744 if (oldContext) *oldContext = converter->toUContext; |
|
745 converter->toUContext = newContext; |
|
746 } |
|
747 |
|
748 U_CAPI void U_EXPORT2 |
|
749 ucnv_setFromUCallBack (UConverter * converter, |
|
750 UConverterFromUCallback newAction, |
|
751 const void* newContext, |
|
752 UConverterFromUCallback *oldAction, |
|
753 const void** oldContext, |
|
754 UErrorCode * err) |
|
755 { |
|
756 if (U_FAILURE (*err)) |
|
757 return; |
|
758 if (oldAction) *oldAction = converter->fromUCharErrorBehaviour; |
|
759 converter->fromUCharErrorBehaviour = newAction; |
|
760 if (oldContext) *oldContext = converter->fromUContext; |
|
761 converter->fromUContext = newContext; |
|
762 } |
|
763 |
|
764 static void |
|
765 _updateOffsets(int32_t *offsets, int32_t length, |
|
766 int32_t sourceIndex, int32_t errorInputLength) { |
|
767 int32_t *limit; |
|
768 int32_t delta, offset; |
|
769 |
|
770 if(sourceIndex>=0) { |
|
771 /* |
|
772 * adjust each offset by adding the previous sourceIndex |
|
773 * minus the length of the input sequence that caused an |
|
774 * error, if any |
|
775 */ |
|
776 delta=sourceIndex-errorInputLength; |
|
777 } else { |
|
778 /* |
|
779 * set each offset to -1 because this conversion function |
|
780 * does not handle offsets |
|
781 */ |
|
782 delta=-1; |
|
783 } |
|
784 |
|
785 limit=offsets+length; |
|
786 if(delta==0) { |
|
787 /* most common case, nothing to do */ |
|
788 } else if(delta>0) { |
|
789 /* add the delta to each offset (but not if the offset is <0) */ |
|
790 while(offsets<limit) { |
|
791 offset=*offsets; |
|
792 if(offset>=0) { |
|
793 *offsets=offset+delta; |
|
794 } |
|
795 ++offsets; |
|
796 } |
|
797 } else /* delta<0 */ { |
|
798 /* |
|
799 * set each offset to -1 because this conversion function |
|
800 * does not handle offsets |
|
801 * or the error input sequence started in a previous buffer |
|
802 */ |
|
803 while(offsets<limit) { |
|
804 *offsets++=-1; |
|
805 } |
|
806 } |
|
807 } |
|
808 |
|
809 /* ucnv_fromUnicode --------------------------------------------------------- */ |
|
810 |
|
811 /* |
|
812 * Implementation note for m:n conversions |
|
813 * |
|
814 * While collecting source units to find the longest match for m:n conversion, |
|
815 * some source units may need to be stored for a partial match. |
|
816 * When a second buffer does not yield a match on all of the previously stored |
|
817 * source units, then they must be "replayed", i.e., fed back into the converter. |
|
818 * |
|
819 * The code relies on the fact that replaying will not nest - |
|
820 * converting a replay buffer will not result in a replay. |
|
821 * This is because a replay is necessary only after the _continuation_ of a |
|
822 * partial match failed, but a replay buffer is converted as a whole. |
|
823 * It may result in some of its units being stored again for a partial match, |
|
824 * but there will not be a continuation _during_ the replay which could fail. |
|
825 * |
|
826 * It is conceivable that a callback function could call the converter |
|
827 * recursively in a way that causes another replay to be stored, but that |
|
828 * would be an error in the callback function. |
|
829 * Such violations will cause assertion failures in a debug build, |
|
830 * and wrong output, but they will not cause a crash. |
|
831 */ |
|
832 |
|
833 static void |
|
834 _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) { |
|
835 UConverterFromUnicode fromUnicode; |
|
836 UConverter *cnv; |
|
837 const UChar *s; |
|
838 char *t; |
|
839 int32_t *offsets; |
|
840 int32_t sourceIndex; |
|
841 int32_t errorInputLength; |
|
842 UBool converterSawEndOfInput, calledCallback; |
|
843 |
|
844 /* variables for m:n conversion */ |
|
845 UChar replay[UCNV_EXT_MAX_UCHARS]; |
|
846 const UChar *realSource, *realSourceLimit; |
|
847 int32_t realSourceIndex; |
|
848 UBool realFlush; |
|
849 |
|
850 cnv=pArgs->converter; |
|
851 s=pArgs->source; |
|
852 t=pArgs->target; |
|
853 offsets=pArgs->offsets; |
|
854 |
|
855 /* get the converter implementation function */ |
|
856 sourceIndex=0; |
|
857 if(offsets==NULL) { |
|
858 fromUnicode=cnv->sharedData->impl->fromUnicode; |
|
859 } else { |
|
860 fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets; |
|
861 if(fromUnicode==NULL) { |
|
862 /* there is no WithOffsets implementation */ |
|
863 fromUnicode=cnv->sharedData->impl->fromUnicode; |
|
864 /* we will write -1 for each offset */ |
|
865 sourceIndex=-1; |
|
866 } |
|
867 } |
|
868 |
|
869 if(cnv->preFromULength>=0) { |
|
870 /* normal mode */ |
|
871 realSource=NULL; |
|
872 |
|
873 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ |
|
874 realSourceLimit=NULL; |
|
875 realFlush=FALSE; |
|
876 realSourceIndex=0; |
|
877 } else { |
|
878 /* |
|
879 * Previous m:n conversion stored source units from a partial match |
|
880 * and failed to consume all of them. |
|
881 * We need to "replay" them from a temporary buffer and convert them first. |
|
882 */ |
|
883 realSource=pArgs->source; |
|
884 realSourceLimit=pArgs->sourceLimit; |
|
885 realFlush=pArgs->flush; |
|
886 realSourceIndex=sourceIndex; |
|
887 |
|
888 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); |
|
889 pArgs->source=replay; |
|
890 pArgs->sourceLimit=replay-cnv->preFromULength; |
|
891 pArgs->flush=FALSE; |
|
892 sourceIndex=-1; |
|
893 |
|
894 cnv->preFromULength=0; |
|
895 } |
|
896 |
|
897 /* |
|
898 * loop for conversion and error handling |
|
899 * |
|
900 * loop { |
|
901 * convert |
|
902 * loop { |
|
903 * update offsets |
|
904 * handle end of input |
|
905 * handle errors/call callback |
|
906 * } |
|
907 * } |
|
908 */ |
|
909 for(;;) { |
|
910 if(U_SUCCESS(*err)) { |
|
911 /* convert */ |
|
912 fromUnicode(pArgs, err); |
|
913 |
|
914 /* |
|
915 * set a flag for whether the converter |
|
916 * successfully processed the end of the input |
|
917 * |
|
918 * need not check cnv->preFromULength==0 because a replay (<0) will cause |
|
919 * s<sourceLimit before converterSawEndOfInput is checked |
|
920 */ |
|
921 converterSawEndOfInput= |
|
922 (UBool)(U_SUCCESS(*err) && |
|
923 pArgs->flush && pArgs->source==pArgs->sourceLimit && |
|
924 cnv->fromUChar32==0); |
|
925 } else { |
|
926 /* handle error from ucnv_convertEx() */ |
|
927 converterSawEndOfInput=FALSE; |
|
928 } |
|
929 |
|
930 /* no callback called yet for this iteration */ |
|
931 calledCallback=FALSE; |
|
932 |
|
933 /* no sourceIndex adjustment for conversion, only for callback output */ |
|
934 errorInputLength=0; |
|
935 |
|
936 /* |
|
937 * loop for offsets and error handling |
|
938 * |
|
939 * iterates at most 3 times: |
|
940 * 1. to clean up after the conversion function |
|
941 * 2. after the callback |
|
942 * 3. after the callback again if there was truncated input |
|
943 */ |
|
944 for(;;) { |
|
945 /* update offsets if we write any */ |
|
946 if(offsets!=NULL) { |
|
947 int32_t length=(int32_t)(pArgs->target-t); |
|
948 if(length>0) { |
|
949 _updateOffsets(offsets, length, sourceIndex, errorInputLength); |
|
950 |
|
951 /* |
|
952 * if a converter handles offsets and updates the offsets |
|
953 * pointer at the end, then pArgs->offset should not change |
|
954 * here; |
|
955 * however, some converters do not handle offsets at all |
|
956 * (sourceIndex<0) or may not update the offsets pointer |
|
957 */ |
|
958 pArgs->offsets=offsets+=length; |
|
959 } |
|
960 |
|
961 if(sourceIndex>=0) { |
|
962 sourceIndex+=(int32_t)(pArgs->source-s); |
|
963 } |
|
964 } |
|
965 |
|
966 if(cnv->preFromULength<0) { |
|
967 /* |
|
968 * switch the source to new replay units (cannot occur while replaying) |
|
969 * after offset handling and before end-of-input and callback handling |
|
970 */ |
|
971 if(realSource==NULL) { |
|
972 realSource=pArgs->source; |
|
973 realSourceLimit=pArgs->sourceLimit; |
|
974 realFlush=pArgs->flush; |
|
975 realSourceIndex=sourceIndex; |
|
976 |
|
977 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); |
|
978 pArgs->source=replay; |
|
979 pArgs->sourceLimit=replay-cnv->preFromULength; |
|
980 pArgs->flush=FALSE; |
|
981 if((sourceIndex+=cnv->preFromULength)<0) { |
|
982 sourceIndex=-1; |
|
983 } |
|
984 |
|
985 cnv->preFromULength=0; |
|
986 } else { |
|
987 /* see implementation note before _fromUnicodeWithCallback() */ |
|
988 U_ASSERT(realSource==NULL); |
|
989 *err=U_INTERNAL_PROGRAM_ERROR; |
|
990 } |
|
991 } |
|
992 |
|
993 /* update pointers */ |
|
994 s=pArgs->source; |
|
995 t=pArgs->target; |
|
996 |
|
997 if(U_SUCCESS(*err)) { |
|
998 if(s<pArgs->sourceLimit) { |
|
999 /* |
|
1000 * continue with the conversion loop while there is still input left |
|
1001 * (continue converting by breaking out of only the inner loop) |
|
1002 */ |
|
1003 break; |
|
1004 } else if(realSource!=NULL) { |
|
1005 /* switch back from replaying to the real source and continue */ |
|
1006 pArgs->source=realSource; |
|
1007 pArgs->sourceLimit=realSourceLimit; |
|
1008 pArgs->flush=realFlush; |
|
1009 sourceIndex=realSourceIndex; |
|
1010 |
|
1011 realSource=NULL; |
|
1012 break; |
|
1013 } else if(pArgs->flush && cnv->fromUChar32!=0) { |
|
1014 /* |
|
1015 * the entire input stream is consumed |
|
1016 * and there is a partial, truncated input sequence left |
|
1017 */ |
|
1018 |
|
1019 /* inject an error and continue with callback handling */ |
|
1020 *err=U_TRUNCATED_CHAR_FOUND; |
|
1021 calledCallback=FALSE; /* new error condition */ |
|
1022 } else { |
|
1023 /* input consumed */ |
|
1024 if(pArgs->flush) { |
|
1025 /* |
|
1026 * return to the conversion loop once more if the flush |
|
1027 * flag is set and the conversion function has not |
|
1028 * successfully processed the end of the input yet |
|
1029 * |
|
1030 * (continue converting by breaking out of only the inner loop) |
|
1031 */ |
|
1032 if(!converterSawEndOfInput) { |
|
1033 break; |
|
1034 } |
|
1035 |
|
1036 /* reset the converter without calling the callback function */ |
|
1037 _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE); |
|
1038 } |
|
1039 |
|
1040 /* done successfully */ |
|
1041 return; |
|
1042 } |
|
1043 } |
|
1044 |
|
1045 /* U_FAILURE(*err) */ |
|
1046 { |
|
1047 UErrorCode e; |
|
1048 |
|
1049 if( calledCallback || |
|
1050 (e=*err)==U_BUFFER_OVERFLOW_ERROR || |
|
1051 (e!=U_INVALID_CHAR_FOUND && |
|
1052 e!=U_ILLEGAL_CHAR_FOUND && |
|
1053 e!=U_TRUNCATED_CHAR_FOUND) |
|
1054 ) { |
|
1055 /* |
|
1056 * the callback did not or cannot resolve the error: |
|
1057 * set output pointers and return |
|
1058 * |
|
1059 * the check for buffer overflow is redundant but it is |
|
1060 * a high-runner case and hopefully documents the intent |
|
1061 * well |
|
1062 * |
|
1063 * if we were replaying, then the replay buffer must be |
|
1064 * copied back into the UConverter |
|
1065 * and the real arguments must be restored |
|
1066 */ |
|
1067 if(realSource!=NULL) { |
|
1068 int32_t length; |
|
1069 |
|
1070 U_ASSERT(cnv->preFromULength==0); |
|
1071 |
|
1072 length=(int32_t)(pArgs->sourceLimit-pArgs->source); |
|
1073 if(length>0) { |
|
1074 uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR); |
|
1075 cnv->preFromULength=(int8_t)-length; |
|
1076 } |
|
1077 |
|
1078 pArgs->source=realSource; |
|
1079 pArgs->sourceLimit=realSourceLimit; |
|
1080 pArgs->flush=realFlush; |
|
1081 } |
|
1082 |
|
1083 return; |
|
1084 } |
|
1085 } |
|
1086 |
|
1087 /* callback handling */ |
|
1088 { |
|
1089 UChar32 codePoint; |
|
1090 |
|
1091 /* get and write the code point */ |
|
1092 codePoint=cnv->fromUChar32; |
|
1093 errorInputLength=0; |
|
1094 U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint); |
|
1095 cnv->invalidUCharLength=(int8_t)errorInputLength; |
|
1096 |
|
1097 /* set the converter state to deal with the next character */ |
|
1098 cnv->fromUChar32=0; |
|
1099 |
|
1100 /* call the callback function */ |
|
1101 cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, |
|
1102 cnv->invalidUCharBuffer, errorInputLength, codePoint, |
|
1103 *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL, |
|
1104 err); |
|
1105 } |
|
1106 |
|
1107 /* |
|
1108 * loop back to the offset handling |
|
1109 * |
|
1110 * this flag will indicate after offset handling |
|
1111 * that a callback was called; |
|
1112 * if the callback did not resolve the error, then we return |
|
1113 */ |
|
1114 calledCallback=TRUE; |
|
1115 } |
|
1116 } |
|
1117 } |
|
1118 |
|
1119 /* |
|
1120 * Output the fromUnicode overflow buffer. |
|
1121 * Call this function if(cnv->charErrorBufferLength>0). |
|
1122 * @return TRUE if overflow |
|
1123 */ |
|
1124 static UBool |
|
1125 ucnv_outputOverflowFromUnicode(UConverter *cnv, |
|
1126 char **target, const char *targetLimit, |
|
1127 int32_t **pOffsets, |
|
1128 UErrorCode *err) { |
|
1129 int32_t *offsets; |
|
1130 char *overflow, *t; |
|
1131 int32_t i, length; |
|
1132 |
|
1133 t=*target; |
|
1134 if(pOffsets!=NULL) { |
|
1135 offsets=*pOffsets; |
|
1136 } else { |
|
1137 offsets=NULL; |
|
1138 } |
|
1139 |
|
1140 overflow=(char *)cnv->charErrorBuffer; |
|
1141 length=cnv->charErrorBufferLength; |
|
1142 i=0; |
|
1143 while(i<length) { |
|
1144 if(t==targetLimit) { |
|
1145 /* the overflow buffer contains too much, keep the rest */ |
|
1146 int32_t j=0; |
|
1147 |
|
1148 do { |
|
1149 overflow[j++]=overflow[i++]; |
|
1150 } while(i<length); |
|
1151 |
|
1152 cnv->charErrorBufferLength=(int8_t)j; |
|
1153 *target=t; |
|
1154 if(offsets!=NULL) { |
|
1155 *pOffsets=offsets; |
|
1156 } |
|
1157 *err=U_BUFFER_OVERFLOW_ERROR; |
|
1158 return TRUE; |
|
1159 } |
|
1160 |
|
1161 /* copy the overflow contents to the target */ |
|
1162 *t++=overflow[i++]; |
|
1163 if(offsets!=NULL) { |
|
1164 *offsets++=-1; /* no source index available for old output */ |
|
1165 } |
|
1166 } |
|
1167 |
|
1168 /* the overflow buffer is completely copied to the target */ |
|
1169 cnv->charErrorBufferLength=0; |
|
1170 *target=t; |
|
1171 if(offsets!=NULL) { |
|
1172 *pOffsets=offsets; |
|
1173 } |
|
1174 return FALSE; |
|
1175 } |
|
1176 |
|
1177 U_CAPI void U_EXPORT2 |
|
1178 ucnv_fromUnicode(UConverter *cnv, |
|
1179 char **target, const char *targetLimit, |
|
1180 const UChar **source, const UChar *sourceLimit, |
|
1181 int32_t *offsets, |
|
1182 UBool flush, |
|
1183 UErrorCode *err) { |
|
1184 UConverterFromUnicodeArgs args; |
|
1185 const UChar *s; |
|
1186 char *t; |
|
1187 |
|
1188 /* check parameters */ |
|
1189 if(err==NULL || U_FAILURE(*err)) { |
|
1190 return; |
|
1191 } |
|
1192 |
|
1193 if(cnv==NULL || target==NULL || source==NULL) { |
|
1194 *err=U_ILLEGAL_ARGUMENT_ERROR; |
|
1195 return; |
|
1196 } |
|
1197 |
|
1198 s=*source; |
|
1199 t=*target; |
|
1200 |
|
1201 if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) { |
|
1202 /* |
|
1203 Prevent code from going into an infinite loop in case we do hit this |
|
1204 limit. The limit pointer is expected to be on a UChar * boundary. |
|
1205 This also prevents the next argument check from failing. |
|
1206 */ |
|
1207 sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1); |
|
1208 } |
|
1209 |
|
1210 /* |
|
1211 * All these conditions should never happen. |
|
1212 * |
|
1213 * 1) Make sure that the limits are >= to the address source or target |
|
1214 * |
|
1215 * 2) Make sure that the buffer sizes do not exceed the number range for |
|
1216 * int32_t because some functions use the size (in units or bytes) |
|
1217 * rather than comparing pointers, and because offsets are int32_t values. |
|
1218 * |
|
1219 * size_t is guaranteed to be unsigned and large enough for the job. |
|
1220 * |
|
1221 * Return with an error instead of adjusting the limits because we would |
|
1222 * not be able to maintain the semantics that either the source must be |
|
1223 * consumed or the target filled (unless an error occurs). |
|
1224 * An adjustment would be targetLimit=t+0x7fffffff; for example. |
|
1225 * |
|
1226 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer |
|
1227 * to a char * pointer and provide an incomplete UChar code unit. |
|
1228 */ |
|
1229 if (sourceLimit<s || targetLimit<t || |
|
1230 ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) || |
|
1231 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) || |
|
1232 (((const char *)sourceLimit-(const char *)s) & 1) != 0) |
|
1233 { |
|
1234 *err=U_ILLEGAL_ARGUMENT_ERROR; |
|
1235 return; |
|
1236 } |
|
1237 |
|
1238 /* output the target overflow buffer */ |
|
1239 if( cnv->charErrorBufferLength>0 && |
|
1240 ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err) |
|
1241 ) { |
|
1242 /* U_BUFFER_OVERFLOW_ERROR */ |
|
1243 return; |
|
1244 } |
|
1245 /* *target may have moved, therefore stop using t */ |
|
1246 |
|
1247 if(!flush && s==sourceLimit && cnv->preFromULength>=0) { |
|
1248 /* the overflow buffer is emptied and there is no new input: we are done */ |
|
1249 return; |
|
1250 } |
|
1251 |
|
1252 /* |
|
1253 * Do not simply return with a buffer overflow error if |
|
1254 * !flush && t==targetLimit |
|
1255 * because it is possible that the source will not generate any output. |
|
1256 * For example, the skip callback may be called; |
|
1257 * it does not output anything. |
|
1258 */ |
|
1259 |
|
1260 /* prepare the converter arguments */ |
|
1261 args.converter=cnv; |
|
1262 args.flush=flush; |
|
1263 args.offsets=offsets; |
|
1264 args.source=s; |
|
1265 args.sourceLimit=sourceLimit; |
|
1266 args.target=*target; |
|
1267 args.targetLimit=targetLimit; |
|
1268 args.size=sizeof(args); |
|
1269 |
|
1270 _fromUnicodeWithCallback(&args, err); |
|
1271 |
|
1272 *source=args.source; |
|
1273 *target=args.target; |
|
1274 } |
|
1275 |
|
1276 /* ucnv_toUnicode() --------------------------------------------------------- */ |
|
1277 |
|
1278 static void |
|
1279 _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { |
|
1280 UConverterToUnicode toUnicode; |
|
1281 UConverter *cnv; |
|
1282 const char *s; |
|
1283 UChar *t; |
|
1284 int32_t *offsets; |
|
1285 int32_t sourceIndex; |
|
1286 int32_t errorInputLength; |
|
1287 UBool converterSawEndOfInput, calledCallback; |
|
1288 |
|
1289 /* variables for m:n conversion */ |
|
1290 char replay[UCNV_EXT_MAX_BYTES]; |
|
1291 const char *realSource, *realSourceLimit; |
|
1292 int32_t realSourceIndex; |
|
1293 UBool realFlush; |
|
1294 |
|
1295 cnv=pArgs->converter; |
|
1296 s=pArgs->source; |
|
1297 t=pArgs->target; |
|
1298 offsets=pArgs->offsets; |
|
1299 |
|
1300 /* get the converter implementation function */ |
|
1301 sourceIndex=0; |
|
1302 if(offsets==NULL) { |
|
1303 toUnicode=cnv->sharedData->impl->toUnicode; |
|
1304 } else { |
|
1305 toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets; |
|
1306 if(toUnicode==NULL) { |
|
1307 /* there is no WithOffsets implementation */ |
|
1308 toUnicode=cnv->sharedData->impl->toUnicode; |
|
1309 /* we will write -1 for each offset */ |
|
1310 sourceIndex=-1; |
|
1311 } |
|
1312 } |
|
1313 |
|
1314 if(cnv->preToULength>=0) { |
|
1315 /* normal mode */ |
|
1316 realSource=NULL; |
|
1317 |
|
1318 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ |
|
1319 realSourceLimit=NULL; |
|
1320 realFlush=FALSE; |
|
1321 realSourceIndex=0; |
|
1322 } else { |
|
1323 /* |
|
1324 * Previous m:n conversion stored source units from a partial match |
|
1325 * and failed to consume all of them. |
|
1326 * We need to "replay" them from a temporary buffer and convert them first. |
|
1327 */ |
|
1328 realSource=pArgs->source; |
|
1329 realSourceLimit=pArgs->sourceLimit; |
|
1330 realFlush=pArgs->flush; |
|
1331 realSourceIndex=sourceIndex; |
|
1332 |
|
1333 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); |
|
1334 pArgs->source=replay; |
|
1335 pArgs->sourceLimit=replay-cnv->preToULength; |
|
1336 pArgs->flush=FALSE; |
|
1337 sourceIndex=-1; |
|
1338 |
|
1339 cnv->preToULength=0; |
|
1340 } |
|
1341 |
|
1342 /* |
|
1343 * loop for conversion and error handling |
|
1344 * |
|
1345 * loop { |
|
1346 * convert |
|
1347 * loop { |
|
1348 * update offsets |
|
1349 * handle end of input |
|
1350 * handle errors/call callback |
|
1351 * } |
|
1352 * } |
|
1353 */ |
|
1354 for(;;) { |
|
1355 if(U_SUCCESS(*err)) { |
|
1356 /* convert */ |
|
1357 toUnicode(pArgs, err); |
|
1358 |
|
1359 /* |
|
1360 * set a flag for whether the converter |
|
1361 * successfully processed the end of the input |
|
1362 * |
|
1363 * need not check cnv->preToULength==0 because a replay (<0) will cause |
|
1364 * s<sourceLimit before converterSawEndOfInput is checked |
|
1365 */ |
|
1366 converterSawEndOfInput= |
|
1367 (UBool)(U_SUCCESS(*err) && |
|
1368 pArgs->flush && pArgs->source==pArgs->sourceLimit && |
|
1369 cnv->toULength==0); |
|
1370 } else { |
|
1371 /* handle error from getNextUChar() or ucnv_convertEx() */ |
|
1372 converterSawEndOfInput=FALSE; |
|
1373 } |
|
1374 |
|
1375 /* no callback called yet for this iteration */ |
|
1376 calledCallback=FALSE; |
|
1377 |
|
1378 /* no sourceIndex adjustment for conversion, only for callback output */ |
|
1379 errorInputLength=0; |
|
1380 |
|
1381 /* |
|
1382 * loop for offsets and error handling |
|
1383 * |
|
1384 * iterates at most 3 times: |
|
1385 * 1. to clean up after the conversion function |
|
1386 * 2. after the callback |
|
1387 * 3. after the callback again if there was truncated input |
|
1388 */ |
|
1389 for(;;) { |
|
1390 /* update offsets if we write any */ |
|
1391 if(offsets!=NULL) { |
|
1392 int32_t length=(int32_t)(pArgs->target-t); |
|
1393 if(length>0) { |
|
1394 _updateOffsets(offsets, length, sourceIndex, errorInputLength); |
|
1395 |
|
1396 /* |
|
1397 * if a converter handles offsets and updates the offsets |
|
1398 * pointer at the end, then pArgs->offset should not change |
|
1399 * here; |
|
1400 * however, some converters do not handle offsets at all |
|
1401 * (sourceIndex<0) or may not update the offsets pointer |
|
1402 */ |
|
1403 pArgs->offsets=offsets+=length; |
|
1404 } |
|
1405 |
|
1406 if(sourceIndex>=0) { |
|
1407 sourceIndex+=(int32_t)(pArgs->source-s); |
|
1408 } |
|
1409 } |
|
1410 |
|
1411 if(cnv->preToULength<0) { |
|
1412 /* |
|
1413 * switch the source to new replay units (cannot occur while replaying) |
|
1414 * after offset handling and before end-of-input and callback handling |
|
1415 */ |
|
1416 if(realSource==NULL) { |
|
1417 realSource=pArgs->source; |
|
1418 realSourceLimit=pArgs->sourceLimit; |
|
1419 realFlush=pArgs->flush; |
|
1420 realSourceIndex=sourceIndex; |
|
1421 |
|
1422 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); |
|
1423 pArgs->source=replay; |
|
1424 pArgs->sourceLimit=replay-cnv->preToULength; |
|
1425 pArgs->flush=FALSE; |
|
1426 if((sourceIndex+=cnv->preToULength)<0) { |
|
1427 sourceIndex=-1; |
|
1428 } |
|
1429 |
|
1430 cnv->preToULength=0; |
|
1431 } else { |
|
1432 /* see implementation note before _fromUnicodeWithCallback() */ |
|
1433 U_ASSERT(realSource==NULL); |
|
1434 *err=U_INTERNAL_PROGRAM_ERROR; |
|
1435 } |
|
1436 } |
|
1437 |
|
1438 /* update pointers */ |
|
1439 s=pArgs->source; |
|
1440 t=pArgs->target; |
|
1441 |
|
1442 if(U_SUCCESS(*err)) { |
|
1443 if(s<pArgs->sourceLimit) { |
|
1444 /* |
|
1445 * continue with the conversion loop while there is still input left |
|
1446 * (continue converting by breaking out of only the inner loop) |
|
1447 */ |
|
1448 break; |
|
1449 } else if(realSource!=NULL) { |
|
1450 /* switch back from replaying to the real source and continue */ |
|
1451 pArgs->source=realSource; |
|
1452 pArgs->sourceLimit=realSourceLimit; |
|
1453 pArgs->flush=realFlush; |
|
1454 sourceIndex=realSourceIndex; |
|
1455 |
|
1456 realSource=NULL; |
|
1457 break; |
|
1458 } else if(pArgs->flush && cnv->toULength>0) { |
|
1459 /* |
|
1460 * the entire input stream is consumed |
|
1461 * and there is a partial, truncated input sequence left |
|
1462 */ |
|
1463 |
|
1464 /* inject an error and continue with callback handling */ |
|
1465 *err=U_TRUNCATED_CHAR_FOUND; |
|
1466 calledCallback=FALSE; /* new error condition */ |
|
1467 } else { |
|
1468 /* input consumed */ |
|
1469 if(pArgs->flush) { |
|
1470 /* |
|
1471 * return to the conversion loop once more if the flush |
|
1472 * flag is set and the conversion function has not |
|
1473 * successfully processed the end of the input yet |
|
1474 * |
|
1475 * (continue converting by breaking out of only the inner loop) |
|
1476 */ |
|
1477 if(!converterSawEndOfInput) { |
|
1478 break; |
|
1479 } |
|
1480 |
|
1481 /* reset the converter without calling the callback function */ |
|
1482 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); |
|
1483 } |
|
1484 |
|
1485 /* done successfully */ |
|
1486 return; |
|
1487 } |
|
1488 } |
|
1489 |
|
1490 /* U_FAILURE(*err) */ |
|
1491 { |
|
1492 UErrorCode e; |
|
1493 |
|
1494 if( calledCallback || |
|
1495 (e=*err)==U_BUFFER_OVERFLOW_ERROR || |
|
1496 (e!=U_INVALID_CHAR_FOUND && |
|
1497 e!=U_ILLEGAL_CHAR_FOUND && |
|
1498 e!=U_TRUNCATED_CHAR_FOUND && |
|
1499 e!=U_ILLEGAL_ESCAPE_SEQUENCE && |
|
1500 e!=U_UNSUPPORTED_ESCAPE_SEQUENCE) |
|
1501 ) { |
|
1502 /* |
|
1503 * the callback did not or cannot resolve the error: |
|
1504 * set output pointers and return |
|
1505 * |
|
1506 * the check for buffer overflow is redundant but it is |
|
1507 * a high-runner case and hopefully documents the intent |
|
1508 * well |
|
1509 * |
|
1510 * if we were replaying, then the replay buffer must be |
|
1511 * copied back into the UConverter |
|
1512 * and the real arguments must be restored |
|
1513 */ |
|
1514 if(realSource!=NULL) { |
|
1515 int32_t length; |
|
1516 |
|
1517 U_ASSERT(cnv->preToULength==0); |
|
1518 |
|
1519 length=(int32_t)(pArgs->sourceLimit-pArgs->source); |
|
1520 if(length>0) { |
|
1521 uprv_memcpy(cnv->preToU, pArgs->source, length); |
|
1522 cnv->preToULength=(int8_t)-length; |
|
1523 } |
|
1524 |
|
1525 pArgs->source=realSource; |
|
1526 pArgs->sourceLimit=realSourceLimit; |
|
1527 pArgs->flush=realFlush; |
|
1528 } |
|
1529 |
|
1530 return; |
|
1531 } |
|
1532 } |
|
1533 |
|
1534 /* copy toUBytes[] to invalidCharBuffer[] */ |
|
1535 errorInputLength=cnv->invalidCharLength=cnv->toULength; |
|
1536 if(errorInputLength>0) { |
|
1537 uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength); |
|
1538 } |
|
1539 |
|
1540 /* set the converter state to deal with the next character */ |
|
1541 cnv->toULength=0; |
|
1542 |
|
1543 /* call the callback function */ |
|
1544 if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) { |
|
1545 cnv->toUCallbackReason = UCNV_UNASSIGNED; |
|
1546 } |
|
1547 cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, |
|
1548 cnv->invalidCharBuffer, errorInputLength, |
|
1549 cnv->toUCallbackReason, |
|
1550 err); |
|
1551 cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */ |
|
1552 |
|
1553 /* |
|
1554 * loop back to the offset handling |
|
1555 * |
|
1556 * this flag will indicate after offset handling |
|
1557 * that a callback was called; |
|
1558 * if the callback did not resolve the error, then we return |
|
1559 */ |
|
1560 calledCallback=TRUE; |
|
1561 } |
|
1562 } |
|
1563 } |
|
1564 |
|
1565 /* |
|
1566 * Output the toUnicode overflow buffer. |
|
1567 * Call this function if(cnv->UCharErrorBufferLength>0). |
|
1568 * @return TRUE if overflow |
|
1569 */ |
|
1570 static UBool |
|
1571 ucnv_outputOverflowToUnicode(UConverter *cnv, |
|
1572 UChar **target, const UChar *targetLimit, |
|
1573 int32_t **pOffsets, |
|
1574 UErrorCode *err) { |
|
1575 int32_t *offsets; |
|
1576 UChar *overflow, *t; |
|
1577 int32_t i, length; |
|
1578 |
|
1579 t=*target; |
|
1580 if(pOffsets!=NULL) { |
|
1581 offsets=*pOffsets; |
|
1582 } else { |
|
1583 offsets=NULL; |
|
1584 } |
|
1585 |
|
1586 overflow=cnv->UCharErrorBuffer; |
|
1587 length=cnv->UCharErrorBufferLength; |
|
1588 i=0; |
|
1589 while(i<length) { |
|
1590 if(t==targetLimit) { |
|
1591 /* the overflow buffer contains too much, keep the rest */ |
|
1592 int32_t j=0; |
|
1593 |
|
1594 do { |
|
1595 overflow[j++]=overflow[i++]; |
|
1596 } while(i<length); |
|
1597 |
|
1598 cnv->UCharErrorBufferLength=(int8_t)j; |
|
1599 *target=t; |
|
1600 if(offsets!=NULL) { |
|
1601 *pOffsets=offsets; |
|
1602 } |
|
1603 *err=U_BUFFER_OVERFLOW_ERROR; |
|
1604 return TRUE; |
|
1605 } |
|
1606 |
|
1607 /* copy the overflow contents to the target */ |
|
1608 *t++=overflow[i++]; |
|
1609 if(offsets!=NULL) { |
|
1610 *offsets++=-1; /* no source index available for old output */ |
|
1611 } |
|
1612 } |
|
1613 |
|
1614 /* the overflow buffer is completely copied to the target */ |
|
1615 cnv->UCharErrorBufferLength=0; |
|
1616 *target=t; |
|
1617 if(offsets!=NULL) { |
|
1618 *pOffsets=offsets; |
|
1619 } |
|
1620 return FALSE; |
|
1621 } |
|
1622 |
|
1623 U_CAPI void U_EXPORT2 |
|
1624 ucnv_toUnicode(UConverter *cnv, |
|
1625 UChar **target, const UChar *targetLimit, |
|
1626 const char **source, const char *sourceLimit, |
|
1627 int32_t *offsets, |
|
1628 UBool flush, |
|
1629 UErrorCode *err) { |
|
1630 UConverterToUnicodeArgs args; |
|
1631 const char *s; |
|
1632 UChar *t; |
|
1633 |
|
1634 /* check parameters */ |
|
1635 if(err==NULL || U_FAILURE(*err)) { |
|
1636 return; |
|
1637 } |
|
1638 |
|
1639 if(cnv==NULL || target==NULL || source==NULL) { |
|
1640 *err=U_ILLEGAL_ARGUMENT_ERROR; |
|
1641 return; |
|
1642 } |
|
1643 |
|
1644 s=*source; |
|
1645 t=*target; |
|
1646 |
|
1647 if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) { |
|
1648 /* |
|
1649 Prevent code from going into an infinite loop in case we do hit this |
|
1650 limit. The limit pointer is expected to be on a UChar * boundary. |
|
1651 This also prevents the next argument check from failing. |
|
1652 */ |
|
1653 targetLimit = (const UChar *)(((const char *)targetLimit) - 1); |
|
1654 } |
|
1655 |
|
1656 /* |
|
1657 * All these conditions should never happen. |
|
1658 * |
|
1659 * 1) Make sure that the limits are >= to the address source or target |
|
1660 * |
|
1661 * 2) Make sure that the buffer sizes do not exceed the number range for |
|
1662 * int32_t because some functions use the size (in units or bytes) |
|
1663 * rather than comparing pointers, and because offsets are int32_t values. |
|
1664 * |
|
1665 * size_t is guaranteed to be unsigned and large enough for the job. |
|
1666 * |
|
1667 * Return with an error instead of adjusting the limits because we would |
|
1668 * not be able to maintain the semantics that either the source must be |
|
1669 * consumed or the target filled (unless an error occurs). |
|
1670 * An adjustment would be sourceLimit=t+0x7fffffff; for example. |
|
1671 * |
|
1672 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer |
|
1673 * to a char * pointer and provide an incomplete UChar code unit. |
|
1674 */ |
|
1675 if (sourceLimit<s || targetLimit<t || |
|
1676 ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) || |
|
1677 ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) || |
|
1678 (((const char *)targetLimit-(const char *)t) & 1) != 0 |
|
1679 ) { |
|
1680 *err=U_ILLEGAL_ARGUMENT_ERROR; |
|
1681 return; |
|
1682 } |
|
1683 |
|
1684 /* output the target overflow buffer */ |
|
1685 if( cnv->UCharErrorBufferLength>0 && |
|
1686 ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err) |
|
1687 ) { |
|
1688 /* U_BUFFER_OVERFLOW_ERROR */ |
|
1689 return; |
|
1690 } |
|
1691 /* *target may have moved, therefore stop using t */ |
|
1692 |
|
1693 if(!flush && s==sourceLimit && cnv->preToULength>=0) { |
|
1694 /* the overflow buffer is emptied and there is no new input: we are done */ |
|
1695 return; |
|
1696 } |
|
1697 |
|
1698 /* |
|
1699 * Do not simply return with a buffer overflow error if |
|
1700 * !flush && t==targetLimit |
|
1701 * because it is possible that the source will not generate any output. |
|
1702 * For example, the skip callback may be called; |
|
1703 * it does not output anything. |
|
1704 */ |
|
1705 |
|
1706 /* prepare the converter arguments */ |
|
1707 args.converter=cnv; |
|
1708 args.flush=flush; |
|
1709 args.offsets=offsets; |
|
1710 args.source=s; |
|
1711 args.sourceLimit=sourceLimit; |
|
1712 args.target=*target; |
|
1713 args.targetLimit=targetLimit; |
|
1714 args.size=sizeof(args); |
|
1715 |
|
1716 _toUnicodeWithCallback(&args, err); |
|
1717 |
|
1718 *source=args.source; |
|
1719 *target=args.target; |
|
1720 } |
|
1721 |
|
1722 /* ucnv_to/fromUChars() ----------------------------------------------------- */ |
|
1723 |
|
1724 U_CAPI int32_t U_EXPORT2 |
|
1725 ucnv_fromUChars(UConverter *cnv, |
|
1726 char *dest, int32_t destCapacity, |
|
1727 const UChar *src, int32_t srcLength, |
|
1728 UErrorCode *pErrorCode) { |
|
1729 const UChar *srcLimit; |
|
1730 char *originalDest, *destLimit; |
|
1731 int32_t destLength; |
|
1732 |
|
1733 /* check arguments */ |
|
1734 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
|
1735 return 0; |
|
1736 } |
|
1737 |
|
1738 if( cnv==NULL || |
|
1739 destCapacity<0 || (destCapacity>0 && dest==NULL) || |
|
1740 srcLength<-1 || (srcLength!=0 && src==NULL) |
|
1741 ) { |
|
1742 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
1743 return 0; |
|
1744 } |
|
1745 |
|
1746 /* initialize */ |
|
1747 ucnv_resetFromUnicode(cnv); |
|
1748 originalDest=dest; |
|
1749 if(srcLength==-1) { |
|
1750 srcLength=u_strlen(src); |
|
1751 } |
|
1752 if(srcLength>0) { |
|
1753 srcLimit=src+srcLength; |
|
1754 destLimit=dest+destCapacity; |
|
1755 |
|
1756 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ |
|
1757 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) { |
|
1758 destLimit=(char *)U_MAX_PTR(dest); |
|
1759 } |
|
1760 |
|
1761 /* perform the conversion */ |
|
1762 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); |
|
1763 destLength=(int32_t)(dest-originalDest); |
|
1764 |
|
1765 /* if an overflow occurs, then get the preflighting length */ |
|
1766 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
|
1767 char buffer[1024]; |
|
1768 |
|
1769 destLimit=buffer+sizeof(buffer); |
|
1770 do { |
|
1771 dest=buffer; |
|
1772 *pErrorCode=U_ZERO_ERROR; |
|
1773 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); |
|
1774 destLength+=(int32_t)(dest-buffer); |
|
1775 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); |
|
1776 } |
|
1777 } else { |
|
1778 destLength=0; |
|
1779 } |
|
1780 |
|
1781 return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode); |
|
1782 } |
|
1783 |
|
1784 U_CAPI int32_t U_EXPORT2 |
|
1785 ucnv_toUChars(UConverter *cnv, |
|
1786 UChar *dest, int32_t destCapacity, |
|
1787 const char *src, int32_t srcLength, |
|
1788 UErrorCode *pErrorCode) { |
|
1789 const char *srcLimit; |
|
1790 UChar *originalDest, *destLimit; |
|
1791 int32_t destLength; |
|
1792 |
|
1793 /* check arguments */ |
|
1794 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
|
1795 return 0; |
|
1796 } |
|
1797 |
|
1798 if( cnv==NULL || |
|
1799 destCapacity<0 || (destCapacity>0 && dest==NULL) || |
|
1800 srcLength<-1 || (srcLength!=0 && src==NULL)) |
|
1801 { |
|
1802 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
1803 return 0; |
|
1804 } |
|
1805 |
|
1806 /* initialize */ |
|
1807 ucnv_resetToUnicode(cnv); |
|
1808 originalDest=dest; |
|
1809 if(srcLength==-1) { |
|
1810 srcLength=(int32_t)uprv_strlen(src); |
|
1811 } |
|
1812 if(srcLength>0) { |
|
1813 srcLimit=src+srcLength; |
|
1814 destLimit=dest+destCapacity; |
|
1815 |
|
1816 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ |
|
1817 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) { |
|
1818 destLimit=(UChar *)U_MAX_PTR(dest); |
|
1819 } |
|
1820 |
|
1821 /* perform the conversion */ |
|
1822 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); |
|
1823 destLength=(int32_t)(dest-originalDest); |
|
1824 |
|
1825 /* if an overflow occurs, then get the preflighting length */ |
|
1826 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) |
|
1827 { |
|
1828 UChar buffer[1024]; |
|
1829 |
|
1830 destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR; |
|
1831 do { |
|
1832 dest=buffer; |
|
1833 *pErrorCode=U_ZERO_ERROR; |
|
1834 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); |
|
1835 destLength+=(int32_t)(dest-buffer); |
|
1836 } |
|
1837 while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); |
|
1838 } |
|
1839 } else { |
|
1840 destLength=0; |
|
1841 } |
|
1842 |
|
1843 return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode); |
|
1844 } |
|
1845 |
|
1846 /* ucnv_getNextUChar() ------------------------------------------------------ */ |
|
1847 |
|
1848 U_CAPI UChar32 U_EXPORT2 |
|
1849 ucnv_getNextUChar(UConverter *cnv, |
|
1850 const char **source, const char *sourceLimit, |
|
1851 UErrorCode *err) { |
|
1852 UConverterToUnicodeArgs args; |
|
1853 UChar buffer[U16_MAX_LENGTH]; |
|
1854 const char *s; |
|
1855 UChar32 c; |
|
1856 int32_t i, length; |
|
1857 |
|
1858 /* check parameters */ |
|
1859 if(err==NULL || U_FAILURE(*err)) { |
|
1860 return 0xffff; |
|
1861 } |
|
1862 |
|
1863 if(cnv==NULL || source==NULL) { |
|
1864 *err=U_ILLEGAL_ARGUMENT_ERROR; |
|
1865 return 0xffff; |
|
1866 } |
|
1867 |
|
1868 s=*source; |
|
1869 if(sourceLimit<s) { |
|
1870 *err=U_ILLEGAL_ARGUMENT_ERROR; |
|
1871 return 0xffff; |
|
1872 } |
|
1873 |
|
1874 /* |
|
1875 * Make sure that the buffer sizes do not exceed the number range for |
|
1876 * int32_t because some functions use the size (in units or bytes) |
|
1877 * rather than comparing pointers, and because offsets are int32_t values. |
|
1878 * |
|
1879 * size_t is guaranteed to be unsigned and large enough for the job. |
|
1880 * |
|
1881 * Return with an error instead of adjusting the limits because we would |
|
1882 * not be able to maintain the semantics that either the source must be |
|
1883 * consumed or the target filled (unless an error occurs). |
|
1884 * An adjustment would be sourceLimit=t+0x7fffffff; for example. |
|
1885 */ |
|
1886 if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) { |
|
1887 *err=U_ILLEGAL_ARGUMENT_ERROR; |
|
1888 return 0xffff; |
|
1889 } |
|
1890 |
|
1891 c=U_SENTINEL; |
|
1892 |
|
1893 /* flush the target overflow buffer */ |
|
1894 if(cnv->UCharErrorBufferLength>0) { |
|
1895 UChar *overflow; |
|
1896 |
|
1897 overflow=cnv->UCharErrorBuffer; |
|
1898 i=0; |
|
1899 length=cnv->UCharErrorBufferLength; |
|
1900 U16_NEXT(overflow, i, length, c); |
|
1901 |
|
1902 /* move the remaining overflow contents up to the beginning */ |
|
1903 if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) { |
|
1904 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i, |
|
1905 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); |
|
1906 } |
|
1907 |
|
1908 if(!U16_IS_LEAD(c) || i<length) { |
|
1909 return c; |
|
1910 } |
|
1911 /* |
|
1912 * Continue if the overflow buffer contained only a lead surrogate, |
|
1913 * in case the converter outputs single surrogates from complete |
|
1914 * input sequences. |
|
1915 */ |
|
1916 } |
|
1917 |
|
1918 /* |
|
1919 * flush==TRUE is implied for ucnv_getNextUChar() |
|
1920 * |
|
1921 * do not simply return even if s==sourceLimit because the converter may |
|
1922 * not have seen flush==TRUE before |
|
1923 */ |
|
1924 |
|
1925 /* prepare the converter arguments */ |
|
1926 args.converter=cnv; |
|
1927 args.flush=TRUE; |
|
1928 args.offsets=NULL; |
|
1929 args.source=s; |
|
1930 args.sourceLimit=sourceLimit; |
|
1931 args.target=buffer; |
|
1932 args.targetLimit=buffer+1; |
|
1933 args.size=sizeof(args); |
|
1934 |
|
1935 if(c<0) { |
|
1936 /* |
|
1937 * call the native getNextUChar() implementation if we are |
|
1938 * at a character boundary (toULength==0) |
|
1939 * |
|
1940 * unlike with _toUnicode(), getNextUChar() implementations must set |
|
1941 * U_TRUNCATED_CHAR_FOUND for truncated input, |
|
1942 * in addition to setting toULength/toUBytes[] |
|
1943 */ |
|
1944 if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) { |
|
1945 c=cnv->sharedData->impl->getNextUChar(&args, err); |
|
1946 *source=s=args.source; |
|
1947 if(*err==U_INDEX_OUTOFBOUNDS_ERROR) { |
|
1948 /* reset the converter without calling the callback function */ |
|
1949 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); |
|
1950 return 0xffff; /* no output */ |
|
1951 } else if(U_SUCCESS(*err) && c>=0) { |
|
1952 return c; |
|
1953 /* |
|
1954 * else fall through to use _toUnicode() because |
|
1955 * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all |
|
1956 * U_FAILURE: call _toUnicode() for callback handling (do not output c) |
|
1957 */ |
|
1958 } |
|
1959 } |
|
1960 |
|
1961 /* convert to one UChar in buffer[0], or handle getNextUChar() errors */ |
|
1962 _toUnicodeWithCallback(&args, err); |
|
1963 |
|
1964 if(*err==U_BUFFER_OVERFLOW_ERROR) { |
|
1965 *err=U_ZERO_ERROR; |
|
1966 } |
|
1967 |
|
1968 i=0; |
|
1969 length=(int32_t)(args.target-buffer); |
|
1970 } else { |
|
1971 /* write the lead surrogate from the overflow buffer */ |
|
1972 buffer[0]=(UChar)c; |
|
1973 args.target=buffer+1; |
|
1974 i=0; |
|
1975 length=1; |
|
1976 } |
|
1977 |
|
1978 /* buffer contents starts at i and ends before length */ |
|
1979 |
|
1980 if(U_FAILURE(*err)) { |
|
1981 c=0xffff; /* no output */ |
|
1982 } else if(length==0) { |
|
1983 /* no input or only state changes */ |
|
1984 *err=U_INDEX_OUTOFBOUNDS_ERROR; |
|
1985 /* no need to reset explicitly because _toUnicodeWithCallback() did it */ |
|
1986 c=0xffff; /* no output */ |
|
1987 } else { |
|
1988 c=buffer[0]; |
|
1989 i=1; |
|
1990 if(!U16_IS_LEAD(c)) { |
|
1991 /* consume c=buffer[0], done */ |
|
1992 } else { |
|
1993 /* got a lead surrogate, see if a trail surrogate follows */ |
|
1994 UChar c2; |
|
1995 |
|
1996 if(cnv->UCharErrorBufferLength>0) { |
|
1997 /* got overflow output from the conversion */ |
|
1998 if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) { |
|
1999 /* got a trail surrogate, too */ |
|
2000 c=U16_GET_SUPPLEMENTARY(c, c2); |
|
2001 |
|
2002 /* move the remaining overflow contents up to the beginning */ |
|
2003 if((--cnv->UCharErrorBufferLength)>0) { |
|
2004 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1, |
|
2005 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); |
|
2006 } |
|
2007 } else { |
|
2008 /* c is an unpaired lead surrogate, just return it */ |
|
2009 } |
|
2010 } else if(args.source<sourceLimit) { |
|
2011 /* convert once more, to buffer[1] */ |
|
2012 args.targetLimit=buffer+2; |
|
2013 _toUnicodeWithCallback(&args, err); |
|
2014 if(*err==U_BUFFER_OVERFLOW_ERROR) { |
|
2015 *err=U_ZERO_ERROR; |
|
2016 } |
|
2017 |
|
2018 length=(int32_t)(args.target-buffer); |
|
2019 if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) { |
|
2020 /* got a trail surrogate, too */ |
|
2021 c=U16_GET_SUPPLEMENTARY(c, c2); |
|
2022 i=2; |
|
2023 } |
|
2024 } |
|
2025 } |
|
2026 } |
|
2027 |
|
2028 /* |
|
2029 * move leftover output from buffer[i..length[ |
|
2030 * into the beginning of the overflow buffer |
|
2031 */ |
|
2032 if(i<length) { |
|
2033 /* move further overflow back */ |
|
2034 int32_t delta=length-i; |
|
2035 if((length=cnv->UCharErrorBufferLength)>0) { |
|
2036 uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer, |
|
2037 length*U_SIZEOF_UCHAR); |
|
2038 } |
|
2039 cnv->UCharErrorBufferLength=(int8_t)(length+delta); |
|
2040 |
|
2041 cnv->UCharErrorBuffer[0]=buffer[i++]; |
|
2042 if(delta>1) { |
|
2043 cnv->UCharErrorBuffer[1]=buffer[i]; |
|
2044 } |
|
2045 } |
|
2046 |
|
2047 *source=args.source; |
|
2048 return c; |
|
2049 } |
|
2050 |
|
2051 /* ucnv_convert() and siblings ---------------------------------------------- */ |
|
2052 |
|
2053 U_CAPI void U_EXPORT2 |
|
2054 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, |
|
2055 char **target, const char *targetLimit, |
|
2056 const char **source, const char *sourceLimit, |
|
2057 UChar *pivotStart, UChar **pivotSource, |
|
2058 UChar **pivotTarget, const UChar *pivotLimit, |
|
2059 UBool reset, UBool flush, |
|
2060 UErrorCode *pErrorCode) { |
|
2061 UChar pivotBuffer[CHUNK_SIZE]; |
|
2062 const UChar *myPivotSource; |
|
2063 UChar *myPivotTarget; |
|
2064 const char *s; |
|
2065 char *t; |
|
2066 |
|
2067 UConverterToUnicodeArgs toUArgs; |
|
2068 UConverterFromUnicodeArgs fromUArgs; |
|
2069 UConverterConvert convert; |
|
2070 |
|
2071 /* error checking */ |
|
2072 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
|
2073 return; |
|
2074 } |
|
2075 |
|
2076 if( targetCnv==NULL || sourceCnv==NULL || |
|
2077 source==NULL || *source==NULL || |
|
2078 target==NULL || *target==NULL || targetLimit==NULL |
|
2079 ) { |
|
2080 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
2081 return; |
|
2082 } |
|
2083 |
|
2084 s=*source; |
|
2085 t=*target; |
|
2086 if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) { |
|
2087 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
2088 return; |
|
2089 } |
|
2090 |
|
2091 /* |
|
2092 * Make sure that the buffer sizes do not exceed the number range for |
|
2093 * int32_t. See ucnv_toUnicode() for a more detailed comment. |
|
2094 */ |
|
2095 if( |
|
2096 (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) || |
|
2097 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) |
|
2098 ) { |
|
2099 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
2100 return; |
|
2101 } |
|
2102 |
|
2103 if(pivotStart==NULL) { |
|
2104 if(!flush) { |
|
2105 /* streaming conversion requires an explicit pivot buffer */ |
|
2106 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
2107 return; |
|
2108 } |
|
2109 |
|
2110 /* use the stack pivot buffer */ |
|
2111 myPivotSource=myPivotTarget=pivotStart=pivotBuffer; |
|
2112 pivotSource=(UChar **)&myPivotSource; |
|
2113 pivotTarget=&myPivotTarget; |
|
2114 pivotLimit=pivotBuffer+CHUNK_SIZE; |
|
2115 } else if( pivotStart>=pivotLimit || |
|
2116 pivotSource==NULL || *pivotSource==NULL || |
|
2117 pivotTarget==NULL || *pivotTarget==NULL || |
|
2118 pivotLimit==NULL |
|
2119 ) { |
|
2120 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
2121 return; |
|
2122 } |
|
2123 |
|
2124 if(sourceLimit==NULL) { |
|
2125 /* get limit of single-byte-NUL-terminated source string */ |
|
2126 sourceLimit=uprv_strchr(*source, 0); |
|
2127 } |
|
2128 |
|
2129 if(reset) { |
|
2130 ucnv_resetToUnicode(sourceCnv); |
|
2131 ucnv_resetFromUnicode(targetCnv); |
|
2132 *pivotSource=*pivotTarget=pivotStart; |
|
2133 } else if(targetCnv->charErrorBufferLength>0) { |
|
2134 /* output the targetCnv overflow buffer */ |
|
2135 if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) { |
|
2136 /* U_BUFFER_OVERFLOW_ERROR */ |
|
2137 return; |
|
2138 } |
|
2139 /* *target has moved, therefore stop using t */ |
|
2140 |
|
2141 if( !flush && |
|
2142 targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget && |
|
2143 sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit |
|
2144 ) { |
|
2145 /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */ |
|
2146 return; |
|
2147 } |
|
2148 } |
|
2149 |
|
2150 /* Is direct-UTF-8 conversion available? */ |
|
2151 if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 && |
|
2152 targetCnv->sharedData->impl->fromUTF8!=NULL |
|
2153 ) { |
|
2154 convert=targetCnv->sharedData->impl->fromUTF8; |
|
2155 } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 && |
|
2156 sourceCnv->sharedData->impl->toUTF8!=NULL |
|
2157 ) { |
|
2158 convert=sourceCnv->sharedData->impl->toUTF8; |
|
2159 } else { |
|
2160 convert=NULL; |
|
2161 } |
|
2162 |
|
2163 /* |
|
2164 * If direct-UTF-8 conversion is available, then we use a smaller |
|
2165 * pivot buffer for error handling and partial matches |
|
2166 * so that we quickly return to direct conversion. |
|
2167 * |
|
2168 * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH. |
|
2169 * |
|
2170 * We could reduce the pivot buffer size further, at the cost of |
|
2171 * buffer overflows from callbacks. |
|
2172 * The pivot buffer should not be smaller than the maximum number of |
|
2173 * fromUnicode extension table input UChars |
|
2174 * (for m:n conversion, see |
|
2175 * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS]) |
|
2176 * or 2 for surrogate pairs. |
|
2177 * |
|
2178 * Too small a buffer can cause thrashing between pivoting and direct |
|
2179 * conversion, with function call overhead outweighing the benefits |
|
2180 * of direct conversion. |
|
2181 */ |
|
2182 if(convert!=NULL && (pivotLimit-pivotStart)>32) { |
|
2183 pivotLimit=pivotStart+32; |
|
2184 } |
|
2185 |
|
2186 /* prepare the converter arguments */ |
|
2187 fromUArgs.converter=targetCnv; |
|
2188 fromUArgs.flush=FALSE; |
|
2189 fromUArgs.offsets=NULL; |
|
2190 fromUArgs.target=*target; |
|
2191 fromUArgs.targetLimit=targetLimit; |
|
2192 fromUArgs.size=sizeof(fromUArgs); |
|
2193 |
|
2194 toUArgs.converter=sourceCnv; |
|
2195 toUArgs.flush=flush; |
|
2196 toUArgs.offsets=NULL; |
|
2197 toUArgs.source=s; |
|
2198 toUArgs.sourceLimit=sourceLimit; |
|
2199 toUArgs.targetLimit=pivotLimit; |
|
2200 toUArgs.size=sizeof(toUArgs); |
|
2201 |
|
2202 /* |
|
2203 * TODO: Consider separating this function into two functions, |
|
2204 * extracting exactly the conversion loop, |
|
2205 * for readability and to reduce the set of visible variables. |
|
2206 * |
|
2207 * Otherwise stop using s and t from here on. |
|
2208 */ |
|
2209 s=t=NULL; |
|
2210 |
|
2211 /* |
|
2212 * conversion loop |
|
2213 * |
|
2214 * The sequence of steps in the loop may appear backward, |
|
2215 * but the principle is simple: |
|
2216 * In the chain of |
|
2217 * source - sourceCnv overflow - pivot - targetCnv overflow - target |
|
2218 * empty out later buffers before refilling them from earlier ones. |
|
2219 * |
|
2220 * The targetCnv overflow buffer is flushed out only once before the loop. |
|
2221 */ |
|
2222 for(;;) { |
|
2223 /* |
|
2224 * if(pivot not empty or error or replay or flush fromUnicode) { |
|
2225 * fromUnicode(pivot -> target); |
|
2226 * } |
|
2227 * |
|
2228 * For pivoting conversion; and for direct conversion for |
|
2229 * error callback handling and flushing the replay buffer. |
|
2230 */ |
|
2231 if( *pivotSource<*pivotTarget || |
|
2232 U_FAILURE(*pErrorCode) || |
|
2233 targetCnv->preFromULength<0 || |
|
2234 fromUArgs.flush |
|
2235 ) { |
|
2236 fromUArgs.source=*pivotSource; |
|
2237 fromUArgs.sourceLimit=*pivotTarget; |
|
2238 _fromUnicodeWithCallback(&fromUArgs, pErrorCode); |
|
2239 if(U_FAILURE(*pErrorCode)) { |
|
2240 /* target overflow, or conversion error */ |
|
2241 *pivotSource=(UChar *)fromUArgs.source; |
|
2242 break; |
|
2243 } |
|
2244 |
|
2245 /* |
|
2246 * _fromUnicodeWithCallback() must have consumed the pivot contents |
|
2247 * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS() |
|
2248 */ |
|
2249 } |
|
2250 |
|
2251 /* The pivot buffer is empty; reset it so we start at pivotStart. */ |
|
2252 *pivotSource=*pivotTarget=pivotStart; |
|
2253 |
|
2254 /* |
|
2255 * if(sourceCnv overflow buffer not empty) { |
|
2256 * move(sourceCnv overflow buffer -> pivot); |
|
2257 * continue; |
|
2258 * } |
|
2259 */ |
|
2260 /* output the sourceCnv overflow buffer */ |
|
2261 if(sourceCnv->UCharErrorBufferLength>0) { |
|
2262 if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) { |
|
2263 /* U_BUFFER_OVERFLOW_ERROR */ |
|
2264 *pErrorCode=U_ZERO_ERROR; |
|
2265 } |
|
2266 continue; |
|
2267 } |
|
2268 |
|
2269 /* |
|
2270 * check for end of input and break if done |
|
2271 * |
|
2272 * Checking both flush and fromUArgs.flush ensures that the converters |
|
2273 * have been called with the flush flag set if the ucnv_convertEx() |
|
2274 * caller set it. |
|
2275 */ |
|
2276 if( toUArgs.source==sourceLimit && |
|
2277 sourceCnv->preToULength>=0 && sourceCnv->toULength==0 && |
|
2278 (!flush || fromUArgs.flush) |
|
2279 ) { |
|
2280 /* done successfully */ |
|
2281 break; |
|
2282 } |
|
2283 |
|
2284 /* |
|
2285 * use direct conversion if available |
|
2286 * but not if continuing a partial match |
|
2287 * or flushing the toUnicode replay buffer |
|
2288 */ |
|
2289 if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) { |
|
2290 if(*pErrorCode==U_USING_DEFAULT_WARNING) { |
|
2291 /* remove a warning that may be set by this function */ |
|
2292 *pErrorCode=U_ZERO_ERROR; |
|
2293 } |
|
2294 convert(&fromUArgs, &toUArgs, pErrorCode); |
|
2295 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
|
2296 break; |
|
2297 } else if(U_FAILURE(*pErrorCode)) { |
|
2298 if(sourceCnv->toULength>0) { |
|
2299 /* |
|
2300 * Fall through to calling _toUnicodeWithCallback() |
|
2301 * for callback handling. |
|
2302 * |
|
2303 * The pivot buffer will be reset with |
|
2304 * *pivotSource=*pivotTarget=pivotStart; |
|
2305 * which indicates a toUnicode error to the caller |
|
2306 * (*pivotSource==pivotStart shows no pivot UChars consumed). |
|
2307 */ |
|
2308 } else { |
|
2309 /* |
|
2310 * Indicate a fromUnicode error to the caller |
|
2311 * (*pivotSource>pivotStart shows some pivot UChars consumed). |
|
2312 */ |
|
2313 *pivotSource=*pivotTarget=pivotStart+1; |
|
2314 /* |
|
2315 * Loop around to calling _fromUnicodeWithCallbacks() |
|
2316 * for callback handling. |
|
2317 */ |
|
2318 continue; |
|
2319 } |
|
2320 } else if(*pErrorCode==U_USING_DEFAULT_WARNING) { |
|
2321 /* |
|
2322 * No error, but the implementation requested to temporarily |
|
2323 * fall back to pivoting. |
|
2324 */ |
|
2325 *pErrorCode=U_ZERO_ERROR; |
|
2326 /* |
|
2327 * The following else branches are almost identical to the end-of-input |
|
2328 * handling in _toUnicodeWithCallback(). |
|
2329 * Avoid calling it just for the end of input. |
|
2330 */ |
|
2331 } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */ |
|
2332 /* |
|
2333 * the entire input stream is consumed |
|
2334 * and there is a partial, truncated input sequence left |
|
2335 */ |
|
2336 |
|
2337 /* inject an error and continue with callback handling */ |
|
2338 *pErrorCode=U_TRUNCATED_CHAR_FOUND; |
|
2339 } else { |
|
2340 /* input consumed */ |
|
2341 if(flush) { |
|
2342 /* reset the converters without calling the callback functions */ |
|
2343 _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE); |
|
2344 _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE); |
|
2345 } |
|
2346 |
|
2347 /* done successfully */ |
|
2348 break; |
|
2349 } |
|
2350 } |
|
2351 |
|
2352 /* |
|
2353 * toUnicode(source -> pivot); |
|
2354 * |
|
2355 * For pivoting conversion; and for direct conversion for |
|
2356 * error callback handling, continuing partial matches |
|
2357 * and flushing the replay buffer. |
|
2358 * |
|
2359 * The pivot buffer is empty and reset. |
|
2360 */ |
|
2361 toUArgs.target=pivotStart; /* ==*pivotTarget */ |
|
2362 /* toUArgs.targetLimit=pivotLimit; already set before the loop */ |
|
2363 _toUnicodeWithCallback(&toUArgs, pErrorCode); |
|
2364 *pivotTarget=toUArgs.target; |
|
2365 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
|
2366 /* pivot overflow: continue with the conversion loop */ |
|
2367 *pErrorCode=U_ZERO_ERROR; |
|
2368 } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) { |
|
2369 /* conversion error, or there was nothing left to convert */ |
|
2370 break; |
|
2371 } |
|
2372 /* |
|
2373 * else: |
|
2374 * _toUnicodeWithCallback() wrote into the pivot buffer, |
|
2375 * continue with fromUnicode conversion. |
|
2376 * |
|
2377 * Set the fromUnicode flush flag if we flush and if toUnicode has |
|
2378 * processed the end of the input. |
|
2379 */ |
|
2380 if( flush && toUArgs.source==sourceLimit && |
|
2381 sourceCnv->preToULength>=0 && |
|
2382 sourceCnv->UCharErrorBufferLength==0 |
|
2383 ) { |
|
2384 fromUArgs.flush=TRUE; |
|
2385 } |
|
2386 } |
|
2387 |
|
2388 /* |
|
2389 * The conversion loop is exited when one of the following is true: |
|
2390 * - the entire source text has been converted successfully to the target buffer |
|
2391 * - a target buffer overflow occurred |
|
2392 * - a conversion error occurred |
|
2393 */ |
|
2394 |
|
2395 *source=toUArgs.source; |
|
2396 *target=fromUArgs.target; |
|
2397 |
|
2398 /* terminate the target buffer if possible */ |
|
2399 if(flush && U_SUCCESS(*pErrorCode)) { |
|
2400 if(*target!=targetLimit) { |
|
2401 **target=0; |
|
2402 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { |
|
2403 *pErrorCode=U_ZERO_ERROR; |
|
2404 } |
|
2405 } else { |
|
2406 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; |
|
2407 } |
|
2408 } |
|
2409 } |
|
2410 |
|
2411 /* internal implementation of ucnv_convert() etc. with preflighting */ |
|
2412 static int32_t |
|
2413 ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter, |
|
2414 char *target, int32_t targetCapacity, |
|
2415 const char *source, int32_t sourceLength, |
|
2416 UErrorCode *pErrorCode) { |
|
2417 UChar pivotBuffer[CHUNK_SIZE]; |
|
2418 UChar *pivot, *pivot2; |
|
2419 |
|
2420 char *myTarget; |
|
2421 const char *sourceLimit; |
|
2422 const char *targetLimit; |
|
2423 int32_t targetLength=0; |
|
2424 |
|
2425 /* set up */ |
|
2426 if(sourceLength<0) { |
|
2427 sourceLimit=uprv_strchr(source, 0); |
|
2428 } else { |
|
2429 sourceLimit=source+sourceLength; |
|
2430 } |
|
2431 |
|
2432 /* if there is no input data, we're done */ |
|
2433 if(source==sourceLimit) { |
|
2434 return u_terminateChars(target, targetCapacity, 0, pErrorCode); |
|
2435 } |
|
2436 |
|
2437 pivot=pivot2=pivotBuffer; |
|
2438 myTarget=target; |
|
2439 targetLength=0; |
|
2440 |
|
2441 if(targetCapacity>0) { |
|
2442 /* perform real conversion */ |
|
2443 targetLimit=target+targetCapacity; |
|
2444 ucnv_convertEx(outConverter, inConverter, |
|
2445 &myTarget, targetLimit, |
|
2446 &source, sourceLimit, |
|
2447 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, |
|
2448 FALSE, |
|
2449 TRUE, |
|
2450 pErrorCode); |
|
2451 targetLength=(int32_t)(myTarget-target); |
|
2452 } |
|
2453 |
|
2454 /* |
|
2455 * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing |
|
2456 * to it but continue the conversion in order to store in targetCapacity |
|
2457 * the number of bytes that was required. |
|
2458 */ |
|
2459 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0) |
|
2460 { |
|
2461 char targetBuffer[CHUNK_SIZE]; |
|
2462 |
|
2463 targetLimit=targetBuffer+CHUNK_SIZE; |
|
2464 do { |
|
2465 *pErrorCode=U_ZERO_ERROR; |
|
2466 myTarget=targetBuffer; |
|
2467 ucnv_convertEx(outConverter, inConverter, |
|
2468 &myTarget, targetLimit, |
|
2469 &source, sourceLimit, |
|
2470 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, |
|
2471 FALSE, |
|
2472 TRUE, |
|
2473 pErrorCode); |
|
2474 targetLength+=(int32_t)(myTarget-targetBuffer); |
|
2475 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); |
|
2476 |
|
2477 /* done with preflighting, set warnings and errors as appropriate */ |
|
2478 return u_terminateChars(target, targetCapacity, targetLength, pErrorCode); |
|
2479 } |
|
2480 |
|
2481 /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */ |
|
2482 return targetLength; |
|
2483 } |
|
2484 |
|
2485 U_CAPI int32_t U_EXPORT2 |
|
2486 ucnv_convert(const char *toConverterName, const char *fromConverterName, |
|
2487 char *target, int32_t targetCapacity, |
|
2488 const char *source, int32_t sourceLength, |
|
2489 UErrorCode *pErrorCode) { |
|
2490 UConverter in, out; /* stack-allocated */ |
|
2491 UConverter *inConverter, *outConverter; |
|
2492 int32_t targetLength; |
|
2493 |
|
2494 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
|
2495 return 0; |
|
2496 } |
|
2497 |
|
2498 if( source==NULL || sourceLength<-1 || |
|
2499 targetCapacity<0 || (targetCapacity>0 && target==NULL) |
|
2500 ) { |
|
2501 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
2502 return 0; |
|
2503 } |
|
2504 |
|
2505 /* if there is no input data, we're done */ |
|
2506 if(sourceLength==0 || (sourceLength<0 && *source==0)) { |
|
2507 return u_terminateChars(target, targetCapacity, 0, pErrorCode); |
|
2508 } |
|
2509 |
|
2510 /* create the converters */ |
|
2511 inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode); |
|
2512 if(U_FAILURE(*pErrorCode)) { |
|
2513 return 0; |
|
2514 } |
|
2515 |
|
2516 outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode); |
|
2517 if(U_FAILURE(*pErrorCode)) { |
|
2518 ucnv_close(inConverter); |
|
2519 return 0; |
|
2520 } |
|
2521 |
|
2522 targetLength=ucnv_internalConvert(outConverter, inConverter, |
|
2523 target, targetCapacity, |
|
2524 source, sourceLength, |
|
2525 pErrorCode); |
|
2526 |
|
2527 ucnv_close(inConverter); |
|
2528 ucnv_close(outConverter); |
|
2529 |
|
2530 return targetLength; |
|
2531 } |
|
2532 |
|
2533 /* @internal */ |
|
2534 static int32_t |
|
2535 ucnv_convertAlgorithmic(UBool convertToAlgorithmic, |
|
2536 UConverterType algorithmicType, |
|
2537 UConverter *cnv, |
|
2538 char *target, int32_t targetCapacity, |
|
2539 const char *source, int32_t sourceLength, |
|
2540 UErrorCode *pErrorCode) { |
|
2541 UConverter algoConverterStatic; /* stack-allocated */ |
|
2542 UConverter *algoConverter, *to, *from; |
|
2543 int32_t targetLength; |
|
2544 |
|
2545 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
|
2546 return 0; |
|
2547 } |
|
2548 |
|
2549 if( cnv==NULL || source==NULL || sourceLength<-1 || |
|
2550 targetCapacity<0 || (targetCapacity>0 && target==NULL) |
|
2551 ) { |
|
2552 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
2553 return 0; |
|
2554 } |
|
2555 |
|
2556 /* if there is no input data, we're done */ |
|
2557 if(sourceLength==0 || (sourceLength<0 && *source==0)) { |
|
2558 return u_terminateChars(target, targetCapacity, 0, pErrorCode); |
|
2559 } |
|
2560 |
|
2561 /* create the algorithmic converter */ |
|
2562 algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType, |
|
2563 "", 0, pErrorCode); |
|
2564 if(U_FAILURE(*pErrorCode)) { |
|
2565 return 0; |
|
2566 } |
|
2567 |
|
2568 /* reset the other converter */ |
|
2569 if(convertToAlgorithmic) { |
|
2570 /* cnv->Unicode->algo */ |
|
2571 ucnv_resetToUnicode(cnv); |
|
2572 to=algoConverter; |
|
2573 from=cnv; |
|
2574 } else { |
|
2575 /* algo->Unicode->cnv */ |
|
2576 ucnv_resetFromUnicode(cnv); |
|
2577 from=algoConverter; |
|
2578 to=cnv; |
|
2579 } |
|
2580 |
|
2581 targetLength=ucnv_internalConvert(to, from, |
|
2582 target, targetCapacity, |
|
2583 source, sourceLength, |
|
2584 pErrorCode); |
|
2585 |
|
2586 ucnv_close(algoConverter); |
|
2587 |
|
2588 return targetLength; |
|
2589 } |
|
2590 |
|
2591 U_CAPI int32_t U_EXPORT2 |
|
2592 ucnv_toAlgorithmic(UConverterType algorithmicType, |
|
2593 UConverter *cnv, |
|
2594 char *target, int32_t targetCapacity, |
|
2595 const char *source, int32_t sourceLength, |
|
2596 UErrorCode *pErrorCode) { |
|
2597 return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv, |
|
2598 target, targetCapacity, |
|
2599 source, sourceLength, |
|
2600 pErrorCode); |
|
2601 } |
|
2602 |
|
2603 U_CAPI int32_t U_EXPORT2 |
|
2604 ucnv_fromAlgorithmic(UConverter *cnv, |
|
2605 UConverterType algorithmicType, |
|
2606 char *target, int32_t targetCapacity, |
|
2607 const char *source, int32_t sourceLength, |
|
2608 UErrorCode *pErrorCode) { |
|
2609 return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv, |
|
2610 target, targetCapacity, |
|
2611 source, sourceLength, |
|
2612 pErrorCode); |
|
2613 } |
|
2614 |
|
2615 U_CAPI UConverterType U_EXPORT2 |
|
2616 ucnv_getType(const UConverter* converter) |
|
2617 { |
|
2618 int8_t type = converter->sharedData->staticData->conversionType; |
|
2619 #if !UCONFIG_NO_LEGACY_CONVERSION |
|
2620 if(type == UCNV_MBCS) { |
|
2621 return ucnv_MBCSGetType(converter); |
|
2622 } |
|
2623 #endif |
|
2624 return (UConverterType)type; |
|
2625 } |
|
2626 |
|
2627 U_CAPI void U_EXPORT2 |
|
2628 ucnv_getStarters(const UConverter* converter, |
|
2629 UBool starters[256], |
|
2630 UErrorCode* err) |
|
2631 { |
|
2632 if (err == NULL || U_FAILURE(*err)) { |
|
2633 return; |
|
2634 } |
|
2635 |
|
2636 if(converter->sharedData->impl->getStarters != NULL) { |
|
2637 converter->sharedData->impl->getStarters(converter, starters, err); |
|
2638 } else { |
|
2639 *err = U_ILLEGAL_ARGUMENT_ERROR; |
|
2640 } |
|
2641 } |
|
2642 |
|
2643 static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv) |
|
2644 { |
|
2645 UErrorCode errorCode; |
|
2646 const char *name; |
|
2647 int32_t i; |
|
2648 |
|
2649 if(cnv==NULL) { |
|
2650 return NULL; |
|
2651 } |
|
2652 |
|
2653 errorCode=U_ZERO_ERROR; |
|
2654 name=ucnv_getName(cnv, &errorCode); |
|
2655 if(U_FAILURE(errorCode)) { |
|
2656 return NULL; |
|
2657 } |
|
2658 |
|
2659 for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i) |
|
2660 { |
|
2661 if(0==uprv_strcmp(name, ambiguousConverters[i].name)) |
|
2662 { |
|
2663 return ambiguousConverters+i; |
|
2664 } |
|
2665 } |
|
2666 |
|
2667 return NULL; |
|
2668 } |
|
2669 |
|
2670 U_CAPI void U_EXPORT2 |
|
2671 ucnv_fixFileSeparator(const UConverter *cnv, |
|
2672 UChar* source, |
|
2673 int32_t sourceLength) { |
|
2674 const UAmbiguousConverter *a; |
|
2675 int32_t i; |
|
2676 UChar variant5c; |
|
2677 |
|
2678 if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL) |
|
2679 { |
|
2680 return; |
|
2681 } |
|
2682 |
|
2683 variant5c=a->variant5c; |
|
2684 for(i=0; i<sourceLength; ++i) { |
|
2685 if(source[i]==variant5c) { |
|
2686 source[i]=0x5c; |
|
2687 } |
|
2688 } |
|
2689 } |
|
2690 |
|
2691 U_CAPI UBool U_EXPORT2 |
|
2692 ucnv_isAmbiguous(const UConverter *cnv) { |
|
2693 return (UBool)(ucnv_getAmbiguous(cnv)!=NULL); |
|
2694 } |
|
2695 |
|
2696 U_CAPI void U_EXPORT2 |
|
2697 ucnv_setFallback(UConverter *cnv, UBool usesFallback) |
|
2698 { |
|
2699 cnv->useFallback = usesFallback; |
|
2700 } |
|
2701 |
|
2702 U_CAPI UBool U_EXPORT2 |
|
2703 ucnv_usesFallback(const UConverter *cnv) |
|
2704 { |
|
2705 return cnv->useFallback; |
|
2706 } |
|
2707 |
|
2708 U_CAPI void U_EXPORT2 |
|
2709 ucnv_getInvalidChars (const UConverter * converter, |
|
2710 char *errBytes, |
|
2711 int8_t * len, |
|
2712 UErrorCode * err) |
|
2713 { |
|
2714 if (err == NULL || U_FAILURE(*err)) |
|
2715 { |
|
2716 return; |
|
2717 } |
|
2718 if (len == NULL || errBytes == NULL || converter == NULL) |
|
2719 { |
|
2720 *err = U_ILLEGAL_ARGUMENT_ERROR; |
|
2721 return; |
|
2722 } |
|
2723 if (*len < converter->invalidCharLength) |
|
2724 { |
|
2725 *err = U_INDEX_OUTOFBOUNDS_ERROR; |
|
2726 return; |
|
2727 } |
|
2728 if ((*len = converter->invalidCharLength) > 0) |
|
2729 { |
|
2730 uprv_memcpy (errBytes, converter->invalidCharBuffer, *len); |
|
2731 } |
|
2732 } |
|
2733 |
|
2734 U_CAPI void U_EXPORT2 |
|
2735 ucnv_getInvalidUChars (const UConverter * converter, |
|
2736 UChar *errChars, |
|
2737 int8_t * len, |
|
2738 UErrorCode * err) |
|
2739 { |
|
2740 if (err == NULL || U_FAILURE(*err)) |
|
2741 { |
|
2742 return; |
|
2743 } |
|
2744 if (len == NULL || errChars == NULL || converter == NULL) |
|
2745 { |
|
2746 *err = U_ILLEGAL_ARGUMENT_ERROR; |
|
2747 return; |
|
2748 } |
|
2749 if (*len < converter->invalidUCharLength) |
|
2750 { |
|
2751 *err = U_INDEX_OUTOFBOUNDS_ERROR; |
|
2752 return; |
|
2753 } |
|
2754 if ((*len = converter->invalidUCharLength) > 0) |
|
2755 { |
|
2756 uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len)); |
|
2757 } |
|
2758 } |
|
2759 |
|
2760 #define SIG_MAX_LEN 5 |
|
2761 |
|
2762 U_CAPI const char* U_EXPORT2 |
|
2763 ucnv_detectUnicodeSignature( const char* source, |
|
2764 int32_t sourceLength, |
|
2765 int32_t* signatureLength, |
|
2766 UErrorCode* pErrorCode) { |
|
2767 int32_t dummy; |
|
2768 |
|
2769 /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN |
|
2770 * bytes we don't misdetect something |
|
2771 */ |
|
2772 char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' }; |
|
2773 int i = 0; |
|
2774 |
|
2775 if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){ |
|
2776 return NULL; |
|
2777 } |
|
2778 |
|
2779 if(source == NULL || sourceLength < -1){ |
|
2780 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
|
2781 return NULL; |
|
2782 } |
|
2783 |
|
2784 if(signatureLength == NULL) { |
|
2785 signatureLength = &dummy; |
|
2786 } |
|
2787 |
|
2788 if(sourceLength==-1){ |
|
2789 sourceLength=(int32_t)uprv_strlen(source); |
|
2790 } |
|
2791 |
|
2792 |
|
2793 while(i<sourceLength&& i<SIG_MAX_LEN){ |
|
2794 start[i]=source[i]; |
|
2795 i++; |
|
2796 } |
|
2797 |
|
2798 if(start[0] == '\xFE' && start[1] == '\xFF') { |
|
2799 *signatureLength=2; |
|
2800 return "UTF-16BE"; |
|
2801 } else if(start[0] == '\xFF' && start[1] == '\xFE') { |
|
2802 if(start[2] == '\x00' && start[3] =='\x00') { |
|
2803 *signatureLength=4; |
|
2804 return "UTF-32LE"; |
|
2805 } else { |
|
2806 *signatureLength=2; |
|
2807 return "UTF-16LE"; |
|
2808 } |
|
2809 } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') { |
|
2810 *signatureLength=3; |
|
2811 return "UTF-8"; |
|
2812 } else if(start[0] == '\x00' && start[1] == '\x00' && |
|
2813 start[2] == '\xFE' && start[3]=='\xFF') { |
|
2814 *signatureLength=4; |
|
2815 return "UTF-32BE"; |
|
2816 } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') { |
|
2817 *signatureLength=3; |
|
2818 return "SCSU"; |
|
2819 } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') { |
|
2820 *signatureLength=3; |
|
2821 return "BOCU-1"; |
|
2822 } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') { |
|
2823 /* |
|
2824 * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/ |
|
2825 * depending on the second UTF-16 code unit. |
|
2826 * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF |
|
2827 * if it occurs. |
|
2828 * |
|
2829 * So far we have +/v |
|
2830 */ |
|
2831 if(start[3] == '\x38' && start[4] == '\x2D') { |
|
2832 /* 5 bytes +/v8- */ |
|
2833 *signatureLength=5; |
|
2834 return "UTF-7"; |
|
2835 } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') { |
|
2836 /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */ |
|
2837 *signatureLength=4; |
|
2838 return "UTF-7"; |
|
2839 } |
|
2840 }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){ |
|
2841 *signatureLength=4; |
|
2842 return "UTF-EBCDIC"; |
|
2843 } |
|
2844 |
|
2845 |
|
2846 /* no known Unicode signature byte sequence recognized */ |
|
2847 *signatureLength=0; |
|
2848 return NULL; |
|
2849 } |
|
2850 |
|
2851 U_CAPI int32_t U_EXPORT2 |
|
2852 ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status) |
|
2853 { |
|
2854 if(status == NULL || U_FAILURE(*status)){ |
|
2855 return -1; |
|
2856 } |
|
2857 if(cnv == NULL){ |
|
2858 *status = U_ILLEGAL_ARGUMENT_ERROR; |
|
2859 return -1; |
|
2860 } |
|
2861 |
|
2862 if(cnv->preFromUFirstCP >= 0){ |
|
2863 return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ; |
|
2864 }else if(cnv->preFromULength < 0){ |
|
2865 return -cnv->preFromULength ; |
|
2866 }else if(cnv->fromUChar32 > 0){ |
|
2867 return 1; |
|
2868 } |
|
2869 return 0; |
|
2870 |
|
2871 } |
|
2872 |
|
2873 U_CAPI int32_t U_EXPORT2 |
|
2874 ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){ |
|
2875 |
|
2876 if(status == NULL || U_FAILURE(*status)){ |
|
2877 return -1; |
|
2878 } |
|
2879 if(cnv == NULL){ |
|
2880 *status = U_ILLEGAL_ARGUMENT_ERROR; |
|
2881 return -1; |
|
2882 } |
|
2883 |
|
2884 if(cnv->preToULength > 0){ |
|
2885 return cnv->preToULength ; |
|
2886 }else if(cnv->preToULength < 0){ |
|
2887 return -cnv->preToULength; |
|
2888 }else if(cnv->toULength > 0){ |
|
2889 return cnv->toULength; |
|
2890 } |
|
2891 return 0; |
|
2892 } |
|
2893 |
|
2894 U_CAPI UBool U_EXPORT2 |
|
2895 ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){ |
|
2896 if (U_FAILURE(*status)) { |
|
2897 return FALSE; |
|
2898 } |
|
2899 |
|
2900 if (cnv == NULL) { |
|
2901 *status = U_ILLEGAL_ARGUMENT_ERROR; |
|
2902 return FALSE; |
|
2903 } |
|
2904 |
|
2905 switch (ucnv_getType(cnv)) { |
|
2906 case UCNV_SBCS: |
|
2907 case UCNV_DBCS: |
|
2908 case UCNV_UTF32_BigEndian: |
|
2909 case UCNV_UTF32_LittleEndian: |
|
2910 case UCNV_UTF32: |
|
2911 case UCNV_US_ASCII: |
|
2912 return TRUE; |
|
2913 default: |
|
2914 return FALSE; |
|
2915 } |
|
2916 } |
|
2917 #endif |
|
2918 |
|
2919 /* |
|
2920 * Hey, Emacs, please set the following: |
|
2921 * |
|
2922 * Local Variables: |
|
2923 * indent-tabs-mode: nil |
|
2924 * End: |
|
2925 * |
|
2926 */ |