1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ucnv.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,2926 @@ 1.4 +/* 1.5 +****************************************************************************** 1.6 +* 1.7 +* Copyright (C) 1998-2013, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +****************************************************************************** 1.11 +* 1.12 +* ucnv.c: 1.13 +* Implements APIs for the ICU's codeset conversion library; 1.14 +* mostly calls through internal functions; 1.15 +* created by Bertrand A. Damiba 1.16 +* 1.17 +* Modification History: 1.18 +* 1.19 +* Date Name Description 1.20 +* 04/04/99 helena Fixed internal header inclusion. 1.21 +* 05/09/00 helena Added implementation to handle fallback mappings. 1.22 +* 06/20/2000 helena OS/400 port changes; mostly typecast. 1.23 +*/ 1.24 + 1.25 +#include "unicode/utypes.h" 1.26 + 1.27 +#if !UCONFIG_NO_CONVERSION 1.28 + 1.29 +#include "unicode/ustring.h" 1.30 +#include "unicode/ucnv.h" 1.31 +#include "unicode/ucnv_err.h" 1.32 +#include "unicode/uset.h" 1.33 +#include "unicode/utf.h" 1.34 +#include "unicode/utf16.h" 1.35 +#include "putilimp.h" 1.36 +#include "cmemory.h" 1.37 +#include "cstring.h" 1.38 +#include "uassert.h" 1.39 +#include "utracimp.h" 1.40 +#include "ustr_imp.h" 1.41 +#include "ucnv_imp.h" 1.42 +#include "ucnv_cnv.h" 1.43 +#include "ucnv_bld.h" 1.44 + 1.45 +/* size of intermediate and preflighting buffers in ucnv_convert() */ 1.46 +#define CHUNK_SIZE 1024 1.47 + 1.48 +typedef struct UAmbiguousConverter { 1.49 + const char *name; 1.50 + const UChar variant5c; 1.51 +} UAmbiguousConverter; 1.52 + 1.53 +static const UAmbiguousConverter ambiguousConverters[]={ 1.54 + { "ibm-897_P100-1995", 0xa5 }, 1.55 + { "ibm-942_P120-1999", 0xa5 }, 1.56 + { "ibm-943_P130-1999", 0xa5 }, 1.57 + { "ibm-946_P100-1995", 0xa5 }, 1.58 + { "ibm-33722_P120-1999", 0xa5 }, 1.59 + { "ibm-1041_P100-1995", 0xa5 }, 1.60 + /*{ "ibm-54191_P100-2006", 0xa5 },*/ 1.61 + /*{ "ibm-62383_P100-2007", 0xa5 },*/ 1.62 + /*{ "ibm-891_P100-1995", 0x20a9 },*/ 1.63 + { "ibm-944_P100-1995", 0x20a9 }, 1.64 + { "ibm-949_P110-1999", 0x20a9 }, 1.65 + { "ibm-1363_P110-1997", 0x20a9 }, 1.66 + { "ISO_2022,locale=ko,version=0", 0x20a9 }, 1.67 + { "ibm-1088_P100-1995", 0x20a9 } 1.68 +}; 1.69 + 1.70 +/*Calls through createConverter */ 1.71 +U_CAPI UConverter* U_EXPORT2 1.72 +ucnv_open (const char *name, 1.73 + UErrorCode * err) 1.74 +{ 1.75 + UConverter *r; 1.76 + 1.77 + if (err == NULL || U_FAILURE (*err)) { 1.78 + return NULL; 1.79 + } 1.80 + 1.81 + r = ucnv_createConverter(NULL, name, err); 1.82 + return r; 1.83 +} 1.84 + 1.85 +U_CAPI UConverter* U_EXPORT2 1.86 +ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err) 1.87 +{ 1.88 + return ucnv_createConverterFromPackage(packageName, converterName, err); 1.89 +} 1.90 + 1.91 +/*Extracts the UChar* to a char* and calls through createConverter */ 1.92 +U_CAPI UConverter* U_EXPORT2 1.93 +ucnv_openU (const UChar * name, 1.94 + UErrorCode * err) 1.95 +{ 1.96 + char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH]; 1.97 + 1.98 + if (err == NULL || U_FAILURE(*err)) 1.99 + return NULL; 1.100 + if (name == NULL) 1.101 + return ucnv_open (NULL, err); 1.102 + if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH) 1.103 + { 1.104 + *err = U_ILLEGAL_ARGUMENT_ERROR; 1.105 + return NULL; 1.106 + } 1.107 + return ucnv_open(u_austrcpy(asciiName, name), err); 1.108 +} 1.109 + 1.110 +/* Copy the string that is represented by the UConverterPlatform enum 1.111 + * @param platformString An output buffer 1.112 + * @param platform An enum representing a platform 1.113 + * @return the length of the copied string. 1.114 + */ 1.115 +static int32_t 1.116 +ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm) 1.117 +{ 1.118 + switch (pltfrm) 1.119 + { 1.120 + case UCNV_IBM: 1.121 + uprv_strcpy(platformString, "ibm-"); 1.122 + return 4; 1.123 + case UCNV_UNKNOWN: 1.124 + break; 1.125 + } 1.126 + 1.127 + /* default to empty string */ 1.128 + *platformString = 0; 1.129 + return 0; 1.130 +} 1.131 + 1.132 +/*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls 1.133 + *through createConverter*/ 1.134 +U_CAPI UConverter* U_EXPORT2 1.135 +ucnv_openCCSID (int32_t codepage, 1.136 + UConverterPlatform platform, 1.137 + UErrorCode * err) 1.138 +{ 1.139 + char myName[UCNV_MAX_CONVERTER_NAME_LENGTH]; 1.140 + int32_t myNameLen; 1.141 + 1.142 + if (err == NULL || U_FAILURE (*err)) 1.143 + return NULL; 1.144 + 1.145 + /* ucnv_copyPlatformString could return "ibm-" or "cp" */ 1.146 + myNameLen = ucnv_copyPlatformString(myName, platform); 1.147 + T_CString_integerToString(myName + myNameLen, codepage, 10); 1.148 + 1.149 + return ucnv_createConverter(NULL, myName, err); 1.150 +} 1.151 + 1.152 +/* Creating a temporary stack-based object that can be used in one thread, 1.153 +and created from a converter that is shared across threads. 1.154 +*/ 1.155 + 1.156 +U_CAPI UConverter* U_EXPORT2 1.157 +ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) 1.158 +{ 1.159 + UConverter *localConverter, *allocatedConverter; 1.160 + int32_t stackBufferSize; 1.161 + int32_t bufferSizeNeeded; 1.162 + char *stackBufferChars = (char *)stackBuffer; 1.163 + UErrorCode cbErr; 1.164 + UConverterToUnicodeArgs toUArgs = { 1.165 + sizeof(UConverterToUnicodeArgs), 1.166 + TRUE, 1.167 + NULL, 1.168 + NULL, 1.169 + NULL, 1.170 + NULL, 1.171 + NULL, 1.172 + NULL 1.173 + }; 1.174 + UConverterFromUnicodeArgs fromUArgs = { 1.175 + sizeof(UConverterFromUnicodeArgs), 1.176 + TRUE, 1.177 + NULL, 1.178 + NULL, 1.179 + NULL, 1.180 + NULL, 1.181 + NULL, 1.182 + NULL 1.183 + }; 1.184 + 1.185 + UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE); 1.186 + 1.187 + if (status == NULL || U_FAILURE(*status)){ 1.188 + UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR); 1.189 + return NULL; 1.190 + } 1.191 + 1.192 + if (cnv == NULL) { 1.193 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.194 + UTRACE_EXIT_STATUS(*status); 1.195 + return NULL; 1.196 + } 1.197 + 1.198 + UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p", 1.199 + ucnv_getName(cnv, status), cnv, stackBuffer); 1.200 + 1.201 + if (cnv->sharedData->impl->safeClone != NULL) { 1.202 + /* call the custom safeClone function for sizing */ 1.203 + bufferSizeNeeded = 0; 1.204 + cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status); 1.205 + if (U_FAILURE(*status)) { 1.206 + UTRACE_EXIT_STATUS(*status); 1.207 + return NULL; 1.208 + } 1.209 + } 1.210 + else 1.211 + { 1.212 + /* inherent sizing */ 1.213 + bufferSizeNeeded = sizeof(UConverter); 1.214 + } 1.215 + 1.216 + if (pBufferSize == NULL) { 1.217 + stackBufferSize = 1; 1.218 + pBufferSize = &stackBufferSize; 1.219 + } else { 1.220 + stackBufferSize = *pBufferSize; 1.221 + if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ 1.222 + *pBufferSize = bufferSizeNeeded; 1.223 + UTRACE_EXIT_VALUE(bufferSizeNeeded); 1.224 + return NULL; 1.225 + } 1.226 + } 1.227 + 1.228 + 1.229 + /* Pointers on 64-bit platforms need to be aligned 1.230 + * on a 64-bit boundary in memory. 1.231 + */ 1.232 + if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { 1.233 + int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); 1.234 + if(stackBufferSize > offsetUp) { 1.235 + stackBufferSize -= offsetUp; 1.236 + stackBufferChars += offsetUp; 1.237 + } else { 1.238 + /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ 1.239 + stackBufferSize = 1; 1.240 + } 1.241 + } 1.242 + 1.243 + stackBuffer = (void *)stackBufferChars; 1.244 + 1.245 + /* Now, see if we must allocate any memory */ 1.246 + if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL) 1.247 + { 1.248 + /* allocate one here...*/ 1.249 + localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded); 1.250 + 1.251 + if(localConverter == NULL) { 1.252 + *status = U_MEMORY_ALLOCATION_ERROR; 1.253 + UTRACE_EXIT_STATUS(*status); 1.254 + return NULL; 1.255 + } 1.256 + *status = U_SAFECLONE_ALLOCATED_WARNING; 1.257 + 1.258 + /* record the fact that memory was allocated */ 1.259 + *pBufferSize = bufferSizeNeeded; 1.260 + } else { 1.261 + /* just use the stack buffer */ 1.262 + localConverter = (UConverter*) stackBuffer; 1.263 + allocatedConverter = NULL; 1.264 + } 1.265 + 1.266 + uprv_memset(localConverter, 0, bufferSizeNeeded); 1.267 + 1.268 + /* Copy initial state */ 1.269 + uprv_memcpy(localConverter, cnv, sizeof(UConverter)); 1.270 + localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE; 1.271 + 1.272 + /* copy the substitution string */ 1.273 + if (cnv->subChars == (uint8_t *)cnv->subUChars) { 1.274 + localConverter->subChars = (uint8_t *)localConverter->subUChars; 1.275 + } else { 1.276 + localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 1.277 + if (localConverter->subChars == NULL) { 1.278 + uprv_free(allocatedConverter); 1.279 + UTRACE_EXIT_STATUS(*status); 1.280 + return NULL; 1.281 + } 1.282 + uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 1.283 + } 1.284 + 1.285 + /* now either call the safeclone fcn or not */ 1.286 + if (cnv->sharedData->impl->safeClone != NULL) { 1.287 + /* call the custom safeClone function */ 1.288 + localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status); 1.289 + } 1.290 + 1.291 + if(localConverter==NULL || U_FAILURE(*status)) { 1.292 + if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) { 1.293 + uprv_free(allocatedConverter->subChars); 1.294 + } 1.295 + uprv_free(allocatedConverter); 1.296 + UTRACE_EXIT_STATUS(*status); 1.297 + return NULL; 1.298 + } 1.299 + 1.300 + /* increment refcount of shared data if needed */ 1.301 + /* 1.302 + Checking whether it's an algorithic converter is okay 1.303 + in multithreaded applications because the value never changes. 1.304 + Don't check referenceCounter for any other value. 1.305 + */ 1.306 + if (cnv->sharedData->referenceCounter != ~0) { 1.307 + ucnv_incrementRefCount(cnv->sharedData); 1.308 + } 1.309 + 1.310 + if(localConverter == (UConverter*)stackBuffer) { 1.311 + /* we're using user provided data - set to not destroy */ 1.312 + localConverter->isCopyLocal = TRUE; 1.313 + } 1.314 + 1.315 + /* allow callback functions to handle any memory allocation */ 1.316 + toUArgs.converter = fromUArgs.converter = localConverter; 1.317 + cbErr = U_ZERO_ERROR; 1.318 + cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr); 1.319 + cbErr = U_ZERO_ERROR; 1.320 + cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr); 1.321 + 1.322 + UTRACE_EXIT_PTR_STATUS(localConverter, *status); 1.323 + return localConverter; 1.324 +} 1.325 + 1.326 + 1.327 + 1.328 +/*Decreases the reference counter in the shared immutable section of the object 1.329 + *and frees the mutable part*/ 1.330 + 1.331 +U_CAPI void U_EXPORT2 1.332 +ucnv_close (UConverter * converter) 1.333 +{ 1.334 + UErrorCode errorCode = U_ZERO_ERROR; 1.335 + 1.336 + UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE); 1.337 + 1.338 + if (converter == NULL) 1.339 + { 1.340 + UTRACE_EXIT(); 1.341 + return; 1.342 + } 1.343 + 1.344 + UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b", 1.345 + ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal); 1.346 + 1.347 + /* In order to speed up the close, only call the callbacks when they have been changed. 1.348 + This performance check will only work when the callbacks are set within a shared library 1.349 + or from user code that statically links this code. */ 1.350 + /* first, notify the callback functions that the converter is closed */ 1.351 + if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { 1.352 + UConverterToUnicodeArgs toUArgs = { 1.353 + sizeof(UConverterToUnicodeArgs), 1.354 + TRUE, 1.355 + NULL, 1.356 + NULL, 1.357 + NULL, 1.358 + NULL, 1.359 + NULL, 1.360 + NULL 1.361 + }; 1.362 + 1.363 + toUArgs.converter = converter; 1.364 + errorCode = U_ZERO_ERROR; 1.365 + converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode); 1.366 + } 1.367 + if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { 1.368 + UConverterFromUnicodeArgs fromUArgs = { 1.369 + sizeof(UConverterFromUnicodeArgs), 1.370 + TRUE, 1.371 + NULL, 1.372 + NULL, 1.373 + NULL, 1.374 + NULL, 1.375 + NULL, 1.376 + NULL 1.377 + }; 1.378 + fromUArgs.converter = converter; 1.379 + errorCode = U_ZERO_ERROR; 1.380 + converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode); 1.381 + } 1.382 + 1.383 + if (converter->sharedData->impl->close != NULL) { 1.384 + converter->sharedData->impl->close(converter); 1.385 + } 1.386 + 1.387 + if (converter->subChars != (uint8_t *)converter->subUChars) { 1.388 + uprv_free(converter->subChars); 1.389 + } 1.390 + 1.391 + /* 1.392 + Checking whether it's an algorithic converter is okay 1.393 + in multithreaded applications because the value never changes. 1.394 + Don't check referenceCounter for any other value. 1.395 + */ 1.396 + if (converter->sharedData->referenceCounter != ~0) { 1.397 + ucnv_unloadSharedDataIfReady(converter->sharedData); 1.398 + } 1.399 + 1.400 + if(!converter->isCopyLocal){ 1.401 + uprv_free(converter); 1.402 + } 1.403 + 1.404 + UTRACE_EXIT(); 1.405 +} 1.406 + 1.407 +/*returns a single Name from the list, will return NULL if out of bounds 1.408 + */ 1.409 +U_CAPI const char* U_EXPORT2 1.410 +ucnv_getAvailableName (int32_t n) 1.411 +{ 1.412 + if (0 <= n && n <= 0xffff) { 1.413 + UErrorCode err = U_ZERO_ERROR; 1.414 + const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err); 1.415 + if (U_SUCCESS(err)) { 1.416 + return name; 1.417 + } 1.418 + } 1.419 + return NULL; 1.420 +} 1.421 + 1.422 +U_CAPI int32_t U_EXPORT2 1.423 +ucnv_countAvailable () 1.424 +{ 1.425 + UErrorCode err = U_ZERO_ERROR; 1.426 + return ucnv_bld_countAvailableConverters(&err); 1.427 +} 1.428 + 1.429 +U_CAPI void U_EXPORT2 1.430 +ucnv_getSubstChars (const UConverter * converter, 1.431 + char *mySubChar, 1.432 + int8_t * len, 1.433 + UErrorCode * err) 1.434 +{ 1.435 + if (U_FAILURE (*err)) 1.436 + return; 1.437 + 1.438 + if (converter->subCharLen <= 0) { 1.439 + /* Unicode string or empty string from ucnv_setSubstString(). */ 1.440 + *len = 0; 1.441 + return; 1.442 + } 1.443 + 1.444 + if (*len < converter->subCharLen) /*not enough space in subChars */ 1.445 + { 1.446 + *err = U_INDEX_OUTOFBOUNDS_ERROR; 1.447 + return; 1.448 + } 1.449 + 1.450 + uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */ 1.451 + *len = converter->subCharLen; /*store # of bytes copied to buffer */ 1.452 +} 1.453 + 1.454 +U_CAPI void U_EXPORT2 1.455 +ucnv_setSubstChars (UConverter * converter, 1.456 + const char *mySubChar, 1.457 + int8_t len, 1.458 + UErrorCode * err) 1.459 +{ 1.460 + if (U_FAILURE (*err)) 1.461 + return; 1.462 + 1.463 + /*Makes sure that the subChar is within the codepages char length boundaries */ 1.464 + if ((len > converter->sharedData->staticData->maxBytesPerChar) 1.465 + || (len < converter->sharedData->staticData->minBytesPerChar)) 1.466 + { 1.467 + *err = U_ILLEGAL_ARGUMENT_ERROR; 1.468 + return; 1.469 + } 1.470 + 1.471 + uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */ 1.472 + converter->subCharLen = len; /*sets the new len */ 1.473 + 1.474 + /* 1.475 + * There is currently (2001Feb) no separate API to set/get subChar1. 1.476 + * In order to always have subChar written after it is explicitly set, 1.477 + * we set subChar1 to 0. 1.478 + */ 1.479 + converter->subChar1 = 0; 1.480 + 1.481 + return; 1.482 +} 1.483 + 1.484 +U_CAPI void U_EXPORT2 1.485 +ucnv_setSubstString(UConverter *cnv, 1.486 + const UChar *s, 1.487 + int32_t length, 1.488 + UErrorCode *err) { 1.489 + UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1]; 1.490 + char chars[UCNV_ERROR_BUFFER_LENGTH]; 1.491 + 1.492 + UConverter *clone; 1.493 + uint8_t *subChars; 1.494 + int32_t cloneSize, length8; 1.495 + 1.496 + /* Let the following functions check all arguments. */ 1.497 + cloneSize = sizeof(cloneBuffer); 1.498 + clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err); 1.499 + ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err); 1.500 + length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err); 1.501 + ucnv_close(clone); 1.502 + if (U_FAILURE(*err)) { 1.503 + return; 1.504 + } 1.505 + 1.506 + if (cnv->sharedData->impl->writeSub == NULL 1.507 +#if !UCONFIG_NO_LEGACY_CONVERSION 1.508 + || (cnv->sharedData->staticData->conversionType == UCNV_MBCS && 1.509 + ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL) 1.510 +#endif 1.511 + ) { 1.512 + /* The converter is not stateful. Store the charset bytes as a fixed string. */ 1.513 + subChars = (uint8_t *)chars; 1.514 + } else { 1.515 + /* 1.516 + * The converter has a non-default writeSub() function, indicating 1.517 + * that it is stateful. 1.518 + * Store the Unicode string for on-the-fly conversion for correct 1.519 + * state handling. 1.520 + */ 1.521 + if (length > UCNV_ERROR_BUFFER_LENGTH) { 1.522 + /* 1.523 + * Should not occur. The converter should output at least one byte 1.524 + * per UChar, which means that ucnv_fromUChars() should catch all 1.525 + * overflows. 1.526 + */ 1.527 + *err = U_BUFFER_OVERFLOW_ERROR; 1.528 + return; 1.529 + } 1.530 + subChars = (uint8_t *)s; 1.531 + if (length < 0) { 1.532 + length = u_strlen(s); 1.533 + } 1.534 + length8 = length * U_SIZEOF_UCHAR; 1.535 + } 1.536 + 1.537 + /* 1.538 + * For storing the substitution string, select either the small buffer inside 1.539 + * UConverter or allocate a subChars buffer. 1.540 + */ 1.541 + if (length8 > UCNV_MAX_SUBCHAR_LEN) { 1.542 + /* Use a separate buffer for the string. Outside UConverter to not make it too large. */ 1.543 + if (cnv->subChars == (uint8_t *)cnv->subUChars) { 1.544 + /* Allocate a new buffer for the string. */ 1.545 + cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 1.546 + if (cnv->subChars == NULL) { 1.547 + cnv->subChars = (uint8_t *)cnv->subUChars; 1.548 + *err = U_MEMORY_ALLOCATION_ERROR; 1.549 + return; 1.550 + } 1.551 + uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 1.552 + } 1.553 + } 1.554 + 1.555 + /* Copy the substitution string into the UConverter or its subChars buffer. */ 1.556 + if (length8 == 0) { 1.557 + cnv->subCharLen = 0; 1.558 + } else { 1.559 + uprv_memcpy(cnv->subChars, subChars, length8); 1.560 + if (subChars == (uint8_t *)chars) { 1.561 + cnv->subCharLen = (int8_t)length8; 1.562 + } else /* subChars == s */ { 1.563 + cnv->subCharLen = (int8_t)-length; 1.564 + } 1.565 + } 1.566 + 1.567 + /* See comment in ucnv_setSubstChars(). */ 1.568 + cnv->subChar1 = 0; 1.569 +} 1.570 + 1.571 +/*resets the internal states of a converter 1.572 + *goal : have the same behaviour than a freshly created converter 1.573 + */ 1.574 +static void _reset(UConverter *converter, UConverterResetChoice choice, 1.575 + UBool callCallback) { 1.576 + if(converter == NULL) { 1.577 + return; 1.578 + } 1.579 + 1.580 + if(callCallback) { 1.581 + /* first, notify the callback functions that the converter is reset */ 1.582 + UErrorCode errorCode; 1.583 + 1.584 + if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { 1.585 + UConverterToUnicodeArgs toUArgs = { 1.586 + sizeof(UConverterToUnicodeArgs), 1.587 + TRUE, 1.588 + NULL, 1.589 + NULL, 1.590 + NULL, 1.591 + NULL, 1.592 + NULL, 1.593 + NULL 1.594 + }; 1.595 + toUArgs.converter = converter; 1.596 + errorCode = U_ZERO_ERROR; 1.597 + converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode); 1.598 + } 1.599 + if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { 1.600 + UConverterFromUnicodeArgs fromUArgs = { 1.601 + sizeof(UConverterFromUnicodeArgs), 1.602 + TRUE, 1.603 + NULL, 1.604 + NULL, 1.605 + NULL, 1.606 + NULL, 1.607 + NULL, 1.608 + NULL 1.609 + }; 1.610 + fromUArgs.converter = converter; 1.611 + errorCode = U_ZERO_ERROR; 1.612 + converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode); 1.613 + } 1.614 + } 1.615 + 1.616 + /* now reset the converter itself */ 1.617 + if(choice<=UCNV_RESET_TO_UNICODE) { 1.618 + converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus; 1.619 + converter->mode = 0; 1.620 + converter->toULength = 0; 1.621 + converter->invalidCharLength = converter->UCharErrorBufferLength = 0; 1.622 + converter->preToULength = 0; 1.623 + } 1.624 + if(choice!=UCNV_RESET_TO_UNICODE) { 1.625 + converter->fromUnicodeStatus = 0; 1.626 + converter->fromUChar32 = 0; 1.627 + converter->invalidUCharLength = converter->charErrorBufferLength = 0; 1.628 + converter->preFromUFirstCP = U_SENTINEL; 1.629 + converter->preFromULength = 0; 1.630 + } 1.631 + 1.632 + if (converter->sharedData->impl->reset != NULL) { 1.633 + /* call the custom reset function */ 1.634 + converter->sharedData->impl->reset(converter, choice); 1.635 + } 1.636 +} 1.637 + 1.638 +U_CAPI void U_EXPORT2 1.639 +ucnv_reset(UConverter *converter) 1.640 +{ 1.641 + _reset(converter, UCNV_RESET_BOTH, TRUE); 1.642 +} 1.643 + 1.644 +U_CAPI void U_EXPORT2 1.645 +ucnv_resetToUnicode(UConverter *converter) 1.646 +{ 1.647 + _reset(converter, UCNV_RESET_TO_UNICODE, TRUE); 1.648 +} 1.649 + 1.650 +U_CAPI void U_EXPORT2 1.651 +ucnv_resetFromUnicode(UConverter *converter) 1.652 +{ 1.653 + _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE); 1.654 +} 1.655 + 1.656 +U_CAPI int8_t U_EXPORT2 1.657 +ucnv_getMaxCharSize (const UConverter * converter) 1.658 +{ 1.659 + return converter->maxBytesPerUChar; 1.660 +} 1.661 + 1.662 + 1.663 +U_CAPI int8_t U_EXPORT2 1.664 +ucnv_getMinCharSize (const UConverter * converter) 1.665 +{ 1.666 + return converter->sharedData->staticData->minBytesPerChar; 1.667 +} 1.668 + 1.669 +U_CAPI const char* U_EXPORT2 1.670 +ucnv_getName (const UConverter * converter, UErrorCode * err) 1.671 + 1.672 +{ 1.673 + if (U_FAILURE (*err)) 1.674 + return NULL; 1.675 + if(converter->sharedData->impl->getName){ 1.676 + const char* temp= converter->sharedData->impl->getName(converter); 1.677 + if(temp) 1.678 + return temp; 1.679 + } 1.680 + return converter->sharedData->staticData->name; 1.681 +} 1.682 + 1.683 +U_CAPI int32_t U_EXPORT2 1.684 +ucnv_getCCSID(const UConverter * converter, 1.685 + UErrorCode * err) 1.686 +{ 1.687 + int32_t ccsid; 1.688 + if (U_FAILURE (*err)) 1.689 + return -1; 1.690 + 1.691 + ccsid = converter->sharedData->staticData->codepage; 1.692 + if (ccsid == 0) { 1.693 + /* Rare case. This is for cases like gb18030, 1.694 + which doesn't have an IBM canonical name, but does have an IBM alias. */ 1.695 + const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err); 1.696 + if (U_SUCCESS(*err) && standardName) { 1.697 + const char *ccsidStr = uprv_strchr(standardName, '-'); 1.698 + if (ccsidStr) { 1.699 + ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */ 1.700 + } 1.701 + } 1.702 + } 1.703 + return ccsid; 1.704 +} 1.705 + 1.706 + 1.707 +U_CAPI UConverterPlatform U_EXPORT2 1.708 +ucnv_getPlatform (const UConverter * converter, 1.709 + UErrorCode * err) 1.710 +{ 1.711 + if (U_FAILURE (*err)) 1.712 + return UCNV_UNKNOWN; 1.713 + 1.714 + return (UConverterPlatform)converter->sharedData->staticData->platform; 1.715 +} 1.716 + 1.717 +U_CAPI void U_EXPORT2 1.718 + ucnv_getToUCallBack (const UConverter * converter, 1.719 + UConverterToUCallback *action, 1.720 + const void **context) 1.721 +{ 1.722 + *action = converter->fromCharErrorBehaviour; 1.723 + *context = converter->toUContext; 1.724 +} 1.725 + 1.726 +U_CAPI void U_EXPORT2 1.727 + ucnv_getFromUCallBack (const UConverter * converter, 1.728 + UConverterFromUCallback *action, 1.729 + const void **context) 1.730 +{ 1.731 + *action = converter->fromUCharErrorBehaviour; 1.732 + *context = converter->fromUContext; 1.733 +} 1.734 + 1.735 +U_CAPI void U_EXPORT2 1.736 +ucnv_setToUCallBack (UConverter * converter, 1.737 + UConverterToUCallback newAction, 1.738 + const void* newContext, 1.739 + UConverterToUCallback *oldAction, 1.740 + const void** oldContext, 1.741 + UErrorCode * err) 1.742 +{ 1.743 + if (U_FAILURE (*err)) 1.744 + return; 1.745 + if (oldAction) *oldAction = converter->fromCharErrorBehaviour; 1.746 + converter->fromCharErrorBehaviour = newAction; 1.747 + if (oldContext) *oldContext = converter->toUContext; 1.748 + converter->toUContext = newContext; 1.749 +} 1.750 + 1.751 +U_CAPI void U_EXPORT2 1.752 +ucnv_setFromUCallBack (UConverter * converter, 1.753 + UConverterFromUCallback newAction, 1.754 + const void* newContext, 1.755 + UConverterFromUCallback *oldAction, 1.756 + const void** oldContext, 1.757 + UErrorCode * err) 1.758 +{ 1.759 + if (U_FAILURE (*err)) 1.760 + return; 1.761 + if (oldAction) *oldAction = converter->fromUCharErrorBehaviour; 1.762 + converter->fromUCharErrorBehaviour = newAction; 1.763 + if (oldContext) *oldContext = converter->fromUContext; 1.764 + converter->fromUContext = newContext; 1.765 +} 1.766 + 1.767 +static void 1.768 +_updateOffsets(int32_t *offsets, int32_t length, 1.769 + int32_t sourceIndex, int32_t errorInputLength) { 1.770 + int32_t *limit; 1.771 + int32_t delta, offset; 1.772 + 1.773 + if(sourceIndex>=0) { 1.774 + /* 1.775 + * adjust each offset by adding the previous sourceIndex 1.776 + * minus the length of the input sequence that caused an 1.777 + * error, if any 1.778 + */ 1.779 + delta=sourceIndex-errorInputLength; 1.780 + } else { 1.781 + /* 1.782 + * set each offset to -1 because this conversion function 1.783 + * does not handle offsets 1.784 + */ 1.785 + delta=-1; 1.786 + } 1.787 + 1.788 + limit=offsets+length; 1.789 + if(delta==0) { 1.790 + /* most common case, nothing to do */ 1.791 + } else if(delta>0) { 1.792 + /* add the delta to each offset (but not if the offset is <0) */ 1.793 + while(offsets<limit) { 1.794 + offset=*offsets; 1.795 + if(offset>=0) { 1.796 + *offsets=offset+delta; 1.797 + } 1.798 + ++offsets; 1.799 + } 1.800 + } else /* delta<0 */ { 1.801 + /* 1.802 + * set each offset to -1 because this conversion function 1.803 + * does not handle offsets 1.804 + * or the error input sequence started in a previous buffer 1.805 + */ 1.806 + while(offsets<limit) { 1.807 + *offsets++=-1; 1.808 + } 1.809 + } 1.810 +} 1.811 + 1.812 +/* ucnv_fromUnicode --------------------------------------------------------- */ 1.813 + 1.814 +/* 1.815 + * Implementation note for m:n conversions 1.816 + * 1.817 + * While collecting source units to find the longest match for m:n conversion, 1.818 + * some source units may need to be stored for a partial match. 1.819 + * When a second buffer does not yield a match on all of the previously stored 1.820 + * source units, then they must be "replayed", i.e., fed back into the converter. 1.821 + * 1.822 + * The code relies on the fact that replaying will not nest - 1.823 + * converting a replay buffer will not result in a replay. 1.824 + * This is because a replay is necessary only after the _continuation_ of a 1.825 + * partial match failed, but a replay buffer is converted as a whole. 1.826 + * It may result in some of its units being stored again for a partial match, 1.827 + * but there will not be a continuation _during_ the replay which could fail. 1.828 + * 1.829 + * It is conceivable that a callback function could call the converter 1.830 + * recursively in a way that causes another replay to be stored, but that 1.831 + * would be an error in the callback function. 1.832 + * Such violations will cause assertion failures in a debug build, 1.833 + * and wrong output, but they will not cause a crash. 1.834 + */ 1.835 + 1.836 +static void 1.837 +_fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) { 1.838 + UConverterFromUnicode fromUnicode; 1.839 + UConverter *cnv; 1.840 + const UChar *s; 1.841 + char *t; 1.842 + int32_t *offsets; 1.843 + int32_t sourceIndex; 1.844 + int32_t errorInputLength; 1.845 + UBool converterSawEndOfInput, calledCallback; 1.846 + 1.847 + /* variables for m:n conversion */ 1.848 + UChar replay[UCNV_EXT_MAX_UCHARS]; 1.849 + const UChar *realSource, *realSourceLimit; 1.850 + int32_t realSourceIndex; 1.851 + UBool realFlush; 1.852 + 1.853 + cnv=pArgs->converter; 1.854 + s=pArgs->source; 1.855 + t=pArgs->target; 1.856 + offsets=pArgs->offsets; 1.857 + 1.858 + /* get the converter implementation function */ 1.859 + sourceIndex=0; 1.860 + if(offsets==NULL) { 1.861 + fromUnicode=cnv->sharedData->impl->fromUnicode; 1.862 + } else { 1.863 + fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets; 1.864 + if(fromUnicode==NULL) { 1.865 + /* there is no WithOffsets implementation */ 1.866 + fromUnicode=cnv->sharedData->impl->fromUnicode; 1.867 + /* we will write -1 for each offset */ 1.868 + sourceIndex=-1; 1.869 + } 1.870 + } 1.871 + 1.872 + if(cnv->preFromULength>=0) { 1.873 + /* normal mode */ 1.874 + realSource=NULL; 1.875 + 1.876 + /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ 1.877 + realSourceLimit=NULL; 1.878 + realFlush=FALSE; 1.879 + realSourceIndex=0; 1.880 + } else { 1.881 + /* 1.882 + * Previous m:n conversion stored source units from a partial match 1.883 + * and failed to consume all of them. 1.884 + * We need to "replay" them from a temporary buffer and convert them first. 1.885 + */ 1.886 + realSource=pArgs->source; 1.887 + realSourceLimit=pArgs->sourceLimit; 1.888 + realFlush=pArgs->flush; 1.889 + realSourceIndex=sourceIndex; 1.890 + 1.891 + uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); 1.892 + pArgs->source=replay; 1.893 + pArgs->sourceLimit=replay-cnv->preFromULength; 1.894 + pArgs->flush=FALSE; 1.895 + sourceIndex=-1; 1.896 + 1.897 + cnv->preFromULength=0; 1.898 + } 1.899 + 1.900 + /* 1.901 + * loop for conversion and error handling 1.902 + * 1.903 + * loop { 1.904 + * convert 1.905 + * loop { 1.906 + * update offsets 1.907 + * handle end of input 1.908 + * handle errors/call callback 1.909 + * } 1.910 + * } 1.911 + */ 1.912 + for(;;) { 1.913 + if(U_SUCCESS(*err)) { 1.914 + /* convert */ 1.915 + fromUnicode(pArgs, err); 1.916 + 1.917 + /* 1.918 + * set a flag for whether the converter 1.919 + * successfully processed the end of the input 1.920 + * 1.921 + * need not check cnv->preFromULength==0 because a replay (<0) will cause 1.922 + * s<sourceLimit before converterSawEndOfInput is checked 1.923 + */ 1.924 + converterSawEndOfInput= 1.925 + (UBool)(U_SUCCESS(*err) && 1.926 + pArgs->flush && pArgs->source==pArgs->sourceLimit && 1.927 + cnv->fromUChar32==0); 1.928 + } else { 1.929 + /* handle error from ucnv_convertEx() */ 1.930 + converterSawEndOfInput=FALSE; 1.931 + } 1.932 + 1.933 + /* no callback called yet for this iteration */ 1.934 + calledCallback=FALSE; 1.935 + 1.936 + /* no sourceIndex adjustment for conversion, only for callback output */ 1.937 + errorInputLength=0; 1.938 + 1.939 + /* 1.940 + * loop for offsets and error handling 1.941 + * 1.942 + * iterates at most 3 times: 1.943 + * 1. to clean up after the conversion function 1.944 + * 2. after the callback 1.945 + * 3. after the callback again if there was truncated input 1.946 + */ 1.947 + for(;;) { 1.948 + /* update offsets if we write any */ 1.949 + if(offsets!=NULL) { 1.950 + int32_t length=(int32_t)(pArgs->target-t); 1.951 + if(length>0) { 1.952 + _updateOffsets(offsets, length, sourceIndex, errorInputLength); 1.953 + 1.954 + /* 1.955 + * if a converter handles offsets and updates the offsets 1.956 + * pointer at the end, then pArgs->offset should not change 1.957 + * here; 1.958 + * however, some converters do not handle offsets at all 1.959 + * (sourceIndex<0) or may not update the offsets pointer 1.960 + */ 1.961 + pArgs->offsets=offsets+=length; 1.962 + } 1.963 + 1.964 + if(sourceIndex>=0) { 1.965 + sourceIndex+=(int32_t)(pArgs->source-s); 1.966 + } 1.967 + } 1.968 + 1.969 + if(cnv->preFromULength<0) { 1.970 + /* 1.971 + * switch the source to new replay units (cannot occur while replaying) 1.972 + * after offset handling and before end-of-input and callback handling 1.973 + */ 1.974 + if(realSource==NULL) { 1.975 + realSource=pArgs->source; 1.976 + realSourceLimit=pArgs->sourceLimit; 1.977 + realFlush=pArgs->flush; 1.978 + realSourceIndex=sourceIndex; 1.979 + 1.980 + uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); 1.981 + pArgs->source=replay; 1.982 + pArgs->sourceLimit=replay-cnv->preFromULength; 1.983 + pArgs->flush=FALSE; 1.984 + if((sourceIndex+=cnv->preFromULength)<0) { 1.985 + sourceIndex=-1; 1.986 + } 1.987 + 1.988 + cnv->preFromULength=0; 1.989 + } else { 1.990 + /* see implementation note before _fromUnicodeWithCallback() */ 1.991 + U_ASSERT(realSource==NULL); 1.992 + *err=U_INTERNAL_PROGRAM_ERROR; 1.993 + } 1.994 + } 1.995 + 1.996 + /* update pointers */ 1.997 + s=pArgs->source; 1.998 + t=pArgs->target; 1.999 + 1.1000 + if(U_SUCCESS(*err)) { 1.1001 + if(s<pArgs->sourceLimit) { 1.1002 + /* 1.1003 + * continue with the conversion loop while there is still input left 1.1004 + * (continue converting by breaking out of only the inner loop) 1.1005 + */ 1.1006 + break; 1.1007 + } else if(realSource!=NULL) { 1.1008 + /* switch back from replaying to the real source and continue */ 1.1009 + pArgs->source=realSource; 1.1010 + pArgs->sourceLimit=realSourceLimit; 1.1011 + pArgs->flush=realFlush; 1.1012 + sourceIndex=realSourceIndex; 1.1013 + 1.1014 + realSource=NULL; 1.1015 + break; 1.1016 + } else if(pArgs->flush && cnv->fromUChar32!=0) { 1.1017 + /* 1.1018 + * the entire input stream is consumed 1.1019 + * and there is a partial, truncated input sequence left 1.1020 + */ 1.1021 + 1.1022 + /* inject an error and continue with callback handling */ 1.1023 + *err=U_TRUNCATED_CHAR_FOUND; 1.1024 + calledCallback=FALSE; /* new error condition */ 1.1025 + } else { 1.1026 + /* input consumed */ 1.1027 + if(pArgs->flush) { 1.1028 + /* 1.1029 + * return to the conversion loop once more if the flush 1.1030 + * flag is set and the conversion function has not 1.1031 + * successfully processed the end of the input yet 1.1032 + * 1.1033 + * (continue converting by breaking out of only the inner loop) 1.1034 + */ 1.1035 + if(!converterSawEndOfInput) { 1.1036 + break; 1.1037 + } 1.1038 + 1.1039 + /* reset the converter without calling the callback function */ 1.1040 + _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE); 1.1041 + } 1.1042 + 1.1043 + /* done successfully */ 1.1044 + return; 1.1045 + } 1.1046 + } 1.1047 + 1.1048 + /* U_FAILURE(*err) */ 1.1049 + { 1.1050 + UErrorCode e; 1.1051 + 1.1052 + if( calledCallback || 1.1053 + (e=*err)==U_BUFFER_OVERFLOW_ERROR || 1.1054 + (e!=U_INVALID_CHAR_FOUND && 1.1055 + e!=U_ILLEGAL_CHAR_FOUND && 1.1056 + e!=U_TRUNCATED_CHAR_FOUND) 1.1057 + ) { 1.1058 + /* 1.1059 + * the callback did not or cannot resolve the error: 1.1060 + * set output pointers and return 1.1061 + * 1.1062 + * the check for buffer overflow is redundant but it is 1.1063 + * a high-runner case and hopefully documents the intent 1.1064 + * well 1.1065 + * 1.1066 + * if we were replaying, then the replay buffer must be 1.1067 + * copied back into the UConverter 1.1068 + * and the real arguments must be restored 1.1069 + */ 1.1070 + if(realSource!=NULL) { 1.1071 + int32_t length; 1.1072 + 1.1073 + U_ASSERT(cnv->preFromULength==0); 1.1074 + 1.1075 + length=(int32_t)(pArgs->sourceLimit-pArgs->source); 1.1076 + if(length>0) { 1.1077 + uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR); 1.1078 + cnv->preFromULength=(int8_t)-length; 1.1079 + } 1.1080 + 1.1081 + pArgs->source=realSource; 1.1082 + pArgs->sourceLimit=realSourceLimit; 1.1083 + pArgs->flush=realFlush; 1.1084 + } 1.1085 + 1.1086 + return; 1.1087 + } 1.1088 + } 1.1089 + 1.1090 + /* callback handling */ 1.1091 + { 1.1092 + UChar32 codePoint; 1.1093 + 1.1094 + /* get and write the code point */ 1.1095 + codePoint=cnv->fromUChar32; 1.1096 + errorInputLength=0; 1.1097 + U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint); 1.1098 + cnv->invalidUCharLength=(int8_t)errorInputLength; 1.1099 + 1.1100 + /* set the converter state to deal with the next character */ 1.1101 + cnv->fromUChar32=0; 1.1102 + 1.1103 + /* call the callback function */ 1.1104 + cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, 1.1105 + cnv->invalidUCharBuffer, errorInputLength, codePoint, 1.1106 + *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL, 1.1107 + err); 1.1108 + } 1.1109 + 1.1110 + /* 1.1111 + * loop back to the offset handling 1.1112 + * 1.1113 + * this flag will indicate after offset handling 1.1114 + * that a callback was called; 1.1115 + * if the callback did not resolve the error, then we return 1.1116 + */ 1.1117 + calledCallback=TRUE; 1.1118 + } 1.1119 + } 1.1120 +} 1.1121 + 1.1122 +/* 1.1123 + * Output the fromUnicode overflow buffer. 1.1124 + * Call this function if(cnv->charErrorBufferLength>0). 1.1125 + * @return TRUE if overflow 1.1126 + */ 1.1127 +static UBool 1.1128 +ucnv_outputOverflowFromUnicode(UConverter *cnv, 1.1129 + char **target, const char *targetLimit, 1.1130 + int32_t **pOffsets, 1.1131 + UErrorCode *err) { 1.1132 + int32_t *offsets; 1.1133 + char *overflow, *t; 1.1134 + int32_t i, length; 1.1135 + 1.1136 + t=*target; 1.1137 + if(pOffsets!=NULL) { 1.1138 + offsets=*pOffsets; 1.1139 + } else { 1.1140 + offsets=NULL; 1.1141 + } 1.1142 + 1.1143 + overflow=(char *)cnv->charErrorBuffer; 1.1144 + length=cnv->charErrorBufferLength; 1.1145 + i=0; 1.1146 + while(i<length) { 1.1147 + if(t==targetLimit) { 1.1148 + /* the overflow buffer contains too much, keep the rest */ 1.1149 + int32_t j=0; 1.1150 + 1.1151 + do { 1.1152 + overflow[j++]=overflow[i++]; 1.1153 + } while(i<length); 1.1154 + 1.1155 + cnv->charErrorBufferLength=(int8_t)j; 1.1156 + *target=t; 1.1157 + if(offsets!=NULL) { 1.1158 + *pOffsets=offsets; 1.1159 + } 1.1160 + *err=U_BUFFER_OVERFLOW_ERROR; 1.1161 + return TRUE; 1.1162 + } 1.1163 + 1.1164 + /* copy the overflow contents to the target */ 1.1165 + *t++=overflow[i++]; 1.1166 + if(offsets!=NULL) { 1.1167 + *offsets++=-1; /* no source index available for old output */ 1.1168 + } 1.1169 + } 1.1170 + 1.1171 + /* the overflow buffer is completely copied to the target */ 1.1172 + cnv->charErrorBufferLength=0; 1.1173 + *target=t; 1.1174 + if(offsets!=NULL) { 1.1175 + *pOffsets=offsets; 1.1176 + } 1.1177 + return FALSE; 1.1178 +} 1.1179 + 1.1180 +U_CAPI void U_EXPORT2 1.1181 +ucnv_fromUnicode(UConverter *cnv, 1.1182 + char **target, const char *targetLimit, 1.1183 + const UChar **source, const UChar *sourceLimit, 1.1184 + int32_t *offsets, 1.1185 + UBool flush, 1.1186 + UErrorCode *err) { 1.1187 + UConverterFromUnicodeArgs args; 1.1188 + const UChar *s; 1.1189 + char *t; 1.1190 + 1.1191 + /* check parameters */ 1.1192 + if(err==NULL || U_FAILURE(*err)) { 1.1193 + return; 1.1194 + } 1.1195 + 1.1196 + if(cnv==NULL || target==NULL || source==NULL) { 1.1197 + *err=U_ILLEGAL_ARGUMENT_ERROR; 1.1198 + return; 1.1199 + } 1.1200 + 1.1201 + s=*source; 1.1202 + t=*target; 1.1203 + 1.1204 + if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) { 1.1205 + /* 1.1206 + Prevent code from going into an infinite loop in case we do hit this 1.1207 + limit. The limit pointer is expected to be on a UChar * boundary. 1.1208 + This also prevents the next argument check from failing. 1.1209 + */ 1.1210 + sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1); 1.1211 + } 1.1212 + 1.1213 + /* 1.1214 + * All these conditions should never happen. 1.1215 + * 1.1216 + * 1) Make sure that the limits are >= to the address source or target 1.1217 + * 1.1218 + * 2) Make sure that the buffer sizes do not exceed the number range for 1.1219 + * int32_t because some functions use the size (in units or bytes) 1.1220 + * rather than comparing pointers, and because offsets are int32_t values. 1.1221 + * 1.1222 + * size_t is guaranteed to be unsigned and large enough for the job. 1.1223 + * 1.1224 + * Return with an error instead of adjusting the limits because we would 1.1225 + * not be able to maintain the semantics that either the source must be 1.1226 + * consumed or the target filled (unless an error occurs). 1.1227 + * An adjustment would be targetLimit=t+0x7fffffff; for example. 1.1228 + * 1.1229 + * 3) Make sure that the user didn't incorrectly cast a UChar * pointer 1.1230 + * to a char * pointer and provide an incomplete UChar code unit. 1.1231 + */ 1.1232 + if (sourceLimit<s || targetLimit<t || 1.1233 + ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) || 1.1234 + ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) || 1.1235 + (((const char *)sourceLimit-(const char *)s) & 1) != 0) 1.1236 + { 1.1237 + *err=U_ILLEGAL_ARGUMENT_ERROR; 1.1238 + return; 1.1239 + } 1.1240 + 1.1241 + /* output the target overflow buffer */ 1.1242 + if( cnv->charErrorBufferLength>0 && 1.1243 + ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err) 1.1244 + ) { 1.1245 + /* U_BUFFER_OVERFLOW_ERROR */ 1.1246 + return; 1.1247 + } 1.1248 + /* *target may have moved, therefore stop using t */ 1.1249 + 1.1250 + if(!flush && s==sourceLimit && cnv->preFromULength>=0) { 1.1251 + /* the overflow buffer is emptied and there is no new input: we are done */ 1.1252 + return; 1.1253 + } 1.1254 + 1.1255 + /* 1.1256 + * Do not simply return with a buffer overflow error if 1.1257 + * !flush && t==targetLimit 1.1258 + * because it is possible that the source will not generate any output. 1.1259 + * For example, the skip callback may be called; 1.1260 + * it does not output anything. 1.1261 + */ 1.1262 + 1.1263 + /* prepare the converter arguments */ 1.1264 + args.converter=cnv; 1.1265 + args.flush=flush; 1.1266 + args.offsets=offsets; 1.1267 + args.source=s; 1.1268 + args.sourceLimit=sourceLimit; 1.1269 + args.target=*target; 1.1270 + args.targetLimit=targetLimit; 1.1271 + args.size=sizeof(args); 1.1272 + 1.1273 + _fromUnicodeWithCallback(&args, err); 1.1274 + 1.1275 + *source=args.source; 1.1276 + *target=args.target; 1.1277 +} 1.1278 + 1.1279 +/* ucnv_toUnicode() --------------------------------------------------------- */ 1.1280 + 1.1281 +static void 1.1282 +_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { 1.1283 + UConverterToUnicode toUnicode; 1.1284 + UConverter *cnv; 1.1285 + const char *s; 1.1286 + UChar *t; 1.1287 + int32_t *offsets; 1.1288 + int32_t sourceIndex; 1.1289 + int32_t errorInputLength; 1.1290 + UBool converterSawEndOfInput, calledCallback; 1.1291 + 1.1292 + /* variables for m:n conversion */ 1.1293 + char replay[UCNV_EXT_MAX_BYTES]; 1.1294 + const char *realSource, *realSourceLimit; 1.1295 + int32_t realSourceIndex; 1.1296 + UBool realFlush; 1.1297 + 1.1298 + cnv=pArgs->converter; 1.1299 + s=pArgs->source; 1.1300 + t=pArgs->target; 1.1301 + offsets=pArgs->offsets; 1.1302 + 1.1303 + /* get the converter implementation function */ 1.1304 + sourceIndex=0; 1.1305 + if(offsets==NULL) { 1.1306 + toUnicode=cnv->sharedData->impl->toUnicode; 1.1307 + } else { 1.1308 + toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets; 1.1309 + if(toUnicode==NULL) { 1.1310 + /* there is no WithOffsets implementation */ 1.1311 + toUnicode=cnv->sharedData->impl->toUnicode; 1.1312 + /* we will write -1 for each offset */ 1.1313 + sourceIndex=-1; 1.1314 + } 1.1315 + } 1.1316 + 1.1317 + if(cnv->preToULength>=0) { 1.1318 + /* normal mode */ 1.1319 + realSource=NULL; 1.1320 + 1.1321 + /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ 1.1322 + realSourceLimit=NULL; 1.1323 + realFlush=FALSE; 1.1324 + realSourceIndex=0; 1.1325 + } else { 1.1326 + /* 1.1327 + * Previous m:n conversion stored source units from a partial match 1.1328 + * and failed to consume all of them. 1.1329 + * We need to "replay" them from a temporary buffer and convert them first. 1.1330 + */ 1.1331 + realSource=pArgs->source; 1.1332 + realSourceLimit=pArgs->sourceLimit; 1.1333 + realFlush=pArgs->flush; 1.1334 + realSourceIndex=sourceIndex; 1.1335 + 1.1336 + uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); 1.1337 + pArgs->source=replay; 1.1338 + pArgs->sourceLimit=replay-cnv->preToULength; 1.1339 + pArgs->flush=FALSE; 1.1340 + sourceIndex=-1; 1.1341 + 1.1342 + cnv->preToULength=0; 1.1343 + } 1.1344 + 1.1345 + /* 1.1346 + * loop for conversion and error handling 1.1347 + * 1.1348 + * loop { 1.1349 + * convert 1.1350 + * loop { 1.1351 + * update offsets 1.1352 + * handle end of input 1.1353 + * handle errors/call callback 1.1354 + * } 1.1355 + * } 1.1356 + */ 1.1357 + for(;;) { 1.1358 + if(U_SUCCESS(*err)) { 1.1359 + /* convert */ 1.1360 + toUnicode(pArgs, err); 1.1361 + 1.1362 + /* 1.1363 + * set a flag for whether the converter 1.1364 + * successfully processed the end of the input 1.1365 + * 1.1366 + * need not check cnv->preToULength==0 because a replay (<0) will cause 1.1367 + * s<sourceLimit before converterSawEndOfInput is checked 1.1368 + */ 1.1369 + converterSawEndOfInput= 1.1370 + (UBool)(U_SUCCESS(*err) && 1.1371 + pArgs->flush && pArgs->source==pArgs->sourceLimit && 1.1372 + cnv->toULength==0); 1.1373 + } else { 1.1374 + /* handle error from getNextUChar() or ucnv_convertEx() */ 1.1375 + converterSawEndOfInput=FALSE; 1.1376 + } 1.1377 + 1.1378 + /* no callback called yet for this iteration */ 1.1379 + calledCallback=FALSE; 1.1380 + 1.1381 + /* no sourceIndex adjustment for conversion, only for callback output */ 1.1382 + errorInputLength=0; 1.1383 + 1.1384 + /* 1.1385 + * loop for offsets and error handling 1.1386 + * 1.1387 + * iterates at most 3 times: 1.1388 + * 1. to clean up after the conversion function 1.1389 + * 2. after the callback 1.1390 + * 3. after the callback again if there was truncated input 1.1391 + */ 1.1392 + for(;;) { 1.1393 + /* update offsets if we write any */ 1.1394 + if(offsets!=NULL) { 1.1395 + int32_t length=(int32_t)(pArgs->target-t); 1.1396 + if(length>0) { 1.1397 + _updateOffsets(offsets, length, sourceIndex, errorInputLength); 1.1398 + 1.1399 + /* 1.1400 + * if a converter handles offsets and updates the offsets 1.1401 + * pointer at the end, then pArgs->offset should not change 1.1402 + * here; 1.1403 + * however, some converters do not handle offsets at all 1.1404 + * (sourceIndex<0) or may not update the offsets pointer 1.1405 + */ 1.1406 + pArgs->offsets=offsets+=length; 1.1407 + } 1.1408 + 1.1409 + if(sourceIndex>=0) { 1.1410 + sourceIndex+=(int32_t)(pArgs->source-s); 1.1411 + } 1.1412 + } 1.1413 + 1.1414 + if(cnv->preToULength<0) { 1.1415 + /* 1.1416 + * switch the source to new replay units (cannot occur while replaying) 1.1417 + * after offset handling and before end-of-input and callback handling 1.1418 + */ 1.1419 + if(realSource==NULL) { 1.1420 + realSource=pArgs->source; 1.1421 + realSourceLimit=pArgs->sourceLimit; 1.1422 + realFlush=pArgs->flush; 1.1423 + realSourceIndex=sourceIndex; 1.1424 + 1.1425 + uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); 1.1426 + pArgs->source=replay; 1.1427 + pArgs->sourceLimit=replay-cnv->preToULength; 1.1428 + pArgs->flush=FALSE; 1.1429 + if((sourceIndex+=cnv->preToULength)<0) { 1.1430 + sourceIndex=-1; 1.1431 + } 1.1432 + 1.1433 + cnv->preToULength=0; 1.1434 + } else { 1.1435 + /* see implementation note before _fromUnicodeWithCallback() */ 1.1436 + U_ASSERT(realSource==NULL); 1.1437 + *err=U_INTERNAL_PROGRAM_ERROR; 1.1438 + } 1.1439 + } 1.1440 + 1.1441 + /* update pointers */ 1.1442 + s=pArgs->source; 1.1443 + t=pArgs->target; 1.1444 + 1.1445 + if(U_SUCCESS(*err)) { 1.1446 + if(s<pArgs->sourceLimit) { 1.1447 + /* 1.1448 + * continue with the conversion loop while there is still input left 1.1449 + * (continue converting by breaking out of only the inner loop) 1.1450 + */ 1.1451 + break; 1.1452 + } else if(realSource!=NULL) { 1.1453 + /* switch back from replaying to the real source and continue */ 1.1454 + pArgs->source=realSource; 1.1455 + pArgs->sourceLimit=realSourceLimit; 1.1456 + pArgs->flush=realFlush; 1.1457 + sourceIndex=realSourceIndex; 1.1458 + 1.1459 + realSource=NULL; 1.1460 + break; 1.1461 + } else if(pArgs->flush && cnv->toULength>0) { 1.1462 + /* 1.1463 + * the entire input stream is consumed 1.1464 + * and there is a partial, truncated input sequence left 1.1465 + */ 1.1466 + 1.1467 + /* inject an error and continue with callback handling */ 1.1468 + *err=U_TRUNCATED_CHAR_FOUND; 1.1469 + calledCallback=FALSE; /* new error condition */ 1.1470 + } else { 1.1471 + /* input consumed */ 1.1472 + if(pArgs->flush) { 1.1473 + /* 1.1474 + * return to the conversion loop once more if the flush 1.1475 + * flag is set and the conversion function has not 1.1476 + * successfully processed the end of the input yet 1.1477 + * 1.1478 + * (continue converting by breaking out of only the inner loop) 1.1479 + */ 1.1480 + if(!converterSawEndOfInput) { 1.1481 + break; 1.1482 + } 1.1483 + 1.1484 + /* reset the converter without calling the callback function */ 1.1485 + _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); 1.1486 + } 1.1487 + 1.1488 + /* done successfully */ 1.1489 + return; 1.1490 + } 1.1491 + } 1.1492 + 1.1493 + /* U_FAILURE(*err) */ 1.1494 + { 1.1495 + UErrorCode e; 1.1496 + 1.1497 + if( calledCallback || 1.1498 + (e=*err)==U_BUFFER_OVERFLOW_ERROR || 1.1499 + (e!=U_INVALID_CHAR_FOUND && 1.1500 + e!=U_ILLEGAL_CHAR_FOUND && 1.1501 + e!=U_TRUNCATED_CHAR_FOUND && 1.1502 + e!=U_ILLEGAL_ESCAPE_SEQUENCE && 1.1503 + e!=U_UNSUPPORTED_ESCAPE_SEQUENCE) 1.1504 + ) { 1.1505 + /* 1.1506 + * the callback did not or cannot resolve the error: 1.1507 + * set output pointers and return 1.1508 + * 1.1509 + * the check for buffer overflow is redundant but it is 1.1510 + * a high-runner case and hopefully documents the intent 1.1511 + * well 1.1512 + * 1.1513 + * if we were replaying, then the replay buffer must be 1.1514 + * copied back into the UConverter 1.1515 + * and the real arguments must be restored 1.1516 + */ 1.1517 + if(realSource!=NULL) { 1.1518 + int32_t length; 1.1519 + 1.1520 + U_ASSERT(cnv->preToULength==0); 1.1521 + 1.1522 + length=(int32_t)(pArgs->sourceLimit-pArgs->source); 1.1523 + if(length>0) { 1.1524 + uprv_memcpy(cnv->preToU, pArgs->source, length); 1.1525 + cnv->preToULength=(int8_t)-length; 1.1526 + } 1.1527 + 1.1528 + pArgs->source=realSource; 1.1529 + pArgs->sourceLimit=realSourceLimit; 1.1530 + pArgs->flush=realFlush; 1.1531 + } 1.1532 + 1.1533 + return; 1.1534 + } 1.1535 + } 1.1536 + 1.1537 + /* copy toUBytes[] to invalidCharBuffer[] */ 1.1538 + errorInputLength=cnv->invalidCharLength=cnv->toULength; 1.1539 + if(errorInputLength>0) { 1.1540 + uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength); 1.1541 + } 1.1542 + 1.1543 + /* set the converter state to deal with the next character */ 1.1544 + cnv->toULength=0; 1.1545 + 1.1546 + /* call the callback function */ 1.1547 + if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) { 1.1548 + cnv->toUCallbackReason = UCNV_UNASSIGNED; 1.1549 + } 1.1550 + cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, 1.1551 + cnv->invalidCharBuffer, errorInputLength, 1.1552 + cnv->toUCallbackReason, 1.1553 + err); 1.1554 + cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */ 1.1555 + 1.1556 + /* 1.1557 + * loop back to the offset handling 1.1558 + * 1.1559 + * this flag will indicate after offset handling 1.1560 + * that a callback was called; 1.1561 + * if the callback did not resolve the error, then we return 1.1562 + */ 1.1563 + calledCallback=TRUE; 1.1564 + } 1.1565 + } 1.1566 +} 1.1567 + 1.1568 +/* 1.1569 + * Output the toUnicode overflow buffer. 1.1570 + * Call this function if(cnv->UCharErrorBufferLength>0). 1.1571 + * @return TRUE if overflow 1.1572 + */ 1.1573 +static UBool 1.1574 +ucnv_outputOverflowToUnicode(UConverter *cnv, 1.1575 + UChar **target, const UChar *targetLimit, 1.1576 + int32_t **pOffsets, 1.1577 + UErrorCode *err) { 1.1578 + int32_t *offsets; 1.1579 + UChar *overflow, *t; 1.1580 + int32_t i, length; 1.1581 + 1.1582 + t=*target; 1.1583 + if(pOffsets!=NULL) { 1.1584 + offsets=*pOffsets; 1.1585 + } else { 1.1586 + offsets=NULL; 1.1587 + } 1.1588 + 1.1589 + overflow=cnv->UCharErrorBuffer; 1.1590 + length=cnv->UCharErrorBufferLength; 1.1591 + i=0; 1.1592 + while(i<length) { 1.1593 + if(t==targetLimit) { 1.1594 + /* the overflow buffer contains too much, keep the rest */ 1.1595 + int32_t j=0; 1.1596 + 1.1597 + do { 1.1598 + overflow[j++]=overflow[i++]; 1.1599 + } while(i<length); 1.1600 + 1.1601 + cnv->UCharErrorBufferLength=(int8_t)j; 1.1602 + *target=t; 1.1603 + if(offsets!=NULL) { 1.1604 + *pOffsets=offsets; 1.1605 + } 1.1606 + *err=U_BUFFER_OVERFLOW_ERROR; 1.1607 + return TRUE; 1.1608 + } 1.1609 + 1.1610 + /* copy the overflow contents to the target */ 1.1611 + *t++=overflow[i++]; 1.1612 + if(offsets!=NULL) { 1.1613 + *offsets++=-1; /* no source index available for old output */ 1.1614 + } 1.1615 + } 1.1616 + 1.1617 + /* the overflow buffer is completely copied to the target */ 1.1618 + cnv->UCharErrorBufferLength=0; 1.1619 + *target=t; 1.1620 + if(offsets!=NULL) { 1.1621 + *pOffsets=offsets; 1.1622 + } 1.1623 + return FALSE; 1.1624 +} 1.1625 + 1.1626 +U_CAPI void U_EXPORT2 1.1627 +ucnv_toUnicode(UConverter *cnv, 1.1628 + UChar **target, const UChar *targetLimit, 1.1629 + const char **source, const char *sourceLimit, 1.1630 + int32_t *offsets, 1.1631 + UBool flush, 1.1632 + UErrorCode *err) { 1.1633 + UConverterToUnicodeArgs args; 1.1634 + const char *s; 1.1635 + UChar *t; 1.1636 + 1.1637 + /* check parameters */ 1.1638 + if(err==NULL || U_FAILURE(*err)) { 1.1639 + return; 1.1640 + } 1.1641 + 1.1642 + if(cnv==NULL || target==NULL || source==NULL) { 1.1643 + *err=U_ILLEGAL_ARGUMENT_ERROR; 1.1644 + return; 1.1645 + } 1.1646 + 1.1647 + s=*source; 1.1648 + t=*target; 1.1649 + 1.1650 + if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) { 1.1651 + /* 1.1652 + Prevent code from going into an infinite loop in case we do hit this 1.1653 + limit. The limit pointer is expected to be on a UChar * boundary. 1.1654 + This also prevents the next argument check from failing. 1.1655 + */ 1.1656 + targetLimit = (const UChar *)(((const char *)targetLimit) - 1); 1.1657 + } 1.1658 + 1.1659 + /* 1.1660 + * All these conditions should never happen. 1.1661 + * 1.1662 + * 1) Make sure that the limits are >= to the address source or target 1.1663 + * 1.1664 + * 2) Make sure that the buffer sizes do not exceed the number range for 1.1665 + * int32_t because some functions use the size (in units or bytes) 1.1666 + * rather than comparing pointers, and because offsets are int32_t values. 1.1667 + * 1.1668 + * size_t is guaranteed to be unsigned and large enough for the job. 1.1669 + * 1.1670 + * Return with an error instead of adjusting the limits because we would 1.1671 + * not be able to maintain the semantics that either the source must be 1.1672 + * consumed or the target filled (unless an error occurs). 1.1673 + * An adjustment would be sourceLimit=t+0x7fffffff; for example. 1.1674 + * 1.1675 + * 3) Make sure that the user didn't incorrectly cast a UChar * pointer 1.1676 + * to a char * pointer and provide an incomplete UChar code unit. 1.1677 + */ 1.1678 + if (sourceLimit<s || targetLimit<t || 1.1679 + ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) || 1.1680 + ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) || 1.1681 + (((const char *)targetLimit-(const char *)t) & 1) != 0 1.1682 + ) { 1.1683 + *err=U_ILLEGAL_ARGUMENT_ERROR; 1.1684 + return; 1.1685 + } 1.1686 + 1.1687 + /* output the target overflow buffer */ 1.1688 + if( cnv->UCharErrorBufferLength>0 && 1.1689 + ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err) 1.1690 + ) { 1.1691 + /* U_BUFFER_OVERFLOW_ERROR */ 1.1692 + return; 1.1693 + } 1.1694 + /* *target may have moved, therefore stop using t */ 1.1695 + 1.1696 + if(!flush && s==sourceLimit && cnv->preToULength>=0) { 1.1697 + /* the overflow buffer is emptied and there is no new input: we are done */ 1.1698 + return; 1.1699 + } 1.1700 + 1.1701 + /* 1.1702 + * Do not simply return with a buffer overflow error if 1.1703 + * !flush && t==targetLimit 1.1704 + * because it is possible that the source will not generate any output. 1.1705 + * For example, the skip callback may be called; 1.1706 + * it does not output anything. 1.1707 + */ 1.1708 + 1.1709 + /* prepare the converter arguments */ 1.1710 + args.converter=cnv; 1.1711 + args.flush=flush; 1.1712 + args.offsets=offsets; 1.1713 + args.source=s; 1.1714 + args.sourceLimit=sourceLimit; 1.1715 + args.target=*target; 1.1716 + args.targetLimit=targetLimit; 1.1717 + args.size=sizeof(args); 1.1718 + 1.1719 + _toUnicodeWithCallback(&args, err); 1.1720 + 1.1721 + *source=args.source; 1.1722 + *target=args.target; 1.1723 +} 1.1724 + 1.1725 +/* ucnv_to/fromUChars() ----------------------------------------------------- */ 1.1726 + 1.1727 +U_CAPI int32_t U_EXPORT2 1.1728 +ucnv_fromUChars(UConverter *cnv, 1.1729 + char *dest, int32_t destCapacity, 1.1730 + const UChar *src, int32_t srcLength, 1.1731 + UErrorCode *pErrorCode) { 1.1732 + const UChar *srcLimit; 1.1733 + char *originalDest, *destLimit; 1.1734 + int32_t destLength; 1.1735 + 1.1736 + /* check arguments */ 1.1737 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.1738 + return 0; 1.1739 + } 1.1740 + 1.1741 + if( cnv==NULL || 1.1742 + destCapacity<0 || (destCapacity>0 && dest==NULL) || 1.1743 + srcLength<-1 || (srcLength!=0 && src==NULL) 1.1744 + ) { 1.1745 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.1746 + return 0; 1.1747 + } 1.1748 + 1.1749 + /* initialize */ 1.1750 + ucnv_resetFromUnicode(cnv); 1.1751 + originalDest=dest; 1.1752 + if(srcLength==-1) { 1.1753 + srcLength=u_strlen(src); 1.1754 + } 1.1755 + if(srcLength>0) { 1.1756 + srcLimit=src+srcLength; 1.1757 + destLimit=dest+destCapacity; 1.1758 + 1.1759 + /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ 1.1760 + if(destLimit<dest || (destLimit==NULL && dest!=NULL)) { 1.1761 + destLimit=(char *)U_MAX_PTR(dest); 1.1762 + } 1.1763 + 1.1764 + /* perform the conversion */ 1.1765 + ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); 1.1766 + destLength=(int32_t)(dest-originalDest); 1.1767 + 1.1768 + /* if an overflow occurs, then get the preflighting length */ 1.1769 + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 1.1770 + char buffer[1024]; 1.1771 + 1.1772 + destLimit=buffer+sizeof(buffer); 1.1773 + do { 1.1774 + dest=buffer; 1.1775 + *pErrorCode=U_ZERO_ERROR; 1.1776 + ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); 1.1777 + destLength+=(int32_t)(dest-buffer); 1.1778 + } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); 1.1779 + } 1.1780 + } else { 1.1781 + destLength=0; 1.1782 + } 1.1783 + 1.1784 + return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode); 1.1785 +} 1.1786 + 1.1787 +U_CAPI int32_t U_EXPORT2 1.1788 +ucnv_toUChars(UConverter *cnv, 1.1789 + UChar *dest, int32_t destCapacity, 1.1790 + const char *src, int32_t srcLength, 1.1791 + UErrorCode *pErrorCode) { 1.1792 + const char *srcLimit; 1.1793 + UChar *originalDest, *destLimit; 1.1794 + int32_t destLength; 1.1795 + 1.1796 + /* check arguments */ 1.1797 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.1798 + return 0; 1.1799 + } 1.1800 + 1.1801 + if( cnv==NULL || 1.1802 + destCapacity<0 || (destCapacity>0 && dest==NULL) || 1.1803 + srcLength<-1 || (srcLength!=0 && src==NULL)) 1.1804 + { 1.1805 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.1806 + return 0; 1.1807 + } 1.1808 + 1.1809 + /* initialize */ 1.1810 + ucnv_resetToUnicode(cnv); 1.1811 + originalDest=dest; 1.1812 + if(srcLength==-1) { 1.1813 + srcLength=(int32_t)uprv_strlen(src); 1.1814 + } 1.1815 + if(srcLength>0) { 1.1816 + srcLimit=src+srcLength; 1.1817 + destLimit=dest+destCapacity; 1.1818 + 1.1819 + /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ 1.1820 + if(destLimit<dest || (destLimit==NULL && dest!=NULL)) { 1.1821 + destLimit=(UChar *)U_MAX_PTR(dest); 1.1822 + } 1.1823 + 1.1824 + /* perform the conversion */ 1.1825 + ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); 1.1826 + destLength=(int32_t)(dest-originalDest); 1.1827 + 1.1828 + /* if an overflow occurs, then get the preflighting length */ 1.1829 + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) 1.1830 + { 1.1831 + UChar buffer[1024]; 1.1832 + 1.1833 + destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR; 1.1834 + do { 1.1835 + dest=buffer; 1.1836 + *pErrorCode=U_ZERO_ERROR; 1.1837 + ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); 1.1838 + destLength+=(int32_t)(dest-buffer); 1.1839 + } 1.1840 + while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); 1.1841 + } 1.1842 + } else { 1.1843 + destLength=0; 1.1844 + } 1.1845 + 1.1846 + return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode); 1.1847 +} 1.1848 + 1.1849 +/* ucnv_getNextUChar() ------------------------------------------------------ */ 1.1850 + 1.1851 +U_CAPI UChar32 U_EXPORT2 1.1852 +ucnv_getNextUChar(UConverter *cnv, 1.1853 + const char **source, const char *sourceLimit, 1.1854 + UErrorCode *err) { 1.1855 + UConverterToUnicodeArgs args; 1.1856 + UChar buffer[U16_MAX_LENGTH]; 1.1857 + const char *s; 1.1858 + UChar32 c; 1.1859 + int32_t i, length; 1.1860 + 1.1861 + /* check parameters */ 1.1862 + if(err==NULL || U_FAILURE(*err)) { 1.1863 + return 0xffff; 1.1864 + } 1.1865 + 1.1866 + if(cnv==NULL || source==NULL) { 1.1867 + *err=U_ILLEGAL_ARGUMENT_ERROR; 1.1868 + return 0xffff; 1.1869 + } 1.1870 + 1.1871 + s=*source; 1.1872 + if(sourceLimit<s) { 1.1873 + *err=U_ILLEGAL_ARGUMENT_ERROR; 1.1874 + return 0xffff; 1.1875 + } 1.1876 + 1.1877 + /* 1.1878 + * Make sure that the buffer sizes do not exceed the number range for 1.1879 + * int32_t because some functions use the size (in units or bytes) 1.1880 + * rather than comparing pointers, and because offsets are int32_t values. 1.1881 + * 1.1882 + * size_t is guaranteed to be unsigned and large enough for the job. 1.1883 + * 1.1884 + * Return with an error instead of adjusting the limits because we would 1.1885 + * not be able to maintain the semantics that either the source must be 1.1886 + * consumed or the target filled (unless an error occurs). 1.1887 + * An adjustment would be sourceLimit=t+0x7fffffff; for example. 1.1888 + */ 1.1889 + if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) { 1.1890 + *err=U_ILLEGAL_ARGUMENT_ERROR; 1.1891 + return 0xffff; 1.1892 + } 1.1893 + 1.1894 + c=U_SENTINEL; 1.1895 + 1.1896 + /* flush the target overflow buffer */ 1.1897 + if(cnv->UCharErrorBufferLength>0) { 1.1898 + UChar *overflow; 1.1899 + 1.1900 + overflow=cnv->UCharErrorBuffer; 1.1901 + i=0; 1.1902 + length=cnv->UCharErrorBufferLength; 1.1903 + U16_NEXT(overflow, i, length, c); 1.1904 + 1.1905 + /* move the remaining overflow contents up to the beginning */ 1.1906 + if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) { 1.1907 + uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i, 1.1908 + cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); 1.1909 + } 1.1910 + 1.1911 + if(!U16_IS_LEAD(c) || i<length) { 1.1912 + return c; 1.1913 + } 1.1914 + /* 1.1915 + * Continue if the overflow buffer contained only a lead surrogate, 1.1916 + * in case the converter outputs single surrogates from complete 1.1917 + * input sequences. 1.1918 + */ 1.1919 + } 1.1920 + 1.1921 + /* 1.1922 + * flush==TRUE is implied for ucnv_getNextUChar() 1.1923 + * 1.1924 + * do not simply return even if s==sourceLimit because the converter may 1.1925 + * not have seen flush==TRUE before 1.1926 + */ 1.1927 + 1.1928 + /* prepare the converter arguments */ 1.1929 + args.converter=cnv; 1.1930 + args.flush=TRUE; 1.1931 + args.offsets=NULL; 1.1932 + args.source=s; 1.1933 + args.sourceLimit=sourceLimit; 1.1934 + args.target=buffer; 1.1935 + args.targetLimit=buffer+1; 1.1936 + args.size=sizeof(args); 1.1937 + 1.1938 + if(c<0) { 1.1939 + /* 1.1940 + * call the native getNextUChar() implementation if we are 1.1941 + * at a character boundary (toULength==0) 1.1942 + * 1.1943 + * unlike with _toUnicode(), getNextUChar() implementations must set 1.1944 + * U_TRUNCATED_CHAR_FOUND for truncated input, 1.1945 + * in addition to setting toULength/toUBytes[] 1.1946 + */ 1.1947 + if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) { 1.1948 + c=cnv->sharedData->impl->getNextUChar(&args, err); 1.1949 + *source=s=args.source; 1.1950 + if(*err==U_INDEX_OUTOFBOUNDS_ERROR) { 1.1951 + /* reset the converter without calling the callback function */ 1.1952 + _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); 1.1953 + return 0xffff; /* no output */ 1.1954 + } else if(U_SUCCESS(*err) && c>=0) { 1.1955 + return c; 1.1956 + /* 1.1957 + * else fall through to use _toUnicode() because 1.1958 + * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all 1.1959 + * U_FAILURE: call _toUnicode() for callback handling (do not output c) 1.1960 + */ 1.1961 + } 1.1962 + } 1.1963 + 1.1964 + /* convert to one UChar in buffer[0], or handle getNextUChar() errors */ 1.1965 + _toUnicodeWithCallback(&args, err); 1.1966 + 1.1967 + if(*err==U_BUFFER_OVERFLOW_ERROR) { 1.1968 + *err=U_ZERO_ERROR; 1.1969 + } 1.1970 + 1.1971 + i=0; 1.1972 + length=(int32_t)(args.target-buffer); 1.1973 + } else { 1.1974 + /* write the lead surrogate from the overflow buffer */ 1.1975 + buffer[0]=(UChar)c; 1.1976 + args.target=buffer+1; 1.1977 + i=0; 1.1978 + length=1; 1.1979 + } 1.1980 + 1.1981 + /* buffer contents starts at i and ends before length */ 1.1982 + 1.1983 + if(U_FAILURE(*err)) { 1.1984 + c=0xffff; /* no output */ 1.1985 + } else if(length==0) { 1.1986 + /* no input or only state changes */ 1.1987 + *err=U_INDEX_OUTOFBOUNDS_ERROR; 1.1988 + /* no need to reset explicitly because _toUnicodeWithCallback() did it */ 1.1989 + c=0xffff; /* no output */ 1.1990 + } else { 1.1991 + c=buffer[0]; 1.1992 + i=1; 1.1993 + if(!U16_IS_LEAD(c)) { 1.1994 + /* consume c=buffer[0], done */ 1.1995 + } else { 1.1996 + /* got a lead surrogate, see if a trail surrogate follows */ 1.1997 + UChar c2; 1.1998 + 1.1999 + if(cnv->UCharErrorBufferLength>0) { 1.2000 + /* got overflow output from the conversion */ 1.2001 + if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) { 1.2002 + /* got a trail surrogate, too */ 1.2003 + c=U16_GET_SUPPLEMENTARY(c, c2); 1.2004 + 1.2005 + /* move the remaining overflow contents up to the beginning */ 1.2006 + if((--cnv->UCharErrorBufferLength)>0) { 1.2007 + uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1, 1.2008 + cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); 1.2009 + } 1.2010 + } else { 1.2011 + /* c is an unpaired lead surrogate, just return it */ 1.2012 + } 1.2013 + } else if(args.source<sourceLimit) { 1.2014 + /* convert once more, to buffer[1] */ 1.2015 + args.targetLimit=buffer+2; 1.2016 + _toUnicodeWithCallback(&args, err); 1.2017 + if(*err==U_BUFFER_OVERFLOW_ERROR) { 1.2018 + *err=U_ZERO_ERROR; 1.2019 + } 1.2020 + 1.2021 + length=(int32_t)(args.target-buffer); 1.2022 + if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) { 1.2023 + /* got a trail surrogate, too */ 1.2024 + c=U16_GET_SUPPLEMENTARY(c, c2); 1.2025 + i=2; 1.2026 + } 1.2027 + } 1.2028 + } 1.2029 + } 1.2030 + 1.2031 + /* 1.2032 + * move leftover output from buffer[i..length[ 1.2033 + * into the beginning of the overflow buffer 1.2034 + */ 1.2035 + if(i<length) { 1.2036 + /* move further overflow back */ 1.2037 + int32_t delta=length-i; 1.2038 + if((length=cnv->UCharErrorBufferLength)>0) { 1.2039 + uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer, 1.2040 + length*U_SIZEOF_UCHAR); 1.2041 + } 1.2042 + cnv->UCharErrorBufferLength=(int8_t)(length+delta); 1.2043 + 1.2044 + cnv->UCharErrorBuffer[0]=buffer[i++]; 1.2045 + if(delta>1) { 1.2046 + cnv->UCharErrorBuffer[1]=buffer[i]; 1.2047 + } 1.2048 + } 1.2049 + 1.2050 + *source=args.source; 1.2051 + return c; 1.2052 +} 1.2053 + 1.2054 +/* ucnv_convert() and siblings ---------------------------------------------- */ 1.2055 + 1.2056 +U_CAPI void U_EXPORT2 1.2057 +ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, 1.2058 + char **target, const char *targetLimit, 1.2059 + const char **source, const char *sourceLimit, 1.2060 + UChar *pivotStart, UChar **pivotSource, 1.2061 + UChar **pivotTarget, const UChar *pivotLimit, 1.2062 + UBool reset, UBool flush, 1.2063 + UErrorCode *pErrorCode) { 1.2064 + UChar pivotBuffer[CHUNK_SIZE]; 1.2065 + const UChar *myPivotSource; 1.2066 + UChar *myPivotTarget; 1.2067 + const char *s; 1.2068 + char *t; 1.2069 + 1.2070 + UConverterToUnicodeArgs toUArgs; 1.2071 + UConverterFromUnicodeArgs fromUArgs; 1.2072 + UConverterConvert convert; 1.2073 + 1.2074 + /* error checking */ 1.2075 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.2076 + return; 1.2077 + } 1.2078 + 1.2079 + if( targetCnv==NULL || sourceCnv==NULL || 1.2080 + source==NULL || *source==NULL || 1.2081 + target==NULL || *target==NULL || targetLimit==NULL 1.2082 + ) { 1.2083 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.2084 + return; 1.2085 + } 1.2086 + 1.2087 + s=*source; 1.2088 + t=*target; 1.2089 + if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) { 1.2090 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.2091 + return; 1.2092 + } 1.2093 + 1.2094 + /* 1.2095 + * Make sure that the buffer sizes do not exceed the number range for 1.2096 + * int32_t. See ucnv_toUnicode() for a more detailed comment. 1.2097 + */ 1.2098 + if( 1.2099 + (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) || 1.2100 + ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) 1.2101 + ) { 1.2102 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.2103 + return; 1.2104 + } 1.2105 + 1.2106 + if(pivotStart==NULL) { 1.2107 + if(!flush) { 1.2108 + /* streaming conversion requires an explicit pivot buffer */ 1.2109 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.2110 + return; 1.2111 + } 1.2112 + 1.2113 + /* use the stack pivot buffer */ 1.2114 + myPivotSource=myPivotTarget=pivotStart=pivotBuffer; 1.2115 + pivotSource=(UChar **)&myPivotSource; 1.2116 + pivotTarget=&myPivotTarget; 1.2117 + pivotLimit=pivotBuffer+CHUNK_SIZE; 1.2118 + } else if( pivotStart>=pivotLimit || 1.2119 + pivotSource==NULL || *pivotSource==NULL || 1.2120 + pivotTarget==NULL || *pivotTarget==NULL || 1.2121 + pivotLimit==NULL 1.2122 + ) { 1.2123 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.2124 + return; 1.2125 + } 1.2126 + 1.2127 + if(sourceLimit==NULL) { 1.2128 + /* get limit of single-byte-NUL-terminated source string */ 1.2129 + sourceLimit=uprv_strchr(*source, 0); 1.2130 + } 1.2131 + 1.2132 + if(reset) { 1.2133 + ucnv_resetToUnicode(sourceCnv); 1.2134 + ucnv_resetFromUnicode(targetCnv); 1.2135 + *pivotSource=*pivotTarget=pivotStart; 1.2136 + } else if(targetCnv->charErrorBufferLength>0) { 1.2137 + /* output the targetCnv overflow buffer */ 1.2138 + if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) { 1.2139 + /* U_BUFFER_OVERFLOW_ERROR */ 1.2140 + return; 1.2141 + } 1.2142 + /* *target has moved, therefore stop using t */ 1.2143 + 1.2144 + if( !flush && 1.2145 + targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget && 1.2146 + sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit 1.2147 + ) { 1.2148 + /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */ 1.2149 + return; 1.2150 + } 1.2151 + } 1.2152 + 1.2153 + /* Is direct-UTF-8 conversion available? */ 1.2154 + if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 && 1.2155 + targetCnv->sharedData->impl->fromUTF8!=NULL 1.2156 + ) { 1.2157 + convert=targetCnv->sharedData->impl->fromUTF8; 1.2158 + } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 && 1.2159 + sourceCnv->sharedData->impl->toUTF8!=NULL 1.2160 + ) { 1.2161 + convert=sourceCnv->sharedData->impl->toUTF8; 1.2162 + } else { 1.2163 + convert=NULL; 1.2164 + } 1.2165 + 1.2166 + /* 1.2167 + * If direct-UTF-8 conversion is available, then we use a smaller 1.2168 + * pivot buffer for error handling and partial matches 1.2169 + * so that we quickly return to direct conversion. 1.2170 + * 1.2171 + * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH. 1.2172 + * 1.2173 + * We could reduce the pivot buffer size further, at the cost of 1.2174 + * buffer overflows from callbacks. 1.2175 + * The pivot buffer should not be smaller than the maximum number of 1.2176 + * fromUnicode extension table input UChars 1.2177 + * (for m:n conversion, see 1.2178 + * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS]) 1.2179 + * or 2 for surrogate pairs. 1.2180 + * 1.2181 + * Too small a buffer can cause thrashing between pivoting and direct 1.2182 + * conversion, with function call overhead outweighing the benefits 1.2183 + * of direct conversion. 1.2184 + */ 1.2185 + if(convert!=NULL && (pivotLimit-pivotStart)>32) { 1.2186 + pivotLimit=pivotStart+32; 1.2187 + } 1.2188 + 1.2189 + /* prepare the converter arguments */ 1.2190 + fromUArgs.converter=targetCnv; 1.2191 + fromUArgs.flush=FALSE; 1.2192 + fromUArgs.offsets=NULL; 1.2193 + fromUArgs.target=*target; 1.2194 + fromUArgs.targetLimit=targetLimit; 1.2195 + fromUArgs.size=sizeof(fromUArgs); 1.2196 + 1.2197 + toUArgs.converter=sourceCnv; 1.2198 + toUArgs.flush=flush; 1.2199 + toUArgs.offsets=NULL; 1.2200 + toUArgs.source=s; 1.2201 + toUArgs.sourceLimit=sourceLimit; 1.2202 + toUArgs.targetLimit=pivotLimit; 1.2203 + toUArgs.size=sizeof(toUArgs); 1.2204 + 1.2205 + /* 1.2206 + * TODO: Consider separating this function into two functions, 1.2207 + * extracting exactly the conversion loop, 1.2208 + * for readability and to reduce the set of visible variables. 1.2209 + * 1.2210 + * Otherwise stop using s and t from here on. 1.2211 + */ 1.2212 + s=t=NULL; 1.2213 + 1.2214 + /* 1.2215 + * conversion loop 1.2216 + * 1.2217 + * The sequence of steps in the loop may appear backward, 1.2218 + * but the principle is simple: 1.2219 + * In the chain of 1.2220 + * source - sourceCnv overflow - pivot - targetCnv overflow - target 1.2221 + * empty out later buffers before refilling them from earlier ones. 1.2222 + * 1.2223 + * The targetCnv overflow buffer is flushed out only once before the loop. 1.2224 + */ 1.2225 + for(;;) { 1.2226 + /* 1.2227 + * if(pivot not empty or error or replay or flush fromUnicode) { 1.2228 + * fromUnicode(pivot -> target); 1.2229 + * } 1.2230 + * 1.2231 + * For pivoting conversion; and for direct conversion for 1.2232 + * error callback handling and flushing the replay buffer. 1.2233 + */ 1.2234 + if( *pivotSource<*pivotTarget || 1.2235 + U_FAILURE(*pErrorCode) || 1.2236 + targetCnv->preFromULength<0 || 1.2237 + fromUArgs.flush 1.2238 + ) { 1.2239 + fromUArgs.source=*pivotSource; 1.2240 + fromUArgs.sourceLimit=*pivotTarget; 1.2241 + _fromUnicodeWithCallback(&fromUArgs, pErrorCode); 1.2242 + if(U_FAILURE(*pErrorCode)) { 1.2243 + /* target overflow, or conversion error */ 1.2244 + *pivotSource=(UChar *)fromUArgs.source; 1.2245 + break; 1.2246 + } 1.2247 + 1.2248 + /* 1.2249 + * _fromUnicodeWithCallback() must have consumed the pivot contents 1.2250 + * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS() 1.2251 + */ 1.2252 + } 1.2253 + 1.2254 + /* The pivot buffer is empty; reset it so we start at pivotStart. */ 1.2255 + *pivotSource=*pivotTarget=pivotStart; 1.2256 + 1.2257 + /* 1.2258 + * if(sourceCnv overflow buffer not empty) { 1.2259 + * move(sourceCnv overflow buffer -> pivot); 1.2260 + * continue; 1.2261 + * } 1.2262 + */ 1.2263 + /* output the sourceCnv overflow buffer */ 1.2264 + if(sourceCnv->UCharErrorBufferLength>0) { 1.2265 + if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) { 1.2266 + /* U_BUFFER_OVERFLOW_ERROR */ 1.2267 + *pErrorCode=U_ZERO_ERROR; 1.2268 + } 1.2269 + continue; 1.2270 + } 1.2271 + 1.2272 + /* 1.2273 + * check for end of input and break if done 1.2274 + * 1.2275 + * Checking both flush and fromUArgs.flush ensures that the converters 1.2276 + * have been called with the flush flag set if the ucnv_convertEx() 1.2277 + * caller set it. 1.2278 + */ 1.2279 + if( toUArgs.source==sourceLimit && 1.2280 + sourceCnv->preToULength>=0 && sourceCnv->toULength==0 && 1.2281 + (!flush || fromUArgs.flush) 1.2282 + ) { 1.2283 + /* done successfully */ 1.2284 + break; 1.2285 + } 1.2286 + 1.2287 + /* 1.2288 + * use direct conversion if available 1.2289 + * but not if continuing a partial match 1.2290 + * or flushing the toUnicode replay buffer 1.2291 + */ 1.2292 + if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) { 1.2293 + if(*pErrorCode==U_USING_DEFAULT_WARNING) { 1.2294 + /* remove a warning that may be set by this function */ 1.2295 + *pErrorCode=U_ZERO_ERROR; 1.2296 + } 1.2297 + convert(&fromUArgs, &toUArgs, pErrorCode); 1.2298 + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 1.2299 + break; 1.2300 + } else if(U_FAILURE(*pErrorCode)) { 1.2301 + if(sourceCnv->toULength>0) { 1.2302 + /* 1.2303 + * Fall through to calling _toUnicodeWithCallback() 1.2304 + * for callback handling. 1.2305 + * 1.2306 + * The pivot buffer will be reset with 1.2307 + * *pivotSource=*pivotTarget=pivotStart; 1.2308 + * which indicates a toUnicode error to the caller 1.2309 + * (*pivotSource==pivotStart shows no pivot UChars consumed). 1.2310 + */ 1.2311 + } else { 1.2312 + /* 1.2313 + * Indicate a fromUnicode error to the caller 1.2314 + * (*pivotSource>pivotStart shows some pivot UChars consumed). 1.2315 + */ 1.2316 + *pivotSource=*pivotTarget=pivotStart+1; 1.2317 + /* 1.2318 + * Loop around to calling _fromUnicodeWithCallbacks() 1.2319 + * for callback handling. 1.2320 + */ 1.2321 + continue; 1.2322 + } 1.2323 + } else if(*pErrorCode==U_USING_DEFAULT_WARNING) { 1.2324 + /* 1.2325 + * No error, but the implementation requested to temporarily 1.2326 + * fall back to pivoting. 1.2327 + */ 1.2328 + *pErrorCode=U_ZERO_ERROR; 1.2329 + /* 1.2330 + * The following else branches are almost identical to the end-of-input 1.2331 + * handling in _toUnicodeWithCallback(). 1.2332 + * Avoid calling it just for the end of input. 1.2333 + */ 1.2334 + } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */ 1.2335 + /* 1.2336 + * the entire input stream is consumed 1.2337 + * and there is a partial, truncated input sequence left 1.2338 + */ 1.2339 + 1.2340 + /* inject an error and continue with callback handling */ 1.2341 + *pErrorCode=U_TRUNCATED_CHAR_FOUND; 1.2342 + } else { 1.2343 + /* input consumed */ 1.2344 + if(flush) { 1.2345 + /* reset the converters without calling the callback functions */ 1.2346 + _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE); 1.2347 + _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE); 1.2348 + } 1.2349 + 1.2350 + /* done successfully */ 1.2351 + break; 1.2352 + } 1.2353 + } 1.2354 + 1.2355 + /* 1.2356 + * toUnicode(source -> pivot); 1.2357 + * 1.2358 + * For pivoting conversion; and for direct conversion for 1.2359 + * error callback handling, continuing partial matches 1.2360 + * and flushing the replay buffer. 1.2361 + * 1.2362 + * The pivot buffer is empty and reset. 1.2363 + */ 1.2364 + toUArgs.target=pivotStart; /* ==*pivotTarget */ 1.2365 + /* toUArgs.targetLimit=pivotLimit; already set before the loop */ 1.2366 + _toUnicodeWithCallback(&toUArgs, pErrorCode); 1.2367 + *pivotTarget=toUArgs.target; 1.2368 + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 1.2369 + /* pivot overflow: continue with the conversion loop */ 1.2370 + *pErrorCode=U_ZERO_ERROR; 1.2371 + } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) { 1.2372 + /* conversion error, or there was nothing left to convert */ 1.2373 + break; 1.2374 + } 1.2375 + /* 1.2376 + * else: 1.2377 + * _toUnicodeWithCallback() wrote into the pivot buffer, 1.2378 + * continue with fromUnicode conversion. 1.2379 + * 1.2380 + * Set the fromUnicode flush flag if we flush and if toUnicode has 1.2381 + * processed the end of the input. 1.2382 + */ 1.2383 + if( flush && toUArgs.source==sourceLimit && 1.2384 + sourceCnv->preToULength>=0 && 1.2385 + sourceCnv->UCharErrorBufferLength==0 1.2386 + ) { 1.2387 + fromUArgs.flush=TRUE; 1.2388 + } 1.2389 + } 1.2390 + 1.2391 + /* 1.2392 + * The conversion loop is exited when one of the following is true: 1.2393 + * - the entire source text has been converted successfully to the target buffer 1.2394 + * - a target buffer overflow occurred 1.2395 + * - a conversion error occurred 1.2396 + */ 1.2397 + 1.2398 + *source=toUArgs.source; 1.2399 + *target=fromUArgs.target; 1.2400 + 1.2401 + /* terminate the target buffer if possible */ 1.2402 + if(flush && U_SUCCESS(*pErrorCode)) { 1.2403 + if(*target!=targetLimit) { 1.2404 + **target=0; 1.2405 + if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { 1.2406 + *pErrorCode=U_ZERO_ERROR; 1.2407 + } 1.2408 + } else { 1.2409 + *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; 1.2410 + } 1.2411 + } 1.2412 +} 1.2413 + 1.2414 +/* internal implementation of ucnv_convert() etc. with preflighting */ 1.2415 +static int32_t 1.2416 +ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter, 1.2417 + char *target, int32_t targetCapacity, 1.2418 + const char *source, int32_t sourceLength, 1.2419 + UErrorCode *pErrorCode) { 1.2420 + UChar pivotBuffer[CHUNK_SIZE]; 1.2421 + UChar *pivot, *pivot2; 1.2422 + 1.2423 + char *myTarget; 1.2424 + const char *sourceLimit; 1.2425 + const char *targetLimit; 1.2426 + int32_t targetLength=0; 1.2427 + 1.2428 + /* set up */ 1.2429 + if(sourceLength<0) { 1.2430 + sourceLimit=uprv_strchr(source, 0); 1.2431 + } else { 1.2432 + sourceLimit=source+sourceLength; 1.2433 + } 1.2434 + 1.2435 + /* if there is no input data, we're done */ 1.2436 + if(source==sourceLimit) { 1.2437 + return u_terminateChars(target, targetCapacity, 0, pErrorCode); 1.2438 + } 1.2439 + 1.2440 + pivot=pivot2=pivotBuffer; 1.2441 + myTarget=target; 1.2442 + targetLength=0; 1.2443 + 1.2444 + if(targetCapacity>0) { 1.2445 + /* perform real conversion */ 1.2446 + targetLimit=target+targetCapacity; 1.2447 + ucnv_convertEx(outConverter, inConverter, 1.2448 + &myTarget, targetLimit, 1.2449 + &source, sourceLimit, 1.2450 + pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, 1.2451 + FALSE, 1.2452 + TRUE, 1.2453 + pErrorCode); 1.2454 + targetLength=(int32_t)(myTarget-target); 1.2455 + } 1.2456 + 1.2457 + /* 1.2458 + * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing 1.2459 + * to it but continue the conversion in order to store in targetCapacity 1.2460 + * the number of bytes that was required. 1.2461 + */ 1.2462 + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0) 1.2463 + { 1.2464 + char targetBuffer[CHUNK_SIZE]; 1.2465 + 1.2466 + targetLimit=targetBuffer+CHUNK_SIZE; 1.2467 + do { 1.2468 + *pErrorCode=U_ZERO_ERROR; 1.2469 + myTarget=targetBuffer; 1.2470 + ucnv_convertEx(outConverter, inConverter, 1.2471 + &myTarget, targetLimit, 1.2472 + &source, sourceLimit, 1.2473 + pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, 1.2474 + FALSE, 1.2475 + TRUE, 1.2476 + pErrorCode); 1.2477 + targetLength+=(int32_t)(myTarget-targetBuffer); 1.2478 + } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); 1.2479 + 1.2480 + /* done with preflighting, set warnings and errors as appropriate */ 1.2481 + return u_terminateChars(target, targetCapacity, targetLength, pErrorCode); 1.2482 + } 1.2483 + 1.2484 + /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */ 1.2485 + return targetLength; 1.2486 +} 1.2487 + 1.2488 +U_CAPI int32_t U_EXPORT2 1.2489 +ucnv_convert(const char *toConverterName, const char *fromConverterName, 1.2490 + char *target, int32_t targetCapacity, 1.2491 + const char *source, int32_t sourceLength, 1.2492 + UErrorCode *pErrorCode) { 1.2493 + UConverter in, out; /* stack-allocated */ 1.2494 + UConverter *inConverter, *outConverter; 1.2495 + int32_t targetLength; 1.2496 + 1.2497 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.2498 + return 0; 1.2499 + } 1.2500 + 1.2501 + if( source==NULL || sourceLength<-1 || 1.2502 + targetCapacity<0 || (targetCapacity>0 && target==NULL) 1.2503 + ) { 1.2504 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.2505 + return 0; 1.2506 + } 1.2507 + 1.2508 + /* if there is no input data, we're done */ 1.2509 + if(sourceLength==0 || (sourceLength<0 && *source==0)) { 1.2510 + return u_terminateChars(target, targetCapacity, 0, pErrorCode); 1.2511 + } 1.2512 + 1.2513 + /* create the converters */ 1.2514 + inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode); 1.2515 + if(U_FAILURE(*pErrorCode)) { 1.2516 + return 0; 1.2517 + } 1.2518 + 1.2519 + outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode); 1.2520 + if(U_FAILURE(*pErrorCode)) { 1.2521 + ucnv_close(inConverter); 1.2522 + return 0; 1.2523 + } 1.2524 + 1.2525 + targetLength=ucnv_internalConvert(outConverter, inConverter, 1.2526 + target, targetCapacity, 1.2527 + source, sourceLength, 1.2528 + pErrorCode); 1.2529 + 1.2530 + ucnv_close(inConverter); 1.2531 + ucnv_close(outConverter); 1.2532 + 1.2533 + return targetLength; 1.2534 +} 1.2535 + 1.2536 +/* @internal */ 1.2537 +static int32_t 1.2538 +ucnv_convertAlgorithmic(UBool convertToAlgorithmic, 1.2539 + UConverterType algorithmicType, 1.2540 + UConverter *cnv, 1.2541 + char *target, int32_t targetCapacity, 1.2542 + const char *source, int32_t sourceLength, 1.2543 + UErrorCode *pErrorCode) { 1.2544 + UConverter algoConverterStatic; /* stack-allocated */ 1.2545 + UConverter *algoConverter, *to, *from; 1.2546 + int32_t targetLength; 1.2547 + 1.2548 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.2549 + return 0; 1.2550 + } 1.2551 + 1.2552 + if( cnv==NULL || source==NULL || sourceLength<-1 || 1.2553 + targetCapacity<0 || (targetCapacity>0 && target==NULL) 1.2554 + ) { 1.2555 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.2556 + return 0; 1.2557 + } 1.2558 + 1.2559 + /* if there is no input data, we're done */ 1.2560 + if(sourceLength==0 || (sourceLength<0 && *source==0)) { 1.2561 + return u_terminateChars(target, targetCapacity, 0, pErrorCode); 1.2562 + } 1.2563 + 1.2564 + /* create the algorithmic converter */ 1.2565 + algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType, 1.2566 + "", 0, pErrorCode); 1.2567 + if(U_FAILURE(*pErrorCode)) { 1.2568 + return 0; 1.2569 + } 1.2570 + 1.2571 + /* reset the other converter */ 1.2572 + if(convertToAlgorithmic) { 1.2573 + /* cnv->Unicode->algo */ 1.2574 + ucnv_resetToUnicode(cnv); 1.2575 + to=algoConverter; 1.2576 + from=cnv; 1.2577 + } else { 1.2578 + /* algo->Unicode->cnv */ 1.2579 + ucnv_resetFromUnicode(cnv); 1.2580 + from=algoConverter; 1.2581 + to=cnv; 1.2582 + } 1.2583 + 1.2584 + targetLength=ucnv_internalConvert(to, from, 1.2585 + target, targetCapacity, 1.2586 + source, sourceLength, 1.2587 + pErrorCode); 1.2588 + 1.2589 + ucnv_close(algoConverter); 1.2590 + 1.2591 + return targetLength; 1.2592 +} 1.2593 + 1.2594 +U_CAPI int32_t U_EXPORT2 1.2595 +ucnv_toAlgorithmic(UConverterType algorithmicType, 1.2596 + UConverter *cnv, 1.2597 + char *target, int32_t targetCapacity, 1.2598 + const char *source, int32_t sourceLength, 1.2599 + UErrorCode *pErrorCode) { 1.2600 + return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv, 1.2601 + target, targetCapacity, 1.2602 + source, sourceLength, 1.2603 + pErrorCode); 1.2604 +} 1.2605 + 1.2606 +U_CAPI int32_t U_EXPORT2 1.2607 +ucnv_fromAlgorithmic(UConverter *cnv, 1.2608 + UConverterType algorithmicType, 1.2609 + char *target, int32_t targetCapacity, 1.2610 + const char *source, int32_t sourceLength, 1.2611 + UErrorCode *pErrorCode) { 1.2612 + return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv, 1.2613 + target, targetCapacity, 1.2614 + source, sourceLength, 1.2615 + pErrorCode); 1.2616 +} 1.2617 + 1.2618 +U_CAPI UConverterType U_EXPORT2 1.2619 +ucnv_getType(const UConverter* converter) 1.2620 +{ 1.2621 + int8_t type = converter->sharedData->staticData->conversionType; 1.2622 +#if !UCONFIG_NO_LEGACY_CONVERSION 1.2623 + if(type == UCNV_MBCS) { 1.2624 + return ucnv_MBCSGetType(converter); 1.2625 + } 1.2626 +#endif 1.2627 + return (UConverterType)type; 1.2628 +} 1.2629 + 1.2630 +U_CAPI void U_EXPORT2 1.2631 +ucnv_getStarters(const UConverter* converter, 1.2632 + UBool starters[256], 1.2633 + UErrorCode* err) 1.2634 +{ 1.2635 + if (err == NULL || U_FAILURE(*err)) { 1.2636 + return; 1.2637 + } 1.2638 + 1.2639 + if(converter->sharedData->impl->getStarters != NULL) { 1.2640 + converter->sharedData->impl->getStarters(converter, starters, err); 1.2641 + } else { 1.2642 + *err = U_ILLEGAL_ARGUMENT_ERROR; 1.2643 + } 1.2644 +} 1.2645 + 1.2646 +static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv) 1.2647 +{ 1.2648 + UErrorCode errorCode; 1.2649 + const char *name; 1.2650 + int32_t i; 1.2651 + 1.2652 + if(cnv==NULL) { 1.2653 + return NULL; 1.2654 + } 1.2655 + 1.2656 + errorCode=U_ZERO_ERROR; 1.2657 + name=ucnv_getName(cnv, &errorCode); 1.2658 + if(U_FAILURE(errorCode)) { 1.2659 + return NULL; 1.2660 + } 1.2661 + 1.2662 + for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i) 1.2663 + { 1.2664 + if(0==uprv_strcmp(name, ambiguousConverters[i].name)) 1.2665 + { 1.2666 + return ambiguousConverters+i; 1.2667 + } 1.2668 + } 1.2669 + 1.2670 + return NULL; 1.2671 +} 1.2672 + 1.2673 +U_CAPI void U_EXPORT2 1.2674 +ucnv_fixFileSeparator(const UConverter *cnv, 1.2675 + UChar* source, 1.2676 + int32_t sourceLength) { 1.2677 + const UAmbiguousConverter *a; 1.2678 + int32_t i; 1.2679 + UChar variant5c; 1.2680 + 1.2681 + if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL) 1.2682 + { 1.2683 + return; 1.2684 + } 1.2685 + 1.2686 + variant5c=a->variant5c; 1.2687 + for(i=0; i<sourceLength; ++i) { 1.2688 + if(source[i]==variant5c) { 1.2689 + source[i]=0x5c; 1.2690 + } 1.2691 + } 1.2692 +} 1.2693 + 1.2694 +U_CAPI UBool U_EXPORT2 1.2695 +ucnv_isAmbiguous(const UConverter *cnv) { 1.2696 + return (UBool)(ucnv_getAmbiguous(cnv)!=NULL); 1.2697 +} 1.2698 + 1.2699 +U_CAPI void U_EXPORT2 1.2700 +ucnv_setFallback(UConverter *cnv, UBool usesFallback) 1.2701 +{ 1.2702 + cnv->useFallback = usesFallback; 1.2703 +} 1.2704 + 1.2705 +U_CAPI UBool U_EXPORT2 1.2706 +ucnv_usesFallback(const UConverter *cnv) 1.2707 +{ 1.2708 + return cnv->useFallback; 1.2709 +} 1.2710 + 1.2711 +U_CAPI void U_EXPORT2 1.2712 +ucnv_getInvalidChars (const UConverter * converter, 1.2713 + char *errBytes, 1.2714 + int8_t * len, 1.2715 + UErrorCode * err) 1.2716 +{ 1.2717 + if (err == NULL || U_FAILURE(*err)) 1.2718 + { 1.2719 + return; 1.2720 + } 1.2721 + if (len == NULL || errBytes == NULL || converter == NULL) 1.2722 + { 1.2723 + *err = U_ILLEGAL_ARGUMENT_ERROR; 1.2724 + return; 1.2725 + } 1.2726 + if (*len < converter->invalidCharLength) 1.2727 + { 1.2728 + *err = U_INDEX_OUTOFBOUNDS_ERROR; 1.2729 + return; 1.2730 + } 1.2731 + if ((*len = converter->invalidCharLength) > 0) 1.2732 + { 1.2733 + uprv_memcpy (errBytes, converter->invalidCharBuffer, *len); 1.2734 + } 1.2735 +} 1.2736 + 1.2737 +U_CAPI void U_EXPORT2 1.2738 +ucnv_getInvalidUChars (const UConverter * converter, 1.2739 + UChar *errChars, 1.2740 + int8_t * len, 1.2741 + UErrorCode * err) 1.2742 +{ 1.2743 + if (err == NULL || U_FAILURE(*err)) 1.2744 + { 1.2745 + return; 1.2746 + } 1.2747 + if (len == NULL || errChars == NULL || converter == NULL) 1.2748 + { 1.2749 + *err = U_ILLEGAL_ARGUMENT_ERROR; 1.2750 + return; 1.2751 + } 1.2752 + if (*len < converter->invalidUCharLength) 1.2753 + { 1.2754 + *err = U_INDEX_OUTOFBOUNDS_ERROR; 1.2755 + return; 1.2756 + } 1.2757 + if ((*len = converter->invalidUCharLength) > 0) 1.2758 + { 1.2759 + uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len)); 1.2760 + } 1.2761 +} 1.2762 + 1.2763 +#define SIG_MAX_LEN 5 1.2764 + 1.2765 +U_CAPI const char* U_EXPORT2 1.2766 +ucnv_detectUnicodeSignature( const char* source, 1.2767 + int32_t sourceLength, 1.2768 + int32_t* signatureLength, 1.2769 + UErrorCode* pErrorCode) { 1.2770 + int32_t dummy; 1.2771 + 1.2772 + /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN 1.2773 + * bytes we don't misdetect something 1.2774 + */ 1.2775 + char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' }; 1.2776 + int i = 0; 1.2777 + 1.2778 + if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){ 1.2779 + return NULL; 1.2780 + } 1.2781 + 1.2782 + if(source == NULL || sourceLength < -1){ 1.2783 + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 1.2784 + return NULL; 1.2785 + } 1.2786 + 1.2787 + if(signatureLength == NULL) { 1.2788 + signatureLength = &dummy; 1.2789 + } 1.2790 + 1.2791 + if(sourceLength==-1){ 1.2792 + sourceLength=(int32_t)uprv_strlen(source); 1.2793 + } 1.2794 + 1.2795 + 1.2796 + while(i<sourceLength&& i<SIG_MAX_LEN){ 1.2797 + start[i]=source[i]; 1.2798 + i++; 1.2799 + } 1.2800 + 1.2801 + if(start[0] == '\xFE' && start[1] == '\xFF') { 1.2802 + *signatureLength=2; 1.2803 + return "UTF-16BE"; 1.2804 + } else if(start[0] == '\xFF' && start[1] == '\xFE') { 1.2805 + if(start[2] == '\x00' && start[3] =='\x00') { 1.2806 + *signatureLength=4; 1.2807 + return "UTF-32LE"; 1.2808 + } else { 1.2809 + *signatureLength=2; 1.2810 + return "UTF-16LE"; 1.2811 + } 1.2812 + } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') { 1.2813 + *signatureLength=3; 1.2814 + return "UTF-8"; 1.2815 + } else if(start[0] == '\x00' && start[1] == '\x00' && 1.2816 + start[2] == '\xFE' && start[3]=='\xFF') { 1.2817 + *signatureLength=4; 1.2818 + return "UTF-32BE"; 1.2819 + } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') { 1.2820 + *signatureLength=3; 1.2821 + return "SCSU"; 1.2822 + } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') { 1.2823 + *signatureLength=3; 1.2824 + return "BOCU-1"; 1.2825 + } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') { 1.2826 + /* 1.2827 + * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/ 1.2828 + * depending on the second UTF-16 code unit. 1.2829 + * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF 1.2830 + * if it occurs. 1.2831 + * 1.2832 + * So far we have +/v 1.2833 + */ 1.2834 + if(start[3] == '\x38' && start[4] == '\x2D') { 1.2835 + /* 5 bytes +/v8- */ 1.2836 + *signatureLength=5; 1.2837 + return "UTF-7"; 1.2838 + } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') { 1.2839 + /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */ 1.2840 + *signatureLength=4; 1.2841 + return "UTF-7"; 1.2842 + } 1.2843 + }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){ 1.2844 + *signatureLength=4; 1.2845 + return "UTF-EBCDIC"; 1.2846 + } 1.2847 + 1.2848 + 1.2849 + /* no known Unicode signature byte sequence recognized */ 1.2850 + *signatureLength=0; 1.2851 + return NULL; 1.2852 +} 1.2853 + 1.2854 +U_CAPI int32_t U_EXPORT2 1.2855 +ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status) 1.2856 +{ 1.2857 + if(status == NULL || U_FAILURE(*status)){ 1.2858 + return -1; 1.2859 + } 1.2860 + if(cnv == NULL){ 1.2861 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.2862 + return -1; 1.2863 + } 1.2864 + 1.2865 + if(cnv->preFromUFirstCP >= 0){ 1.2866 + return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ; 1.2867 + }else if(cnv->preFromULength < 0){ 1.2868 + return -cnv->preFromULength ; 1.2869 + }else if(cnv->fromUChar32 > 0){ 1.2870 + return 1; 1.2871 + } 1.2872 + return 0; 1.2873 + 1.2874 +} 1.2875 + 1.2876 +U_CAPI int32_t U_EXPORT2 1.2877 +ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){ 1.2878 + 1.2879 + if(status == NULL || U_FAILURE(*status)){ 1.2880 + return -1; 1.2881 + } 1.2882 + if(cnv == NULL){ 1.2883 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.2884 + return -1; 1.2885 + } 1.2886 + 1.2887 + if(cnv->preToULength > 0){ 1.2888 + return cnv->preToULength ; 1.2889 + }else if(cnv->preToULength < 0){ 1.2890 + return -cnv->preToULength; 1.2891 + }else if(cnv->toULength > 0){ 1.2892 + return cnv->toULength; 1.2893 + } 1.2894 + return 0; 1.2895 +} 1.2896 + 1.2897 +U_CAPI UBool U_EXPORT2 1.2898 +ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){ 1.2899 + if (U_FAILURE(*status)) { 1.2900 + return FALSE; 1.2901 + } 1.2902 + 1.2903 + if (cnv == NULL) { 1.2904 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.2905 + return FALSE; 1.2906 + } 1.2907 + 1.2908 + switch (ucnv_getType(cnv)) { 1.2909 + case UCNV_SBCS: 1.2910 + case UCNV_DBCS: 1.2911 + case UCNV_UTF32_BigEndian: 1.2912 + case UCNV_UTF32_LittleEndian: 1.2913 + case UCNV_UTF32: 1.2914 + case UCNV_US_ASCII: 1.2915 + return TRUE; 1.2916 + default: 1.2917 + return FALSE; 1.2918 + } 1.2919 +} 1.2920 +#endif 1.2921 + 1.2922 +/* 1.2923 + * Hey, Emacs, please set the following: 1.2924 + * 1.2925 + * Local Variables: 1.2926 + * indent-tabs-mode: nil 1.2927 + * End: 1.2928 + * 1.2929 + */