intl/icu/source/common/ucnv.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 ******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 1998-2013, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 ******************************************************************************
michael@0 8 *
michael@0 9 * ucnv.c:
michael@0 10 * Implements APIs for the ICU's codeset conversion library;
michael@0 11 * mostly calls through internal functions;
michael@0 12 * created by Bertrand A. Damiba
michael@0 13 *
michael@0 14 * Modification History:
michael@0 15 *
michael@0 16 * Date Name Description
michael@0 17 * 04/04/99 helena Fixed internal header inclusion.
michael@0 18 * 05/09/00 helena Added implementation to handle fallback mappings.
michael@0 19 * 06/20/2000 helena OS/400 port changes; mostly typecast.
michael@0 20 */
michael@0 21
michael@0 22 #include "unicode/utypes.h"
michael@0 23
michael@0 24 #if !UCONFIG_NO_CONVERSION
michael@0 25
michael@0 26 #include "unicode/ustring.h"
michael@0 27 #include "unicode/ucnv.h"
michael@0 28 #include "unicode/ucnv_err.h"
michael@0 29 #include "unicode/uset.h"
michael@0 30 #include "unicode/utf.h"
michael@0 31 #include "unicode/utf16.h"
michael@0 32 #include "putilimp.h"
michael@0 33 #include "cmemory.h"
michael@0 34 #include "cstring.h"
michael@0 35 #include "uassert.h"
michael@0 36 #include "utracimp.h"
michael@0 37 #include "ustr_imp.h"
michael@0 38 #include "ucnv_imp.h"
michael@0 39 #include "ucnv_cnv.h"
michael@0 40 #include "ucnv_bld.h"
michael@0 41
michael@0 42 /* size of intermediate and preflighting buffers in ucnv_convert() */
michael@0 43 #define CHUNK_SIZE 1024
michael@0 44
michael@0 45 typedef struct UAmbiguousConverter {
michael@0 46 const char *name;
michael@0 47 const UChar variant5c;
michael@0 48 } UAmbiguousConverter;
michael@0 49
michael@0 50 static const UAmbiguousConverter ambiguousConverters[]={
michael@0 51 { "ibm-897_P100-1995", 0xa5 },
michael@0 52 { "ibm-942_P120-1999", 0xa5 },
michael@0 53 { "ibm-943_P130-1999", 0xa5 },
michael@0 54 { "ibm-946_P100-1995", 0xa5 },
michael@0 55 { "ibm-33722_P120-1999", 0xa5 },
michael@0 56 { "ibm-1041_P100-1995", 0xa5 },
michael@0 57 /*{ "ibm-54191_P100-2006", 0xa5 },*/
michael@0 58 /*{ "ibm-62383_P100-2007", 0xa5 },*/
michael@0 59 /*{ "ibm-891_P100-1995", 0x20a9 },*/
michael@0 60 { "ibm-944_P100-1995", 0x20a9 },
michael@0 61 { "ibm-949_P110-1999", 0x20a9 },
michael@0 62 { "ibm-1363_P110-1997", 0x20a9 },
michael@0 63 { "ISO_2022,locale=ko,version=0", 0x20a9 },
michael@0 64 { "ibm-1088_P100-1995", 0x20a9 }
michael@0 65 };
michael@0 66
michael@0 67 /*Calls through createConverter */
michael@0 68 U_CAPI UConverter* U_EXPORT2
michael@0 69 ucnv_open (const char *name,
michael@0 70 UErrorCode * err)
michael@0 71 {
michael@0 72 UConverter *r;
michael@0 73
michael@0 74 if (err == NULL || U_FAILURE (*err)) {
michael@0 75 return NULL;
michael@0 76 }
michael@0 77
michael@0 78 r = ucnv_createConverter(NULL, name, err);
michael@0 79 return r;
michael@0 80 }
michael@0 81
michael@0 82 U_CAPI UConverter* U_EXPORT2
michael@0 83 ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err)
michael@0 84 {
michael@0 85 return ucnv_createConverterFromPackage(packageName, converterName, err);
michael@0 86 }
michael@0 87
michael@0 88 /*Extracts the UChar* to a char* and calls through createConverter */
michael@0 89 U_CAPI UConverter* U_EXPORT2
michael@0 90 ucnv_openU (const UChar * name,
michael@0 91 UErrorCode * err)
michael@0 92 {
michael@0 93 char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
michael@0 94
michael@0 95 if (err == NULL || U_FAILURE(*err))
michael@0 96 return NULL;
michael@0 97 if (name == NULL)
michael@0 98 return ucnv_open (NULL, err);
michael@0 99 if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)
michael@0 100 {
michael@0 101 *err = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 102 return NULL;
michael@0 103 }
michael@0 104 return ucnv_open(u_austrcpy(asciiName, name), err);
michael@0 105 }
michael@0 106
michael@0 107 /* Copy the string that is represented by the UConverterPlatform enum
michael@0 108 * @param platformString An output buffer
michael@0 109 * @param platform An enum representing a platform
michael@0 110 * @return the length of the copied string.
michael@0 111 */
michael@0 112 static int32_t
michael@0 113 ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)
michael@0 114 {
michael@0 115 switch (pltfrm)
michael@0 116 {
michael@0 117 case UCNV_IBM:
michael@0 118 uprv_strcpy(platformString, "ibm-");
michael@0 119 return 4;
michael@0 120 case UCNV_UNKNOWN:
michael@0 121 break;
michael@0 122 }
michael@0 123
michael@0 124 /* default to empty string */
michael@0 125 *platformString = 0;
michael@0 126 return 0;
michael@0 127 }
michael@0 128
michael@0 129 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
michael@0 130 *through createConverter*/
michael@0 131 U_CAPI UConverter* U_EXPORT2
michael@0 132 ucnv_openCCSID (int32_t codepage,
michael@0 133 UConverterPlatform platform,
michael@0 134 UErrorCode * err)
michael@0 135 {
michael@0 136 char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
michael@0 137 int32_t myNameLen;
michael@0 138
michael@0 139 if (err == NULL || U_FAILURE (*err))
michael@0 140 return NULL;
michael@0 141
michael@0 142 /* ucnv_copyPlatformString could return "ibm-" or "cp" */
michael@0 143 myNameLen = ucnv_copyPlatformString(myName, platform);
michael@0 144 T_CString_integerToString(myName + myNameLen, codepage, 10);
michael@0 145
michael@0 146 return ucnv_createConverter(NULL, myName, err);
michael@0 147 }
michael@0 148
michael@0 149 /* Creating a temporary stack-based object that can be used in one thread,
michael@0 150 and created from a converter that is shared across threads.
michael@0 151 */
michael@0 152
michael@0 153 U_CAPI UConverter* U_EXPORT2
michael@0 154 ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
michael@0 155 {
michael@0 156 UConverter *localConverter, *allocatedConverter;
michael@0 157 int32_t stackBufferSize;
michael@0 158 int32_t bufferSizeNeeded;
michael@0 159 char *stackBufferChars = (char *)stackBuffer;
michael@0 160 UErrorCode cbErr;
michael@0 161 UConverterToUnicodeArgs toUArgs = {
michael@0 162 sizeof(UConverterToUnicodeArgs),
michael@0 163 TRUE,
michael@0 164 NULL,
michael@0 165 NULL,
michael@0 166 NULL,
michael@0 167 NULL,
michael@0 168 NULL,
michael@0 169 NULL
michael@0 170 };
michael@0 171 UConverterFromUnicodeArgs fromUArgs = {
michael@0 172 sizeof(UConverterFromUnicodeArgs),
michael@0 173 TRUE,
michael@0 174 NULL,
michael@0 175 NULL,
michael@0 176 NULL,
michael@0 177 NULL,
michael@0 178 NULL,
michael@0 179 NULL
michael@0 180 };
michael@0 181
michael@0 182 UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
michael@0 183
michael@0 184 if (status == NULL || U_FAILURE(*status)){
michael@0 185 UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
michael@0 186 return NULL;
michael@0 187 }
michael@0 188
michael@0 189 if (cnv == NULL) {
michael@0 190 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 191 UTRACE_EXIT_STATUS(*status);
michael@0 192 return NULL;
michael@0 193 }
michael@0 194
michael@0 195 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
michael@0 196 ucnv_getName(cnv, status), cnv, stackBuffer);
michael@0 197
michael@0 198 if (cnv->sharedData->impl->safeClone != NULL) {
michael@0 199 /* call the custom safeClone function for sizing */
michael@0 200 bufferSizeNeeded = 0;
michael@0 201 cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
michael@0 202 if (U_FAILURE(*status)) {
michael@0 203 UTRACE_EXIT_STATUS(*status);
michael@0 204 return NULL;
michael@0 205 }
michael@0 206 }
michael@0 207 else
michael@0 208 {
michael@0 209 /* inherent sizing */
michael@0 210 bufferSizeNeeded = sizeof(UConverter);
michael@0 211 }
michael@0 212
michael@0 213 if (pBufferSize == NULL) {
michael@0 214 stackBufferSize = 1;
michael@0 215 pBufferSize = &stackBufferSize;
michael@0 216 } else {
michael@0 217 stackBufferSize = *pBufferSize;
michael@0 218 if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
michael@0 219 *pBufferSize = bufferSizeNeeded;
michael@0 220 UTRACE_EXIT_VALUE(bufferSizeNeeded);
michael@0 221 return NULL;
michael@0 222 }
michael@0 223 }
michael@0 224
michael@0 225
michael@0 226 /* Pointers on 64-bit platforms need to be aligned
michael@0 227 * on a 64-bit boundary in memory.
michael@0 228 */
michael@0 229 if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
michael@0 230 int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
michael@0 231 if(stackBufferSize > offsetUp) {
michael@0 232 stackBufferSize -= offsetUp;
michael@0 233 stackBufferChars += offsetUp;
michael@0 234 } else {
michael@0 235 /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
michael@0 236 stackBufferSize = 1;
michael@0 237 }
michael@0 238 }
michael@0 239
michael@0 240 stackBuffer = (void *)stackBufferChars;
michael@0 241
michael@0 242 /* Now, see if we must allocate any memory */
michael@0 243 if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL)
michael@0 244 {
michael@0 245 /* allocate one here...*/
michael@0 246 localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);
michael@0 247
michael@0 248 if(localConverter == NULL) {
michael@0 249 *status = U_MEMORY_ALLOCATION_ERROR;
michael@0 250 UTRACE_EXIT_STATUS(*status);
michael@0 251 return NULL;
michael@0 252 }
michael@0 253 *status = U_SAFECLONE_ALLOCATED_WARNING;
michael@0 254
michael@0 255 /* record the fact that memory was allocated */
michael@0 256 *pBufferSize = bufferSizeNeeded;
michael@0 257 } else {
michael@0 258 /* just use the stack buffer */
michael@0 259 localConverter = (UConverter*) stackBuffer;
michael@0 260 allocatedConverter = NULL;
michael@0 261 }
michael@0 262
michael@0 263 uprv_memset(localConverter, 0, bufferSizeNeeded);
michael@0 264
michael@0 265 /* Copy initial state */
michael@0 266 uprv_memcpy(localConverter, cnv, sizeof(UConverter));
michael@0 267 localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
michael@0 268
michael@0 269 /* copy the substitution string */
michael@0 270 if (cnv->subChars == (uint8_t *)cnv->subUChars) {
michael@0 271 localConverter->subChars = (uint8_t *)localConverter->subUChars;
michael@0 272 } else {
michael@0 273 localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
michael@0 274 if (localConverter->subChars == NULL) {
michael@0 275 uprv_free(allocatedConverter);
michael@0 276 UTRACE_EXIT_STATUS(*status);
michael@0 277 return NULL;
michael@0 278 }
michael@0 279 uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
michael@0 280 }
michael@0 281
michael@0 282 /* now either call the safeclone fcn or not */
michael@0 283 if (cnv->sharedData->impl->safeClone != NULL) {
michael@0 284 /* call the custom safeClone function */
michael@0 285 localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);
michael@0 286 }
michael@0 287
michael@0 288 if(localConverter==NULL || U_FAILURE(*status)) {
michael@0 289 if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {
michael@0 290 uprv_free(allocatedConverter->subChars);
michael@0 291 }
michael@0 292 uprv_free(allocatedConverter);
michael@0 293 UTRACE_EXIT_STATUS(*status);
michael@0 294 return NULL;
michael@0 295 }
michael@0 296
michael@0 297 /* increment refcount of shared data if needed */
michael@0 298 /*
michael@0 299 Checking whether it's an algorithic converter is okay
michael@0 300 in multithreaded applications because the value never changes.
michael@0 301 Don't check referenceCounter for any other value.
michael@0 302 */
michael@0 303 if (cnv->sharedData->referenceCounter != ~0) {
michael@0 304 ucnv_incrementRefCount(cnv->sharedData);
michael@0 305 }
michael@0 306
michael@0 307 if(localConverter == (UConverter*)stackBuffer) {
michael@0 308 /* we're using user provided data - set to not destroy */
michael@0 309 localConverter->isCopyLocal = TRUE;
michael@0 310 }
michael@0 311
michael@0 312 /* allow callback functions to handle any memory allocation */
michael@0 313 toUArgs.converter = fromUArgs.converter = localConverter;
michael@0 314 cbErr = U_ZERO_ERROR;
michael@0 315 cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr);
michael@0 316 cbErr = U_ZERO_ERROR;
michael@0 317 cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);
michael@0 318
michael@0 319 UTRACE_EXIT_PTR_STATUS(localConverter, *status);
michael@0 320 return localConverter;
michael@0 321 }
michael@0 322
michael@0 323
michael@0 324
michael@0 325 /*Decreases the reference counter in the shared immutable section of the object
michael@0 326 *and frees the mutable part*/
michael@0 327
michael@0 328 U_CAPI void U_EXPORT2
michael@0 329 ucnv_close (UConverter * converter)
michael@0 330 {
michael@0 331 UErrorCode errorCode = U_ZERO_ERROR;
michael@0 332
michael@0 333 UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
michael@0 334
michael@0 335 if (converter == NULL)
michael@0 336 {
michael@0 337 UTRACE_EXIT();
michael@0 338 return;
michael@0 339 }
michael@0 340
michael@0 341 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
michael@0 342 ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
michael@0 343
michael@0 344 /* In order to speed up the close, only call the callbacks when they have been changed.
michael@0 345 This performance check will only work when the callbacks are set within a shared library
michael@0 346 or from user code that statically links this code. */
michael@0 347 /* first, notify the callback functions that the converter is closed */
michael@0 348 if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
michael@0 349 UConverterToUnicodeArgs toUArgs = {
michael@0 350 sizeof(UConverterToUnicodeArgs),
michael@0 351 TRUE,
michael@0 352 NULL,
michael@0 353 NULL,
michael@0 354 NULL,
michael@0 355 NULL,
michael@0 356 NULL,
michael@0 357 NULL
michael@0 358 };
michael@0 359
michael@0 360 toUArgs.converter = converter;
michael@0 361 errorCode = U_ZERO_ERROR;
michael@0 362 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
michael@0 363 }
michael@0 364 if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
michael@0 365 UConverterFromUnicodeArgs fromUArgs = {
michael@0 366 sizeof(UConverterFromUnicodeArgs),
michael@0 367 TRUE,
michael@0 368 NULL,
michael@0 369 NULL,
michael@0 370 NULL,
michael@0 371 NULL,
michael@0 372 NULL,
michael@0 373 NULL
michael@0 374 };
michael@0 375 fromUArgs.converter = converter;
michael@0 376 errorCode = U_ZERO_ERROR;
michael@0 377 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
michael@0 378 }
michael@0 379
michael@0 380 if (converter->sharedData->impl->close != NULL) {
michael@0 381 converter->sharedData->impl->close(converter);
michael@0 382 }
michael@0 383
michael@0 384 if (converter->subChars != (uint8_t *)converter->subUChars) {
michael@0 385 uprv_free(converter->subChars);
michael@0 386 }
michael@0 387
michael@0 388 /*
michael@0 389 Checking whether it's an algorithic converter is okay
michael@0 390 in multithreaded applications because the value never changes.
michael@0 391 Don't check referenceCounter for any other value.
michael@0 392 */
michael@0 393 if (converter->sharedData->referenceCounter != ~0) {
michael@0 394 ucnv_unloadSharedDataIfReady(converter->sharedData);
michael@0 395 }
michael@0 396
michael@0 397 if(!converter->isCopyLocal){
michael@0 398 uprv_free(converter);
michael@0 399 }
michael@0 400
michael@0 401 UTRACE_EXIT();
michael@0 402 }
michael@0 403
michael@0 404 /*returns a single Name from the list, will return NULL if out of bounds
michael@0 405 */
michael@0 406 U_CAPI const char* U_EXPORT2
michael@0 407 ucnv_getAvailableName (int32_t n)
michael@0 408 {
michael@0 409 if (0 <= n && n <= 0xffff) {
michael@0 410 UErrorCode err = U_ZERO_ERROR;
michael@0 411 const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);
michael@0 412 if (U_SUCCESS(err)) {
michael@0 413 return name;
michael@0 414 }
michael@0 415 }
michael@0 416 return NULL;
michael@0 417 }
michael@0 418
michael@0 419 U_CAPI int32_t U_EXPORT2
michael@0 420 ucnv_countAvailable ()
michael@0 421 {
michael@0 422 UErrorCode err = U_ZERO_ERROR;
michael@0 423 return ucnv_bld_countAvailableConverters(&err);
michael@0 424 }
michael@0 425
michael@0 426 U_CAPI void U_EXPORT2
michael@0 427 ucnv_getSubstChars (const UConverter * converter,
michael@0 428 char *mySubChar,
michael@0 429 int8_t * len,
michael@0 430 UErrorCode * err)
michael@0 431 {
michael@0 432 if (U_FAILURE (*err))
michael@0 433 return;
michael@0 434
michael@0 435 if (converter->subCharLen <= 0) {
michael@0 436 /* Unicode string or empty string from ucnv_setSubstString(). */
michael@0 437 *len = 0;
michael@0 438 return;
michael@0 439 }
michael@0 440
michael@0 441 if (*len < converter->subCharLen) /*not enough space in subChars */
michael@0 442 {
michael@0 443 *err = U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 444 return;
michael@0 445 }
michael@0 446
michael@0 447 uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */
michael@0 448 *len = converter->subCharLen; /*store # of bytes copied to buffer */
michael@0 449 }
michael@0 450
michael@0 451 U_CAPI void U_EXPORT2
michael@0 452 ucnv_setSubstChars (UConverter * converter,
michael@0 453 const char *mySubChar,
michael@0 454 int8_t len,
michael@0 455 UErrorCode * err)
michael@0 456 {
michael@0 457 if (U_FAILURE (*err))
michael@0 458 return;
michael@0 459
michael@0 460 /*Makes sure that the subChar is within the codepages char length boundaries */
michael@0 461 if ((len > converter->sharedData->staticData->maxBytesPerChar)
michael@0 462 || (len < converter->sharedData->staticData->minBytesPerChar))
michael@0 463 {
michael@0 464 *err = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 465 return;
michael@0 466 }
michael@0 467
michael@0 468 uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
michael@0 469 converter->subCharLen = len; /*sets the new len */
michael@0 470
michael@0 471 /*
michael@0 472 * There is currently (2001Feb) no separate API to set/get subChar1.
michael@0 473 * In order to always have subChar written after it is explicitly set,
michael@0 474 * we set subChar1 to 0.
michael@0 475 */
michael@0 476 converter->subChar1 = 0;
michael@0 477
michael@0 478 return;
michael@0 479 }
michael@0 480
michael@0 481 U_CAPI void U_EXPORT2
michael@0 482 ucnv_setSubstString(UConverter *cnv,
michael@0 483 const UChar *s,
michael@0 484 int32_t length,
michael@0 485 UErrorCode *err) {
michael@0 486 UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1];
michael@0 487 char chars[UCNV_ERROR_BUFFER_LENGTH];
michael@0 488
michael@0 489 UConverter *clone;
michael@0 490 uint8_t *subChars;
michael@0 491 int32_t cloneSize, length8;
michael@0 492
michael@0 493 /* Let the following functions check all arguments. */
michael@0 494 cloneSize = sizeof(cloneBuffer);
michael@0 495 clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
michael@0 496 ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err);
michael@0 497 length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
michael@0 498 ucnv_close(clone);
michael@0 499 if (U_FAILURE(*err)) {
michael@0 500 return;
michael@0 501 }
michael@0 502
michael@0 503 if (cnv->sharedData->impl->writeSub == NULL
michael@0 504 #if !UCONFIG_NO_LEGACY_CONVERSION
michael@0 505 || (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
michael@0 506 ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)
michael@0 507 #endif
michael@0 508 ) {
michael@0 509 /* The converter is not stateful. Store the charset bytes as a fixed string. */
michael@0 510 subChars = (uint8_t *)chars;
michael@0 511 } else {
michael@0 512 /*
michael@0 513 * The converter has a non-default writeSub() function, indicating
michael@0 514 * that it is stateful.
michael@0 515 * Store the Unicode string for on-the-fly conversion for correct
michael@0 516 * state handling.
michael@0 517 */
michael@0 518 if (length > UCNV_ERROR_BUFFER_LENGTH) {
michael@0 519 /*
michael@0 520 * Should not occur. The converter should output at least one byte
michael@0 521 * per UChar, which means that ucnv_fromUChars() should catch all
michael@0 522 * overflows.
michael@0 523 */
michael@0 524 *err = U_BUFFER_OVERFLOW_ERROR;
michael@0 525 return;
michael@0 526 }
michael@0 527 subChars = (uint8_t *)s;
michael@0 528 if (length < 0) {
michael@0 529 length = u_strlen(s);
michael@0 530 }
michael@0 531 length8 = length * U_SIZEOF_UCHAR;
michael@0 532 }
michael@0 533
michael@0 534 /*
michael@0 535 * For storing the substitution string, select either the small buffer inside
michael@0 536 * UConverter or allocate a subChars buffer.
michael@0 537 */
michael@0 538 if (length8 > UCNV_MAX_SUBCHAR_LEN) {
michael@0 539 /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
michael@0 540 if (cnv->subChars == (uint8_t *)cnv->subUChars) {
michael@0 541 /* Allocate a new buffer for the string. */
michael@0 542 cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
michael@0 543 if (cnv->subChars == NULL) {
michael@0 544 cnv->subChars = (uint8_t *)cnv->subUChars;
michael@0 545 *err = U_MEMORY_ALLOCATION_ERROR;
michael@0 546 return;
michael@0 547 }
michael@0 548 uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
michael@0 549 }
michael@0 550 }
michael@0 551
michael@0 552 /* Copy the substitution string into the UConverter or its subChars buffer. */
michael@0 553 if (length8 == 0) {
michael@0 554 cnv->subCharLen = 0;
michael@0 555 } else {
michael@0 556 uprv_memcpy(cnv->subChars, subChars, length8);
michael@0 557 if (subChars == (uint8_t *)chars) {
michael@0 558 cnv->subCharLen = (int8_t)length8;
michael@0 559 } else /* subChars == s */ {
michael@0 560 cnv->subCharLen = (int8_t)-length;
michael@0 561 }
michael@0 562 }
michael@0 563
michael@0 564 /* See comment in ucnv_setSubstChars(). */
michael@0 565 cnv->subChar1 = 0;
michael@0 566 }
michael@0 567
michael@0 568 /*resets the internal states of a converter
michael@0 569 *goal : have the same behaviour than a freshly created converter
michael@0 570 */
michael@0 571 static void _reset(UConverter *converter, UConverterResetChoice choice,
michael@0 572 UBool callCallback) {
michael@0 573 if(converter == NULL) {
michael@0 574 return;
michael@0 575 }
michael@0 576
michael@0 577 if(callCallback) {
michael@0 578 /* first, notify the callback functions that the converter is reset */
michael@0 579 UErrorCode errorCode;
michael@0 580
michael@0 581 if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
michael@0 582 UConverterToUnicodeArgs toUArgs = {
michael@0 583 sizeof(UConverterToUnicodeArgs),
michael@0 584 TRUE,
michael@0 585 NULL,
michael@0 586 NULL,
michael@0 587 NULL,
michael@0 588 NULL,
michael@0 589 NULL,
michael@0 590 NULL
michael@0 591 };
michael@0 592 toUArgs.converter = converter;
michael@0 593 errorCode = U_ZERO_ERROR;
michael@0 594 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
michael@0 595 }
michael@0 596 if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
michael@0 597 UConverterFromUnicodeArgs fromUArgs = {
michael@0 598 sizeof(UConverterFromUnicodeArgs),
michael@0 599 TRUE,
michael@0 600 NULL,
michael@0 601 NULL,
michael@0 602 NULL,
michael@0 603 NULL,
michael@0 604 NULL,
michael@0 605 NULL
michael@0 606 };
michael@0 607 fromUArgs.converter = converter;
michael@0 608 errorCode = U_ZERO_ERROR;
michael@0 609 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
michael@0 610 }
michael@0 611 }
michael@0 612
michael@0 613 /* now reset the converter itself */
michael@0 614 if(choice<=UCNV_RESET_TO_UNICODE) {
michael@0 615 converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
michael@0 616 converter->mode = 0;
michael@0 617 converter->toULength = 0;
michael@0 618 converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
michael@0 619 converter->preToULength = 0;
michael@0 620 }
michael@0 621 if(choice!=UCNV_RESET_TO_UNICODE) {
michael@0 622 converter->fromUnicodeStatus = 0;
michael@0 623 converter->fromUChar32 = 0;
michael@0 624 converter->invalidUCharLength = converter->charErrorBufferLength = 0;
michael@0 625 converter->preFromUFirstCP = U_SENTINEL;
michael@0 626 converter->preFromULength = 0;
michael@0 627 }
michael@0 628
michael@0 629 if (converter->sharedData->impl->reset != NULL) {
michael@0 630 /* call the custom reset function */
michael@0 631 converter->sharedData->impl->reset(converter, choice);
michael@0 632 }
michael@0 633 }
michael@0 634
michael@0 635 U_CAPI void U_EXPORT2
michael@0 636 ucnv_reset(UConverter *converter)
michael@0 637 {
michael@0 638 _reset(converter, UCNV_RESET_BOTH, TRUE);
michael@0 639 }
michael@0 640
michael@0 641 U_CAPI void U_EXPORT2
michael@0 642 ucnv_resetToUnicode(UConverter *converter)
michael@0 643 {
michael@0 644 _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);
michael@0 645 }
michael@0 646
michael@0 647 U_CAPI void U_EXPORT2
michael@0 648 ucnv_resetFromUnicode(UConverter *converter)
michael@0 649 {
michael@0 650 _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);
michael@0 651 }
michael@0 652
michael@0 653 U_CAPI int8_t U_EXPORT2
michael@0 654 ucnv_getMaxCharSize (const UConverter * converter)
michael@0 655 {
michael@0 656 return converter->maxBytesPerUChar;
michael@0 657 }
michael@0 658
michael@0 659
michael@0 660 U_CAPI int8_t U_EXPORT2
michael@0 661 ucnv_getMinCharSize (const UConverter * converter)
michael@0 662 {
michael@0 663 return converter->sharedData->staticData->minBytesPerChar;
michael@0 664 }
michael@0 665
michael@0 666 U_CAPI const char* U_EXPORT2
michael@0 667 ucnv_getName (const UConverter * converter, UErrorCode * err)
michael@0 668
michael@0 669 {
michael@0 670 if (U_FAILURE (*err))
michael@0 671 return NULL;
michael@0 672 if(converter->sharedData->impl->getName){
michael@0 673 const char* temp= converter->sharedData->impl->getName(converter);
michael@0 674 if(temp)
michael@0 675 return temp;
michael@0 676 }
michael@0 677 return converter->sharedData->staticData->name;
michael@0 678 }
michael@0 679
michael@0 680 U_CAPI int32_t U_EXPORT2
michael@0 681 ucnv_getCCSID(const UConverter * converter,
michael@0 682 UErrorCode * err)
michael@0 683 {
michael@0 684 int32_t ccsid;
michael@0 685 if (U_FAILURE (*err))
michael@0 686 return -1;
michael@0 687
michael@0 688 ccsid = converter->sharedData->staticData->codepage;
michael@0 689 if (ccsid == 0) {
michael@0 690 /* Rare case. This is for cases like gb18030,
michael@0 691 which doesn't have an IBM canonical name, but does have an IBM alias. */
michael@0 692 const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
michael@0 693 if (U_SUCCESS(*err) && standardName) {
michael@0 694 const char *ccsidStr = uprv_strchr(standardName, '-');
michael@0 695 if (ccsidStr) {
michael@0 696 ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */
michael@0 697 }
michael@0 698 }
michael@0 699 }
michael@0 700 return ccsid;
michael@0 701 }
michael@0 702
michael@0 703
michael@0 704 U_CAPI UConverterPlatform U_EXPORT2
michael@0 705 ucnv_getPlatform (const UConverter * converter,
michael@0 706 UErrorCode * err)
michael@0 707 {
michael@0 708 if (U_FAILURE (*err))
michael@0 709 return UCNV_UNKNOWN;
michael@0 710
michael@0 711 return (UConverterPlatform)converter->sharedData->staticData->platform;
michael@0 712 }
michael@0 713
michael@0 714 U_CAPI void U_EXPORT2
michael@0 715 ucnv_getToUCallBack (const UConverter * converter,
michael@0 716 UConverterToUCallback *action,
michael@0 717 const void **context)
michael@0 718 {
michael@0 719 *action = converter->fromCharErrorBehaviour;
michael@0 720 *context = converter->toUContext;
michael@0 721 }
michael@0 722
michael@0 723 U_CAPI void U_EXPORT2
michael@0 724 ucnv_getFromUCallBack (const UConverter * converter,
michael@0 725 UConverterFromUCallback *action,
michael@0 726 const void **context)
michael@0 727 {
michael@0 728 *action = converter->fromUCharErrorBehaviour;
michael@0 729 *context = converter->fromUContext;
michael@0 730 }
michael@0 731
michael@0 732 U_CAPI void U_EXPORT2
michael@0 733 ucnv_setToUCallBack (UConverter * converter,
michael@0 734 UConverterToUCallback newAction,
michael@0 735 const void* newContext,
michael@0 736 UConverterToUCallback *oldAction,
michael@0 737 const void** oldContext,
michael@0 738 UErrorCode * err)
michael@0 739 {
michael@0 740 if (U_FAILURE (*err))
michael@0 741 return;
michael@0 742 if (oldAction) *oldAction = converter->fromCharErrorBehaviour;
michael@0 743 converter->fromCharErrorBehaviour = newAction;
michael@0 744 if (oldContext) *oldContext = converter->toUContext;
michael@0 745 converter->toUContext = newContext;
michael@0 746 }
michael@0 747
michael@0 748 U_CAPI void U_EXPORT2
michael@0 749 ucnv_setFromUCallBack (UConverter * converter,
michael@0 750 UConverterFromUCallback newAction,
michael@0 751 const void* newContext,
michael@0 752 UConverterFromUCallback *oldAction,
michael@0 753 const void** oldContext,
michael@0 754 UErrorCode * err)
michael@0 755 {
michael@0 756 if (U_FAILURE (*err))
michael@0 757 return;
michael@0 758 if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;
michael@0 759 converter->fromUCharErrorBehaviour = newAction;
michael@0 760 if (oldContext) *oldContext = converter->fromUContext;
michael@0 761 converter->fromUContext = newContext;
michael@0 762 }
michael@0 763
michael@0 764 static void
michael@0 765 _updateOffsets(int32_t *offsets, int32_t length,
michael@0 766 int32_t sourceIndex, int32_t errorInputLength) {
michael@0 767 int32_t *limit;
michael@0 768 int32_t delta, offset;
michael@0 769
michael@0 770 if(sourceIndex>=0) {
michael@0 771 /*
michael@0 772 * adjust each offset by adding the previous sourceIndex
michael@0 773 * minus the length of the input sequence that caused an
michael@0 774 * error, if any
michael@0 775 */
michael@0 776 delta=sourceIndex-errorInputLength;
michael@0 777 } else {
michael@0 778 /*
michael@0 779 * set each offset to -1 because this conversion function
michael@0 780 * does not handle offsets
michael@0 781 */
michael@0 782 delta=-1;
michael@0 783 }
michael@0 784
michael@0 785 limit=offsets+length;
michael@0 786 if(delta==0) {
michael@0 787 /* most common case, nothing to do */
michael@0 788 } else if(delta>0) {
michael@0 789 /* add the delta to each offset (but not if the offset is <0) */
michael@0 790 while(offsets<limit) {
michael@0 791 offset=*offsets;
michael@0 792 if(offset>=0) {
michael@0 793 *offsets=offset+delta;
michael@0 794 }
michael@0 795 ++offsets;
michael@0 796 }
michael@0 797 } else /* delta<0 */ {
michael@0 798 /*
michael@0 799 * set each offset to -1 because this conversion function
michael@0 800 * does not handle offsets
michael@0 801 * or the error input sequence started in a previous buffer
michael@0 802 */
michael@0 803 while(offsets<limit) {
michael@0 804 *offsets++=-1;
michael@0 805 }
michael@0 806 }
michael@0 807 }
michael@0 808
michael@0 809 /* ucnv_fromUnicode --------------------------------------------------------- */
michael@0 810
michael@0 811 /*
michael@0 812 * Implementation note for m:n conversions
michael@0 813 *
michael@0 814 * While collecting source units to find the longest match for m:n conversion,
michael@0 815 * some source units may need to be stored for a partial match.
michael@0 816 * When a second buffer does not yield a match on all of the previously stored
michael@0 817 * source units, then they must be "replayed", i.e., fed back into the converter.
michael@0 818 *
michael@0 819 * The code relies on the fact that replaying will not nest -
michael@0 820 * converting a replay buffer will not result in a replay.
michael@0 821 * This is because a replay is necessary only after the _continuation_ of a
michael@0 822 * partial match failed, but a replay buffer is converted as a whole.
michael@0 823 * It may result in some of its units being stored again for a partial match,
michael@0 824 * but there will not be a continuation _during_ the replay which could fail.
michael@0 825 *
michael@0 826 * It is conceivable that a callback function could call the converter
michael@0 827 * recursively in a way that causes another replay to be stored, but that
michael@0 828 * would be an error in the callback function.
michael@0 829 * Such violations will cause assertion failures in a debug build,
michael@0 830 * and wrong output, but they will not cause a crash.
michael@0 831 */
michael@0 832
michael@0 833 static void
michael@0 834 _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
michael@0 835 UConverterFromUnicode fromUnicode;
michael@0 836 UConverter *cnv;
michael@0 837 const UChar *s;
michael@0 838 char *t;
michael@0 839 int32_t *offsets;
michael@0 840 int32_t sourceIndex;
michael@0 841 int32_t errorInputLength;
michael@0 842 UBool converterSawEndOfInput, calledCallback;
michael@0 843
michael@0 844 /* variables for m:n conversion */
michael@0 845 UChar replay[UCNV_EXT_MAX_UCHARS];
michael@0 846 const UChar *realSource, *realSourceLimit;
michael@0 847 int32_t realSourceIndex;
michael@0 848 UBool realFlush;
michael@0 849
michael@0 850 cnv=pArgs->converter;
michael@0 851 s=pArgs->source;
michael@0 852 t=pArgs->target;
michael@0 853 offsets=pArgs->offsets;
michael@0 854
michael@0 855 /* get the converter implementation function */
michael@0 856 sourceIndex=0;
michael@0 857 if(offsets==NULL) {
michael@0 858 fromUnicode=cnv->sharedData->impl->fromUnicode;
michael@0 859 } else {
michael@0 860 fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
michael@0 861 if(fromUnicode==NULL) {
michael@0 862 /* there is no WithOffsets implementation */
michael@0 863 fromUnicode=cnv->sharedData->impl->fromUnicode;
michael@0 864 /* we will write -1 for each offset */
michael@0 865 sourceIndex=-1;
michael@0 866 }
michael@0 867 }
michael@0 868
michael@0 869 if(cnv->preFromULength>=0) {
michael@0 870 /* normal mode */
michael@0 871 realSource=NULL;
michael@0 872
michael@0 873 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
michael@0 874 realSourceLimit=NULL;
michael@0 875 realFlush=FALSE;
michael@0 876 realSourceIndex=0;
michael@0 877 } else {
michael@0 878 /*
michael@0 879 * Previous m:n conversion stored source units from a partial match
michael@0 880 * and failed to consume all of them.
michael@0 881 * We need to "replay" them from a temporary buffer and convert them first.
michael@0 882 */
michael@0 883 realSource=pArgs->source;
michael@0 884 realSourceLimit=pArgs->sourceLimit;
michael@0 885 realFlush=pArgs->flush;
michael@0 886 realSourceIndex=sourceIndex;
michael@0 887
michael@0 888 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
michael@0 889 pArgs->source=replay;
michael@0 890 pArgs->sourceLimit=replay-cnv->preFromULength;
michael@0 891 pArgs->flush=FALSE;
michael@0 892 sourceIndex=-1;
michael@0 893
michael@0 894 cnv->preFromULength=0;
michael@0 895 }
michael@0 896
michael@0 897 /*
michael@0 898 * loop for conversion and error handling
michael@0 899 *
michael@0 900 * loop {
michael@0 901 * convert
michael@0 902 * loop {
michael@0 903 * update offsets
michael@0 904 * handle end of input
michael@0 905 * handle errors/call callback
michael@0 906 * }
michael@0 907 * }
michael@0 908 */
michael@0 909 for(;;) {
michael@0 910 if(U_SUCCESS(*err)) {
michael@0 911 /* convert */
michael@0 912 fromUnicode(pArgs, err);
michael@0 913
michael@0 914 /*
michael@0 915 * set a flag for whether the converter
michael@0 916 * successfully processed the end of the input
michael@0 917 *
michael@0 918 * need not check cnv->preFromULength==0 because a replay (<0) will cause
michael@0 919 * s<sourceLimit before converterSawEndOfInput is checked
michael@0 920 */
michael@0 921 converterSawEndOfInput=
michael@0 922 (UBool)(U_SUCCESS(*err) &&
michael@0 923 pArgs->flush && pArgs->source==pArgs->sourceLimit &&
michael@0 924 cnv->fromUChar32==0);
michael@0 925 } else {
michael@0 926 /* handle error from ucnv_convertEx() */
michael@0 927 converterSawEndOfInput=FALSE;
michael@0 928 }
michael@0 929
michael@0 930 /* no callback called yet for this iteration */
michael@0 931 calledCallback=FALSE;
michael@0 932
michael@0 933 /* no sourceIndex adjustment for conversion, only for callback output */
michael@0 934 errorInputLength=0;
michael@0 935
michael@0 936 /*
michael@0 937 * loop for offsets and error handling
michael@0 938 *
michael@0 939 * iterates at most 3 times:
michael@0 940 * 1. to clean up after the conversion function
michael@0 941 * 2. after the callback
michael@0 942 * 3. after the callback again if there was truncated input
michael@0 943 */
michael@0 944 for(;;) {
michael@0 945 /* update offsets if we write any */
michael@0 946 if(offsets!=NULL) {
michael@0 947 int32_t length=(int32_t)(pArgs->target-t);
michael@0 948 if(length>0) {
michael@0 949 _updateOffsets(offsets, length, sourceIndex, errorInputLength);
michael@0 950
michael@0 951 /*
michael@0 952 * if a converter handles offsets and updates the offsets
michael@0 953 * pointer at the end, then pArgs->offset should not change
michael@0 954 * here;
michael@0 955 * however, some converters do not handle offsets at all
michael@0 956 * (sourceIndex<0) or may not update the offsets pointer
michael@0 957 */
michael@0 958 pArgs->offsets=offsets+=length;
michael@0 959 }
michael@0 960
michael@0 961 if(sourceIndex>=0) {
michael@0 962 sourceIndex+=(int32_t)(pArgs->source-s);
michael@0 963 }
michael@0 964 }
michael@0 965
michael@0 966 if(cnv->preFromULength<0) {
michael@0 967 /*
michael@0 968 * switch the source to new replay units (cannot occur while replaying)
michael@0 969 * after offset handling and before end-of-input and callback handling
michael@0 970 */
michael@0 971 if(realSource==NULL) {
michael@0 972 realSource=pArgs->source;
michael@0 973 realSourceLimit=pArgs->sourceLimit;
michael@0 974 realFlush=pArgs->flush;
michael@0 975 realSourceIndex=sourceIndex;
michael@0 976
michael@0 977 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
michael@0 978 pArgs->source=replay;
michael@0 979 pArgs->sourceLimit=replay-cnv->preFromULength;
michael@0 980 pArgs->flush=FALSE;
michael@0 981 if((sourceIndex+=cnv->preFromULength)<0) {
michael@0 982 sourceIndex=-1;
michael@0 983 }
michael@0 984
michael@0 985 cnv->preFromULength=0;
michael@0 986 } else {
michael@0 987 /* see implementation note before _fromUnicodeWithCallback() */
michael@0 988 U_ASSERT(realSource==NULL);
michael@0 989 *err=U_INTERNAL_PROGRAM_ERROR;
michael@0 990 }
michael@0 991 }
michael@0 992
michael@0 993 /* update pointers */
michael@0 994 s=pArgs->source;
michael@0 995 t=pArgs->target;
michael@0 996
michael@0 997 if(U_SUCCESS(*err)) {
michael@0 998 if(s<pArgs->sourceLimit) {
michael@0 999 /*
michael@0 1000 * continue with the conversion loop while there is still input left
michael@0 1001 * (continue converting by breaking out of only the inner loop)
michael@0 1002 */
michael@0 1003 break;
michael@0 1004 } else if(realSource!=NULL) {
michael@0 1005 /* switch back from replaying to the real source and continue */
michael@0 1006 pArgs->source=realSource;
michael@0 1007 pArgs->sourceLimit=realSourceLimit;
michael@0 1008 pArgs->flush=realFlush;
michael@0 1009 sourceIndex=realSourceIndex;
michael@0 1010
michael@0 1011 realSource=NULL;
michael@0 1012 break;
michael@0 1013 } else if(pArgs->flush && cnv->fromUChar32!=0) {
michael@0 1014 /*
michael@0 1015 * the entire input stream is consumed
michael@0 1016 * and there is a partial, truncated input sequence left
michael@0 1017 */
michael@0 1018
michael@0 1019 /* inject an error and continue with callback handling */
michael@0 1020 *err=U_TRUNCATED_CHAR_FOUND;
michael@0 1021 calledCallback=FALSE; /* new error condition */
michael@0 1022 } else {
michael@0 1023 /* input consumed */
michael@0 1024 if(pArgs->flush) {
michael@0 1025 /*
michael@0 1026 * return to the conversion loop once more if the flush
michael@0 1027 * flag is set and the conversion function has not
michael@0 1028 * successfully processed the end of the input yet
michael@0 1029 *
michael@0 1030 * (continue converting by breaking out of only the inner loop)
michael@0 1031 */
michael@0 1032 if(!converterSawEndOfInput) {
michael@0 1033 break;
michael@0 1034 }
michael@0 1035
michael@0 1036 /* reset the converter without calling the callback function */
michael@0 1037 _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
michael@0 1038 }
michael@0 1039
michael@0 1040 /* done successfully */
michael@0 1041 return;
michael@0 1042 }
michael@0 1043 }
michael@0 1044
michael@0 1045 /* U_FAILURE(*err) */
michael@0 1046 {
michael@0 1047 UErrorCode e;
michael@0 1048
michael@0 1049 if( calledCallback ||
michael@0 1050 (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
michael@0 1051 (e!=U_INVALID_CHAR_FOUND &&
michael@0 1052 e!=U_ILLEGAL_CHAR_FOUND &&
michael@0 1053 e!=U_TRUNCATED_CHAR_FOUND)
michael@0 1054 ) {
michael@0 1055 /*
michael@0 1056 * the callback did not or cannot resolve the error:
michael@0 1057 * set output pointers and return
michael@0 1058 *
michael@0 1059 * the check for buffer overflow is redundant but it is
michael@0 1060 * a high-runner case and hopefully documents the intent
michael@0 1061 * well
michael@0 1062 *
michael@0 1063 * if we were replaying, then the replay buffer must be
michael@0 1064 * copied back into the UConverter
michael@0 1065 * and the real arguments must be restored
michael@0 1066 */
michael@0 1067 if(realSource!=NULL) {
michael@0 1068 int32_t length;
michael@0 1069
michael@0 1070 U_ASSERT(cnv->preFromULength==0);
michael@0 1071
michael@0 1072 length=(int32_t)(pArgs->sourceLimit-pArgs->source);
michael@0 1073 if(length>0) {
michael@0 1074 uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR);
michael@0 1075 cnv->preFromULength=(int8_t)-length;
michael@0 1076 }
michael@0 1077
michael@0 1078 pArgs->source=realSource;
michael@0 1079 pArgs->sourceLimit=realSourceLimit;
michael@0 1080 pArgs->flush=realFlush;
michael@0 1081 }
michael@0 1082
michael@0 1083 return;
michael@0 1084 }
michael@0 1085 }
michael@0 1086
michael@0 1087 /* callback handling */
michael@0 1088 {
michael@0 1089 UChar32 codePoint;
michael@0 1090
michael@0 1091 /* get and write the code point */
michael@0 1092 codePoint=cnv->fromUChar32;
michael@0 1093 errorInputLength=0;
michael@0 1094 U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
michael@0 1095 cnv->invalidUCharLength=(int8_t)errorInputLength;
michael@0 1096
michael@0 1097 /* set the converter state to deal with the next character */
michael@0 1098 cnv->fromUChar32=0;
michael@0 1099
michael@0 1100 /* call the callback function */
michael@0 1101 cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
michael@0 1102 cnv->invalidUCharBuffer, errorInputLength, codePoint,
michael@0 1103 *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
michael@0 1104 err);
michael@0 1105 }
michael@0 1106
michael@0 1107 /*
michael@0 1108 * loop back to the offset handling
michael@0 1109 *
michael@0 1110 * this flag will indicate after offset handling
michael@0 1111 * that a callback was called;
michael@0 1112 * if the callback did not resolve the error, then we return
michael@0 1113 */
michael@0 1114 calledCallback=TRUE;
michael@0 1115 }
michael@0 1116 }
michael@0 1117 }
michael@0 1118
michael@0 1119 /*
michael@0 1120 * Output the fromUnicode overflow buffer.
michael@0 1121 * Call this function if(cnv->charErrorBufferLength>0).
michael@0 1122 * @return TRUE if overflow
michael@0 1123 */
michael@0 1124 static UBool
michael@0 1125 ucnv_outputOverflowFromUnicode(UConverter *cnv,
michael@0 1126 char **target, const char *targetLimit,
michael@0 1127 int32_t **pOffsets,
michael@0 1128 UErrorCode *err) {
michael@0 1129 int32_t *offsets;
michael@0 1130 char *overflow, *t;
michael@0 1131 int32_t i, length;
michael@0 1132
michael@0 1133 t=*target;
michael@0 1134 if(pOffsets!=NULL) {
michael@0 1135 offsets=*pOffsets;
michael@0 1136 } else {
michael@0 1137 offsets=NULL;
michael@0 1138 }
michael@0 1139
michael@0 1140 overflow=(char *)cnv->charErrorBuffer;
michael@0 1141 length=cnv->charErrorBufferLength;
michael@0 1142 i=0;
michael@0 1143 while(i<length) {
michael@0 1144 if(t==targetLimit) {
michael@0 1145 /* the overflow buffer contains too much, keep the rest */
michael@0 1146 int32_t j=0;
michael@0 1147
michael@0 1148 do {
michael@0 1149 overflow[j++]=overflow[i++];
michael@0 1150 } while(i<length);
michael@0 1151
michael@0 1152 cnv->charErrorBufferLength=(int8_t)j;
michael@0 1153 *target=t;
michael@0 1154 if(offsets!=NULL) {
michael@0 1155 *pOffsets=offsets;
michael@0 1156 }
michael@0 1157 *err=U_BUFFER_OVERFLOW_ERROR;
michael@0 1158 return TRUE;
michael@0 1159 }
michael@0 1160
michael@0 1161 /* copy the overflow contents to the target */
michael@0 1162 *t++=overflow[i++];
michael@0 1163 if(offsets!=NULL) {
michael@0 1164 *offsets++=-1; /* no source index available for old output */
michael@0 1165 }
michael@0 1166 }
michael@0 1167
michael@0 1168 /* the overflow buffer is completely copied to the target */
michael@0 1169 cnv->charErrorBufferLength=0;
michael@0 1170 *target=t;
michael@0 1171 if(offsets!=NULL) {
michael@0 1172 *pOffsets=offsets;
michael@0 1173 }
michael@0 1174 return FALSE;
michael@0 1175 }
michael@0 1176
michael@0 1177 U_CAPI void U_EXPORT2
michael@0 1178 ucnv_fromUnicode(UConverter *cnv,
michael@0 1179 char **target, const char *targetLimit,
michael@0 1180 const UChar **source, const UChar *sourceLimit,
michael@0 1181 int32_t *offsets,
michael@0 1182 UBool flush,
michael@0 1183 UErrorCode *err) {
michael@0 1184 UConverterFromUnicodeArgs args;
michael@0 1185 const UChar *s;
michael@0 1186 char *t;
michael@0 1187
michael@0 1188 /* check parameters */
michael@0 1189 if(err==NULL || U_FAILURE(*err)) {
michael@0 1190 return;
michael@0 1191 }
michael@0 1192
michael@0 1193 if(cnv==NULL || target==NULL || source==NULL) {
michael@0 1194 *err=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1195 return;
michael@0 1196 }
michael@0 1197
michael@0 1198 s=*source;
michael@0 1199 t=*target;
michael@0 1200
michael@0 1201 if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) {
michael@0 1202 /*
michael@0 1203 Prevent code from going into an infinite loop in case we do hit this
michael@0 1204 limit. The limit pointer is expected to be on a UChar * boundary.
michael@0 1205 This also prevents the next argument check from failing.
michael@0 1206 */
michael@0 1207 sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1);
michael@0 1208 }
michael@0 1209
michael@0 1210 /*
michael@0 1211 * All these conditions should never happen.
michael@0 1212 *
michael@0 1213 * 1) Make sure that the limits are >= to the address source or target
michael@0 1214 *
michael@0 1215 * 2) Make sure that the buffer sizes do not exceed the number range for
michael@0 1216 * int32_t because some functions use the size (in units or bytes)
michael@0 1217 * rather than comparing pointers, and because offsets are int32_t values.
michael@0 1218 *
michael@0 1219 * size_t is guaranteed to be unsigned and large enough for the job.
michael@0 1220 *
michael@0 1221 * Return with an error instead of adjusting the limits because we would
michael@0 1222 * not be able to maintain the semantics that either the source must be
michael@0 1223 * consumed or the target filled (unless an error occurs).
michael@0 1224 * An adjustment would be targetLimit=t+0x7fffffff; for example.
michael@0 1225 *
michael@0 1226 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
michael@0 1227 * to a char * pointer and provide an incomplete UChar code unit.
michael@0 1228 */
michael@0 1229 if (sourceLimit<s || targetLimit<t ||
michael@0 1230 ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
michael@0 1231 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||
michael@0 1232 (((const char *)sourceLimit-(const char *)s) & 1) != 0)
michael@0 1233 {
michael@0 1234 *err=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1235 return;
michael@0 1236 }
michael@0 1237
michael@0 1238 /* output the target overflow buffer */
michael@0 1239 if( cnv->charErrorBufferLength>0 &&
michael@0 1240 ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)
michael@0 1241 ) {
michael@0 1242 /* U_BUFFER_OVERFLOW_ERROR */
michael@0 1243 return;
michael@0 1244 }
michael@0 1245 /* *target may have moved, therefore stop using t */
michael@0 1246
michael@0 1247 if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
michael@0 1248 /* the overflow buffer is emptied and there is no new input: we are done */
michael@0 1249 return;
michael@0 1250 }
michael@0 1251
michael@0 1252 /*
michael@0 1253 * Do not simply return with a buffer overflow error if
michael@0 1254 * !flush && t==targetLimit
michael@0 1255 * because it is possible that the source will not generate any output.
michael@0 1256 * For example, the skip callback may be called;
michael@0 1257 * it does not output anything.
michael@0 1258 */
michael@0 1259
michael@0 1260 /* prepare the converter arguments */
michael@0 1261 args.converter=cnv;
michael@0 1262 args.flush=flush;
michael@0 1263 args.offsets=offsets;
michael@0 1264 args.source=s;
michael@0 1265 args.sourceLimit=sourceLimit;
michael@0 1266 args.target=*target;
michael@0 1267 args.targetLimit=targetLimit;
michael@0 1268 args.size=sizeof(args);
michael@0 1269
michael@0 1270 _fromUnicodeWithCallback(&args, err);
michael@0 1271
michael@0 1272 *source=args.source;
michael@0 1273 *target=args.target;
michael@0 1274 }
michael@0 1275
michael@0 1276 /* ucnv_toUnicode() --------------------------------------------------------- */
michael@0 1277
michael@0 1278 static void
michael@0 1279 _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
michael@0 1280 UConverterToUnicode toUnicode;
michael@0 1281 UConverter *cnv;
michael@0 1282 const char *s;
michael@0 1283 UChar *t;
michael@0 1284 int32_t *offsets;
michael@0 1285 int32_t sourceIndex;
michael@0 1286 int32_t errorInputLength;
michael@0 1287 UBool converterSawEndOfInput, calledCallback;
michael@0 1288
michael@0 1289 /* variables for m:n conversion */
michael@0 1290 char replay[UCNV_EXT_MAX_BYTES];
michael@0 1291 const char *realSource, *realSourceLimit;
michael@0 1292 int32_t realSourceIndex;
michael@0 1293 UBool realFlush;
michael@0 1294
michael@0 1295 cnv=pArgs->converter;
michael@0 1296 s=pArgs->source;
michael@0 1297 t=pArgs->target;
michael@0 1298 offsets=pArgs->offsets;
michael@0 1299
michael@0 1300 /* get the converter implementation function */
michael@0 1301 sourceIndex=0;
michael@0 1302 if(offsets==NULL) {
michael@0 1303 toUnicode=cnv->sharedData->impl->toUnicode;
michael@0 1304 } else {
michael@0 1305 toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
michael@0 1306 if(toUnicode==NULL) {
michael@0 1307 /* there is no WithOffsets implementation */
michael@0 1308 toUnicode=cnv->sharedData->impl->toUnicode;
michael@0 1309 /* we will write -1 for each offset */
michael@0 1310 sourceIndex=-1;
michael@0 1311 }
michael@0 1312 }
michael@0 1313
michael@0 1314 if(cnv->preToULength>=0) {
michael@0 1315 /* normal mode */
michael@0 1316 realSource=NULL;
michael@0 1317
michael@0 1318 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
michael@0 1319 realSourceLimit=NULL;
michael@0 1320 realFlush=FALSE;
michael@0 1321 realSourceIndex=0;
michael@0 1322 } else {
michael@0 1323 /*
michael@0 1324 * Previous m:n conversion stored source units from a partial match
michael@0 1325 * and failed to consume all of them.
michael@0 1326 * We need to "replay" them from a temporary buffer and convert them first.
michael@0 1327 */
michael@0 1328 realSource=pArgs->source;
michael@0 1329 realSourceLimit=pArgs->sourceLimit;
michael@0 1330 realFlush=pArgs->flush;
michael@0 1331 realSourceIndex=sourceIndex;
michael@0 1332
michael@0 1333 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
michael@0 1334 pArgs->source=replay;
michael@0 1335 pArgs->sourceLimit=replay-cnv->preToULength;
michael@0 1336 pArgs->flush=FALSE;
michael@0 1337 sourceIndex=-1;
michael@0 1338
michael@0 1339 cnv->preToULength=0;
michael@0 1340 }
michael@0 1341
michael@0 1342 /*
michael@0 1343 * loop for conversion and error handling
michael@0 1344 *
michael@0 1345 * loop {
michael@0 1346 * convert
michael@0 1347 * loop {
michael@0 1348 * update offsets
michael@0 1349 * handle end of input
michael@0 1350 * handle errors/call callback
michael@0 1351 * }
michael@0 1352 * }
michael@0 1353 */
michael@0 1354 for(;;) {
michael@0 1355 if(U_SUCCESS(*err)) {
michael@0 1356 /* convert */
michael@0 1357 toUnicode(pArgs, err);
michael@0 1358
michael@0 1359 /*
michael@0 1360 * set a flag for whether the converter
michael@0 1361 * successfully processed the end of the input
michael@0 1362 *
michael@0 1363 * need not check cnv->preToULength==0 because a replay (<0) will cause
michael@0 1364 * s<sourceLimit before converterSawEndOfInput is checked
michael@0 1365 */
michael@0 1366 converterSawEndOfInput=
michael@0 1367 (UBool)(U_SUCCESS(*err) &&
michael@0 1368 pArgs->flush && pArgs->source==pArgs->sourceLimit &&
michael@0 1369 cnv->toULength==0);
michael@0 1370 } else {
michael@0 1371 /* handle error from getNextUChar() or ucnv_convertEx() */
michael@0 1372 converterSawEndOfInput=FALSE;
michael@0 1373 }
michael@0 1374
michael@0 1375 /* no callback called yet for this iteration */
michael@0 1376 calledCallback=FALSE;
michael@0 1377
michael@0 1378 /* no sourceIndex adjustment for conversion, only for callback output */
michael@0 1379 errorInputLength=0;
michael@0 1380
michael@0 1381 /*
michael@0 1382 * loop for offsets and error handling
michael@0 1383 *
michael@0 1384 * iterates at most 3 times:
michael@0 1385 * 1. to clean up after the conversion function
michael@0 1386 * 2. after the callback
michael@0 1387 * 3. after the callback again if there was truncated input
michael@0 1388 */
michael@0 1389 for(;;) {
michael@0 1390 /* update offsets if we write any */
michael@0 1391 if(offsets!=NULL) {
michael@0 1392 int32_t length=(int32_t)(pArgs->target-t);
michael@0 1393 if(length>0) {
michael@0 1394 _updateOffsets(offsets, length, sourceIndex, errorInputLength);
michael@0 1395
michael@0 1396 /*
michael@0 1397 * if a converter handles offsets and updates the offsets
michael@0 1398 * pointer at the end, then pArgs->offset should not change
michael@0 1399 * here;
michael@0 1400 * however, some converters do not handle offsets at all
michael@0 1401 * (sourceIndex<0) or may not update the offsets pointer
michael@0 1402 */
michael@0 1403 pArgs->offsets=offsets+=length;
michael@0 1404 }
michael@0 1405
michael@0 1406 if(sourceIndex>=0) {
michael@0 1407 sourceIndex+=(int32_t)(pArgs->source-s);
michael@0 1408 }
michael@0 1409 }
michael@0 1410
michael@0 1411 if(cnv->preToULength<0) {
michael@0 1412 /*
michael@0 1413 * switch the source to new replay units (cannot occur while replaying)
michael@0 1414 * after offset handling and before end-of-input and callback handling
michael@0 1415 */
michael@0 1416 if(realSource==NULL) {
michael@0 1417 realSource=pArgs->source;
michael@0 1418 realSourceLimit=pArgs->sourceLimit;
michael@0 1419 realFlush=pArgs->flush;
michael@0 1420 realSourceIndex=sourceIndex;
michael@0 1421
michael@0 1422 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
michael@0 1423 pArgs->source=replay;
michael@0 1424 pArgs->sourceLimit=replay-cnv->preToULength;
michael@0 1425 pArgs->flush=FALSE;
michael@0 1426 if((sourceIndex+=cnv->preToULength)<0) {
michael@0 1427 sourceIndex=-1;
michael@0 1428 }
michael@0 1429
michael@0 1430 cnv->preToULength=0;
michael@0 1431 } else {
michael@0 1432 /* see implementation note before _fromUnicodeWithCallback() */
michael@0 1433 U_ASSERT(realSource==NULL);
michael@0 1434 *err=U_INTERNAL_PROGRAM_ERROR;
michael@0 1435 }
michael@0 1436 }
michael@0 1437
michael@0 1438 /* update pointers */
michael@0 1439 s=pArgs->source;
michael@0 1440 t=pArgs->target;
michael@0 1441
michael@0 1442 if(U_SUCCESS(*err)) {
michael@0 1443 if(s<pArgs->sourceLimit) {
michael@0 1444 /*
michael@0 1445 * continue with the conversion loop while there is still input left
michael@0 1446 * (continue converting by breaking out of only the inner loop)
michael@0 1447 */
michael@0 1448 break;
michael@0 1449 } else if(realSource!=NULL) {
michael@0 1450 /* switch back from replaying to the real source and continue */
michael@0 1451 pArgs->source=realSource;
michael@0 1452 pArgs->sourceLimit=realSourceLimit;
michael@0 1453 pArgs->flush=realFlush;
michael@0 1454 sourceIndex=realSourceIndex;
michael@0 1455
michael@0 1456 realSource=NULL;
michael@0 1457 break;
michael@0 1458 } else if(pArgs->flush && cnv->toULength>0) {
michael@0 1459 /*
michael@0 1460 * the entire input stream is consumed
michael@0 1461 * and there is a partial, truncated input sequence left
michael@0 1462 */
michael@0 1463
michael@0 1464 /* inject an error and continue with callback handling */
michael@0 1465 *err=U_TRUNCATED_CHAR_FOUND;
michael@0 1466 calledCallback=FALSE; /* new error condition */
michael@0 1467 } else {
michael@0 1468 /* input consumed */
michael@0 1469 if(pArgs->flush) {
michael@0 1470 /*
michael@0 1471 * return to the conversion loop once more if the flush
michael@0 1472 * flag is set and the conversion function has not
michael@0 1473 * successfully processed the end of the input yet
michael@0 1474 *
michael@0 1475 * (continue converting by breaking out of only the inner loop)
michael@0 1476 */
michael@0 1477 if(!converterSawEndOfInput) {
michael@0 1478 break;
michael@0 1479 }
michael@0 1480
michael@0 1481 /* reset the converter without calling the callback function */
michael@0 1482 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
michael@0 1483 }
michael@0 1484
michael@0 1485 /* done successfully */
michael@0 1486 return;
michael@0 1487 }
michael@0 1488 }
michael@0 1489
michael@0 1490 /* U_FAILURE(*err) */
michael@0 1491 {
michael@0 1492 UErrorCode e;
michael@0 1493
michael@0 1494 if( calledCallback ||
michael@0 1495 (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
michael@0 1496 (e!=U_INVALID_CHAR_FOUND &&
michael@0 1497 e!=U_ILLEGAL_CHAR_FOUND &&
michael@0 1498 e!=U_TRUNCATED_CHAR_FOUND &&
michael@0 1499 e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
michael@0 1500 e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
michael@0 1501 ) {
michael@0 1502 /*
michael@0 1503 * the callback did not or cannot resolve the error:
michael@0 1504 * set output pointers and return
michael@0 1505 *
michael@0 1506 * the check for buffer overflow is redundant but it is
michael@0 1507 * a high-runner case and hopefully documents the intent
michael@0 1508 * well
michael@0 1509 *
michael@0 1510 * if we were replaying, then the replay buffer must be
michael@0 1511 * copied back into the UConverter
michael@0 1512 * and the real arguments must be restored
michael@0 1513 */
michael@0 1514 if(realSource!=NULL) {
michael@0 1515 int32_t length;
michael@0 1516
michael@0 1517 U_ASSERT(cnv->preToULength==0);
michael@0 1518
michael@0 1519 length=(int32_t)(pArgs->sourceLimit-pArgs->source);
michael@0 1520 if(length>0) {
michael@0 1521 uprv_memcpy(cnv->preToU, pArgs->source, length);
michael@0 1522 cnv->preToULength=(int8_t)-length;
michael@0 1523 }
michael@0 1524
michael@0 1525 pArgs->source=realSource;
michael@0 1526 pArgs->sourceLimit=realSourceLimit;
michael@0 1527 pArgs->flush=realFlush;
michael@0 1528 }
michael@0 1529
michael@0 1530 return;
michael@0 1531 }
michael@0 1532 }
michael@0 1533
michael@0 1534 /* copy toUBytes[] to invalidCharBuffer[] */
michael@0 1535 errorInputLength=cnv->invalidCharLength=cnv->toULength;
michael@0 1536 if(errorInputLength>0) {
michael@0 1537 uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
michael@0 1538 }
michael@0 1539
michael@0 1540 /* set the converter state to deal with the next character */
michael@0 1541 cnv->toULength=0;
michael@0 1542
michael@0 1543 /* call the callback function */
michael@0 1544 if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {
michael@0 1545 cnv->toUCallbackReason = UCNV_UNASSIGNED;
michael@0 1546 }
michael@0 1547 cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
michael@0 1548 cnv->invalidCharBuffer, errorInputLength,
michael@0 1549 cnv->toUCallbackReason,
michael@0 1550 err);
michael@0 1551 cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */
michael@0 1552
michael@0 1553 /*
michael@0 1554 * loop back to the offset handling
michael@0 1555 *
michael@0 1556 * this flag will indicate after offset handling
michael@0 1557 * that a callback was called;
michael@0 1558 * if the callback did not resolve the error, then we return
michael@0 1559 */
michael@0 1560 calledCallback=TRUE;
michael@0 1561 }
michael@0 1562 }
michael@0 1563 }
michael@0 1564
michael@0 1565 /*
michael@0 1566 * Output the toUnicode overflow buffer.
michael@0 1567 * Call this function if(cnv->UCharErrorBufferLength>0).
michael@0 1568 * @return TRUE if overflow
michael@0 1569 */
michael@0 1570 static UBool
michael@0 1571 ucnv_outputOverflowToUnicode(UConverter *cnv,
michael@0 1572 UChar **target, const UChar *targetLimit,
michael@0 1573 int32_t **pOffsets,
michael@0 1574 UErrorCode *err) {
michael@0 1575 int32_t *offsets;
michael@0 1576 UChar *overflow, *t;
michael@0 1577 int32_t i, length;
michael@0 1578
michael@0 1579 t=*target;
michael@0 1580 if(pOffsets!=NULL) {
michael@0 1581 offsets=*pOffsets;
michael@0 1582 } else {
michael@0 1583 offsets=NULL;
michael@0 1584 }
michael@0 1585
michael@0 1586 overflow=cnv->UCharErrorBuffer;
michael@0 1587 length=cnv->UCharErrorBufferLength;
michael@0 1588 i=0;
michael@0 1589 while(i<length) {
michael@0 1590 if(t==targetLimit) {
michael@0 1591 /* the overflow buffer contains too much, keep the rest */
michael@0 1592 int32_t j=0;
michael@0 1593
michael@0 1594 do {
michael@0 1595 overflow[j++]=overflow[i++];
michael@0 1596 } while(i<length);
michael@0 1597
michael@0 1598 cnv->UCharErrorBufferLength=(int8_t)j;
michael@0 1599 *target=t;
michael@0 1600 if(offsets!=NULL) {
michael@0 1601 *pOffsets=offsets;
michael@0 1602 }
michael@0 1603 *err=U_BUFFER_OVERFLOW_ERROR;
michael@0 1604 return TRUE;
michael@0 1605 }
michael@0 1606
michael@0 1607 /* copy the overflow contents to the target */
michael@0 1608 *t++=overflow[i++];
michael@0 1609 if(offsets!=NULL) {
michael@0 1610 *offsets++=-1; /* no source index available for old output */
michael@0 1611 }
michael@0 1612 }
michael@0 1613
michael@0 1614 /* the overflow buffer is completely copied to the target */
michael@0 1615 cnv->UCharErrorBufferLength=0;
michael@0 1616 *target=t;
michael@0 1617 if(offsets!=NULL) {
michael@0 1618 *pOffsets=offsets;
michael@0 1619 }
michael@0 1620 return FALSE;
michael@0 1621 }
michael@0 1622
michael@0 1623 U_CAPI void U_EXPORT2
michael@0 1624 ucnv_toUnicode(UConverter *cnv,
michael@0 1625 UChar **target, const UChar *targetLimit,
michael@0 1626 const char **source, const char *sourceLimit,
michael@0 1627 int32_t *offsets,
michael@0 1628 UBool flush,
michael@0 1629 UErrorCode *err) {
michael@0 1630 UConverterToUnicodeArgs args;
michael@0 1631 const char *s;
michael@0 1632 UChar *t;
michael@0 1633
michael@0 1634 /* check parameters */
michael@0 1635 if(err==NULL || U_FAILURE(*err)) {
michael@0 1636 return;
michael@0 1637 }
michael@0 1638
michael@0 1639 if(cnv==NULL || target==NULL || source==NULL) {
michael@0 1640 *err=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1641 return;
michael@0 1642 }
michael@0 1643
michael@0 1644 s=*source;
michael@0 1645 t=*target;
michael@0 1646
michael@0 1647 if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) {
michael@0 1648 /*
michael@0 1649 Prevent code from going into an infinite loop in case we do hit this
michael@0 1650 limit. The limit pointer is expected to be on a UChar * boundary.
michael@0 1651 This also prevents the next argument check from failing.
michael@0 1652 */
michael@0 1653 targetLimit = (const UChar *)(((const char *)targetLimit) - 1);
michael@0 1654 }
michael@0 1655
michael@0 1656 /*
michael@0 1657 * All these conditions should never happen.
michael@0 1658 *
michael@0 1659 * 1) Make sure that the limits are >= to the address source or target
michael@0 1660 *
michael@0 1661 * 2) Make sure that the buffer sizes do not exceed the number range for
michael@0 1662 * int32_t because some functions use the size (in units or bytes)
michael@0 1663 * rather than comparing pointers, and because offsets are int32_t values.
michael@0 1664 *
michael@0 1665 * size_t is guaranteed to be unsigned and large enough for the job.
michael@0 1666 *
michael@0 1667 * Return with an error instead of adjusting the limits because we would
michael@0 1668 * not be able to maintain the semantics that either the source must be
michael@0 1669 * consumed or the target filled (unless an error occurs).
michael@0 1670 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
michael@0 1671 *
michael@0 1672 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
michael@0 1673 * to a char * pointer and provide an incomplete UChar code unit.
michael@0 1674 */
michael@0 1675 if (sourceLimit<s || targetLimit<t ||
michael@0 1676 ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
michael@0 1677 ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) ||
michael@0 1678 (((const char *)targetLimit-(const char *)t) & 1) != 0
michael@0 1679 ) {
michael@0 1680 *err=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1681 return;
michael@0 1682 }
michael@0 1683
michael@0 1684 /* output the target overflow buffer */
michael@0 1685 if( cnv->UCharErrorBufferLength>0 &&
michael@0 1686 ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)
michael@0 1687 ) {
michael@0 1688 /* U_BUFFER_OVERFLOW_ERROR */
michael@0 1689 return;
michael@0 1690 }
michael@0 1691 /* *target may have moved, therefore stop using t */
michael@0 1692
michael@0 1693 if(!flush && s==sourceLimit && cnv->preToULength>=0) {
michael@0 1694 /* the overflow buffer is emptied and there is no new input: we are done */
michael@0 1695 return;
michael@0 1696 }
michael@0 1697
michael@0 1698 /*
michael@0 1699 * Do not simply return with a buffer overflow error if
michael@0 1700 * !flush && t==targetLimit
michael@0 1701 * because it is possible that the source will not generate any output.
michael@0 1702 * For example, the skip callback may be called;
michael@0 1703 * it does not output anything.
michael@0 1704 */
michael@0 1705
michael@0 1706 /* prepare the converter arguments */
michael@0 1707 args.converter=cnv;
michael@0 1708 args.flush=flush;
michael@0 1709 args.offsets=offsets;
michael@0 1710 args.source=s;
michael@0 1711 args.sourceLimit=sourceLimit;
michael@0 1712 args.target=*target;
michael@0 1713 args.targetLimit=targetLimit;
michael@0 1714 args.size=sizeof(args);
michael@0 1715
michael@0 1716 _toUnicodeWithCallback(&args, err);
michael@0 1717
michael@0 1718 *source=args.source;
michael@0 1719 *target=args.target;
michael@0 1720 }
michael@0 1721
michael@0 1722 /* ucnv_to/fromUChars() ----------------------------------------------------- */
michael@0 1723
michael@0 1724 U_CAPI int32_t U_EXPORT2
michael@0 1725 ucnv_fromUChars(UConverter *cnv,
michael@0 1726 char *dest, int32_t destCapacity,
michael@0 1727 const UChar *src, int32_t srcLength,
michael@0 1728 UErrorCode *pErrorCode) {
michael@0 1729 const UChar *srcLimit;
michael@0 1730 char *originalDest, *destLimit;
michael@0 1731 int32_t destLength;
michael@0 1732
michael@0 1733 /* check arguments */
michael@0 1734 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 1735 return 0;
michael@0 1736 }
michael@0 1737
michael@0 1738 if( cnv==NULL ||
michael@0 1739 destCapacity<0 || (destCapacity>0 && dest==NULL) ||
michael@0 1740 srcLength<-1 || (srcLength!=0 && src==NULL)
michael@0 1741 ) {
michael@0 1742 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1743 return 0;
michael@0 1744 }
michael@0 1745
michael@0 1746 /* initialize */
michael@0 1747 ucnv_resetFromUnicode(cnv);
michael@0 1748 originalDest=dest;
michael@0 1749 if(srcLength==-1) {
michael@0 1750 srcLength=u_strlen(src);
michael@0 1751 }
michael@0 1752 if(srcLength>0) {
michael@0 1753 srcLimit=src+srcLength;
michael@0 1754 destLimit=dest+destCapacity;
michael@0 1755
michael@0 1756 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
michael@0 1757 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
michael@0 1758 destLimit=(char *)U_MAX_PTR(dest);
michael@0 1759 }
michael@0 1760
michael@0 1761 /* perform the conversion */
michael@0 1762 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
michael@0 1763 destLength=(int32_t)(dest-originalDest);
michael@0 1764
michael@0 1765 /* if an overflow occurs, then get the preflighting length */
michael@0 1766 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
michael@0 1767 char buffer[1024];
michael@0 1768
michael@0 1769 destLimit=buffer+sizeof(buffer);
michael@0 1770 do {
michael@0 1771 dest=buffer;
michael@0 1772 *pErrorCode=U_ZERO_ERROR;
michael@0 1773 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
michael@0 1774 destLength+=(int32_t)(dest-buffer);
michael@0 1775 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
michael@0 1776 }
michael@0 1777 } else {
michael@0 1778 destLength=0;
michael@0 1779 }
michael@0 1780
michael@0 1781 return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
michael@0 1782 }
michael@0 1783
michael@0 1784 U_CAPI int32_t U_EXPORT2
michael@0 1785 ucnv_toUChars(UConverter *cnv,
michael@0 1786 UChar *dest, int32_t destCapacity,
michael@0 1787 const char *src, int32_t srcLength,
michael@0 1788 UErrorCode *pErrorCode) {
michael@0 1789 const char *srcLimit;
michael@0 1790 UChar *originalDest, *destLimit;
michael@0 1791 int32_t destLength;
michael@0 1792
michael@0 1793 /* check arguments */
michael@0 1794 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 1795 return 0;
michael@0 1796 }
michael@0 1797
michael@0 1798 if( cnv==NULL ||
michael@0 1799 destCapacity<0 || (destCapacity>0 && dest==NULL) ||
michael@0 1800 srcLength<-1 || (srcLength!=0 && src==NULL))
michael@0 1801 {
michael@0 1802 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1803 return 0;
michael@0 1804 }
michael@0 1805
michael@0 1806 /* initialize */
michael@0 1807 ucnv_resetToUnicode(cnv);
michael@0 1808 originalDest=dest;
michael@0 1809 if(srcLength==-1) {
michael@0 1810 srcLength=(int32_t)uprv_strlen(src);
michael@0 1811 }
michael@0 1812 if(srcLength>0) {
michael@0 1813 srcLimit=src+srcLength;
michael@0 1814 destLimit=dest+destCapacity;
michael@0 1815
michael@0 1816 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
michael@0 1817 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
michael@0 1818 destLimit=(UChar *)U_MAX_PTR(dest);
michael@0 1819 }
michael@0 1820
michael@0 1821 /* perform the conversion */
michael@0 1822 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
michael@0 1823 destLength=(int32_t)(dest-originalDest);
michael@0 1824
michael@0 1825 /* if an overflow occurs, then get the preflighting length */
michael@0 1826 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)
michael@0 1827 {
michael@0 1828 UChar buffer[1024];
michael@0 1829
michael@0 1830 destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR;
michael@0 1831 do {
michael@0 1832 dest=buffer;
michael@0 1833 *pErrorCode=U_ZERO_ERROR;
michael@0 1834 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
michael@0 1835 destLength+=(int32_t)(dest-buffer);
michael@0 1836 }
michael@0 1837 while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
michael@0 1838 }
michael@0 1839 } else {
michael@0 1840 destLength=0;
michael@0 1841 }
michael@0 1842
michael@0 1843 return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
michael@0 1844 }
michael@0 1845
michael@0 1846 /* ucnv_getNextUChar() ------------------------------------------------------ */
michael@0 1847
michael@0 1848 U_CAPI UChar32 U_EXPORT2
michael@0 1849 ucnv_getNextUChar(UConverter *cnv,
michael@0 1850 const char **source, const char *sourceLimit,
michael@0 1851 UErrorCode *err) {
michael@0 1852 UConverterToUnicodeArgs args;
michael@0 1853 UChar buffer[U16_MAX_LENGTH];
michael@0 1854 const char *s;
michael@0 1855 UChar32 c;
michael@0 1856 int32_t i, length;
michael@0 1857
michael@0 1858 /* check parameters */
michael@0 1859 if(err==NULL || U_FAILURE(*err)) {
michael@0 1860 return 0xffff;
michael@0 1861 }
michael@0 1862
michael@0 1863 if(cnv==NULL || source==NULL) {
michael@0 1864 *err=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1865 return 0xffff;
michael@0 1866 }
michael@0 1867
michael@0 1868 s=*source;
michael@0 1869 if(sourceLimit<s) {
michael@0 1870 *err=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1871 return 0xffff;
michael@0 1872 }
michael@0 1873
michael@0 1874 /*
michael@0 1875 * Make sure that the buffer sizes do not exceed the number range for
michael@0 1876 * int32_t because some functions use the size (in units or bytes)
michael@0 1877 * rather than comparing pointers, and because offsets are int32_t values.
michael@0 1878 *
michael@0 1879 * size_t is guaranteed to be unsigned and large enough for the job.
michael@0 1880 *
michael@0 1881 * Return with an error instead of adjusting the limits because we would
michael@0 1882 * not be able to maintain the semantics that either the source must be
michael@0 1883 * consumed or the target filled (unless an error occurs).
michael@0 1884 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
michael@0 1885 */
michael@0 1886 if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
michael@0 1887 *err=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1888 return 0xffff;
michael@0 1889 }
michael@0 1890
michael@0 1891 c=U_SENTINEL;
michael@0 1892
michael@0 1893 /* flush the target overflow buffer */
michael@0 1894 if(cnv->UCharErrorBufferLength>0) {
michael@0 1895 UChar *overflow;
michael@0 1896
michael@0 1897 overflow=cnv->UCharErrorBuffer;
michael@0 1898 i=0;
michael@0 1899 length=cnv->UCharErrorBufferLength;
michael@0 1900 U16_NEXT(overflow, i, length, c);
michael@0 1901
michael@0 1902 /* move the remaining overflow contents up to the beginning */
michael@0 1903 if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
michael@0 1904 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
michael@0 1905 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
michael@0 1906 }
michael@0 1907
michael@0 1908 if(!U16_IS_LEAD(c) || i<length) {
michael@0 1909 return c;
michael@0 1910 }
michael@0 1911 /*
michael@0 1912 * Continue if the overflow buffer contained only a lead surrogate,
michael@0 1913 * in case the converter outputs single surrogates from complete
michael@0 1914 * input sequences.
michael@0 1915 */
michael@0 1916 }
michael@0 1917
michael@0 1918 /*
michael@0 1919 * flush==TRUE is implied for ucnv_getNextUChar()
michael@0 1920 *
michael@0 1921 * do not simply return even if s==sourceLimit because the converter may
michael@0 1922 * not have seen flush==TRUE before
michael@0 1923 */
michael@0 1924
michael@0 1925 /* prepare the converter arguments */
michael@0 1926 args.converter=cnv;
michael@0 1927 args.flush=TRUE;
michael@0 1928 args.offsets=NULL;
michael@0 1929 args.source=s;
michael@0 1930 args.sourceLimit=sourceLimit;
michael@0 1931 args.target=buffer;
michael@0 1932 args.targetLimit=buffer+1;
michael@0 1933 args.size=sizeof(args);
michael@0 1934
michael@0 1935 if(c<0) {
michael@0 1936 /*
michael@0 1937 * call the native getNextUChar() implementation if we are
michael@0 1938 * at a character boundary (toULength==0)
michael@0 1939 *
michael@0 1940 * unlike with _toUnicode(), getNextUChar() implementations must set
michael@0 1941 * U_TRUNCATED_CHAR_FOUND for truncated input,
michael@0 1942 * in addition to setting toULength/toUBytes[]
michael@0 1943 */
michael@0 1944 if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {
michael@0 1945 c=cnv->sharedData->impl->getNextUChar(&args, err);
michael@0 1946 *source=s=args.source;
michael@0 1947 if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
michael@0 1948 /* reset the converter without calling the callback function */
michael@0 1949 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
michael@0 1950 return 0xffff; /* no output */
michael@0 1951 } else if(U_SUCCESS(*err) && c>=0) {
michael@0 1952 return c;
michael@0 1953 /*
michael@0 1954 * else fall through to use _toUnicode() because
michael@0 1955 * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
michael@0 1956 * U_FAILURE: call _toUnicode() for callback handling (do not output c)
michael@0 1957 */
michael@0 1958 }
michael@0 1959 }
michael@0 1960
michael@0 1961 /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
michael@0 1962 _toUnicodeWithCallback(&args, err);
michael@0 1963
michael@0 1964 if(*err==U_BUFFER_OVERFLOW_ERROR) {
michael@0 1965 *err=U_ZERO_ERROR;
michael@0 1966 }
michael@0 1967
michael@0 1968 i=0;
michael@0 1969 length=(int32_t)(args.target-buffer);
michael@0 1970 } else {
michael@0 1971 /* write the lead surrogate from the overflow buffer */
michael@0 1972 buffer[0]=(UChar)c;
michael@0 1973 args.target=buffer+1;
michael@0 1974 i=0;
michael@0 1975 length=1;
michael@0 1976 }
michael@0 1977
michael@0 1978 /* buffer contents starts at i and ends before length */
michael@0 1979
michael@0 1980 if(U_FAILURE(*err)) {
michael@0 1981 c=0xffff; /* no output */
michael@0 1982 } else if(length==0) {
michael@0 1983 /* no input or only state changes */
michael@0 1984 *err=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 1985 /* no need to reset explicitly because _toUnicodeWithCallback() did it */
michael@0 1986 c=0xffff; /* no output */
michael@0 1987 } else {
michael@0 1988 c=buffer[0];
michael@0 1989 i=1;
michael@0 1990 if(!U16_IS_LEAD(c)) {
michael@0 1991 /* consume c=buffer[0], done */
michael@0 1992 } else {
michael@0 1993 /* got a lead surrogate, see if a trail surrogate follows */
michael@0 1994 UChar c2;
michael@0 1995
michael@0 1996 if(cnv->UCharErrorBufferLength>0) {
michael@0 1997 /* got overflow output from the conversion */
michael@0 1998 if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
michael@0 1999 /* got a trail surrogate, too */
michael@0 2000 c=U16_GET_SUPPLEMENTARY(c, c2);
michael@0 2001
michael@0 2002 /* move the remaining overflow contents up to the beginning */
michael@0 2003 if((--cnv->UCharErrorBufferLength)>0) {
michael@0 2004 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
michael@0 2005 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
michael@0 2006 }
michael@0 2007 } else {
michael@0 2008 /* c is an unpaired lead surrogate, just return it */
michael@0 2009 }
michael@0 2010 } else if(args.source<sourceLimit) {
michael@0 2011 /* convert once more, to buffer[1] */
michael@0 2012 args.targetLimit=buffer+2;
michael@0 2013 _toUnicodeWithCallback(&args, err);
michael@0 2014 if(*err==U_BUFFER_OVERFLOW_ERROR) {
michael@0 2015 *err=U_ZERO_ERROR;
michael@0 2016 }
michael@0 2017
michael@0 2018 length=(int32_t)(args.target-buffer);
michael@0 2019 if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
michael@0 2020 /* got a trail surrogate, too */
michael@0 2021 c=U16_GET_SUPPLEMENTARY(c, c2);
michael@0 2022 i=2;
michael@0 2023 }
michael@0 2024 }
michael@0 2025 }
michael@0 2026 }
michael@0 2027
michael@0 2028 /*
michael@0 2029 * move leftover output from buffer[i..length[
michael@0 2030 * into the beginning of the overflow buffer
michael@0 2031 */
michael@0 2032 if(i<length) {
michael@0 2033 /* move further overflow back */
michael@0 2034 int32_t delta=length-i;
michael@0 2035 if((length=cnv->UCharErrorBufferLength)>0) {
michael@0 2036 uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
michael@0 2037 length*U_SIZEOF_UCHAR);
michael@0 2038 }
michael@0 2039 cnv->UCharErrorBufferLength=(int8_t)(length+delta);
michael@0 2040
michael@0 2041 cnv->UCharErrorBuffer[0]=buffer[i++];
michael@0 2042 if(delta>1) {
michael@0 2043 cnv->UCharErrorBuffer[1]=buffer[i];
michael@0 2044 }
michael@0 2045 }
michael@0 2046
michael@0 2047 *source=args.source;
michael@0 2048 return c;
michael@0 2049 }
michael@0 2050
michael@0 2051 /* ucnv_convert() and siblings ---------------------------------------------- */
michael@0 2052
michael@0 2053 U_CAPI void U_EXPORT2
michael@0 2054 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
michael@0 2055 char **target, const char *targetLimit,
michael@0 2056 const char **source, const char *sourceLimit,
michael@0 2057 UChar *pivotStart, UChar **pivotSource,
michael@0 2058 UChar **pivotTarget, const UChar *pivotLimit,
michael@0 2059 UBool reset, UBool flush,
michael@0 2060 UErrorCode *pErrorCode) {
michael@0 2061 UChar pivotBuffer[CHUNK_SIZE];
michael@0 2062 const UChar *myPivotSource;
michael@0 2063 UChar *myPivotTarget;
michael@0 2064 const char *s;
michael@0 2065 char *t;
michael@0 2066
michael@0 2067 UConverterToUnicodeArgs toUArgs;
michael@0 2068 UConverterFromUnicodeArgs fromUArgs;
michael@0 2069 UConverterConvert convert;
michael@0 2070
michael@0 2071 /* error checking */
michael@0 2072 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 2073 return;
michael@0 2074 }
michael@0 2075
michael@0 2076 if( targetCnv==NULL || sourceCnv==NULL ||
michael@0 2077 source==NULL || *source==NULL ||
michael@0 2078 target==NULL || *target==NULL || targetLimit==NULL
michael@0 2079 ) {
michael@0 2080 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 2081 return;
michael@0 2082 }
michael@0 2083
michael@0 2084 s=*source;
michael@0 2085 t=*target;
michael@0 2086 if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) {
michael@0 2087 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 2088 return;
michael@0 2089 }
michael@0 2090
michael@0 2091 /*
michael@0 2092 * Make sure that the buffer sizes do not exceed the number range for
michael@0 2093 * int32_t. See ucnv_toUnicode() for a more detailed comment.
michael@0 2094 */
michael@0 2095 if(
michael@0 2096 (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) ||
michael@0 2097 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
michael@0 2098 ) {
michael@0 2099 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 2100 return;
michael@0 2101 }
michael@0 2102
michael@0 2103 if(pivotStart==NULL) {
michael@0 2104 if(!flush) {
michael@0 2105 /* streaming conversion requires an explicit pivot buffer */
michael@0 2106 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 2107 return;
michael@0 2108 }
michael@0 2109
michael@0 2110 /* use the stack pivot buffer */
michael@0 2111 myPivotSource=myPivotTarget=pivotStart=pivotBuffer;
michael@0 2112 pivotSource=(UChar **)&myPivotSource;
michael@0 2113 pivotTarget=&myPivotTarget;
michael@0 2114 pivotLimit=pivotBuffer+CHUNK_SIZE;
michael@0 2115 } else if( pivotStart>=pivotLimit ||
michael@0 2116 pivotSource==NULL || *pivotSource==NULL ||
michael@0 2117 pivotTarget==NULL || *pivotTarget==NULL ||
michael@0 2118 pivotLimit==NULL
michael@0 2119 ) {
michael@0 2120 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 2121 return;
michael@0 2122 }
michael@0 2123
michael@0 2124 if(sourceLimit==NULL) {
michael@0 2125 /* get limit of single-byte-NUL-terminated source string */
michael@0 2126 sourceLimit=uprv_strchr(*source, 0);
michael@0 2127 }
michael@0 2128
michael@0 2129 if(reset) {
michael@0 2130 ucnv_resetToUnicode(sourceCnv);
michael@0 2131 ucnv_resetFromUnicode(targetCnv);
michael@0 2132 *pivotSource=*pivotTarget=pivotStart;
michael@0 2133 } else if(targetCnv->charErrorBufferLength>0) {
michael@0 2134 /* output the targetCnv overflow buffer */
michael@0 2135 if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) {
michael@0 2136 /* U_BUFFER_OVERFLOW_ERROR */
michael@0 2137 return;
michael@0 2138 }
michael@0 2139 /* *target has moved, therefore stop using t */
michael@0 2140
michael@0 2141 if( !flush &&
michael@0 2142 targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget &&
michael@0 2143 sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit
michael@0 2144 ) {
michael@0 2145 /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
michael@0 2146 return;
michael@0 2147 }
michael@0 2148 }
michael@0 2149
michael@0 2150 /* Is direct-UTF-8 conversion available? */
michael@0 2151 if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
michael@0 2152 targetCnv->sharedData->impl->fromUTF8!=NULL
michael@0 2153 ) {
michael@0 2154 convert=targetCnv->sharedData->impl->fromUTF8;
michael@0 2155 } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
michael@0 2156 sourceCnv->sharedData->impl->toUTF8!=NULL
michael@0 2157 ) {
michael@0 2158 convert=sourceCnv->sharedData->impl->toUTF8;
michael@0 2159 } else {
michael@0 2160 convert=NULL;
michael@0 2161 }
michael@0 2162
michael@0 2163 /*
michael@0 2164 * If direct-UTF-8 conversion is available, then we use a smaller
michael@0 2165 * pivot buffer for error handling and partial matches
michael@0 2166 * so that we quickly return to direct conversion.
michael@0 2167 *
michael@0 2168 * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
michael@0 2169 *
michael@0 2170 * We could reduce the pivot buffer size further, at the cost of
michael@0 2171 * buffer overflows from callbacks.
michael@0 2172 * The pivot buffer should not be smaller than the maximum number of
michael@0 2173 * fromUnicode extension table input UChars
michael@0 2174 * (for m:n conversion, see
michael@0 2175 * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
michael@0 2176 * or 2 for surrogate pairs.
michael@0 2177 *
michael@0 2178 * Too small a buffer can cause thrashing between pivoting and direct
michael@0 2179 * conversion, with function call overhead outweighing the benefits
michael@0 2180 * of direct conversion.
michael@0 2181 */
michael@0 2182 if(convert!=NULL && (pivotLimit-pivotStart)>32) {
michael@0 2183 pivotLimit=pivotStart+32;
michael@0 2184 }
michael@0 2185
michael@0 2186 /* prepare the converter arguments */
michael@0 2187 fromUArgs.converter=targetCnv;
michael@0 2188 fromUArgs.flush=FALSE;
michael@0 2189 fromUArgs.offsets=NULL;
michael@0 2190 fromUArgs.target=*target;
michael@0 2191 fromUArgs.targetLimit=targetLimit;
michael@0 2192 fromUArgs.size=sizeof(fromUArgs);
michael@0 2193
michael@0 2194 toUArgs.converter=sourceCnv;
michael@0 2195 toUArgs.flush=flush;
michael@0 2196 toUArgs.offsets=NULL;
michael@0 2197 toUArgs.source=s;
michael@0 2198 toUArgs.sourceLimit=sourceLimit;
michael@0 2199 toUArgs.targetLimit=pivotLimit;
michael@0 2200 toUArgs.size=sizeof(toUArgs);
michael@0 2201
michael@0 2202 /*
michael@0 2203 * TODO: Consider separating this function into two functions,
michael@0 2204 * extracting exactly the conversion loop,
michael@0 2205 * for readability and to reduce the set of visible variables.
michael@0 2206 *
michael@0 2207 * Otherwise stop using s and t from here on.
michael@0 2208 */
michael@0 2209 s=t=NULL;
michael@0 2210
michael@0 2211 /*
michael@0 2212 * conversion loop
michael@0 2213 *
michael@0 2214 * The sequence of steps in the loop may appear backward,
michael@0 2215 * but the principle is simple:
michael@0 2216 * In the chain of
michael@0 2217 * source - sourceCnv overflow - pivot - targetCnv overflow - target
michael@0 2218 * empty out later buffers before refilling them from earlier ones.
michael@0 2219 *
michael@0 2220 * The targetCnv overflow buffer is flushed out only once before the loop.
michael@0 2221 */
michael@0 2222 for(;;) {
michael@0 2223 /*
michael@0 2224 * if(pivot not empty or error or replay or flush fromUnicode) {
michael@0 2225 * fromUnicode(pivot -> target);
michael@0 2226 * }
michael@0 2227 *
michael@0 2228 * For pivoting conversion; and for direct conversion for
michael@0 2229 * error callback handling and flushing the replay buffer.
michael@0 2230 */
michael@0 2231 if( *pivotSource<*pivotTarget ||
michael@0 2232 U_FAILURE(*pErrorCode) ||
michael@0 2233 targetCnv->preFromULength<0 ||
michael@0 2234 fromUArgs.flush
michael@0 2235 ) {
michael@0 2236 fromUArgs.source=*pivotSource;
michael@0 2237 fromUArgs.sourceLimit=*pivotTarget;
michael@0 2238 _fromUnicodeWithCallback(&fromUArgs, pErrorCode);
michael@0 2239 if(U_FAILURE(*pErrorCode)) {
michael@0 2240 /* target overflow, or conversion error */
michael@0 2241 *pivotSource=(UChar *)fromUArgs.source;
michael@0 2242 break;
michael@0 2243 }
michael@0 2244
michael@0 2245 /*
michael@0 2246 * _fromUnicodeWithCallback() must have consumed the pivot contents
michael@0 2247 * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
michael@0 2248 */
michael@0 2249 }
michael@0 2250
michael@0 2251 /* The pivot buffer is empty; reset it so we start at pivotStart. */
michael@0 2252 *pivotSource=*pivotTarget=pivotStart;
michael@0 2253
michael@0 2254 /*
michael@0 2255 * if(sourceCnv overflow buffer not empty) {
michael@0 2256 * move(sourceCnv overflow buffer -> pivot);
michael@0 2257 * continue;
michael@0 2258 * }
michael@0 2259 */
michael@0 2260 /* output the sourceCnv overflow buffer */
michael@0 2261 if(sourceCnv->UCharErrorBufferLength>0) {
michael@0 2262 if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) {
michael@0 2263 /* U_BUFFER_OVERFLOW_ERROR */
michael@0 2264 *pErrorCode=U_ZERO_ERROR;
michael@0 2265 }
michael@0 2266 continue;
michael@0 2267 }
michael@0 2268
michael@0 2269 /*
michael@0 2270 * check for end of input and break if done
michael@0 2271 *
michael@0 2272 * Checking both flush and fromUArgs.flush ensures that the converters
michael@0 2273 * have been called with the flush flag set if the ucnv_convertEx()
michael@0 2274 * caller set it.
michael@0 2275 */
michael@0 2276 if( toUArgs.source==sourceLimit &&
michael@0 2277 sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&
michael@0 2278 (!flush || fromUArgs.flush)
michael@0 2279 ) {
michael@0 2280 /* done successfully */
michael@0 2281 break;
michael@0 2282 }
michael@0 2283
michael@0 2284 /*
michael@0 2285 * use direct conversion if available
michael@0 2286 * but not if continuing a partial match
michael@0 2287 * or flushing the toUnicode replay buffer
michael@0 2288 */
michael@0 2289 if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) {
michael@0 2290 if(*pErrorCode==U_USING_DEFAULT_WARNING) {
michael@0 2291 /* remove a warning that may be set by this function */
michael@0 2292 *pErrorCode=U_ZERO_ERROR;
michael@0 2293 }
michael@0 2294 convert(&fromUArgs, &toUArgs, pErrorCode);
michael@0 2295 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
michael@0 2296 break;
michael@0 2297 } else if(U_FAILURE(*pErrorCode)) {
michael@0 2298 if(sourceCnv->toULength>0) {
michael@0 2299 /*
michael@0 2300 * Fall through to calling _toUnicodeWithCallback()
michael@0 2301 * for callback handling.
michael@0 2302 *
michael@0 2303 * The pivot buffer will be reset with
michael@0 2304 * *pivotSource=*pivotTarget=pivotStart;
michael@0 2305 * which indicates a toUnicode error to the caller
michael@0 2306 * (*pivotSource==pivotStart shows no pivot UChars consumed).
michael@0 2307 */
michael@0 2308 } else {
michael@0 2309 /*
michael@0 2310 * Indicate a fromUnicode error to the caller
michael@0 2311 * (*pivotSource>pivotStart shows some pivot UChars consumed).
michael@0 2312 */
michael@0 2313 *pivotSource=*pivotTarget=pivotStart+1;
michael@0 2314 /*
michael@0 2315 * Loop around to calling _fromUnicodeWithCallbacks()
michael@0 2316 * for callback handling.
michael@0 2317 */
michael@0 2318 continue;
michael@0 2319 }
michael@0 2320 } else if(*pErrorCode==U_USING_DEFAULT_WARNING) {
michael@0 2321 /*
michael@0 2322 * No error, but the implementation requested to temporarily
michael@0 2323 * fall back to pivoting.
michael@0 2324 */
michael@0 2325 *pErrorCode=U_ZERO_ERROR;
michael@0 2326 /*
michael@0 2327 * The following else branches are almost identical to the end-of-input
michael@0 2328 * handling in _toUnicodeWithCallback().
michael@0 2329 * Avoid calling it just for the end of input.
michael@0 2330 */
michael@0 2331 } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */
michael@0 2332 /*
michael@0 2333 * the entire input stream is consumed
michael@0 2334 * and there is a partial, truncated input sequence left
michael@0 2335 */
michael@0 2336
michael@0 2337 /* inject an error and continue with callback handling */
michael@0 2338 *pErrorCode=U_TRUNCATED_CHAR_FOUND;
michael@0 2339 } else {
michael@0 2340 /* input consumed */
michael@0 2341 if(flush) {
michael@0 2342 /* reset the converters without calling the callback functions */
michael@0 2343 _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE);
michael@0 2344 _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE);
michael@0 2345 }
michael@0 2346
michael@0 2347 /* done successfully */
michael@0 2348 break;
michael@0 2349 }
michael@0 2350 }
michael@0 2351
michael@0 2352 /*
michael@0 2353 * toUnicode(source -> pivot);
michael@0 2354 *
michael@0 2355 * For pivoting conversion; and for direct conversion for
michael@0 2356 * error callback handling, continuing partial matches
michael@0 2357 * and flushing the replay buffer.
michael@0 2358 *
michael@0 2359 * The pivot buffer is empty and reset.
michael@0 2360 */
michael@0 2361 toUArgs.target=pivotStart; /* ==*pivotTarget */
michael@0 2362 /* toUArgs.targetLimit=pivotLimit; already set before the loop */
michael@0 2363 _toUnicodeWithCallback(&toUArgs, pErrorCode);
michael@0 2364 *pivotTarget=toUArgs.target;
michael@0 2365 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
michael@0 2366 /* pivot overflow: continue with the conversion loop */
michael@0 2367 *pErrorCode=U_ZERO_ERROR;
michael@0 2368 } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) {
michael@0 2369 /* conversion error, or there was nothing left to convert */
michael@0 2370 break;
michael@0 2371 }
michael@0 2372 /*
michael@0 2373 * else:
michael@0 2374 * _toUnicodeWithCallback() wrote into the pivot buffer,
michael@0 2375 * continue with fromUnicode conversion.
michael@0 2376 *
michael@0 2377 * Set the fromUnicode flush flag if we flush and if toUnicode has
michael@0 2378 * processed the end of the input.
michael@0 2379 */
michael@0 2380 if( flush && toUArgs.source==sourceLimit &&
michael@0 2381 sourceCnv->preToULength>=0 &&
michael@0 2382 sourceCnv->UCharErrorBufferLength==0
michael@0 2383 ) {
michael@0 2384 fromUArgs.flush=TRUE;
michael@0 2385 }
michael@0 2386 }
michael@0 2387
michael@0 2388 /*
michael@0 2389 * The conversion loop is exited when one of the following is true:
michael@0 2390 * - the entire source text has been converted successfully to the target buffer
michael@0 2391 * - a target buffer overflow occurred
michael@0 2392 * - a conversion error occurred
michael@0 2393 */
michael@0 2394
michael@0 2395 *source=toUArgs.source;
michael@0 2396 *target=fromUArgs.target;
michael@0 2397
michael@0 2398 /* terminate the target buffer if possible */
michael@0 2399 if(flush && U_SUCCESS(*pErrorCode)) {
michael@0 2400 if(*target!=targetLimit) {
michael@0 2401 **target=0;
michael@0 2402 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
michael@0 2403 *pErrorCode=U_ZERO_ERROR;
michael@0 2404 }
michael@0 2405 } else {
michael@0 2406 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
michael@0 2407 }
michael@0 2408 }
michael@0 2409 }
michael@0 2410
michael@0 2411 /* internal implementation of ucnv_convert() etc. with preflighting */
michael@0 2412 static int32_t
michael@0 2413 ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
michael@0 2414 char *target, int32_t targetCapacity,
michael@0 2415 const char *source, int32_t sourceLength,
michael@0 2416 UErrorCode *pErrorCode) {
michael@0 2417 UChar pivotBuffer[CHUNK_SIZE];
michael@0 2418 UChar *pivot, *pivot2;
michael@0 2419
michael@0 2420 char *myTarget;
michael@0 2421 const char *sourceLimit;
michael@0 2422 const char *targetLimit;
michael@0 2423 int32_t targetLength=0;
michael@0 2424
michael@0 2425 /* set up */
michael@0 2426 if(sourceLength<0) {
michael@0 2427 sourceLimit=uprv_strchr(source, 0);
michael@0 2428 } else {
michael@0 2429 sourceLimit=source+sourceLength;
michael@0 2430 }
michael@0 2431
michael@0 2432 /* if there is no input data, we're done */
michael@0 2433 if(source==sourceLimit) {
michael@0 2434 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
michael@0 2435 }
michael@0 2436
michael@0 2437 pivot=pivot2=pivotBuffer;
michael@0 2438 myTarget=target;
michael@0 2439 targetLength=0;
michael@0 2440
michael@0 2441 if(targetCapacity>0) {
michael@0 2442 /* perform real conversion */
michael@0 2443 targetLimit=target+targetCapacity;
michael@0 2444 ucnv_convertEx(outConverter, inConverter,
michael@0 2445 &myTarget, targetLimit,
michael@0 2446 &source, sourceLimit,
michael@0 2447 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
michael@0 2448 FALSE,
michael@0 2449 TRUE,
michael@0 2450 pErrorCode);
michael@0 2451 targetLength=(int32_t)(myTarget-target);
michael@0 2452 }
michael@0 2453
michael@0 2454 /*
michael@0 2455 * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
michael@0 2456 * to it but continue the conversion in order to store in targetCapacity
michael@0 2457 * the number of bytes that was required.
michael@0 2458 */
michael@0 2459 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)
michael@0 2460 {
michael@0 2461 char targetBuffer[CHUNK_SIZE];
michael@0 2462
michael@0 2463 targetLimit=targetBuffer+CHUNK_SIZE;
michael@0 2464 do {
michael@0 2465 *pErrorCode=U_ZERO_ERROR;
michael@0 2466 myTarget=targetBuffer;
michael@0 2467 ucnv_convertEx(outConverter, inConverter,
michael@0 2468 &myTarget, targetLimit,
michael@0 2469 &source, sourceLimit,
michael@0 2470 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
michael@0 2471 FALSE,
michael@0 2472 TRUE,
michael@0 2473 pErrorCode);
michael@0 2474 targetLength+=(int32_t)(myTarget-targetBuffer);
michael@0 2475 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
michael@0 2476
michael@0 2477 /* done with preflighting, set warnings and errors as appropriate */
michael@0 2478 return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
michael@0 2479 }
michael@0 2480
michael@0 2481 /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
michael@0 2482 return targetLength;
michael@0 2483 }
michael@0 2484
michael@0 2485 U_CAPI int32_t U_EXPORT2
michael@0 2486 ucnv_convert(const char *toConverterName, const char *fromConverterName,
michael@0 2487 char *target, int32_t targetCapacity,
michael@0 2488 const char *source, int32_t sourceLength,
michael@0 2489 UErrorCode *pErrorCode) {
michael@0 2490 UConverter in, out; /* stack-allocated */
michael@0 2491 UConverter *inConverter, *outConverter;
michael@0 2492 int32_t targetLength;
michael@0 2493
michael@0 2494 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 2495 return 0;
michael@0 2496 }
michael@0 2497
michael@0 2498 if( source==NULL || sourceLength<-1 ||
michael@0 2499 targetCapacity<0 || (targetCapacity>0 && target==NULL)
michael@0 2500 ) {
michael@0 2501 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 2502 return 0;
michael@0 2503 }
michael@0 2504
michael@0 2505 /* if there is no input data, we're done */
michael@0 2506 if(sourceLength==0 || (sourceLength<0 && *source==0)) {
michael@0 2507 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
michael@0 2508 }
michael@0 2509
michael@0 2510 /* create the converters */
michael@0 2511 inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
michael@0 2512 if(U_FAILURE(*pErrorCode)) {
michael@0 2513 return 0;
michael@0 2514 }
michael@0 2515
michael@0 2516 outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
michael@0 2517 if(U_FAILURE(*pErrorCode)) {
michael@0 2518 ucnv_close(inConverter);
michael@0 2519 return 0;
michael@0 2520 }
michael@0 2521
michael@0 2522 targetLength=ucnv_internalConvert(outConverter, inConverter,
michael@0 2523 target, targetCapacity,
michael@0 2524 source, sourceLength,
michael@0 2525 pErrorCode);
michael@0 2526
michael@0 2527 ucnv_close(inConverter);
michael@0 2528 ucnv_close(outConverter);
michael@0 2529
michael@0 2530 return targetLength;
michael@0 2531 }
michael@0 2532
michael@0 2533 /* @internal */
michael@0 2534 static int32_t
michael@0 2535 ucnv_convertAlgorithmic(UBool convertToAlgorithmic,
michael@0 2536 UConverterType algorithmicType,
michael@0 2537 UConverter *cnv,
michael@0 2538 char *target, int32_t targetCapacity,
michael@0 2539 const char *source, int32_t sourceLength,
michael@0 2540 UErrorCode *pErrorCode) {
michael@0 2541 UConverter algoConverterStatic; /* stack-allocated */
michael@0 2542 UConverter *algoConverter, *to, *from;
michael@0 2543 int32_t targetLength;
michael@0 2544
michael@0 2545 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 2546 return 0;
michael@0 2547 }
michael@0 2548
michael@0 2549 if( cnv==NULL || source==NULL || sourceLength<-1 ||
michael@0 2550 targetCapacity<0 || (targetCapacity>0 && target==NULL)
michael@0 2551 ) {
michael@0 2552 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 2553 return 0;
michael@0 2554 }
michael@0 2555
michael@0 2556 /* if there is no input data, we're done */
michael@0 2557 if(sourceLength==0 || (sourceLength<0 && *source==0)) {
michael@0 2558 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
michael@0 2559 }
michael@0 2560
michael@0 2561 /* create the algorithmic converter */
michael@0 2562 algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType,
michael@0 2563 "", 0, pErrorCode);
michael@0 2564 if(U_FAILURE(*pErrorCode)) {
michael@0 2565 return 0;
michael@0 2566 }
michael@0 2567
michael@0 2568 /* reset the other converter */
michael@0 2569 if(convertToAlgorithmic) {
michael@0 2570 /* cnv->Unicode->algo */
michael@0 2571 ucnv_resetToUnicode(cnv);
michael@0 2572 to=algoConverter;
michael@0 2573 from=cnv;
michael@0 2574 } else {
michael@0 2575 /* algo->Unicode->cnv */
michael@0 2576 ucnv_resetFromUnicode(cnv);
michael@0 2577 from=algoConverter;
michael@0 2578 to=cnv;
michael@0 2579 }
michael@0 2580
michael@0 2581 targetLength=ucnv_internalConvert(to, from,
michael@0 2582 target, targetCapacity,
michael@0 2583 source, sourceLength,
michael@0 2584 pErrorCode);
michael@0 2585
michael@0 2586 ucnv_close(algoConverter);
michael@0 2587
michael@0 2588 return targetLength;
michael@0 2589 }
michael@0 2590
michael@0 2591 U_CAPI int32_t U_EXPORT2
michael@0 2592 ucnv_toAlgorithmic(UConverterType algorithmicType,
michael@0 2593 UConverter *cnv,
michael@0 2594 char *target, int32_t targetCapacity,
michael@0 2595 const char *source, int32_t sourceLength,
michael@0 2596 UErrorCode *pErrorCode) {
michael@0 2597 return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv,
michael@0 2598 target, targetCapacity,
michael@0 2599 source, sourceLength,
michael@0 2600 pErrorCode);
michael@0 2601 }
michael@0 2602
michael@0 2603 U_CAPI int32_t U_EXPORT2
michael@0 2604 ucnv_fromAlgorithmic(UConverter *cnv,
michael@0 2605 UConverterType algorithmicType,
michael@0 2606 char *target, int32_t targetCapacity,
michael@0 2607 const char *source, int32_t sourceLength,
michael@0 2608 UErrorCode *pErrorCode) {
michael@0 2609 return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv,
michael@0 2610 target, targetCapacity,
michael@0 2611 source, sourceLength,
michael@0 2612 pErrorCode);
michael@0 2613 }
michael@0 2614
michael@0 2615 U_CAPI UConverterType U_EXPORT2
michael@0 2616 ucnv_getType(const UConverter* converter)
michael@0 2617 {
michael@0 2618 int8_t type = converter->sharedData->staticData->conversionType;
michael@0 2619 #if !UCONFIG_NO_LEGACY_CONVERSION
michael@0 2620 if(type == UCNV_MBCS) {
michael@0 2621 return ucnv_MBCSGetType(converter);
michael@0 2622 }
michael@0 2623 #endif
michael@0 2624 return (UConverterType)type;
michael@0 2625 }
michael@0 2626
michael@0 2627 U_CAPI void U_EXPORT2
michael@0 2628 ucnv_getStarters(const UConverter* converter,
michael@0 2629 UBool starters[256],
michael@0 2630 UErrorCode* err)
michael@0 2631 {
michael@0 2632 if (err == NULL || U_FAILURE(*err)) {
michael@0 2633 return;
michael@0 2634 }
michael@0 2635
michael@0 2636 if(converter->sharedData->impl->getStarters != NULL) {
michael@0 2637 converter->sharedData->impl->getStarters(converter, starters, err);
michael@0 2638 } else {
michael@0 2639 *err = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 2640 }
michael@0 2641 }
michael@0 2642
michael@0 2643 static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv)
michael@0 2644 {
michael@0 2645 UErrorCode errorCode;
michael@0 2646 const char *name;
michael@0 2647 int32_t i;
michael@0 2648
michael@0 2649 if(cnv==NULL) {
michael@0 2650 return NULL;
michael@0 2651 }
michael@0 2652
michael@0 2653 errorCode=U_ZERO_ERROR;
michael@0 2654 name=ucnv_getName(cnv, &errorCode);
michael@0 2655 if(U_FAILURE(errorCode)) {
michael@0 2656 return NULL;
michael@0 2657 }
michael@0 2658
michael@0 2659 for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i)
michael@0 2660 {
michael@0 2661 if(0==uprv_strcmp(name, ambiguousConverters[i].name))
michael@0 2662 {
michael@0 2663 return ambiguousConverters+i;
michael@0 2664 }
michael@0 2665 }
michael@0 2666
michael@0 2667 return NULL;
michael@0 2668 }
michael@0 2669
michael@0 2670 U_CAPI void U_EXPORT2
michael@0 2671 ucnv_fixFileSeparator(const UConverter *cnv,
michael@0 2672 UChar* source,
michael@0 2673 int32_t sourceLength) {
michael@0 2674 const UAmbiguousConverter *a;
michael@0 2675 int32_t i;
michael@0 2676 UChar variant5c;
michael@0 2677
michael@0 2678 if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL)
michael@0 2679 {
michael@0 2680 return;
michael@0 2681 }
michael@0 2682
michael@0 2683 variant5c=a->variant5c;
michael@0 2684 for(i=0; i<sourceLength; ++i) {
michael@0 2685 if(source[i]==variant5c) {
michael@0 2686 source[i]=0x5c;
michael@0 2687 }
michael@0 2688 }
michael@0 2689 }
michael@0 2690
michael@0 2691 U_CAPI UBool U_EXPORT2
michael@0 2692 ucnv_isAmbiguous(const UConverter *cnv) {
michael@0 2693 return (UBool)(ucnv_getAmbiguous(cnv)!=NULL);
michael@0 2694 }
michael@0 2695
michael@0 2696 U_CAPI void U_EXPORT2
michael@0 2697 ucnv_setFallback(UConverter *cnv, UBool usesFallback)
michael@0 2698 {
michael@0 2699 cnv->useFallback = usesFallback;
michael@0 2700 }
michael@0 2701
michael@0 2702 U_CAPI UBool U_EXPORT2
michael@0 2703 ucnv_usesFallback(const UConverter *cnv)
michael@0 2704 {
michael@0 2705 return cnv->useFallback;
michael@0 2706 }
michael@0 2707
michael@0 2708 U_CAPI void U_EXPORT2
michael@0 2709 ucnv_getInvalidChars (const UConverter * converter,
michael@0 2710 char *errBytes,
michael@0 2711 int8_t * len,
michael@0 2712 UErrorCode * err)
michael@0 2713 {
michael@0 2714 if (err == NULL || U_FAILURE(*err))
michael@0 2715 {
michael@0 2716 return;
michael@0 2717 }
michael@0 2718 if (len == NULL || errBytes == NULL || converter == NULL)
michael@0 2719 {
michael@0 2720 *err = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 2721 return;
michael@0 2722 }
michael@0 2723 if (*len < converter->invalidCharLength)
michael@0 2724 {
michael@0 2725 *err = U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 2726 return;
michael@0 2727 }
michael@0 2728 if ((*len = converter->invalidCharLength) > 0)
michael@0 2729 {
michael@0 2730 uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);
michael@0 2731 }
michael@0 2732 }
michael@0 2733
michael@0 2734 U_CAPI void U_EXPORT2
michael@0 2735 ucnv_getInvalidUChars (const UConverter * converter,
michael@0 2736 UChar *errChars,
michael@0 2737 int8_t * len,
michael@0 2738 UErrorCode * err)
michael@0 2739 {
michael@0 2740 if (err == NULL || U_FAILURE(*err))
michael@0 2741 {
michael@0 2742 return;
michael@0 2743 }
michael@0 2744 if (len == NULL || errChars == NULL || converter == NULL)
michael@0 2745 {
michael@0 2746 *err = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 2747 return;
michael@0 2748 }
michael@0 2749 if (*len < converter->invalidUCharLength)
michael@0 2750 {
michael@0 2751 *err = U_INDEX_OUTOFBOUNDS_ERROR;
michael@0 2752 return;
michael@0 2753 }
michael@0 2754 if ((*len = converter->invalidUCharLength) > 0)
michael@0 2755 {
michael@0 2756 uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len));
michael@0 2757 }
michael@0 2758 }
michael@0 2759
michael@0 2760 #define SIG_MAX_LEN 5
michael@0 2761
michael@0 2762 U_CAPI const char* U_EXPORT2
michael@0 2763 ucnv_detectUnicodeSignature( const char* source,
michael@0 2764 int32_t sourceLength,
michael@0 2765 int32_t* signatureLength,
michael@0 2766 UErrorCode* pErrorCode) {
michael@0 2767 int32_t dummy;
michael@0 2768
michael@0 2769 /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
michael@0 2770 * bytes we don't misdetect something
michael@0 2771 */
michael@0 2772 char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
michael@0 2773 int i = 0;
michael@0 2774
michael@0 2775 if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){
michael@0 2776 return NULL;
michael@0 2777 }
michael@0 2778
michael@0 2779 if(source == NULL || sourceLength < -1){
michael@0 2780 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 2781 return NULL;
michael@0 2782 }
michael@0 2783
michael@0 2784 if(signatureLength == NULL) {
michael@0 2785 signatureLength = &dummy;
michael@0 2786 }
michael@0 2787
michael@0 2788 if(sourceLength==-1){
michael@0 2789 sourceLength=(int32_t)uprv_strlen(source);
michael@0 2790 }
michael@0 2791
michael@0 2792
michael@0 2793 while(i<sourceLength&& i<SIG_MAX_LEN){
michael@0 2794 start[i]=source[i];
michael@0 2795 i++;
michael@0 2796 }
michael@0 2797
michael@0 2798 if(start[0] == '\xFE' && start[1] == '\xFF') {
michael@0 2799 *signatureLength=2;
michael@0 2800 return "UTF-16BE";
michael@0 2801 } else if(start[0] == '\xFF' && start[1] == '\xFE') {
michael@0 2802 if(start[2] == '\x00' && start[3] =='\x00') {
michael@0 2803 *signatureLength=4;
michael@0 2804 return "UTF-32LE";
michael@0 2805 } else {
michael@0 2806 *signatureLength=2;
michael@0 2807 return "UTF-16LE";
michael@0 2808 }
michael@0 2809 } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
michael@0 2810 *signatureLength=3;
michael@0 2811 return "UTF-8";
michael@0 2812 } else if(start[0] == '\x00' && start[1] == '\x00' &&
michael@0 2813 start[2] == '\xFE' && start[3]=='\xFF') {
michael@0 2814 *signatureLength=4;
michael@0 2815 return "UTF-32BE";
michael@0 2816 } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {
michael@0 2817 *signatureLength=3;
michael@0 2818 return "SCSU";
michael@0 2819 } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {
michael@0 2820 *signatureLength=3;
michael@0 2821 return "BOCU-1";
michael@0 2822 } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {
michael@0 2823 /*
michael@0 2824 * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/
michael@0 2825 * depending on the second UTF-16 code unit.
michael@0 2826 * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
michael@0 2827 * if it occurs.
michael@0 2828 *
michael@0 2829 * So far we have +/v
michael@0 2830 */
michael@0 2831 if(start[3] == '\x38' && start[4] == '\x2D') {
michael@0 2832 /* 5 bytes +/v8- */
michael@0 2833 *signatureLength=5;
michael@0 2834 return "UTF-7";
michael@0 2835 } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') {
michael@0 2836 /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */
michael@0 2837 *signatureLength=4;
michael@0 2838 return "UTF-7";
michael@0 2839 }
michael@0 2840 }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
michael@0 2841 *signatureLength=4;
michael@0 2842 return "UTF-EBCDIC";
michael@0 2843 }
michael@0 2844
michael@0 2845
michael@0 2846 /* no known Unicode signature byte sequence recognized */
michael@0 2847 *signatureLength=0;
michael@0 2848 return NULL;
michael@0 2849 }
michael@0 2850
michael@0 2851 U_CAPI int32_t U_EXPORT2
michael@0 2852 ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status)
michael@0 2853 {
michael@0 2854 if(status == NULL || U_FAILURE(*status)){
michael@0 2855 return -1;
michael@0 2856 }
michael@0 2857 if(cnv == NULL){
michael@0 2858 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 2859 return -1;
michael@0 2860 }
michael@0 2861
michael@0 2862 if(cnv->preFromUFirstCP >= 0){
michael@0 2863 return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;
michael@0 2864 }else if(cnv->preFromULength < 0){
michael@0 2865 return -cnv->preFromULength ;
michael@0 2866 }else if(cnv->fromUChar32 > 0){
michael@0 2867 return 1;
michael@0 2868 }
michael@0 2869 return 0;
michael@0 2870
michael@0 2871 }
michael@0 2872
michael@0 2873 U_CAPI int32_t U_EXPORT2
michael@0 2874 ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){
michael@0 2875
michael@0 2876 if(status == NULL || U_FAILURE(*status)){
michael@0 2877 return -1;
michael@0 2878 }
michael@0 2879 if(cnv == NULL){
michael@0 2880 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 2881 return -1;
michael@0 2882 }
michael@0 2883
michael@0 2884 if(cnv->preToULength > 0){
michael@0 2885 return cnv->preToULength ;
michael@0 2886 }else if(cnv->preToULength < 0){
michael@0 2887 return -cnv->preToULength;
michael@0 2888 }else if(cnv->toULength > 0){
michael@0 2889 return cnv->toULength;
michael@0 2890 }
michael@0 2891 return 0;
michael@0 2892 }
michael@0 2893
michael@0 2894 U_CAPI UBool U_EXPORT2
michael@0 2895 ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){
michael@0 2896 if (U_FAILURE(*status)) {
michael@0 2897 return FALSE;
michael@0 2898 }
michael@0 2899
michael@0 2900 if (cnv == NULL) {
michael@0 2901 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 2902 return FALSE;
michael@0 2903 }
michael@0 2904
michael@0 2905 switch (ucnv_getType(cnv)) {
michael@0 2906 case UCNV_SBCS:
michael@0 2907 case UCNV_DBCS:
michael@0 2908 case UCNV_UTF32_BigEndian:
michael@0 2909 case UCNV_UTF32_LittleEndian:
michael@0 2910 case UCNV_UTF32:
michael@0 2911 case UCNV_US_ASCII:
michael@0 2912 return TRUE;
michael@0 2913 default:
michael@0 2914 return FALSE;
michael@0 2915 }
michael@0 2916 }
michael@0 2917 #endif
michael@0 2918
michael@0 2919 /*
michael@0 2920 * Hey, Emacs, please set the following:
michael@0 2921 *
michael@0 2922 * Local Variables:
michael@0 2923 * indent-tabs-mode: nil
michael@0 2924 * End:
michael@0 2925 *
michael@0 2926 */

mercurial