The Tor Browser: diff intl/icu/source/common/ucnv.c

     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/ucnv.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,2926 @@
     1.4 +/*
     1.5 +******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 1998-2013, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +******************************************************************************
    1.11 +*
    1.12 +*  ucnv.c:
    1.13 +*  Implements APIs for the ICU's codeset conversion library;
    1.14 +*  mostly calls through internal functions;
    1.15 +*  created by Bertrand A. Damiba
    1.16 +*
    1.17 +* Modification History:
    1.18 +*
    1.19 +*   Date        Name        Description
    1.20 +*   04/04/99    helena      Fixed internal header inclusion.
    1.21 +*   05/09/00    helena      Added implementation to handle fallback mappings.
    1.22 +*   06/20/2000  helena      OS/400 port changes; mostly typecast.
    1.23 +*/
    1.24 +
    1.25 +#include "unicode/utypes.h"
    1.26 +
    1.27 +#if !UCONFIG_NO_CONVERSION
    1.28 +
    1.29 +#include "unicode/ustring.h"
    1.30 +#include "unicode/ucnv.h"
    1.31 +#include "unicode/ucnv_err.h"
    1.32 +#include "unicode/uset.h"
    1.33 +#include "unicode/utf.h"
    1.34 +#include "unicode/utf16.h"
    1.35 +#include "putilimp.h"
    1.36 +#include "cmemory.h"
    1.37 +#include "cstring.h"
    1.38 +#include "uassert.h"
    1.39 +#include "utracimp.h"
    1.40 +#include "ustr_imp.h"
    1.41 +#include "ucnv_imp.h"
    1.42 +#include "ucnv_cnv.h"
    1.43 +#include "ucnv_bld.h"
    1.44 +
    1.45 +/* size of intermediate and preflighting buffers in ucnv_convert() */
    1.46 +#define CHUNK_SIZE 1024
    1.47 +
    1.48 +typedef struct UAmbiguousConverter {
    1.49 +    const char *name;
    1.50 +    const UChar variant5c;
    1.51 +} UAmbiguousConverter;
    1.52 +
    1.53 +static const UAmbiguousConverter ambiguousConverters[]={
    1.54 +    { "ibm-897_P100-1995", 0xa5 },
    1.55 +    { "ibm-942_P120-1999", 0xa5 },
    1.56 +    { "ibm-943_P130-1999", 0xa5 },
    1.57 +    { "ibm-946_P100-1995", 0xa5 },
    1.58 +    { "ibm-33722_P120-1999", 0xa5 },
    1.59 +    { "ibm-1041_P100-1995", 0xa5 },
    1.60 +    /*{ "ibm-54191_P100-2006", 0xa5 },*/
    1.61 +    /*{ "ibm-62383_P100-2007", 0xa5 },*/
    1.62 +    /*{ "ibm-891_P100-1995", 0x20a9 },*/
    1.63 +    { "ibm-944_P100-1995", 0x20a9 },
    1.64 +    { "ibm-949_P110-1999", 0x20a9 },
    1.65 +    { "ibm-1363_P110-1997", 0x20a9 },
    1.66 +    { "ISO_2022,locale=ko,version=0", 0x20a9 },
    1.67 +    { "ibm-1088_P100-1995", 0x20a9 }
    1.68 +};
    1.69 +
    1.70 +/*Calls through createConverter */
    1.71 +U_CAPI UConverter* U_EXPORT2
    1.72 +ucnv_open (const char *name,
    1.73 +                       UErrorCode * err)
    1.74 +{
    1.75 +    UConverter *r;
    1.76 +
    1.77 +    if (err == NULL || U_FAILURE (*err)) {
    1.78 +        return NULL;
    1.79 +    }
    1.80 +
    1.81 +    r =  ucnv_createConverter(NULL, name, err);
    1.82 +    return r;
    1.83 +}
    1.84 +
    1.85 +U_CAPI UConverter* U_EXPORT2 
    1.86 +ucnv_openPackage   (const char *packageName, const char *converterName, UErrorCode * err)
    1.87 +{
    1.88 +    return ucnv_createConverterFromPackage(packageName, converterName,  err);
    1.89 +}
    1.90 +
    1.91 +/*Extracts the UChar* to a char* and calls through createConverter */
    1.92 +U_CAPI UConverter*   U_EXPORT2
    1.93 +ucnv_openU (const UChar * name,
    1.94 +                         UErrorCode * err)
    1.95 +{
    1.96 +    char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
    1.97 +
    1.98 +    if (err == NULL || U_FAILURE(*err))
    1.99 +        return NULL;
   1.100 +    if (name == NULL)
   1.101 +        return ucnv_open (NULL, err);
   1.102 +    if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)
   1.103 +    {
   1.104 +        *err = U_ILLEGAL_ARGUMENT_ERROR;
   1.105 +        return NULL;
   1.106 +    }
   1.107 +    return ucnv_open(u_austrcpy(asciiName, name), err);
   1.108 +}
   1.109 +
   1.110 +/* Copy the string that is represented by the UConverterPlatform enum
   1.111 + * @param platformString An output buffer
   1.112 + * @param platform An enum representing a platform
   1.113 + * @return the length of the copied string.
   1.114 + */
   1.115 +static int32_t
   1.116 +ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)
   1.117 +{
   1.118 +    switch (pltfrm)
   1.119 +    {
   1.120 +    case UCNV_IBM:
   1.121 +        uprv_strcpy(platformString, "ibm-");
   1.122 +        return 4;
   1.123 +    case UCNV_UNKNOWN:
   1.124 +        break;
   1.125 +    }
   1.126 +
   1.127 +    /* default to empty string */
   1.128 +    *platformString = 0;
   1.129 +    return 0;
   1.130 +}
   1.131 +
   1.132 +/*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
   1.133 + *through createConverter*/
   1.134 +U_CAPI UConverter*   U_EXPORT2
   1.135 +ucnv_openCCSID (int32_t codepage,
   1.136 +                UConverterPlatform platform,
   1.137 +                UErrorCode * err)
   1.138 +{
   1.139 +    char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
   1.140 +    int32_t myNameLen;
   1.141 +
   1.142 +    if (err == NULL || U_FAILURE (*err))
   1.143 +        return NULL;
   1.144 +
   1.145 +    /* ucnv_copyPlatformString could return "ibm-" or "cp" */
   1.146 +    myNameLen = ucnv_copyPlatformString(myName, platform);
   1.147 +    T_CString_integerToString(myName + myNameLen, codepage, 10);
   1.148 +
   1.149 +    return ucnv_createConverter(NULL, myName, err);
   1.150 +}
   1.151 +
   1.152 +/* Creating a temporary stack-based object that can be used in one thread, 
   1.153 +and created from a converter that is shared across threads.
   1.154 +*/
   1.155 +
   1.156 +U_CAPI UConverter* U_EXPORT2
   1.157 +ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
   1.158 +{
   1.159 +    UConverter *localConverter, *allocatedConverter;
   1.160 +    int32_t stackBufferSize;
   1.161 +    int32_t bufferSizeNeeded;
   1.162 +    char *stackBufferChars = (char *)stackBuffer;
   1.163 +    UErrorCode cbErr;
   1.164 +    UConverterToUnicodeArgs toUArgs = {
   1.165 +        sizeof(UConverterToUnicodeArgs),
   1.166 +            TRUE,
   1.167 +            NULL,
   1.168 +            NULL,
   1.169 +            NULL,
   1.170 +            NULL,
   1.171 +            NULL,
   1.172 +            NULL
   1.173 +    };
   1.174 +    UConverterFromUnicodeArgs fromUArgs = {
   1.175 +        sizeof(UConverterFromUnicodeArgs),
   1.176 +            TRUE,
   1.177 +            NULL,
   1.178 +            NULL,
   1.179 +            NULL,
   1.180 +            NULL,
   1.181 +            NULL,
   1.182 +            NULL
   1.183 +    };
   1.184 +
   1.185 +    UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
   1.186 +
   1.187 +    if (status == NULL || U_FAILURE(*status)){
   1.188 +        UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
   1.189 +        return NULL;
   1.190 +    }
   1.191 +
   1.192 +    if (cnv == NULL) {
   1.193 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1.194 +        UTRACE_EXIT_STATUS(*status);
   1.195 +        return NULL;
   1.196 +    }
   1.197 +
   1.198 +    UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
   1.199 +                                    ucnv_getName(cnv, status), cnv, stackBuffer);
   1.200 +
   1.201 +    if (cnv->sharedData->impl->safeClone != NULL) {
   1.202 +        /* call the custom safeClone function for sizing */
   1.203 +        bufferSizeNeeded = 0;
   1.204 +        cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
   1.205 +        if (U_FAILURE(*status)) {
   1.206 +            UTRACE_EXIT_STATUS(*status);
   1.207 +            return NULL;
   1.208 +        }
   1.209 +    }
   1.210 +    else
   1.211 +    {
   1.212 +        /* inherent sizing */
   1.213 +        bufferSizeNeeded = sizeof(UConverter);
   1.214 +    }
   1.215 +
   1.216 +    if (pBufferSize == NULL) {
   1.217 +        stackBufferSize = 1;
   1.218 +        pBufferSize = &stackBufferSize;
   1.219 +    } else {
   1.220 +        stackBufferSize = *pBufferSize;
   1.221 +        if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
   1.222 +            *pBufferSize = bufferSizeNeeded;
   1.223 +            UTRACE_EXIT_VALUE(bufferSizeNeeded);
   1.224 +            return NULL;
   1.225 +        }
   1.226 +    }
   1.227 +
   1.228 +
   1.229 +    /* Pointers on 64-bit platforms need to be aligned
   1.230 +     * on a 64-bit boundary in memory.
   1.231 +     */
   1.232 +    if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
   1.233 +        int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
   1.234 +        if(stackBufferSize > offsetUp) {
   1.235 +            stackBufferSize -= offsetUp;
   1.236 +            stackBufferChars += offsetUp;
   1.237 +        } else {
   1.238 +            /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
   1.239 +            stackBufferSize = 1;
   1.240 +        }
   1.241 +    }
   1.242 +
   1.243 +    stackBuffer = (void *)stackBufferChars;
   1.244 +    
   1.245 +    /* Now, see if we must allocate any memory */
   1.246 +    if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL)
   1.247 +    {
   1.248 +        /* allocate one here...*/
   1.249 +        localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);
   1.250 +
   1.251 +        if(localConverter == NULL) {
   1.252 +            *status = U_MEMORY_ALLOCATION_ERROR;
   1.253 +            UTRACE_EXIT_STATUS(*status);
   1.254 +            return NULL;
   1.255 +        }
   1.256 +        *status = U_SAFECLONE_ALLOCATED_WARNING;
   1.257 +
   1.258 +        /* record the fact that memory was allocated */
   1.259 +        *pBufferSize = bufferSizeNeeded;
   1.260 +    } else {
   1.261 +        /* just use the stack buffer */
   1.262 +        localConverter = (UConverter*) stackBuffer;
   1.263 +        allocatedConverter = NULL;
   1.264 +    }
   1.265 +
   1.266 +    uprv_memset(localConverter, 0, bufferSizeNeeded);
   1.267 +
   1.268 +    /* Copy initial state */
   1.269 +    uprv_memcpy(localConverter, cnv, sizeof(UConverter));
   1.270 +    localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
   1.271 +
   1.272 +    /* copy the substitution string */
   1.273 +    if (cnv->subChars == (uint8_t *)cnv->subUChars) {
   1.274 +        localConverter->subChars = (uint8_t *)localConverter->subUChars;
   1.275 +    } else {
   1.276 +        localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
   1.277 +        if (localConverter->subChars == NULL) {
   1.278 +            uprv_free(allocatedConverter);
   1.279 +            UTRACE_EXIT_STATUS(*status);
   1.280 +            return NULL;
   1.281 +        }
   1.282 +        uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
   1.283 +    }
   1.284 +
   1.285 +    /* now either call the safeclone fcn or not */
   1.286 +    if (cnv->sharedData->impl->safeClone != NULL) {
   1.287 +        /* call the custom safeClone function */
   1.288 +        localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);
   1.289 +    }
   1.290 +
   1.291 +    if(localConverter==NULL || U_FAILURE(*status)) {
   1.292 +        if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {
   1.293 +            uprv_free(allocatedConverter->subChars);
   1.294 +        }
   1.295 +        uprv_free(allocatedConverter);
   1.296 +        UTRACE_EXIT_STATUS(*status);
   1.297 +        return NULL;
   1.298 +    }
   1.299 +
   1.300 +    /* increment refcount of shared data if needed */
   1.301 +    /*
   1.302 +    Checking whether it's an algorithic converter is okay
   1.303 +    in multithreaded applications because the value never changes.
   1.304 +    Don't check referenceCounter for any other value.
   1.305 +    */
   1.306 +    if (cnv->sharedData->referenceCounter != ~0) {
   1.307 +        ucnv_incrementRefCount(cnv->sharedData);
   1.308 +    }
   1.309 +
   1.310 +    if(localConverter == (UConverter*)stackBuffer) {
   1.311 +        /* we're using user provided data - set to not destroy */
   1.312 +        localConverter->isCopyLocal = TRUE;
   1.313 +    }
   1.314 +
   1.315 +    /* allow callback functions to handle any memory allocation */
   1.316 +    toUArgs.converter = fromUArgs.converter = localConverter;
   1.317 +    cbErr = U_ZERO_ERROR;
   1.318 +    cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr);
   1.319 +    cbErr = U_ZERO_ERROR;
   1.320 +    cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);
   1.321 +
   1.322 +    UTRACE_EXIT_PTR_STATUS(localConverter, *status);
   1.323 +    return localConverter;
   1.324 +}
   1.325 +
   1.326 +
   1.327 +
   1.328 +/*Decreases the reference counter in the shared immutable section of the object
   1.329 + *and frees the mutable part*/
   1.330 +
   1.331 +U_CAPI void  U_EXPORT2
   1.332 +ucnv_close (UConverter * converter)
   1.333 +{
   1.334 +    UErrorCode errorCode = U_ZERO_ERROR;
   1.335 +
   1.336 +    UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
   1.337 +
   1.338 +    if (converter == NULL)
   1.339 +    {
   1.340 +        UTRACE_EXIT();
   1.341 +        return;
   1.342 +    }
   1.343 +
   1.344 +    UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
   1.345 +        ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
   1.346 +
   1.347 +    /* In order to speed up the close, only call the callbacks when they have been changed.
   1.348 +    This performance check will only work when the callbacks are set within a shared library
   1.349 +    or from user code that statically links this code. */
   1.350 +    /* first, notify the callback functions that the converter is closed */
   1.351 +    if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
   1.352 +        UConverterToUnicodeArgs toUArgs = {
   1.353 +            sizeof(UConverterToUnicodeArgs),
   1.354 +                TRUE,
   1.355 +                NULL,
   1.356 +                NULL,
   1.357 +                NULL,
   1.358 +                NULL,
   1.359 +                NULL,
   1.360 +                NULL
   1.361 +        };
   1.362 +
   1.363 +        toUArgs.converter = converter;
   1.364 +        errorCode = U_ZERO_ERROR;
   1.365 +        converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
   1.366 +    }
   1.367 +    if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
   1.368 +        UConverterFromUnicodeArgs fromUArgs = {
   1.369 +            sizeof(UConverterFromUnicodeArgs),
   1.370 +                TRUE,
   1.371 +                NULL,
   1.372 +                NULL,
   1.373 +                NULL,
   1.374 +                NULL,
   1.375 +                NULL,
   1.376 +                NULL
   1.377 +        };
   1.378 +        fromUArgs.converter = converter;
   1.379 +        errorCode = U_ZERO_ERROR;
   1.380 +        converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
   1.381 +    }
   1.382 +
   1.383 +    if (converter->sharedData->impl->close != NULL) {
   1.384 +        converter->sharedData->impl->close(converter);
   1.385 +    }
   1.386 +
   1.387 +    if (converter->subChars != (uint8_t *)converter->subUChars) {
   1.388 +        uprv_free(converter->subChars);
   1.389 +    }
   1.390 +
   1.391 +    /*
   1.392 +    Checking whether it's an algorithic converter is okay
   1.393 +    in multithreaded applications because the value never changes.
   1.394 +    Don't check referenceCounter for any other value.
   1.395 +    */
   1.396 +    if (converter->sharedData->referenceCounter != ~0) {
   1.397 +        ucnv_unloadSharedDataIfReady(converter->sharedData);
   1.398 +    }
   1.399 +
   1.400 +    if(!converter->isCopyLocal){
   1.401 +        uprv_free(converter);
   1.402 +    }
   1.403 +
   1.404 +    UTRACE_EXIT();
   1.405 +}
   1.406 +
   1.407 +/*returns a single Name from the list, will return NULL if out of bounds
   1.408 + */
   1.409 +U_CAPI const char*   U_EXPORT2
   1.410 +ucnv_getAvailableName (int32_t n)
   1.411 +{
   1.412 +    if (0 <= n && n <= 0xffff) {
   1.413 +        UErrorCode err = U_ZERO_ERROR;
   1.414 +        const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);
   1.415 +        if (U_SUCCESS(err)) {
   1.416 +            return name;
   1.417 +        }
   1.418 +    }
   1.419 +    return NULL;
   1.420 +}
   1.421 +
   1.422 +U_CAPI int32_t   U_EXPORT2
   1.423 +ucnv_countAvailable ()
   1.424 +{
   1.425 +    UErrorCode err = U_ZERO_ERROR;
   1.426 +    return ucnv_bld_countAvailableConverters(&err);
   1.427 +}
   1.428 +
   1.429 +U_CAPI void    U_EXPORT2
   1.430 +ucnv_getSubstChars (const UConverter * converter,
   1.431 +                    char *mySubChar,
   1.432 +                    int8_t * len,
   1.433 +                    UErrorCode * err)
   1.434 +{
   1.435 +    if (U_FAILURE (*err))
   1.436 +        return;
   1.437 +
   1.438 +    if (converter->subCharLen <= 0) {
   1.439 +        /* Unicode string or empty string from ucnv_setSubstString(). */
   1.440 +        *len = 0;
   1.441 +        return;
   1.442 +    }
   1.443 +
   1.444 +    if (*len < converter->subCharLen) /*not enough space in subChars */
   1.445 +    {
   1.446 +        *err = U_INDEX_OUTOFBOUNDS_ERROR;
   1.447 +        return;
   1.448 +    }
   1.449 +
   1.450 +    uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen);   /*fills in the subchars */
   1.451 +    *len = converter->subCharLen; /*store # of bytes copied to buffer */
   1.452 +}
   1.453 +
   1.454 +U_CAPI void    U_EXPORT2
   1.455 +ucnv_setSubstChars (UConverter * converter,
   1.456 +                    const char *mySubChar,
   1.457 +                    int8_t len,
   1.458 +                    UErrorCode * err)
   1.459 +{
   1.460 +    if (U_FAILURE (*err))
   1.461 +        return;
   1.462 +    
   1.463 +    /*Makes sure that the subChar is within the codepages char length boundaries */
   1.464 +    if ((len > converter->sharedData->staticData->maxBytesPerChar)
   1.465 +     || (len < converter->sharedData->staticData->minBytesPerChar))
   1.466 +    {
   1.467 +        *err = U_ILLEGAL_ARGUMENT_ERROR;
   1.468 +        return;
   1.469 +    }
   1.470 +    
   1.471 +    uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
   1.472 +    converter->subCharLen = len;  /*sets the new len */
   1.473 +
   1.474 +    /*
   1.475 +    * There is currently (2001Feb) no separate API to set/get subChar1.
   1.476 +    * In order to always have subChar written after it is explicitly set,
   1.477 +    * we set subChar1 to 0.
   1.478 +    */
   1.479 +    converter->subChar1 = 0;
   1.480 +    
   1.481 +    return;
   1.482 +}
   1.483 +
   1.484 +U_CAPI void U_EXPORT2
   1.485 +ucnv_setSubstString(UConverter *cnv,
   1.486 +                    const UChar *s,
   1.487 +                    int32_t length,
   1.488 +                    UErrorCode *err) {
   1.489 +    UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1];
   1.490 +    char chars[UCNV_ERROR_BUFFER_LENGTH];
   1.491 +
   1.492 +    UConverter *clone;
   1.493 +    uint8_t *subChars;
   1.494 +    int32_t cloneSize, length8;
   1.495 +
   1.496 +    /* Let the following functions check all arguments. */
   1.497 +    cloneSize = sizeof(cloneBuffer);
   1.498 +    clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
   1.499 +    ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err);
   1.500 +    length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
   1.501 +    ucnv_close(clone);
   1.502 +    if (U_FAILURE(*err)) {
   1.503 +        return;
   1.504 +    }
   1.505 +
   1.506 +    if (cnv->sharedData->impl->writeSub == NULL
   1.507 +#if !UCONFIG_NO_LEGACY_CONVERSION
   1.508 +        || (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
   1.509 +         ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)
   1.510 +#endif
   1.511 +    ) {
   1.512 +        /* The converter is not stateful. Store the charset bytes as a fixed string. */
   1.513 +        subChars = (uint8_t *)chars;
   1.514 +    } else {
   1.515 +        /*
   1.516 +         * The converter has a non-default writeSub() function, indicating
   1.517 +         * that it is stateful.
   1.518 +         * Store the Unicode string for on-the-fly conversion for correct
   1.519 +         * state handling.
   1.520 +         */
   1.521 +        if (length > UCNV_ERROR_BUFFER_LENGTH) {
   1.522 +            /*
   1.523 +             * Should not occur. The converter should output at least one byte
   1.524 +             * per UChar, which means that ucnv_fromUChars() should catch all
   1.525 +             * overflows.
   1.526 +             */
   1.527 +            *err = U_BUFFER_OVERFLOW_ERROR;
   1.528 +            return;
   1.529 +        }
   1.530 +        subChars = (uint8_t *)s;
   1.531 +        if (length < 0) {
   1.532 +            length = u_strlen(s);
   1.533 +        }
   1.534 +        length8 = length * U_SIZEOF_UCHAR;
   1.535 +    }
   1.536 +
   1.537 +    /*
   1.538 +     * For storing the substitution string, select either the small buffer inside
   1.539 +     * UConverter or allocate a subChars buffer.
   1.540 +     */
   1.541 +    if (length8 > UCNV_MAX_SUBCHAR_LEN) {
   1.542 +        /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
   1.543 +        if (cnv->subChars == (uint8_t *)cnv->subUChars) {
   1.544 +            /* Allocate a new buffer for the string. */
   1.545 +            cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
   1.546 +            if (cnv->subChars == NULL) {
   1.547 +                cnv->subChars = (uint8_t *)cnv->subUChars;
   1.548 +                *err = U_MEMORY_ALLOCATION_ERROR;
   1.549 +                return;
   1.550 +            }
   1.551 +            uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
   1.552 +        }
   1.553 +    }
   1.554 +
   1.555 +    /* Copy the substitution string into the UConverter or its subChars buffer. */
   1.556 +    if (length8 == 0) {
   1.557 +        cnv->subCharLen = 0;
   1.558 +    } else {
   1.559 +        uprv_memcpy(cnv->subChars, subChars, length8);
   1.560 +        if (subChars == (uint8_t *)chars) {
   1.561 +            cnv->subCharLen = (int8_t)length8;
   1.562 +        } else /* subChars == s */ {
   1.563 +            cnv->subCharLen = (int8_t)-length;
   1.564 +        }
   1.565 +    }
   1.566 +
   1.567 +    /* See comment in ucnv_setSubstChars(). */
   1.568 +    cnv->subChar1 = 0;
   1.569 +}
   1.570 +
   1.571 +/*resets the internal states of a converter
   1.572 + *goal : have the same behaviour than a freshly created converter
   1.573 + */
   1.574 +static void _reset(UConverter *converter, UConverterResetChoice choice,
   1.575 +                   UBool callCallback) {
   1.576 +    if(converter == NULL) {
   1.577 +        return;
   1.578 +    }
   1.579 +
   1.580 +    if(callCallback) {
   1.581 +        /* first, notify the callback functions that the converter is reset */
   1.582 +        UErrorCode errorCode;
   1.583 +
   1.584 +        if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
   1.585 +            UConverterToUnicodeArgs toUArgs = {
   1.586 +                sizeof(UConverterToUnicodeArgs),
   1.587 +                TRUE,
   1.588 +                NULL,
   1.589 +                NULL,
   1.590 +                NULL,
   1.591 +                NULL,
   1.592 +                NULL,
   1.593 +                NULL
   1.594 +            };
   1.595 +            toUArgs.converter = converter;
   1.596 +            errorCode = U_ZERO_ERROR;
   1.597 +            converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
   1.598 +        }
   1.599 +        if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
   1.600 +            UConverterFromUnicodeArgs fromUArgs = {
   1.601 +                sizeof(UConverterFromUnicodeArgs),
   1.602 +                TRUE,
   1.603 +                NULL,
   1.604 +                NULL,
   1.605 +                NULL,
   1.606 +                NULL,
   1.607 +                NULL,
   1.608 +                NULL
   1.609 +            };
   1.610 +            fromUArgs.converter = converter;
   1.611 +            errorCode = U_ZERO_ERROR;
   1.612 +            converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
   1.613 +        }
   1.614 +    }
   1.615 +
   1.616 +    /* now reset the converter itself */
   1.617 +    if(choice<=UCNV_RESET_TO_UNICODE) {
   1.618 +        converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
   1.619 +        converter->mode = 0;
   1.620 +        converter->toULength = 0;
   1.621 +        converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
   1.622 +        converter->preToULength = 0;
   1.623 +    }
   1.624 +    if(choice!=UCNV_RESET_TO_UNICODE) {
   1.625 +        converter->fromUnicodeStatus = 0;
   1.626 +        converter->fromUChar32 = 0;
   1.627 +        converter->invalidUCharLength = converter->charErrorBufferLength = 0;
   1.628 +        converter->preFromUFirstCP = U_SENTINEL;
   1.629 +        converter->preFromULength = 0;
   1.630 +    }
   1.631 +
   1.632 +    if (converter->sharedData->impl->reset != NULL) {
   1.633 +        /* call the custom reset function */
   1.634 +        converter->sharedData->impl->reset(converter, choice);
   1.635 +    }
   1.636 +}
   1.637 +
   1.638 +U_CAPI void  U_EXPORT2
   1.639 +ucnv_reset(UConverter *converter)
   1.640 +{
   1.641 +    _reset(converter, UCNV_RESET_BOTH, TRUE);
   1.642 +}
   1.643 +
   1.644 +U_CAPI void  U_EXPORT2
   1.645 +ucnv_resetToUnicode(UConverter *converter)
   1.646 +{
   1.647 +    _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);
   1.648 +}
   1.649 +
   1.650 +U_CAPI void  U_EXPORT2
   1.651 +ucnv_resetFromUnicode(UConverter *converter)
   1.652 +{
   1.653 +    _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);
   1.654 +}
   1.655 +
   1.656 +U_CAPI int8_t   U_EXPORT2
   1.657 +ucnv_getMaxCharSize (const UConverter * converter)
   1.658 +{
   1.659 +    return converter->maxBytesPerUChar;
   1.660 +}
   1.661 +
   1.662 +
   1.663 +U_CAPI int8_t   U_EXPORT2
   1.664 +ucnv_getMinCharSize (const UConverter * converter)
   1.665 +{
   1.666 +    return converter->sharedData->staticData->minBytesPerChar;
   1.667 +}
   1.668 +
   1.669 +U_CAPI const char*   U_EXPORT2
   1.670 +ucnv_getName (const UConverter * converter, UErrorCode * err)
   1.671 +     
   1.672 +{
   1.673 +    if (U_FAILURE (*err))
   1.674 +        return NULL;
   1.675 +    if(converter->sharedData->impl->getName){
   1.676 +        const char* temp= converter->sharedData->impl->getName(converter);
   1.677 +        if(temp)
   1.678 +            return temp;
   1.679 +    }
   1.680 +    return converter->sharedData->staticData->name;
   1.681 +}
   1.682 +
   1.683 +U_CAPI int32_t U_EXPORT2
   1.684 +ucnv_getCCSID(const UConverter * converter,
   1.685 +              UErrorCode * err)
   1.686 +{
   1.687 +    int32_t ccsid;
   1.688 +    if (U_FAILURE (*err))
   1.689 +        return -1;
   1.690 +
   1.691 +    ccsid = converter->sharedData->staticData->codepage;
   1.692 +    if (ccsid == 0) {
   1.693 +        /* Rare case. This is for cases like gb18030,
   1.694 +        which doesn't have an IBM canonical name, but does have an IBM alias. */
   1.695 +        const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
   1.696 +        if (U_SUCCESS(*err) && standardName) {
   1.697 +            const char *ccsidStr = uprv_strchr(standardName, '-');
   1.698 +            if (ccsidStr) {
   1.699 +                ccsid = (int32_t)atol(ccsidStr+1);  /* +1 to skip '-' */
   1.700 +            }
   1.701 +        }
   1.702 +    }
   1.703 +    return ccsid;
   1.704 +}
   1.705 +
   1.706 +
   1.707 +U_CAPI UConverterPlatform   U_EXPORT2
   1.708 +ucnv_getPlatform (const UConverter * converter,
   1.709 +                                      UErrorCode * err)
   1.710 +{
   1.711 +    if (U_FAILURE (*err))
   1.712 +        return UCNV_UNKNOWN;
   1.713 +
   1.714 +    return (UConverterPlatform)converter->sharedData->staticData->platform;
   1.715 +}
   1.716 +
   1.717 +U_CAPI void U_EXPORT2
   1.718 +    ucnv_getToUCallBack (const UConverter * converter,
   1.719 +                         UConverterToUCallback *action,
   1.720 +                         const void **context)
   1.721 +{
   1.722 +    *action = converter->fromCharErrorBehaviour;
   1.723 +    *context = converter->toUContext;
   1.724 +}
   1.725 +
   1.726 +U_CAPI void U_EXPORT2
   1.727 +    ucnv_getFromUCallBack (const UConverter * converter,
   1.728 +                           UConverterFromUCallback *action,
   1.729 +                           const void **context)
   1.730 +{
   1.731 +    *action = converter->fromUCharErrorBehaviour;
   1.732 +    *context = converter->fromUContext;
   1.733 +}
   1.734 +
   1.735 +U_CAPI void    U_EXPORT2
   1.736 +ucnv_setToUCallBack (UConverter * converter,
   1.737 +                            UConverterToUCallback newAction,
   1.738 +                            const void* newContext,
   1.739 +                            UConverterToUCallback *oldAction,
   1.740 +                            const void** oldContext,
   1.741 +                            UErrorCode * err)
   1.742 +{
   1.743 +    if (U_FAILURE (*err))
   1.744 +        return;
   1.745 +    if (oldAction) *oldAction = converter->fromCharErrorBehaviour;
   1.746 +    converter->fromCharErrorBehaviour = newAction;
   1.747 +    if (oldContext) *oldContext = converter->toUContext;
   1.748 +    converter->toUContext = newContext;
   1.749 +}
   1.750 +
   1.751 +U_CAPI void  U_EXPORT2
   1.752 +ucnv_setFromUCallBack (UConverter * converter,
   1.753 +                            UConverterFromUCallback newAction,
   1.754 +                            const void* newContext,
   1.755 +                            UConverterFromUCallback *oldAction,
   1.756 +                            const void** oldContext,
   1.757 +                            UErrorCode * err)
   1.758 +{
   1.759 +    if (U_FAILURE (*err))
   1.760 +        return;
   1.761 +    if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;
   1.762 +    converter->fromUCharErrorBehaviour = newAction;
   1.763 +    if (oldContext) *oldContext = converter->fromUContext;
   1.764 +    converter->fromUContext = newContext;
   1.765 +}
   1.766 +
   1.767 +static void
   1.768 +_updateOffsets(int32_t *offsets, int32_t length,
   1.769 +               int32_t sourceIndex, int32_t errorInputLength) {
   1.770 +    int32_t *limit;
   1.771 +    int32_t delta, offset;
   1.772 +
   1.773 +    if(sourceIndex>=0) {
   1.774 +        /*
   1.775 +         * adjust each offset by adding the previous sourceIndex
   1.776 +         * minus the length of the input sequence that caused an
   1.777 +         * error, if any
   1.778 +         */
   1.779 +        delta=sourceIndex-errorInputLength;
   1.780 +    } else {
   1.781 +        /*
   1.782 +         * set each offset to -1 because this conversion function
   1.783 +         * does not handle offsets
   1.784 +         */
   1.785 +        delta=-1;
   1.786 +    }
   1.787 +
   1.788 +    limit=offsets+length;
   1.789 +    if(delta==0) {
   1.790 +        /* most common case, nothing to do */
   1.791 +    } else if(delta>0) {
   1.792 +        /* add the delta to each offset (but not if the offset is <0) */
   1.793 +        while(offsets<limit) {
   1.794 +            offset=*offsets;
   1.795 +            if(offset>=0) {
   1.796 +                *offsets=offset+delta;
   1.797 +            }
   1.798 +            ++offsets;
   1.799 +        }
   1.800 +    } else /* delta<0 */ {
   1.801 +        /*
   1.802 +         * set each offset to -1 because this conversion function
   1.803 +         * does not handle offsets
   1.804 +         * or the error input sequence started in a previous buffer
   1.805 +         */
   1.806 +        while(offsets<limit) {
   1.807 +            *offsets++=-1;
   1.808 +        }
   1.809 +    }
   1.810 +}
   1.811 +
   1.812 +/* ucnv_fromUnicode --------------------------------------------------------- */
   1.813 +
   1.814 +/*
   1.815 + * Implementation note for m:n conversions
   1.816 + *
   1.817 + * While collecting source units to find the longest match for m:n conversion,
   1.818 + * some source units may need to be stored for a partial match.
   1.819 + * When a second buffer does not yield a match on all of the previously stored
   1.820 + * source units, then they must be "replayed", i.e., fed back into the converter.
   1.821 + *
   1.822 + * The code relies on the fact that replaying will not nest -
   1.823 + * converting a replay buffer will not result in a replay.
   1.824 + * This is because a replay is necessary only after the _continuation_ of a
   1.825 + * partial match failed, but a replay buffer is converted as a whole.
   1.826 + * It may result in some of its units being stored again for a partial match,
   1.827 + * but there will not be a continuation _during_ the replay which could fail.
   1.828 + *
   1.829 + * It is conceivable that a callback function could call the converter
   1.830 + * recursively in a way that causes another replay to be stored, but that
   1.831 + * would be an error in the callback function.
   1.832 + * Such violations will cause assertion failures in a debug build,
   1.833 + * and wrong output, but they will not cause a crash.
   1.834 + */
   1.835 +
   1.836 +static void
   1.837 +_fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
   1.838 +    UConverterFromUnicode fromUnicode;
   1.839 +    UConverter *cnv;
   1.840 +    const UChar *s;
   1.841 +    char *t;
   1.842 +    int32_t *offsets;
   1.843 +    int32_t sourceIndex;
   1.844 +    int32_t errorInputLength;
   1.845 +    UBool converterSawEndOfInput, calledCallback;
   1.846 +
   1.847 +    /* variables for m:n conversion */
   1.848 +    UChar replay[UCNV_EXT_MAX_UCHARS];
   1.849 +    const UChar *realSource, *realSourceLimit;
   1.850 +    int32_t realSourceIndex;
   1.851 +    UBool realFlush;
   1.852 +
   1.853 +    cnv=pArgs->converter;
   1.854 +    s=pArgs->source;
   1.855 +    t=pArgs->target;
   1.856 +    offsets=pArgs->offsets;
   1.857 +
   1.858 +    /* get the converter implementation function */
   1.859 +    sourceIndex=0;
   1.860 +    if(offsets==NULL) {
   1.861 +        fromUnicode=cnv->sharedData->impl->fromUnicode;
   1.862 +    } else {
   1.863 +        fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
   1.864 +        if(fromUnicode==NULL) {
   1.865 +            /* there is no WithOffsets implementation */
   1.866 +            fromUnicode=cnv->sharedData->impl->fromUnicode;
   1.867 +            /* we will write -1 for each offset */
   1.868 +            sourceIndex=-1;
   1.869 +        }
   1.870 +    }
   1.871 +
   1.872 +    if(cnv->preFromULength>=0) {
   1.873 +        /* normal mode */
   1.874 +        realSource=NULL;
   1.875 +
   1.876 +        /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
   1.877 +        realSourceLimit=NULL;
   1.878 +        realFlush=FALSE;
   1.879 +        realSourceIndex=0;
   1.880 +    } else {
   1.881 +        /*
   1.882 +         * Previous m:n conversion stored source units from a partial match
   1.883 +         * and failed to consume all of them.
   1.884 +         * We need to "replay" them from a temporary buffer and convert them first.
   1.885 +         */
   1.886 +        realSource=pArgs->source;
   1.887 +        realSourceLimit=pArgs->sourceLimit;
   1.888 +        realFlush=pArgs->flush;
   1.889 +        realSourceIndex=sourceIndex;
   1.890 +
   1.891 +        uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
   1.892 +        pArgs->source=replay;
   1.893 +        pArgs->sourceLimit=replay-cnv->preFromULength;
   1.894 +        pArgs->flush=FALSE;
   1.895 +        sourceIndex=-1;
   1.896 +
   1.897 +        cnv->preFromULength=0;
   1.898 +    }
   1.899 +
   1.900 +    /*
   1.901 +     * loop for conversion and error handling
   1.902 +     *
   1.903 +     * loop {
   1.904 +     *   convert
   1.905 +     *   loop {
   1.906 +     *     update offsets
   1.907 +     *     handle end of input
   1.908 +     *     handle errors/call callback
   1.909 +     *   }
   1.910 +     * }
   1.911 +     */
   1.912 +    for(;;) {
   1.913 +        if(U_SUCCESS(*err)) {
   1.914 +            /* convert */
   1.915 +            fromUnicode(pArgs, err);
   1.916 +
   1.917 +            /*
   1.918 +             * set a flag for whether the converter
   1.919 +             * successfully processed the end of the input
   1.920 +             *
   1.921 +             * need not check cnv->preFromULength==0 because a replay (<0) will cause
   1.922 +             * s<sourceLimit before converterSawEndOfInput is checked
   1.923 +             */
   1.924 +            converterSawEndOfInput=
   1.925 +                (UBool)(U_SUCCESS(*err) &&
   1.926 +                        pArgs->flush && pArgs->source==pArgs->sourceLimit &&
   1.927 +                        cnv->fromUChar32==0);
   1.928 +        } else {
   1.929 +            /* handle error from ucnv_convertEx() */
   1.930 +            converterSawEndOfInput=FALSE;
   1.931 +        }
   1.932 +
   1.933 +        /* no callback called yet for this iteration */
   1.934 +        calledCallback=FALSE;
   1.935 +
   1.936 +        /* no sourceIndex adjustment for conversion, only for callback output */
   1.937 +        errorInputLength=0;
   1.938 +
   1.939 +        /*
   1.940 +         * loop for offsets and error handling
   1.941 +         *
   1.942 +         * iterates at most 3 times:
   1.943 +         * 1. to clean up after the conversion function
   1.944 +         * 2. after the callback
   1.945 +         * 3. after the callback again if there was truncated input
   1.946 +         */
   1.947 +        for(;;) {
   1.948 +            /* update offsets if we write any */
   1.949 +            if(offsets!=NULL) {
   1.950 +                int32_t length=(int32_t)(pArgs->target-t);
   1.951 +                if(length>0) {
   1.952 +                    _updateOffsets(offsets, length, sourceIndex, errorInputLength);
   1.953 +
   1.954 +                    /*
   1.955 +                     * if a converter handles offsets and updates the offsets
   1.956 +                     * pointer at the end, then pArgs->offset should not change
   1.957 +                     * here;
   1.958 +                     * however, some converters do not handle offsets at all
   1.959 +                     * (sourceIndex<0) or may not update the offsets pointer
   1.960 +                     */
   1.961 +                    pArgs->offsets=offsets+=length;
   1.962 +                }
   1.963 +
   1.964 +                if(sourceIndex>=0) {
   1.965 +                    sourceIndex+=(int32_t)(pArgs->source-s);
   1.966 +                }
   1.967 +            }
   1.968 +
   1.969 +            if(cnv->preFromULength<0) {
   1.970 +                /*
   1.971 +                 * switch the source to new replay units (cannot occur while replaying)
   1.972 +                 * after offset handling and before end-of-input and callback handling
   1.973 +                 */
   1.974 +                if(realSource==NULL) {
   1.975 +                    realSource=pArgs->source;
   1.976 +                    realSourceLimit=pArgs->sourceLimit;
   1.977 +                    realFlush=pArgs->flush;
   1.978 +                    realSourceIndex=sourceIndex;
   1.979 +
   1.980 +                    uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
   1.981 +                    pArgs->source=replay;
   1.982 +                    pArgs->sourceLimit=replay-cnv->preFromULength;
   1.983 +                    pArgs->flush=FALSE;
   1.984 +                    if((sourceIndex+=cnv->preFromULength)<0) {
   1.985 +                        sourceIndex=-1;
   1.986 +                    }
   1.987 +
   1.988 +                    cnv->preFromULength=0;
   1.989 +                } else {
   1.990 +                    /* see implementation note before _fromUnicodeWithCallback() */
   1.991 +                    U_ASSERT(realSource==NULL);
   1.992 +                    *err=U_INTERNAL_PROGRAM_ERROR;
   1.993 +                }
   1.994 +            }
   1.995 +
   1.996 +            /* update pointers */
   1.997 +            s=pArgs->source;
   1.998 +            t=pArgs->target;
   1.999 +
  1.1000 +            if(U_SUCCESS(*err)) {
  1.1001 +                if(s<pArgs->sourceLimit) {
  1.1002 +                    /*
  1.1003 +                     * continue with the conversion loop while there is still input left
  1.1004 +                     * (continue converting by breaking out of only the inner loop)
  1.1005 +                     */
  1.1006 +                    break;
  1.1007 +                } else if(realSource!=NULL) {
  1.1008 +                    /* switch back from replaying to the real source and continue */
  1.1009 +                    pArgs->source=realSource;
  1.1010 +                    pArgs->sourceLimit=realSourceLimit;
  1.1011 +                    pArgs->flush=realFlush;
  1.1012 +                    sourceIndex=realSourceIndex;
  1.1013 +
  1.1014 +                    realSource=NULL;
  1.1015 +                    break;
  1.1016 +                } else if(pArgs->flush && cnv->fromUChar32!=0) {
  1.1017 +                    /*
  1.1018 +                     * the entire input stream is consumed
  1.1019 +                     * and there is a partial, truncated input sequence left
  1.1020 +                     */
  1.1021 +
  1.1022 +                    /* inject an error and continue with callback handling */
  1.1023 +                    *err=U_TRUNCATED_CHAR_FOUND;
  1.1024 +                    calledCallback=FALSE; /* new error condition */
  1.1025 +                } else {
  1.1026 +                    /* input consumed */
  1.1027 +                    if(pArgs->flush) {
  1.1028 +                        /*
  1.1029 +                         * return to the conversion loop once more if the flush
  1.1030 +                         * flag is set and the conversion function has not
  1.1031 +                         * successfully processed the end of the input yet
  1.1032 +                         *
  1.1033 +                         * (continue converting by breaking out of only the inner loop)
  1.1034 +                         */
  1.1035 +                        if(!converterSawEndOfInput) {
  1.1036 +                            break;
  1.1037 +                        }
  1.1038 +
  1.1039 +                        /* reset the converter without calling the callback function */
  1.1040 +                        _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
  1.1041 +                    }
  1.1042 +
  1.1043 +                    /* done successfully */
  1.1044 +                    return;
  1.1045 +                }
  1.1046 +            }
  1.1047 +
  1.1048 +            /* U_FAILURE(*err) */
  1.1049 +            {
  1.1050 +                UErrorCode e;
  1.1051 +
  1.1052 +                if( calledCallback ||
  1.1053 +                    (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
  1.1054 +                    (e!=U_INVALID_CHAR_FOUND &&
  1.1055 +                     e!=U_ILLEGAL_CHAR_FOUND &&
  1.1056 +                     e!=U_TRUNCATED_CHAR_FOUND)
  1.1057 +                ) {
  1.1058 +                    /*
  1.1059 +                     * the callback did not or cannot resolve the error:
  1.1060 +                     * set output pointers and return
  1.1061 +                     *
  1.1062 +                     * the check for buffer overflow is redundant but it is
  1.1063 +                     * a high-runner case and hopefully documents the intent
  1.1064 +                     * well
  1.1065 +                     *
  1.1066 +                     * if we were replaying, then the replay buffer must be
  1.1067 +                     * copied back into the UConverter
  1.1068 +                     * and the real arguments must be restored
  1.1069 +                     */
  1.1070 +                    if(realSource!=NULL) {
  1.1071 +                        int32_t length;
  1.1072 +
  1.1073 +                        U_ASSERT(cnv->preFromULength==0);
  1.1074 +
  1.1075 +                        length=(int32_t)(pArgs->sourceLimit-pArgs->source);
  1.1076 +                        if(length>0) {
  1.1077 +                            uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR);
  1.1078 +                            cnv->preFromULength=(int8_t)-length;
  1.1079 +                        }
  1.1080 +
  1.1081 +                        pArgs->source=realSource;
  1.1082 +                        pArgs->sourceLimit=realSourceLimit;
  1.1083 +                        pArgs->flush=realFlush;
  1.1084 +                    }
  1.1085 +
  1.1086 +                    return;
  1.1087 +                }
  1.1088 +            }
  1.1089 +
  1.1090 +            /* callback handling */
  1.1091 +            {
  1.1092 +                UChar32 codePoint;
  1.1093 +
  1.1094 +                /* get and write the code point */
  1.1095 +                codePoint=cnv->fromUChar32;
  1.1096 +                errorInputLength=0;
  1.1097 +                U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
  1.1098 +                cnv->invalidUCharLength=(int8_t)errorInputLength;
  1.1099 +
  1.1100 +                /* set the converter state to deal with the next character */
  1.1101 +                cnv->fromUChar32=0;
  1.1102 +
  1.1103 +                /* call the callback function */
  1.1104 +                cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
  1.1105 +                    cnv->invalidUCharBuffer, errorInputLength, codePoint,
  1.1106 +                    *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
  1.1107 +                    err);
  1.1108 +            }
  1.1109 +
  1.1110 +            /*
  1.1111 +             * loop back to the offset handling
  1.1112 +             *
  1.1113 +             * this flag will indicate after offset handling
  1.1114 +             * that a callback was called;
  1.1115 +             * if the callback did not resolve the error, then we return
  1.1116 +             */
  1.1117 +            calledCallback=TRUE;
  1.1118 +        }
  1.1119 +    }
  1.1120 +}
  1.1121 +
  1.1122 +/*
  1.1123 + * Output the fromUnicode overflow buffer.
  1.1124 + * Call this function if(cnv->charErrorBufferLength>0).
  1.1125 + * @return TRUE if overflow
  1.1126 + */
  1.1127 +static UBool
  1.1128 +ucnv_outputOverflowFromUnicode(UConverter *cnv,
  1.1129 +                               char **target, const char *targetLimit,
  1.1130 +                               int32_t **pOffsets,
  1.1131 +                               UErrorCode *err) {
  1.1132 +    int32_t *offsets;
  1.1133 +    char *overflow, *t;
  1.1134 +    int32_t i, length;
  1.1135 +
  1.1136 +    t=*target;
  1.1137 +    if(pOffsets!=NULL) {
  1.1138 +        offsets=*pOffsets;
  1.1139 +    } else {
  1.1140 +        offsets=NULL;
  1.1141 +    }
  1.1142 +
  1.1143 +    overflow=(char *)cnv->charErrorBuffer;
  1.1144 +    length=cnv->charErrorBufferLength;
  1.1145 +    i=0;
  1.1146 +    while(i<length) {
  1.1147 +        if(t==targetLimit) {
  1.1148 +            /* the overflow buffer contains too much, keep the rest */
  1.1149 +            int32_t j=0;
  1.1150 +
  1.1151 +            do {
  1.1152 +                overflow[j++]=overflow[i++];
  1.1153 +            } while(i<length);
  1.1154 +
  1.1155 +            cnv->charErrorBufferLength=(int8_t)j;
  1.1156 +            *target=t;
  1.1157 +            if(offsets!=NULL) {
  1.1158 +                *pOffsets=offsets;
  1.1159 +            }
  1.1160 +            *err=U_BUFFER_OVERFLOW_ERROR;
  1.1161 +            return TRUE;
  1.1162 +        }
  1.1163 +
  1.1164 +        /* copy the overflow contents to the target */
  1.1165 +        *t++=overflow[i++];
  1.1166 +        if(offsets!=NULL) {
  1.1167 +            *offsets++=-1; /* no source index available for old output */
  1.1168 +        }
  1.1169 +    }
  1.1170 +
  1.1171 +    /* the overflow buffer is completely copied to the target */
  1.1172 +    cnv->charErrorBufferLength=0;
  1.1173 +    *target=t;
  1.1174 +    if(offsets!=NULL) {
  1.1175 +        *pOffsets=offsets;
  1.1176 +    }
  1.1177 +    return FALSE;
  1.1178 +}
  1.1179 +
  1.1180 +U_CAPI void U_EXPORT2
  1.1181 +ucnv_fromUnicode(UConverter *cnv,
  1.1182 +                 char **target, const char *targetLimit,
  1.1183 +                 const UChar **source, const UChar *sourceLimit,
  1.1184 +                 int32_t *offsets,
  1.1185 +                 UBool flush,
  1.1186 +                 UErrorCode *err) {
  1.1187 +    UConverterFromUnicodeArgs args;
  1.1188 +    const UChar *s;
  1.1189 +    char *t;
  1.1190 +
  1.1191 +    /* check parameters */
  1.1192 +    if(err==NULL || U_FAILURE(*err)) {
  1.1193 +        return;
  1.1194 +    }
  1.1195 +
  1.1196 +    if(cnv==NULL || target==NULL || source==NULL) {
  1.1197 +        *err=U_ILLEGAL_ARGUMENT_ERROR;
  1.1198 +        return;
  1.1199 +    }
  1.1200 +
  1.1201 +    s=*source;
  1.1202 +    t=*target;
  1.1203 +
  1.1204 +    if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) {
  1.1205 +        /*
  1.1206 +        Prevent code from going into an infinite loop in case we do hit this
  1.1207 +        limit. The limit pointer is expected to be on a UChar * boundary.
  1.1208 +        This also prevents the next argument check from failing.
  1.1209 +        */
  1.1210 +        sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1);
  1.1211 +    }
  1.1212 +
  1.1213 +    /*
  1.1214 +     * All these conditions should never happen.
  1.1215 +     *
  1.1216 +     * 1) Make sure that the limits are >= to the address source or target
  1.1217 +     *
  1.1218 +     * 2) Make sure that the buffer sizes do not exceed the number range for
  1.1219 +     * int32_t because some functions use the size (in units or bytes)
  1.1220 +     * rather than comparing pointers, and because offsets are int32_t values.
  1.1221 +     *
  1.1222 +     * size_t is guaranteed to be unsigned and large enough for the job.
  1.1223 +     *
  1.1224 +     * Return with an error instead of adjusting the limits because we would
  1.1225 +     * not be able to maintain the semantics that either the source must be
  1.1226 +     * consumed or the target filled (unless an error occurs).
  1.1227 +     * An adjustment would be targetLimit=t+0x7fffffff; for example.
  1.1228 +     *
  1.1229 +     * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
  1.1230 +     * to a char * pointer and provide an incomplete UChar code unit.
  1.1231 +     */
  1.1232 +    if (sourceLimit<s || targetLimit<t ||
  1.1233 +        ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
  1.1234 +        ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||
  1.1235 +        (((const char *)sourceLimit-(const char *)s) & 1) != 0)
  1.1236 +    {
  1.1237 +        *err=U_ILLEGAL_ARGUMENT_ERROR;
  1.1238 +        return;
  1.1239 +    }
  1.1240 +    
  1.1241 +    /* output the target overflow buffer */
  1.1242 +    if( cnv->charErrorBufferLength>0 &&
  1.1243 +        ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)
  1.1244 +    ) {
  1.1245 +        /* U_BUFFER_OVERFLOW_ERROR */
  1.1246 +        return;
  1.1247 +    }
  1.1248 +    /* *target may have moved, therefore stop using t */
  1.1249 +
  1.1250 +    if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
  1.1251 +        /* the overflow buffer is emptied and there is no new input: we are done */
  1.1252 +        return;
  1.1253 +    }
  1.1254 +
  1.1255 +    /*
  1.1256 +     * Do not simply return with a buffer overflow error if
  1.1257 +     * !flush && t==targetLimit
  1.1258 +     * because it is possible that the source will not generate any output.
  1.1259 +     * For example, the skip callback may be called;
  1.1260 +     * it does not output anything.
  1.1261 +     */
  1.1262 +
  1.1263 +    /* prepare the converter arguments */
  1.1264 +    args.converter=cnv;
  1.1265 +    args.flush=flush;
  1.1266 +    args.offsets=offsets;
  1.1267 +    args.source=s;
  1.1268 +    args.sourceLimit=sourceLimit;
  1.1269 +    args.target=*target;
  1.1270 +    args.targetLimit=targetLimit;
  1.1271 +    args.size=sizeof(args);
  1.1272 +
  1.1273 +    _fromUnicodeWithCallback(&args, err);
  1.1274 +
  1.1275 +    *source=args.source;
  1.1276 +    *target=args.target;
  1.1277 +}
  1.1278 +
  1.1279 +/* ucnv_toUnicode() --------------------------------------------------------- */
  1.1280 +
  1.1281 +static void
  1.1282 +_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
  1.1283 +    UConverterToUnicode toUnicode;
  1.1284 +    UConverter *cnv;
  1.1285 +    const char *s;
  1.1286 +    UChar *t;
  1.1287 +    int32_t *offsets;
  1.1288 +    int32_t sourceIndex;
  1.1289 +    int32_t errorInputLength;
  1.1290 +    UBool converterSawEndOfInput, calledCallback;
  1.1291 +
  1.1292 +    /* variables for m:n conversion */
  1.1293 +    char replay[UCNV_EXT_MAX_BYTES];
  1.1294 +    const char *realSource, *realSourceLimit;
  1.1295 +    int32_t realSourceIndex;
  1.1296 +    UBool realFlush;
  1.1297 +
  1.1298 +    cnv=pArgs->converter;
  1.1299 +    s=pArgs->source;
  1.1300 +    t=pArgs->target;
  1.1301 +    offsets=pArgs->offsets;
  1.1302 +
  1.1303 +    /* get the converter implementation function */
  1.1304 +    sourceIndex=0;
  1.1305 +    if(offsets==NULL) {
  1.1306 +        toUnicode=cnv->sharedData->impl->toUnicode;
  1.1307 +    } else {
  1.1308 +        toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
  1.1309 +        if(toUnicode==NULL) {
  1.1310 +            /* there is no WithOffsets implementation */
  1.1311 +            toUnicode=cnv->sharedData->impl->toUnicode;
  1.1312 +            /* we will write -1 for each offset */
  1.1313 +            sourceIndex=-1;
  1.1314 +        }
  1.1315 +    }
  1.1316 +
  1.1317 +    if(cnv->preToULength>=0) {
  1.1318 +        /* normal mode */
  1.1319 +        realSource=NULL;
  1.1320 +
  1.1321 +        /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
  1.1322 +        realSourceLimit=NULL;
  1.1323 +        realFlush=FALSE;
  1.1324 +        realSourceIndex=0;
  1.1325 +    } else {
  1.1326 +        /*
  1.1327 +         * Previous m:n conversion stored source units from a partial match
  1.1328 +         * and failed to consume all of them.
  1.1329 +         * We need to "replay" them from a temporary buffer and convert them first.
  1.1330 +         */
  1.1331 +        realSource=pArgs->source;
  1.1332 +        realSourceLimit=pArgs->sourceLimit;
  1.1333 +        realFlush=pArgs->flush;
  1.1334 +        realSourceIndex=sourceIndex;
  1.1335 +
  1.1336 +        uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
  1.1337 +        pArgs->source=replay;
  1.1338 +        pArgs->sourceLimit=replay-cnv->preToULength;
  1.1339 +        pArgs->flush=FALSE;
  1.1340 +        sourceIndex=-1;
  1.1341 +
  1.1342 +        cnv->preToULength=0;
  1.1343 +    }
  1.1344 +
  1.1345 +    /*
  1.1346 +     * loop for conversion and error handling
  1.1347 +     *
  1.1348 +     * loop {
  1.1349 +     *   convert
  1.1350 +     *   loop {
  1.1351 +     *     update offsets
  1.1352 +     *     handle end of input
  1.1353 +     *     handle errors/call callback
  1.1354 +     *   }
  1.1355 +     * }
  1.1356 +     */
  1.1357 +    for(;;) {
  1.1358 +        if(U_SUCCESS(*err)) {
  1.1359 +            /* convert */
  1.1360 +            toUnicode(pArgs, err);
  1.1361 +
  1.1362 +            /*
  1.1363 +             * set a flag for whether the converter
  1.1364 +             * successfully processed the end of the input
  1.1365 +             *
  1.1366 +             * need not check cnv->preToULength==0 because a replay (<0) will cause
  1.1367 +             * s<sourceLimit before converterSawEndOfInput is checked
  1.1368 +             */
  1.1369 +            converterSawEndOfInput=
  1.1370 +                (UBool)(U_SUCCESS(*err) &&
  1.1371 +                        pArgs->flush && pArgs->source==pArgs->sourceLimit &&
  1.1372 +                        cnv->toULength==0);
  1.1373 +        } else {
  1.1374 +            /* handle error from getNextUChar() or ucnv_convertEx() */
  1.1375 +            converterSawEndOfInput=FALSE;
  1.1376 +        }
  1.1377 +
  1.1378 +        /* no callback called yet for this iteration */
  1.1379 +        calledCallback=FALSE;
  1.1380 +
  1.1381 +        /* no sourceIndex adjustment for conversion, only for callback output */
  1.1382 +        errorInputLength=0;
  1.1383 +
  1.1384 +        /*
  1.1385 +         * loop for offsets and error handling
  1.1386 +         *
  1.1387 +         * iterates at most 3 times:
  1.1388 +         * 1. to clean up after the conversion function
  1.1389 +         * 2. after the callback
  1.1390 +         * 3. after the callback again if there was truncated input
  1.1391 +         */
  1.1392 +        for(;;) {
  1.1393 +            /* update offsets if we write any */
  1.1394 +            if(offsets!=NULL) {
  1.1395 +                int32_t length=(int32_t)(pArgs->target-t);
  1.1396 +                if(length>0) {
  1.1397 +                    _updateOffsets(offsets, length, sourceIndex, errorInputLength);
  1.1398 +
  1.1399 +                    /*
  1.1400 +                     * if a converter handles offsets and updates the offsets
  1.1401 +                     * pointer at the end, then pArgs->offset should not change
  1.1402 +                     * here;
  1.1403 +                     * however, some converters do not handle offsets at all
  1.1404 +                     * (sourceIndex<0) or may not update the offsets pointer
  1.1405 +                     */
  1.1406 +                    pArgs->offsets=offsets+=length;
  1.1407 +                }
  1.1408 +
  1.1409 +                if(sourceIndex>=0) {
  1.1410 +                    sourceIndex+=(int32_t)(pArgs->source-s);
  1.1411 +                }
  1.1412 +            }
  1.1413 +
  1.1414 +            if(cnv->preToULength<0) {
  1.1415 +                /*
  1.1416 +                 * switch the source to new replay units (cannot occur while replaying)
  1.1417 +                 * after offset handling and before end-of-input and callback handling
  1.1418 +                 */
  1.1419 +                if(realSource==NULL) {
  1.1420 +                    realSource=pArgs->source;
  1.1421 +                    realSourceLimit=pArgs->sourceLimit;
  1.1422 +                    realFlush=pArgs->flush;
  1.1423 +                    realSourceIndex=sourceIndex;
  1.1424 +
  1.1425 +                    uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
  1.1426 +                    pArgs->source=replay;
  1.1427 +                    pArgs->sourceLimit=replay-cnv->preToULength;
  1.1428 +                    pArgs->flush=FALSE;
  1.1429 +                    if((sourceIndex+=cnv->preToULength)<0) {
  1.1430 +                        sourceIndex=-1;
  1.1431 +                    }
  1.1432 +
  1.1433 +                    cnv->preToULength=0;
  1.1434 +                } else {
  1.1435 +                    /* see implementation note before _fromUnicodeWithCallback() */
  1.1436 +                    U_ASSERT(realSource==NULL);
  1.1437 +                    *err=U_INTERNAL_PROGRAM_ERROR;
  1.1438 +                }
  1.1439 +            }
  1.1440 +
  1.1441 +            /* update pointers */
  1.1442 +            s=pArgs->source;
  1.1443 +            t=pArgs->target;
  1.1444 +
  1.1445 +            if(U_SUCCESS(*err)) {
  1.1446 +                if(s<pArgs->sourceLimit) {
  1.1447 +                    /*
  1.1448 +                     * continue with the conversion loop while there is still input left
  1.1449 +                     * (continue converting by breaking out of only the inner loop)
  1.1450 +                     */
  1.1451 +                    break;
  1.1452 +                } else if(realSource!=NULL) {
  1.1453 +                    /* switch back from replaying to the real source and continue */
  1.1454 +                    pArgs->source=realSource;
  1.1455 +                    pArgs->sourceLimit=realSourceLimit;
  1.1456 +                    pArgs->flush=realFlush;
  1.1457 +                    sourceIndex=realSourceIndex;
  1.1458 +
  1.1459 +                    realSource=NULL;
  1.1460 +                    break;
  1.1461 +                } else if(pArgs->flush && cnv->toULength>0) {
  1.1462 +                    /*
  1.1463 +                     * the entire input stream is consumed
  1.1464 +                     * and there is a partial, truncated input sequence left
  1.1465 +                     */
  1.1466 +
  1.1467 +                    /* inject an error and continue with callback handling */
  1.1468 +                    *err=U_TRUNCATED_CHAR_FOUND;
  1.1469 +                    calledCallback=FALSE; /* new error condition */
  1.1470 +                } else {
  1.1471 +                    /* input consumed */
  1.1472 +                    if(pArgs->flush) {
  1.1473 +                        /*
  1.1474 +                         * return to the conversion loop once more if the flush
  1.1475 +                         * flag is set and the conversion function has not
  1.1476 +                         * successfully processed the end of the input yet
  1.1477 +                         *
  1.1478 +                         * (continue converting by breaking out of only the inner loop)
  1.1479 +                         */
  1.1480 +                        if(!converterSawEndOfInput) {
  1.1481 +                            break;
  1.1482 +                        }
  1.1483 +
  1.1484 +                        /* reset the converter without calling the callback function */
  1.1485 +                        _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
  1.1486 +                    }
  1.1487 +
  1.1488 +                    /* done successfully */
  1.1489 +                    return;
  1.1490 +                }
  1.1491 +            }
  1.1492 +
  1.1493 +            /* U_FAILURE(*err) */
  1.1494 +            {
  1.1495 +                UErrorCode e;
  1.1496 +
  1.1497 +                if( calledCallback ||
  1.1498 +                    (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
  1.1499 +                    (e!=U_INVALID_CHAR_FOUND &&
  1.1500 +                     e!=U_ILLEGAL_CHAR_FOUND &&
  1.1501 +                     e!=U_TRUNCATED_CHAR_FOUND &&
  1.1502 +                     e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
  1.1503 +                     e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
  1.1504 +                ) {
  1.1505 +                    /*
  1.1506 +                     * the callback did not or cannot resolve the error:
  1.1507 +                     * set output pointers and return
  1.1508 +                     *
  1.1509 +                     * the check for buffer overflow is redundant but it is
  1.1510 +                     * a high-runner case and hopefully documents the intent
  1.1511 +                     * well
  1.1512 +                     *
  1.1513 +                     * if we were replaying, then the replay buffer must be
  1.1514 +                     * copied back into the UConverter
  1.1515 +                     * and the real arguments must be restored
  1.1516 +                     */
  1.1517 +                    if(realSource!=NULL) {
  1.1518 +                        int32_t length;
  1.1519 +
  1.1520 +                        U_ASSERT(cnv->preToULength==0);
  1.1521 +
  1.1522 +                        length=(int32_t)(pArgs->sourceLimit-pArgs->source);
  1.1523 +                        if(length>0) {
  1.1524 +                            uprv_memcpy(cnv->preToU, pArgs->source, length);
  1.1525 +                            cnv->preToULength=(int8_t)-length;
  1.1526 +                        }
  1.1527 +
  1.1528 +                        pArgs->source=realSource;
  1.1529 +                        pArgs->sourceLimit=realSourceLimit;
  1.1530 +                        pArgs->flush=realFlush;
  1.1531 +                    }
  1.1532 +
  1.1533 +                    return;
  1.1534 +                }
  1.1535 +            }
  1.1536 +
  1.1537 +            /* copy toUBytes[] to invalidCharBuffer[] */
  1.1538 +            errorInputLength=cnv->invalidCharLength=cnv->toULength;
  1.1539 +            if(errorInputLength>0) {
  1.1540 +                uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
  1.1541 +            }
  1.1542 +
  1.1543 +            /* set the converter state to deal with the next character */
  1.1544 +            cnv->toULength=0;
  1.1545 +
  1.1546 +            /* call the callback function */
  1.1547 +            if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {
  1.1548 +                cnv->toUCallbackReason = UCNV_UNASSIGNED;
  1.1549 +            }
  1.1550 +            cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
  1.1551 +                cnv->invalidCharBuffer, errorInputLength,
  1.1552 +                cnv->toUCallbackReason,
  1.1553 +                err);
  1.1554 +            cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */
  1.1555 +
  1.1556 +            /*
  1.1557 +             * loop back to the offset handling
  1.1558 +             *
  1.1559 +             * this flag will indicate after offset handling
  1.1560 +             * that a callback was called;
  1.1561 +             * if the callback did not resolve the error, then we return
  1.1562 +             */
  1.1563 +            calledCallback=TRUE;
  1.1564 +        }
  1.1565 +    }
  1.1566 +}
  1.1567 +
  1.1568 +/*
  1.1569 + * Output the toUnicode overflow buffer.
  1.1570 + * Call this function if(cnv->UCharErrorBufferLength>0).
  1.1571 + * @return TRUE if overflow
  1.1572 + */
  1.1573 +static UBool
  1.1574 +ucnv_outputOverflowToUnicode(UConverter *cnv,
  1.1575 +                             UChar **target, const UChar *targetLimit,
  1.1576 +                             int32_t **pOffsets,
  1.1577 +                             UErrorCode *err) {
  1.1578 +    int32_t *offsets;
  1.1579 +    UChar *overflow, *t;
  1.1580 +    int32_t i, length;
  1.1581 +
  1.1582 +    t=*target;
  1.1583 +    if(pOffsets!=NULL) {
  1.1584 +        offsets=*pOffsets;
  1.1585 +    } else {
  1.1586 +        offsets=NULL;
  1.1587 +    }
  1.1588 +
  1.1589 +    overflow=cnv->UCharErrorBuffer;
  1.1590 +    length=cnv->UCharErrorBufferLength;
  1.1591 +    i=0;
  1.1592 +    while(i<length) {
  1.1593 +        if(t==targetLimit) {
  1.1594 +            /* the overflow buffer contains too much, keep the rest */
  1.1595 +            int32_t j=0;
  1.1596 +
  1.1597 +            do {
  1.1598 +                overflow[j++]=overflow[i++];
  1.1599 +            } while(i<length);
  1.1600 +
  1.1601 +            cnv->UCharErrorBufferLength=(int8_t)j;
  1.1602 +            *target=t;
  1.1603 +            if(offsets!=NULL) {
  1.1604 +                *pOffsets=offsets;
  1.1605 +            }
  1.1606 +            *err=U_BUFFER_OVERFLOW_ERROR;
  1.1607 +            return TRUE;
  1.1608 +        }
  1.1609 +
  1.1610 +        /* copy the overflow contents to the target */
  1.1611 +        *t++=overflow[i++];
  1.1612 +        if(offsets!=NULL) {
  1.1613 +            *offsets++=-1; /* no source index available for old output */
  1.1614 +        }
  1.1615 +    }
  1.1616 +
  1.1617 +    /* the overflow buffer is completely copied to the target */
  1.1618 +    cnv->UCharErrorBufferLength=0;
  1.1619 +    *target=t;
  1.1620 +    if(offsets!=NULL) {
  1.1621 +        *pOffsets=offsets;
  1.1622 +    }
  1.1623 +    return FALSE;
  1.1624 +}
  1.1625 +
  1.1626 +U_CAPI void U_EXPORT2
  1.1627 +ucnv_toUnicode(UConverter *cnv,
  1.1628 +               UChar **target, const UChar *targetLimit,
  1.1629 +               const char **source, const char *sourceLimit,
  1.1630 +               int32_t *offsets,
  1.1631 +               UBool flush,
  1.1632 +               UErrorCode *err) {
  1.1633 +    UConverterToUnicodeArgs args;
  1.1634 +    const char *s;
  1.1635 +    UChar *t;
  1.1636 +
  1.1637 +    /* check parameters */
  1.1638 +    if(err==NULL || U_FAILURE(*err)) {
  1.1639 +        return;
  1.1640 +    }
  1.1641 +
  1.1642 +    if(cnv==NULL || target==NULL || source==NULL) {
  1.1643 +        *err=U_ILLEGAL_ARGUMENT_ERROR;
  1.1644 +        return;
  1.1645 +    }
  1.1646 +
  1.1647 +    s=*source;
  1.1648 +    t=*target;
  1.1649 +
  1.1650 +    if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) {
  1.1651 +        /*
  1.1652 +        Prevent code from going into an infinite loop in case we do hit this
  1.1653 +        limit. The limit pointer is expected to be on a UChar * boundary.
  1.1654 +        This also prevents the next argument check from failing.
  1.1655 +        */
  1.1656 +        targetLimit = (const UChar *)(((const char *)targetLimit) - 1);
  1.1657 +    }
  1.1658 +
  1.1659 +    /*
  1.1660 +     * All these conditions should never happen.
  1.1661 +     *
  1.1662 +     * 1) Make sure that the limits are >= to the address source or target
  1.1663 +     *
  1.1664 +     * 2) Make sure that the buffer sizes do not exceed the number range for
  1.1665 +     * int32_t because some functions use the size (in units or bytes)
  1.1666 +     * rather than comparing pointers, and because offsets are int32_t values.
  1.1667 +     *
  1.1668 +     * size_t is guaranteed to be unsigned and large enough for the job.
  1.1669 +     *
  1.1670 +     * Return with an error instead of adjusting the limits because we would
  1.1671 +     * not be able to maintain the semantics that either the source must be
  1.1672 +     * consumed or the target filled (unless an error occurs).
  1.1673 +     * An adjustment would be sourceLimit=t+0x7fffffff; for example.
  1.1674 +     *
  1.1675 +     * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
  1.1676 +     * to a char * pointer and provide an incomplete UChar code unit.
  1.1677 +     */
  1.1678 +    if (sourceLimit<s || targetLimit<t ||
  1.1679 +        ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
  1.1680 +        ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) ||
  1.1681 +        (((const char *)targetLimit-(const char *)t) & 1) != 0
  1.1682 +    ) {
  1.1683 +        *err=U_ILLEGAL_ARGUMENT_ERROR;
  1.1684 +        return;
  1.1685 +    }
  1.1686 +    
  1.1687 +    /* output the target overflow buffer */
  1.1688 +    if( cnv->UCharErrorBufferLength>0 &&
  1.1689 +        ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)
  1.1690 +    ) {
  1.1691 +        /* U_BUFFER_OVERFLOW_ERROR */
  1.1692 +        return;
  1.1693 +    }
  1.1694 +    /* *target may have moved, therefore stop using t */
  1.1695 +
  1.1696 +    if(!flush && s==sourceLimit && cnv->preToULength>=0) {
  1.1697 +        /* the overflow buffer is emptied and there is no new input: we are done */
  1.1698 +        return;
  1.1699 +    }
  1.1700 +
  1.1701 +    /*
  1.1702 +     * Do not simply return with a buffer overflow error if
  1.1703 +     * !flush && t==targetLimit
  1.1704 +     * because it is possible that the source will not generate any output.
  1.1705 +     * For example, the skip callback may be called;
  1.1706 +     * it does not output anything.
  1.1707 +     */
  1.1708 +
  1.1709 +    /* prepare the converter arguments */
  1.1710 +    args.converter=cnv;
  1.1711 +    args.flush=flush;
  1.1712 +    args.offsets=offsets;
  1.1713 +    args.source=s;
  1.1714 +    args.sourceLimit=sourceLimit;
  1.1715 +    args.target=*target;
  1.1716 +    args.targetLimit=targetLimit;
  1.1717 +    args.size=sizeof(args);
  1.1718 +
  1.1719 +    _toUnicodeWithCallback(&args, err);
  1.1720 +
  1.1721 +    *source=args.source;
  1.1722 +    *target=args.target;
  1.1723 +}
  1.1724 +
  1.1725 +/* ucnv_to/fromUChars() ----------------------------------------------------- */
  1.1726 +
  1.1727 +U_CAPI int32_t U_EXPORT2
  1.1728 +ucnv_fromUChars(UConverter *cnv,
  1.1729 +                char *dest, int32_t destCapacity,
  1.1730 +                const UChar *src, int32_t srcLength,
  1.1731 +                UErrorCode *pErrorCode) {
  1.1732 +    const UChar *srcLimit;
  1.1733 +    char *originalDest, *destLimit;
  1.1734 +    int32_t destLength;
  1.1735 +
  1.1736 +    /* check arguments */
  1.1737 +    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
  1.1738 +        return 0;
  1.1739 +    }
  1.1740 +
  1.1741 +    if( cnv==NULL ||
  1.1742 +        destCapacity<0 || (destCapacity>0 && dest==NULL) ||
  1.1743 +        srcLength<-1 || (srcLength!=0 && src==NULL)
  1.1744 +    ) {
  1.1745 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1.1746 +        return 0;
  1.1747 +    }
  1.1748 +
  1.1749 +    /* initialize */
  1.1750 +    ucnv_resetFromUnicode(cnv);
  1.1751 +    originalDest=dest;
  1.1752 +    if(srcLength==-1) {
  1.1753 +        srcLength=u_strlen(src);
  1.1754 +    }
  1.1755 +    if(srcLength>0) {
  1.1756 +        srcLimit=src+srcLength;
  1.1757 +        destLimit=dest+destCapacity;
  1.1758 +
  1.1759 +        /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
  1.1760 +        if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
  1.1761 +            destLimit=(char *)U_MAX_PTR(dest);
  1.1762 +        }
  1.1763 +
  1.1764 +        /* perform the conversion */
  1.1765 +        ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
  1.1766 +        destLength=(int32_t)(dest-originalDest);
  1.1767 +
  1.1768 +        /* if an overflow occurs, then get the preflighting length */
  1.1769 +        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
  1.1770 +            char buffer[1024];
  1.1771 +
  1.1772 +            destLimit=buffer+sizeof(buffer);
  1.1773 +            do {
  1.1774 +                dest=buffer;
  1.1775 +                *pErrorCode=U_ZERO_ERROR;
  1.1776 +                ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
  1.1777 +                destLength+=(int32_t)(dest-buffer);
  1.1778 +            } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
  1.1779 +        }
  1.1780 +    } else {
  1.1781 +        destLength=0;
  1.1782 +    }
  1.1783 +
  1.1784 +    return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
  1.1785 +}
  1.1786 +
  1.1787 +U_CAPI int32_t U_EXPORT2
  1.1788 +ucnv_toUChars(UConverter *cnv,
  1.1789 +              UChar *dest, int32_t destCapacity,
  1.1790 +              const char *src, int32_t srcLength,
  1.1791 +              UErrorCode *pErrorCode) {
  1.1792 +    const char *srcLimit;
  1.1793 +    UChar *originalDest, *destLimit;
  1.1794 +    int32_t destLength;
  1.1795 +
  1.1796 +    /* check arguments */
  1.1797 +    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
  1.1798 +        return 0;
  1.1799 +    }
  1.1800 +
  1.1801 +    if( cnv==NULL ||
  1.1802 +        destCapacity<0 || (destCapacity>0 && dest==NULL) ||
  1.1803 +        srcLength<-1 || (srcLength!=0 && src==NULL))
  1.1804 +    {
  1.1805 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1.1806 +        return 0;
  1.1807 +    }
  1.1808 +
  1.1809 +    /* initialize */
  1.1810 +    ucnv_resetToUnicode(cnv);
  1.1811 +    originalDest=dest;
  1.1812 +    if(srcLength==-1) {
  1.1813 +        srcLength=(int32_t)uprv_strlen(src);
  1.1814 +    }
  1.1815 +    if(srcLength>0) {
  1.1816 +        srcLimit=src+srcLength;
  1.1817 +        destLimit=dest+destCapacity;
  1.1818 +
  1.1819 +        /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
  1.1820 +        if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
  1.1821 +            destLimit=(UChar *)U_MAX_PTR(dest);
  1.1822 +        }
  1.1823 +
  1.1824 +        /* perform the conversion */
  1.1825 +        ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
  1.1826 +        destLength=(int32_t)(dest-originalDest);
  1.1827 +
  1.1828 +        /* if an overflow occurs, then get the preflighting length */
  1.1829 +        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)
  1.1830 +        {
  1.1831 +            UChar buffer[1024];
  1.1832 +
  1.1833 +            destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR;
  1.1834 +            do {
  1.1835 +                dest=buffer;
  1.1836 +                *pErrorCode=U_ZERO_ERROR;
  1.1837 +                ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
  1.1838 +                destLength+=(int32_t)(dest-buffer);
  1.1839 +            }
  1.1840 +            while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
  1.1841 +        }
  1.1842 +    } else {
  1.1843 +        destLength=0;
  1.1844 +    }
  1.1845 +
  1.1846 +    return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
  1.1847 +}
  1.1848 +
  1.1849 +/* ucnv_getNextUChar() ------------------------------------------------------ */
  1.1850 +
  1.1851 +U_CAPI UChar32 U_EXPORT2
  1.1852 +ucnv_getNextUChar(UConverter *cnv,
  1.1853 +                  const char **source, const char *sourceLimit,
  1.1854 +                  UErrorCode *err) {
  1.1855 +    UConverterToUnicodeArgs args;
  1.1856 +    UChar buffer[U16_MAX_LENGTH];
  1.1857 +    const char *s;
  1.1858 +    UChar32 c;
  1.1859 +    int32_t i, length;
  1.1860 +
  1.1861 +    /* check parameters */
  1.1862 +    if(err==NULL || U_FAILURE(*err)) {
  1.1863 +        return 0xffff;
  1.1864 +    }
  1.1865 +
  1.1866 +    if(cnv==NULL || source==NULL) {
  1.1867 +        *err=U_ILLEGAL_ARGUMENT_ERROR;
  1.1868 +        return 0xffff;
  1.1869 +    }
  1.1870 +
  1.1871 +    s=*source;
  1.1872 +    if(sourceLimit<s) {
  1.1873 +        *err=U_ILLEGAL_ARGUMENT_ERROR;
  1.1874 +        return 0xffff;
  1.1875 +    }
  1.1876 +
  1.1877 +    /*
  1.1878 +     * Make sure that the buffer sizes do not exceed the number range for
  1.1879 +     * int32_t because some functions use the size (in units or bytes)
  1.1880 +     * rather than comparing pointers, and because offsets are int32_t values.
  1.1881 +     *
  1.1882 +     * size_t is guaranteed to be unsigned and large enough for the job.
  1.1883 +     *
  1.1884 +     * Return with an error instead of adjusting the limits because we would
  1.1885 +     * not be able to maintain the semantics that either the source must be
  1.1886 +     * consumed or the target filled (unless an error occurs).
  1.1887 +     * An adjustment would be sourceLimit=t+0x7fffffff; for example.
  1.1888 +     */
  1.1889 +    if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
  1.1890 +        *err=U_ILLEGAL_ARGUMENT_ERROR;
  1.1891 +        return 0xffff;
  1.1892 +    }
  1.1893 +
  1.1894 +    c=U_SENTINEL;
  1.1895 +
  1.1896 +    /* flush the target overflow buffer */
  1.1897 +    if(cnv->UCharErrorBufferLength>0) {
  1.1898 +        UChar *overflow;
  1.1899 +
  1.1900 +        overflow=cnv->UCharErrorBuffer;
  1.1901 +        i=0;
  1.1902 +        length=cnv->UCharErrorBufferLength;
  1.1903 +        U16_NEXT(overflow, i, length, c);
  1.1904 +
  1.1905 +        /* move the remaining overflow contents up to the beginning */
  1.1906 +        if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
  1.1907 +            uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
  1.1908 +                         cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
  1.1909 +        }
  1.1910 +
  1.1911 +        if(!U16_IS_LEAD(c) || i<length) {
  1.1912 +            return c;
  1.1913 +        }
  1.1914 +        /*
  1.1915 +         * Continue if the overflow buffer contained only a lead surrogate,
  1.1916 +         * in case the converter outputs single surrogates from complete
  1.1917 +         * input sequences.
  1.1918 +         */
  1.1919 +    }
  1.1920 +
  1.1921 +    /*
  1.1922 +     * flush==TRUE is implied for ucnv_getNextUChar()
  1.1923 +     *
  1.1924 +     * do not simply return even if s==sourceLimit because the converter may
  1.1925 +     * not have seen flush==TRUE before
  1.1926 +     */
  1.1927 +
  1.1928 +    /* prepare the converter arguments */
  1.1929 +    args.converter=cnv;
  1.1930 +    args.flush=TRUE;
  1.1931 +    args.offsets=NULL;
  1.1932 +    args.source=s;
  1.1933 +    args.sourceLimit=sourceLimit;
  1.1934 +    args.target=buffer;
  1.1935 +    args.targetLimit=buffer+1;
  1.1936 +    args.size=sizeof(args);
  1.1937 +
  1.1938 +    if(c<0) {
  1.1939 +        /*
  1.1940 +         * call the native getNextUChar() implementation if we are
  1.1941 +         * at a character boundary (toULength==0)
  1.1942 +         *
  1.1943 +         * unlike with _toUnicode(), getNextUChar() implementations must set
  1.1944 +         * U_TRUNCATED_CHAR_FOUND for truncated input,
  1.1945 +         * in addition to setting toULength/toUBytes[]
  1.1946 +         */
  1.1947 +        if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {
  1.1948 +            c=cnv->sharedData->impl->getNextUChar(&args, err);
  1.1949 +            *source=s=args.source;
  1.1950 +            if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
  1.1951 +                /* reset the converter without calling the callback function */
  1.1952 +                _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
  1.1953 +                return 0xffff; /* no output */
  1.1954 +            } else if(U_SUCCESS(*err) && c>=0) {
  1.1955 +                return c;
  1.1956 +            /*
  1.1957 +             * else fall through to use _toUnicode() because
  1.1958 +             *   UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
  1.1959 +             *   U_FAILURE: call _toUnicode() for callback handling (do not output c)
  1.1960 +             */
  1.1961 +            }
  1.1962 +        }
  1.1963 +
  1.1964 +        /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
  1.1965 +        _toUnicodeWithCallback(&args, err);
  1.1966 +
  1.1967 +        if(*err==U_BUFFER_OVERFLOW_ERROR) {
  1.1968 +            *err=U_ZERO_ERROR;
  1.1969 +        }
  1.1970 +
  1.1971 +        i=0;
  1.1972 +        length=(int32_t)(args.target-buffer);
  1.1973 +    } else {
  1.1974 +        /* write the lead surrogate from the overflow buffer */
  1.1975 +        buffer[0]=(UChar)c;
  1.1976 +        args.target=buffer+1;
  1.1977 +        i=0;
  1.1978 +        length=1;
  1.1979 +    }
  1.1980 +
  1.1981 +    /* buffer contents starts at i and ends before length */
  1.1982 +
  1.1983 +    if(U_FAILURE(*err)) {
  1.1984 +        c=0xffff; /* no output */
  1.1985 +    } else if(length==0) {
  1.1986 +        /* no input or only state changes */
  1.1987 +        *err=U_INDEX_OUTOFBOUNDS_ERROR;
  1.1988 +        /* no need to reset explicitly because _toUnicodeWithCallback() did it */
  1.1989 +        c=0xffff; /* no output */
  1.1990 +    } else {
  1.1991 +        c=buffer[0];
  1.1992 +        i=1;
  1.1993 +        if(!U16_IS_LEAD(c)) {
  1.1994 +            /* consume c=buffer[0], done */
  1.1995 +        } else {
  1.1996 +            /* got a lead surrogate, see if a trail surrogate follows */
  1.1997 +            UChar c2;
  1.1998 +
  1.1999 +            if(cnv->UCharErrorBufferLength>0) {
  1.2000 +                /* got overflow output from the conversion */
  1.2001 +                if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
  1.2002 +                    /* got a trail surrogate, too */
  1.2003 +                    c=U16_GET_SUPPLEMENTARY(c, c2);
  1.2004 +
  1.2005 +                    /* move the remaining overflow contents up to the beginning */
  1.2006 +                    if((--cnv->UCharErrorBufferLength)>0) {
  1.2007 +                        uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
  1.2008 +                                     cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
  1.2009 +                    }
  1.2010 +                } else {
  1.2011 +                    /* c is an unpaired lead surrogate, just return it */
  1.2012 +                }
  1.2013 +            } else if(args.source<sourceLimit) {
  1.2014 +                /* convert once more, to buffer[1] */
  1.2015 +                args.targetLimit=buffer+2;
  1.2016 +                _toUnicodeWithCallback(&args, err);
  1.2017 +                if(*err==U_BUFFER_OVERFLOW_ERROR) {
  1.2018 +                    *err=U_ZERO_ERROR;
  1.2019 +                }
  1.2020 +
  1.2021 +                length=(int32_t)(args.target-buffer);
  1.2022 +                if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
  1.2023 +                    /* got a trail surrogate, too */
  1.2024 +                    c=U16_GET_SUPPLEMENTARY(c, c2);
  1.2025 +                    i=2;
  1.2026 +                }
  1.2027 +            }
  1.2028 +        }
  1.2029 +    }
  1.2030 +
  1.2031 +    /*
  1.2032 +     * move leftover output from buffer[i..length[
  1.2033 +     * into the beginning of the overflow buffer
  1.2034 +     */
  1.2035 +    if(i<length) {
  1.2036 +        /* move further overflow back */
  1.2037 +        int32_t delta=length-i;
  1.2038 +        if((length=cnv->UCharErrorBufferLength)>0) {
  1.2039 +            uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
  1.2040 +                         length*U_SIZEOF_UCHAR);
  1.2041 +        }
  1.2042 +        cnv->UCharErrorBufferLength=(int8_t)(length+delta);
  1.2043 +
  1.2044 +        cnv->UCharErrorBuffer[0]=buffer[i++];
  1.2045 +        if(delta>1) {
  1.2046 +            cnv->UCharErrorBuffer[1]=buffer[i];
  1.2047 +        }
  1.2048 +    }
  1.2049 +
  1.2050 +    *source=args.source;
  1.2051 +    return c;
  1.2052 +}
  1.2053 +
  1.2054 +/* ucnv_convert() and siblings ---------------------------------------------- */
  1.2055 +
  1.2056 +U_CAPI void U_EXPORT2
  1.2057 +ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
  1.2058 +               char **target, const char *targetLimit,
  1.2059 +               const char **source, const char *sourceLimit,
  1.2060 +               UChar *pivotStart, UChar **pivotSource,
  1.2061 +               UChar **pivotTarget, const UChar *pivotLimit,
  1.2062 +               UBool reset, UBool flush,
  1.2063 +               UErrorCode *pErrorCode) {
  1.2064 +    UChar pivotBuffer[CHUNK_SIZE];
  1.2065 +    const UChar *myPivotSource;
  1.2066 +    UChar *myPivotTarget;
  1.2067 +    const char *s;
  1.2068 +    char *t;
  1.2069 +
  1.2070 +    UConverterToUnicodeArgs toUArgs;
  1.2071 +    UConverterFromUnicodeArgs fromUArgs;
  1.2072 +    UConverterConvert convert;
  1.2073 +
  1.2074 +    /* error checking */
  1.2075 +    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
  1.2076 +        return;
  1.2077 +    }
  1.2078 +
  1.2079 +    if( targetCnv==NULL || sourceCnv==NULL ||
  1.2080 +        source==NULL || *source==NULL ||
  1.2081 +        target==NULL || *target==NULL || targetLimit==NULL
  1.2082 +    ) {
  1.2083 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1.2084 +        return;
  1.2085 +    }
  1.2086 +
  1.2087 +    s=*source;
  1.2088 +    t=*target;
  1.2089 +    if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) {
  1.2090 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1.2091 +        return;
  1.2092 +    }
  1.2093 +
  1.2094 +    /*
  1.2095 +     * Make sure that the buffer sizes do not exceed the number range for
  1.2096 +     * int32_t. See ucnv_toUnicode() for a more detailed comment.
  1.2097 +     */
  1.2098 +    if(
  1.2099 +        (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) ||
  1.2100 +        ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
  1.2101 +    ) {
  1.2102 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1.2103 +        return;
  1.2104 +    }
  1.2105 +    
  1.2106 +    if(pivotStart==NULL) {
  1.2107 +        if(!flush) {
  1.2108 +            /* streaming conversion requires an explicit pivot buffer */
  1.2109 +            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1.2110 +            return;
  1.2111 +        }
  1.2112 +
  1.2113 +        /* use the stack pivot buffer */
  1.2114 +        myPivotSource=myPivotTarget=pivotStart=pivotBuffer;
  1.2115 +        pivotSource=(UChar **)&myPivotSource;
  1.2116 +        pivotTarget=&myPivotTarget;
  1.2117 +        pivotLimit=pivotBuffer+CHUNK_SIZE;
  1.2118 +    } else if(  pivotStart>=pivotLimit ||
  1.2119 +                pivotSource==NULL || *pivotSource==NULL ||
  1.2120 +                pivotTarget==NULL || *pivotTarget==NULL ||
  1.2121 +                pivotLimit==NULL
  1.2122 +    ) {
  1.2123 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1.2124 +        return;
  1.2125 +    }
  1.2126 +
  1.2127 +    if(sourceLimit==NULL) {
  1.2128 +        /* get limit of single-byte-NUL-terminated source string */
  1.2129 +        sourceLimit=uprv_strchr(*source, 0);
  1.2130 +    }
  1.2131 +
  1.2132 +    if(reset) {
  1.2133 +        ucnv_resetToUnicode(sourceCnv);
  1.2134 +        ucnv_resetFromUnicode(targetCnv);
  1.2135 +        *pivotSource=*pivotTarget=pivotStart;
  1.2136 +    } else if(targetCnv->charErrorBufferLength>0) {
  1.2137 +        /* output the targetCnv overflow buffer */
  1.2138 +        if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) {
  1.2139 +            /* U_BUFFER_OVERFLOW_ERROR */
  1.2140 +            return;
  1.2141 +        }
  1.2142 +        /* *target has moved, therefore stop using t */
  1.2143 +
  1.2144 +        if( !flush &&
  1.2145 +            targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget &&
  1.2146 +            sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit
  1.2147 +        ) {
  1.2148 +            /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
  1.2149 +            return;
  1.2150 +        }
  1.2151 +    }
  1.2152 +
  1.2153 +    /* Is direct-UTF-8 conversion available? */
  1.2154 +    if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
  1.2155 +        targetCnv->sharedData->impl->fromUTF8!=NULL
  1.2156 +    ) {
  1.2157 +        convert=targetCnv->sharedData->impl->fromUTF8;
  1.2158 +    } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
  1.2159 +               sourceCnv->sharedData->impl->toUTF8!=NULL
  1.2160 +    ) {
  1.2161 +        convert=sourceCnv->sharedData->impl->toUTF8;
  1.2162 +    } else {
  1.2163 +        convert=NULL;
  1.2164 +    }
  1.2165 +
  1.2166 +    /*
  1.2167 +     * If direct-UTF-8 conversion is available, then we use a smaller
  1.2168 +     * pivot buffer for error handling and partial matches
  1.2169 +     * so that we quickly return to direct conversion.
  1.2170 +     *
  1.2171 +     * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
  1.2172 +     *
  1.2173 +     * We could reduce the pivot buffer size further, at the cost of
  1.2174 +     * buffer overflows from callbacks.
  1.2175 +     * The pivot buffer should not be smaller than the maximum number of
  1.2176 +     * fromUnicode extension table input UChars
  1.2177 +     * (for m:n conversion, see
  1.2178 +     * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
  1.2179 +     * or 2 for surrogate pairs.
  1.2180 +     *
  1.2181 +     * Too small a buffer can cause thrashing between pivoting and direct
  1.2182 +     * conversion, with function call overhead outweighing the benefits
  1.2183 +     * of direct conversion.
  1.2184 +     */
  1.2185 +    if(convert!=NULL && (pivotLimit-pivotStart)>32) {
  1.2186 +        pivotLimit=pivotStart+32;
  1.2187 +    }
  1.2188 +
  1.2189 +    /* prepare the converter arguments */
  1.2190 +    fromUArgs.converter=targetCnv;
  1.2191 +    fromUArgs.flush=FALSE;
  1.2192 +    fromUArgs.offsets=NULL;
  1.2193 +    fromUArgs.target=*target;
  1.2194 +    fromUArgs.targetLimit=targetLimit;
  1.2195 +    fromUArgs.size=sizeof(fromUArgs);
  1.2196 +
  1.2197 +    toUArgs.converter=sourceCnv;
  1.2198 +    toUArgs.flush=flush;
  1.2199 +    toUArgs.offsets=NULL;
  1.2200 +    toUArgs.source=s;
  1.2201 +    toUArgs.sourceLimit=sourceLimit;
  1.2202 +    toUArgs.targetLimit=pivotLimit;
  1.2203 +    toUArgs.size=sizeof(toUArgs);
  1.2204 +
  1.2205 +    /*
  1.2206 +     * TODO: Consider separating this function into two functions,
  1.2207 +     * extracting exactly the conversion loop,
  1.2208 +     * for readability and to reduce the set of visible variables.
  1.2209 +     *
  1.2210 +     * Otherwise stop using s and t from here on.
  1.2211 +     */
  1.2212 +    s=t=NULL;
  1.2213 +
  1.2214 +    /*
  1.2215 +     * conversion loop
  1.2216 +     *
  1.2217 +     * The sequence of steps in the loop may appear backward,
  1.2218 +     * but the principle is simple:
  1.2219 +     * In the chain of
  1.2220 +     *   source - sourceCnv overflow - pivot - targetCnv overflow - target
  1.2221 +     * empty out later buffers before refilling them from earlier ones.
  1.2222 +     *
  1.2223 +     * The targetCnv overflow buffer is flushed out only once before the loop.
  1.2224 +     */
  1.2225 +    for(;;) {
  1.2226 +        /*
  1.2227 +         * if(pivot not empty or error or replay or flush fromUnicode) {
  1.2228 +         *   fromUnicode(pivot -> target);
  1.2229 +         * }
  1.2230 +         *
  1.2231 +         * For pivoting conversion; and for direct conversion for
  1.2232 +         * error callback handling and flushing the replay buffer.
  1.2233 +         */
  1.2234 +        if( *pivotSource<*pivotTarget ||
  1.2235 +            U_FAILURE(*pErrorCode) ||
  1.2236 +            targetCnv->preFromULength<0 ||
  1.2237 +            fromUArgs.flush
  1.2238 +        ) {
  1.2239 +            fromUArgs.source=*pivotSource;
  1.2240 +            fromUArgs.sourceLimit=*pivotTarget;
  1.2241 +            _fromUnicodeWithCallback(&fromUArgs, pErrorCode);
  1.2242 +            if(U_FAILURE(*pErrorCode)) {
  1.2243 +                /* target overflow, or conversion error */
  1.2244 +                *pivotSource=(UChar *)fromUArgs.source;
  1.2245 +                break;
  1.2246 +            }
  1.2247 +
  1.2248 +            /*
  1.2249 +             * _fromUnicodeWithCallback() must have consumed the pivot contents
  1.2250 +             * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
  1.2251 +             */
  1.2252 +        }
  1.2253 +
  1.2254 +        /* The pivot buffer is empty; reset it so we start at pivotStart. */
  1.2255 +        *pivotSource=*pivotTarget=pivotStart;
  1.2256 +
  1.2257 +        /*
  1.2258 +         * if(sourceCnv overflow buffer not empty) {
  1.2259 +         *     move(sourceCnv overflow buffer -> pivot);
  1.2260 +         *     continue;
  1.2261 +         * }
  1.2262 +         */
  1.2263 +        /* output the sourceCnv overflow buffer */
  1.2264 +        if(sourceCnv->UCharErrorBufferLength>0) {
  1.2265 +            if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) {
  1.2266 +                /* U_BUFFER_OVERFLOW_ERROR */
  1.2267 +                *pErrorCode=U_ZERO_ERROR;
  1.2268 +            }
  1.2269 +            continue;
  1.2270 +        }
  1.2271 +
  1.2272 +        /*
  1.2273 +         * check for end of input and break if done
  1.2274 +         *
  1.2275 +         * Checking both flush and fromUArgs.flush ensures that the converters
  1.2276 +         * have been called with the flush flag set if the ucnv_convertEx()
  1.2277 +         * caller set it.
  1.2278 +         */
  1.2279 +        if( toUArgs.source==sourceLimit &&
  1.2280 +            sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&
  1.2281 +            (!flush || fromUArgs.flush)
  1.2282 +        ) {
  1.2283 +            /* done successfully */
  1.2284 +            break;
  1.2285 +        }
  1.2286 +
  1.2287 +        /*
  1.2288 +         * use direct conversion if available
  1.2289 +         * but not if continuing a partial match
  1.2290 +         * or flushing the toUnicode replay buffer
  1.2291 +         */
  1.2292 +        if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) {
  1.2293 +            if(*pErrorCode==U_USING_DEFAULT_WARNING) {
  1.2294 +                /* remove a warning that may be set by this function */
  1.2295 +                *pErrorCode=U_ZERO_ERROR;
  1.2296 +            }
  1.2297 +            convert(&fromUArgs, &toUArgs, pErrorCode);
  1.2298 +            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
  1.2299 +                break;
  1.2300 +            } else if(U_FAILURE(*pErrorCode)) {
  1.2301 +                if(sourceCnv->toULength>0) {
  1.2302 +                    /*
  1.2303 +                     * Fall through to calling _toUnicodeWithCallback()
  1.2304 +                     * for callback handling.
  1.2305 +                     *
  1.2306 +                     * The pivot buffer will be reset with
  1.2307 +                     *   *pivotSource=*pivotTarget=pivotStart;
  1.2308 +                     * which indicates a toUnicode error to the caller
  1.2309 +                     * (*pivotSource==pivotStart shows no pivot UChars consumed).
  1.2310 +                     */
  1.2311 +                } else {
  1.2312 +                    /*
  1.2313 +                     * Indicate a fromUnicode error to the caller
  1.2314 +                     * (*pivotSource>pivotStart shows some pivot UChars consumed).
  1.2315 +                     */
  1.2316 +                    *pivotSource=*pivotTarget=pivotStart+1;
  1.2317 +                    /*
  1.2318 +                     * Loop around to calling _fromUnicodeWithCallbacks()
  1.2319 +                     * for callback handling.
  1.2320 +                     */
  1.2321 +                    continue;
  1.2322 +                }
  1.2323 +            } else if(*pErrorCode==U_USING_DEFAULT_WARNING) {
  1.2324 +                /*
  1.2325 +                 * No error, but the implementation requested to temporarily
  1.2326 +                 * fall back to pivoting.
  1.2327 +                 */
  1.2328 +                *pErrorCode=U_ZERO_ERROR;
  1.2329 +            /*
  1.2330 +             * The following else branches are almost identical to the end-of-input
  1.2331 +             * handling in _toUnicodeWithCallback().
  1.2332 +             * Avoid calling it just for the end of input.
  1.2333 +             */
  1.2334 +            } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */
  1.2335 +                /*
  1.2336 +                 * the entire input stream is consumed
  1.2337 +                 * and there is a partial, truncated input sequence left
  1.2338 +                 */
  1.2339 +
  1.2340 +                /* inject an error and continue with callback handling */
  1.2341 +                *pErrorCode=U_TRUNCATED_CHAR_FOUND;
  1.2342 +            } else {
  1.2343 +                /* input consumed */
  1.2344 +                if(flush) {
  1.2345 +                    /* reset the converters without calling the callback functions */
  1.2346 +                    _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE);
  1.2347 +                    _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE);
  1.2348 +                }
  1.2349 +
  1.2350 +                /* done successfully */
  1.2351 +                break;
  1.2352 +            }
  1.2353 +        }
  1.2354 +        
  1.2355 +        /*
  1.2356 +         * toUnicode(source -> pivot);
  1.2357 +         *
  1.2358 +         * For pivoting conversion; and for direct conversion for
  1.2359 +         * error callback handling, continuing partial matches
  1.2360 +         * and flushing the replay buffer.
  1.2361 +         *
  1.2362 +         * The pivot buffer is empty and reset.
  1.2363 +         */
  1.2364 +        toUArgs.target=pivotStart; /* ==*pivotTarget */
  1.2365 +        /* toUArgs.targetLimit=pivotLimit; already set before the loop */
  1.2366 +        _toUnicodeWithCallback(&toUArgs, pErrorCode);
  1.2367 +        *pivotTarget=toUArgs.target;
  1.2368 +        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
  1.2369 +            /* pivot overflow: continue with the conversion loop */
  1.2370 +            *pErrorCode=U_ZERO_ERROR;
  1.2371 +        } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) {
  1.2372 +            /* conversion error, or there was nothing left to convert */
  1.2373 +            break;
  1.2374 +        }
  1.2375 +        /*
  1.2376 +         * else:
  1.2377 +         * _toUnicodeWithCallback() wrote into the pivot buffer,
  1.2378 +         * continue with fromUnicode conversion.
  1.2379 +         *
  1.2380 +         * Set the fromUnicode flush flag if we flush and if toUnicode has
  1.2381 +         * processed the end of the input.
  1.2382 +         */
  1.2383 +        if( flush && toUArgs.source==sourceLimit &&
  1.2384 +            sourceCnv->preToULength>=0 &&
  1.2385 +            sourceCnv->UCharErrorBufferLength==0
  1.2386 +        ) {
  1.2387 +            fromUArgs.flush=TRUE;
  1.2388 +        }
  1.2389 +    }
  1.2390 +
  1.2391 +    /*
  1.2392 +     * The conversion loop is exited when one of the following is true:
  1.2393 +     * - the entire source text has been converted successfully to the target buffer
  1.2394 +     * - a target buffer overflow occurred
  1.2395 +     * - a conversion error occurred
  1.2396 +     */
  1.2397 +
  1.2398 +    *source=toUArgs.source;
  1.2399 +    *target=fromUArgs.target;
  1.2400 +
  1.2401 +    /* terminate the target buffer if possible */
  1.2402 +    if(flush && U_SUCCESS(*pErrorCode)) {
  1.2403 +        if(*target!=targetLimit) {
  1.2404 +            **target=0;
  1.2405 +            if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
  1.2406 +                *pErrorCode=U_ZERO_ERROR;
  1.2407 +            }
  1.2408 +        } else {
  1.2409 +            *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
  1.2410 +        }
  1.2411 +    }
  1.2412 +}
  1.2413 +
  1.2414 +/* internal implementation of ucnv_convert() etc. with preflighting */
  1.2415 +static int32_t
  1.2416 +ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
  1.2417 +                     char *target, int32_t targetCapacity,
  1.2418 +                     const char *source, int32_t sourceLength,
  1.2419 +                     UErrorCode *pErrorCode) {
  1.2420 +    UChar pivotBuffer[CHUNK_SIZE];
  1.2421 +    UChar *pivot, *pivot2;
  1.2422 +
  1.2423 +    char *myTarget;
  1.2424 +    const char *sourceLimit;
  1.2425 +    const char *targetLimit;
  1.2426 +    int32_t targetLength=0;
  1.2427 +
  1.2428 +    /* set up */
  1.2429 +    if(sourceLength<0) {
  1.2430 +        sourceLimit=uprv_strchr(source, 0);
  1.2431 +    } else {
  1.2432 +        sourceLimit=source+sourceLength;
  1.2433 +    }
  1.2434 +
  1.2435 +    /* if there is no input data, we're done */
  1.2436 +    if(source==sourceLimit) {
  1.2437 +        return u_terminateChars(target, targetCapacity, 0, pErrorCode);
  1.2438 +    }
  1.2439 +
  1.2440 +    pivot=pivot2=pivotBuffer;
  1.2441 +    myTarget=target;
  1.2442 +    targetLength=0;
  1.2443 +
  1.2444 +    if(targetCapacity>0) {
  1.2445 +        /* perform real conversion */
  1.2446 +        targetLimit=target+targetCapacity;
  1.2447 +        ucnv_convertEx(outConverter, inConverter,
  1.2448 +                       &myTarget, targetLimit,
  1.2449 +                       &source, sourceLimit,
  1.2450 +                       pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
  1.2451 +                       FALSE,
  1.2452 +                       TRUE,
  1.2453 +                       pErrorCode);
  1.2454 +        targetLength=(int32_t)(myTarget-target);
  1.2455 +    }
  1.2456 +
  1.2457 +    /*
  1.2458 +     * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
  1.2459 +     * to it but continue the conversion in order to store in targetCapacity
  1.2460 +     * the number of bytes that was required.
  1.2461 +     */
  1.2462 +    if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)
  1.2463 +    {
  1.2464 +        char targetBuffer[CHUNK_SIZE];
  1.2465 +
  1.2466 +        targetLimit=targetBuffer+CHUNK_SIZE;
  1.2467 +        do {
  1.2468 +            *pErrorCode=U_ZERO_ERROR;
  1.2469 +            myTarget=targetBuffer;
  1.2470 +            ucnv_convertEx(outConverter, inConverter,
  1.2471 +                           &myTarget, targetLimit,
  1.2472 +                           &source, sourceLimit,
  1.2473 +                           pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
  1.2474 +                           FALSE,
  1.2475 +                           TRUE,
  1.2476 +                           pErrorCode);
  1.2477 +            targetLength+=(int32_t)(myTarget-targetBuffer);
  1.2478 +        } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
  1.2479 +
  1.2480 +        /* done with preflighting, set warnings and errors as appropriate */
  1.2481 +        return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
  1.2482 +    }
  1.2483 +
  1.2484 +    /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
  1.2485 +    return targetLength;
  1.2486 +}
  1.2487 +
  1.2488 +U_CAPI int32_t U_EXPORT2
  1.2489 +ucnv_convert(const char *toConverterName, const char *fromConverterName,
  1.2490 +             char *target, int32_t targetCapacity,
  1.2491 +             const char *source, int32_t sourceLength,
  1.2492 +             UErrorCode *pErrorCode) {
  1.2493 +    UConverter in, out; /* stack-allocated */
  1.2494 +    UConverter *inConverter, *outConverter;
  1.2495 +    int32_t targetLength;
  1.2496 +
  1.2497 +    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
  1.2498 +        return 0;
  1.2499 +    }
  1.2500 +
  1.2501 +    if( source==NULL || sourceLength<-1 ||
  1.2502 +        targetCapacity<0 || (targetCapacity>0 && target==NULL)
  1.2503 +    ) {
  1.2504 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1.2505 +        return 0;
  1.2506 +    }
  1.2507 +
  1.2508 +    /* if there is no input data, we're done */
  1.2509 +    if(sourceLength==0 || (sourceLength<0 && *source==0)) {
  1.2510 +        return u_terminateChars(target, targetCapacity, 0, pErrorCode);
  1.2511 +    }
  1.2512 +
  1.2513 +    /* create the converters */
  1.2514 +    inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
  1.2515 +    if(U_FAILURE(*pErrorCode)) {
  1.2516 +        return 0;
  1.2517 +    }
  1.2518 +
  1.2519 +    outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
  1.2520 +    if(U_FAILURE(*pErrorCode)) {
  1.2521 +        ucnv_close(inConverter);
  1.2522 +        return 0;
  1.2523 +    }
  1.2524 +
  1.2525 +    targetLength=ucnv_internalConvert(outConverter, inConverter,
  1.2526 +                                      target, targetCapacity,
  1.2527 +                                      source, sourceLength,
  1.2528 +                                      pErrorCode);
  1.2529 +
  1.2530 +    ucnv_close(inConverter);
  1.2531 +    ucnv_close(outConverter);
  1.2532 +
  1.2533 +    return targetLength;
  1.2534 +}
  1.2535 +
  1.2536 +/* @internal */
  1.2537 +static int32_t
  1.2538 +ucnv_convertAlgorithmic(UBool convertToAlgorithmic,
  1.2539 +                        UConverterType algorithmicType,
  1.2540 +                        UConverter *cnv,
  1.2541 +                        char *target, int32_t targetCapacity,
  1.2542 +                        const char *source, int32_t sourceLength,
  1.2543 +                        UErrorCode *pErrorCode) {
  1.2544 +    UConverter algoConverterStatic; /* stack-allocated */
  1.2545 +    UConverter *algoConverter, *to, *from;
  1.2546 +    int32_t targetLength;
  1.2547 +
  1.2548 +    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
  1.2549 +        return 0;
  1.2550 +    }
  1.2551 +
  1.2552 +    if( cnv==NULL || source==NULL || sourceLength<-1 ||
  1.2553 +        targetCapacity<0 || (targetCapacity>0 && target==NULL)
  1.2554 +    ) {
  1.2555 +        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1.2556 +        return 0;
  1.2557 +    }
  1.2558 +
  1.2559 +    /* if there is no input data, we're done */
  1.2560 +    if(sourceLength==0 || (sourceLength<0 && *source==0)) {
  1.2561 +        return u_terminateChars(target, targetCapacity, 0, pErrorCode);
  1.2562 +    }
  1.2563 +
  1.2564 +    /* create the algorithmic converter */
  1.2565 +    algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType,
  1.2566 +                                                  "", 0, pErrorCode);
  1.2567 +    if(U_FAILURE(*pErrorCode)) {
  1.2568 +        return 0;
  1.2569 +    }
  1.2570 +
  1.2571 +    /* reset the other converter */
  1.2572 +    if(convertToAlgorithmic) {
  1.2573 +        /* cnv->Unicode->algo */
  1.2574 +        ucnv_resetToUnicode(cnv);
  1.2575 +        to=algoConverter;
  1.2576 +        from=cnv;
  1.2577 +    } else {
  1.2578 +        /* algo->Unicode->cnv */
  1.2579 +        ucnv_resetFromUnicode(cnv);
  1.2580 +        from=algoConverter;
  1.2581 +        to=cnv;
  1.2582 +    }
  1.2583 +
  1.2584 +    targetLength=ucnv_internalConvert(to, from,
  1.2585 +                                      target, targetCapacity,
  1.2586 +                                      source, sourceLength,
  1.2587 +                                      pErrorCode);
  1.2588 +
  1.2589 +    ucnv_close(algoConverter);
  1.2590 +
  1.2591 +    return targetLength;
  1.2592 +}
  1.2593 +
  1.2594 +U_CAPI int32_t U_EXPORT2
  1.2595 +ucnv_toAlgorithmic(UConverterType algorithmicType,
  1.2596 +                   UConverter *cnv,
  1.2597 +                   char *target, int32_t targetCapacity,
  1.2598 +                   const char *source, int32_t sourceLength,
  1.2599 +                   UErrorCode *pErrorCode) {
  1.2600 +    return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv,
  1.2601 +                                   target, targetCapacity,
  1.2602 +                                   source, sourceLength,
  1.2603 +                                   pErrorCode);
  1.2604 +}
  1.2605 +
  1.2606 +U_CAPI int32_t U_EXPORT2
  1.2607 +ucnv_fromAlgorithmic(UConverter *cnv,
  1.2608 +                     UConverterType algorithmicType,
  1.2609 +                     char *target, int32_t targetCapacity,
  1.2610 +                     const char *source, int32_t sourceLength,
  1.2611 +                     UErrorCode *pErrorCode) {
  1.2612 +    return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv,
  1.2613 +                                   target, targetCapacity,
  1.2614 +                                   source, sourceLength,
  1.2615 +                                   pErrorCode);
  1.2616 +}
  1.2617 +
  1.2618 +U_CAPI UConverterType  U_EXPORT2
  1.2619 +ucnv_getType(const UConverter* converter)
  1.2620 +{
  1.2621 +    int8_t type = converter->sharedData->staticData->conversionType;
  1.2622 +#if !UCONFIG_NO_LEGACY_CONVERSION
  1.2623 +    if(type == UCNV_MBCS) {
  1.2624 +        return ucnv_MBCSGetType(converter);
  1.2625 +    }
  1.2626 +#endif
  1.2627 +    return (UConverterType)type;
  1.2628 +}
  1.2629 +
  1.2630 +U_CAPI void  U_EXPORT2
  1.2631 +ucnv_getStarters(const UConverter* converter, 
  1.2632 +                 UBool starters[256],
  1.2633 +                 UErrorCode* err)
  1.2634 +{
  1.2635 +    if (err == NULL || U_FAILURE(*err)) {
  1.2636 +        return;
  1.2637 +    }
  1.2638 +
  1.2639 +    if(converter->sharedData->impl->getStarters != NULL) {
  1.2640 +        converter->sharedData->impl->getStarters(converter, starters, err);
  1.2641 +    } else {
  1.2642 +        *err = U_ILLEGAL_ARGUMENT_ERROR;
  1.2643 +    }
  1.2644 +}
  1.2645 +
  1.2646 +static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv)
  1.2647 +{
  1.2648 +    UErrorCode errorCode;
  1.2649 +    const char *name;
  1.2650 +    int32_t i;
  1.2651 +
  1.2652 +    if(cnv==NULL) {
  1.2653 +        return NULL;
  1.2654 +    }
  1.2655 +
  1.2656 +    errorCode=U_ZERO_ERROR;
  1.2657 +    name=ucnv_getName(cnv, &errorCode);
  1.2658 +    if(U_FAILURE(errorCode)) {
  1.2659 +        return NULL;
  1.2660 +    }
  1.2661 +
  1.2662 +    for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i)
  1.2663 +    {
  1.2664 +        if(0==uprv_strcmp(name, ambiguousConverters[i].name))
  1.2665 +        {
  1.2666 +            return ambiguousConverters+i;
  1.2667 +        }
  1.2668 +    }
  1.2669 +
  1.2670 +    return NULL;
  1.2671 +}
  1.2672 +
  1.2673 +U_CAPI void  U_EXPORT2
  1.2674 +ucnv_fixFileSeparator(const UConverter *cnv, 
  1.2675 +                      UChar* source, 
  1.2676 +                      int32_t sourceLength) {
  1.2677 +    const UAmbiguousConverter *a;
  1.2678 +    int32_t i;
  1.2679 +    UChar variant5c;
  1.2680 +
  1.2681 +    if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL)
  1.2682 +    {
  1.2683 +        return;
  1.2684 +    }
  1.2685 +
  1.2686 +    variant5c=a->variant5c;
  1.2687 +    for(i=0; i<sourceLength; ++i) {
  1.2688 +        if(source[i]==variant5c) {
  1.2689 +            source[i]=0x5c;
  1.2690 +        }
  1.2691 +    }
  1.2692 +}
  1.2693 +
  1.2694 +U_CAPI UBool  U_EXPORT2
  1.2695 +ucnv_isAmbiguous(const UConverter *cnv) {
  1.2696 +    return (UBool)(ucnv_getAmbiguous(cnv)!=NULL);
  1.2697 +}
  1.2698 +
  1.2699 +U_CAPI void  U_EXPORT2
  1.2700 +ucnv_setFallback(UConverter *cnv, UBool usesFallback)
  1.2701 +{
  1.2702 +    cnv->useFallback = usesFallback;
  1.2703 +}
  1.2704 +
  1.2705 +U_CAPI UBool  U_EXPORT2
  1.2706 +ucnv_usesFallback(const UConverter *cnv)
  1.2707 +{
  1.2708 +    return cnv->useFallback;
  1.2709 +}
  1.2710 +
  1.2711 +U_CAPI void  U_EXPORT2
  1.2712 +ucnv_getInvalidChars (const UConverter * converter,
  1.2713 +                      char *errBytes,
  1.2714 +                      int8_t * len,
  1.2715 +                      UErrorCode * err)
  1.2716 +{
  1.2717 +    if (err == NULL || U_FAILURE(*err))
  1.2718 +    {
  1.2719 +        return;
  1.2720 +    }
  1.2721 +    if (len == NULL || errBytes == NULL || converter == NULL)
  1.2722 +    {
  1.2723 +        *err = U_ILLEGAL_ARGUMENT_ERROR;
  1.2724 +        return;
  1.2725 +    }
  1.2726 +    if (*len < converter->invalidCharLength)
  1.2727 +    {
  1.2728 +        *err = U_INDEX_OUTOFBOUNDS_ERROR;
  1.2729 +        return;
  1.2730 +    }
  1.2731 +    if ((*len = converter->invalidCharLength) > 0)
  1.2732 +    {
  1.2733 +        uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);
  1.2734 +    }
  1.2735 +}
  1.2736 +
  1.2737 +U_CAPI void  U_EXPORT2
  1.2738 +ucnv_getInvalidUChars (const UConverter * converter,
  1.2739 +                       UChar *errChars,
  1.2740 +                       int8_t * len,
  1.2741 +                       UErrorCode * err)
  1.2742 +{
  1.2743 +    if (err == NULL || U_FAILURE(*err))
  1.2744 +    {
  1.2745 +        return;
  1.2746 +    }
  1.2747 +    if (len == NULL || errChars == NULL || converter == NULL)
  1.2748 +    {
  1.2749 +        *err = U_ILLEGAL_ARGUMENT_ERROR;
  1.2750 +        return;
  1.2751 +    }
  1.2752 +    if (*len < converter->invalidUCharLength)
  1.2753 +    {
  1.2754 +        *err = U_INDEX_OUTOFBOUNDS_ERROR;
  1.2755 +        return;
  1.2756 +    }
  1.2757 +    if ((*len = converter->invalidUCharLength) > 0)
  1.2758 +    {
  1.2759 +        uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len));
  1.2760 +    }
  1.2761 +}
  1.2762 +
  1.2763 +#define SIG_MAX_LEN 5
  1.2764 +
  1.2765 +U_CAPI const char* U_EXPORT2
  1.2766 +ucnv_detectUnicodeSignature( const char* source,
  1.2767 +                             int32_t sourceLength,
  1.2768 +                             int32_t* signatureLength,
  1.2769 +                             UErrorCode* pErrorCode) {
  1.2770 +    int32_t dummy;
  1.2771 +
  1.2772 +    /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
  1.2773 +     * bytes we don't misdetect something 
  1.2774 +     */
  1.2775 +    char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
  1.2776 +    int i = 0;
  1.2777 +
  1.2778 +    if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){
  1.2779 +        return NULL;
  1.2780 +    }
  1.2781 +    
  1.2782 +    if(source == NULL || sourceLength < -1){
  1.2783 +        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
  1.2784 +        return NULL;
  1.2785 +    }
  1.2786 +
  1.2787 +    if(signatureLength == NULL) {
  1.2788 +        signatureLength = &dummy;
  1.2789 +    }
  1.2790 +
  1.2791 +    if(sourceLength==-1){
  1.2792 +        sourceLength=(int32_t)uprv_strlen(source);
  1.2793 +    }
  1.2794 +
  1.2795 +    
  1.2796 +    while(i<sourceLength&& i<SIG_MAX_LEN){
  1.2797 +        start[i]=source[i];
  1.2798 +        i++;
  1.2799 +    }
  1.2800 +
  1.2801 +    if(start[0] == '\xFE' && start[1] == '\xFF') {
  1.2802 +        *signatureLength=2;
  1.2803 +        return  "UTF-16BE";
  1.2804 +    } else if(start[0] == '\xFF' && start[1] == '\xFE') {
  1.2805 +        if(start[2] == '\x00' && start[3] =='\x00') {
  1.2806 +            *signatureLength=4;
  1.2807 +            return "UTF-32LE";
  1.2808 +        } else {
  1.2809 +            *signatureLength=2;
  1.2810 +            return  "UTF-16LE";
  1.2811 +        }
  1.2812 +    } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
  1.2813 +        *signatureLength=3;
  1.2814 +        return  "UTF-8";
  1.2815 +    } else if(start[0] == '\x00' && start[1] == '\x00' && 
  1.2816 +              start[2] == '\xFE' && start[3]=='\xFF') {
  1.2817 +        *signatureLength=4;
  1.2818 +        return  "UTF-32BE";
  1.2819 +    } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {
  1.2820 +        *signatureLength=3;
  1.2821 +        return "SCSU";
  1.2822 +    } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {
  1.2823 +        *signatureLength=3;
  1.2824 +        return "BOCU-1";
  1.2825 +    } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {
  1.2826 +        /*
  1.2827 +         * UTF-7: Initial U+FEFF is encoded as +/v8  or  +/v9  or  +/v+  or  +/v/
  1.2828 +         * depending on the second UTF-16 code unit.
  1.2829 +         * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
  1.2830 +         * if it occurs.
  1.2831 +         *
  1.2832 +         * So far we have +/v
  1.2833 +         */
  1.2834 +        if(start[3] == '\x38' && start[4] == '\x2D') {
  1.2835 +            /* 5 bytes +/v8- */
  1.2836 +            *signatureLength=5;
  1.2837 +            return "UTF-7";
  1.2838 +        } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') {
  1.2839 +            /* 4 bytes +/v8  or  +/v9  or  +/v+  or  +/v/ */
  1.2840 +            *signatureLength=4;
  1.2841 +            return "UTF-7";
  1.2842 +        }
  1.2843 +    }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
  1.2844 +        *signatureLength=4;
  1.2845 +        return "UTF-EBCDIC";
  1.2846 +    }
  1.2847 +
  1.2848 +
  1.2849 +    /* no known Unicode signature byte sequence recognized */
  1.2850 +    *signatureLength=0;
  1.2851 +    return NULL;
  1.2852 +}
  1.2853 +
  1.2854 +U_CAPI int32_t U_EXPORT2
  1.2855 +ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status)
  1.2856 +{
  1.2857 +    if(status == NULL || U_FAILURE(*status)){
  1.2858 +        return -1;
  1.2859 +    }
  1.2860 +    if(cnv == NULL){
  1.2861 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.2862 +        return -1;
  1.2863 +    }
  1.2864 +
  1.2865 +    if(cnv->preFromUFirstCP >= 0){
  1.2866 +        return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;
  1.2867 +    }else if(cnv->preFromULength < 0){
  1.2868 +        return -cnv->preFromULength ;
  1.2869 +    }else if(cnv->fromUChar32 > 0){
  1.2870 +        return 1;
  1.2871 +    }
  1.2872 +    return 0; 
  1.2873 +
  1.2874 +}
  1.2875 +
  1.2876 +U_CAPI int32_t U_EXPORT2
  1.2877 +ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){
  1.2878 +
  1.2879 +    if(status == NULL || U_FAILURE(*status)){
  1.2880 +        return -1;
  1.2881 +    }
  1.2882 +    if(cnv == NULL){
  1.2883 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.2884 +        return -1;
  1.2885 +    }
  1.2886 +
  1.2887 +    if(cnv->preToULength > 0){
  1.2888 +        return cnv->preToULength ;
  1.2889 +    }else if(cnv->preToULength < 0){
  1.2890 +        return -cnv->preToULength;
  1.2891 +    }else if(cnv->toULength > 0){
  1.2892 +        return cnv->toULength;
  1.2893 +    }
  1.2894 +    return 0;
  1.2895 +}
  1.2896 +
  1.2897 +U_CAPI UBool U_EXPORT2
  1.2898 +ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){
  1.2899 +    if (U_FAILURE(*status)) {
  1.2900 +        return FALSE;
  1.2901 +    }
  1.2902 +
  1.2903 +    if (cnv == NULL) {
  1.2904 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.2905 +        return FALSE;
  1.2906 +    }
  1.2907 +
  1.2908 +    switch (ucnv_getType(cnv)) {
  1.2909 +        case UCNV_SBCS:
  1.2910 +        case UCNV_DBCS:
  1.2911 +        case UCNV_UTF32_BigEndian:
  1.2912 +        case UCNV_UTF32_LittleEndian:
  1.2913 +        case UCNV_UTF32:
  1.2914 +        case UCNV_US_ASCII:
  1.2915 +            return TRUE;
  1.2916 +        default:
  1.2917 +            return FALSE;
  1.2918 +    }
  1.2919 +}
  1.2920 +#endif
  1.2921 +
  1.2922 +/*
  1.2923 + * Hey, Emacs, please set the following:
  1.2924 + *
  1.2925 + * Local Variables:
  1.2926 + * indent-tabs-mode: nil
  1.2927 + * End:
  1.2928 + *
  1.2929 + */
The Tor Browser / file diff

diff: intl/icu/source/common/ucnv.c

intl/icu/source/common/ucnv.c