intl/icu/source/common/ucnv.c

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /*
     2 ******************************************************************************
     3 *
     4 *   Copyright (C) 1998-2013, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 ******************************************************************************
     8 *
     9 *  ucnv.c:
    10 *  Implements APIs for the ICU's codeset conversion library;
    11 *  mostly calls through internal functions;
    12 *  created by Bertrand A. Damiba
    13 *
    14 * Modification History:
    15 *
    16 *   Date        Name        Description
    17 *   04/04/99    helena      Fixed internal header inclusion.
    18 *   05/09/00    helena      Added implementation to handle fallback mappings.
    19 *   06/20/2000  helena      OS/400 port changes; mostly typecast.
    20 */
    22 #include "unicode/utypes.h"
    24 #if !UCONFIG_NO_CONVERSION
    26 #include "unicode/ustring.h"
    27 #include "unicode/ucnv.h"
    28 #include "unicode/ucnv_err.h"
    29 #include "unicode/uset.h"
    30 #include "unicode/utf.h"
    31 #include "unicode/utf16.h"
    32 #include "putilimp.h"
    33 #include "cmemory.h"
    34 #include "cstring.h"
    35 #include "uassert.h"
    36 #include "utracimp.h"
    37 #include "ustr_imp.h"
    38 #include "ucnv_imp.h"
    39 #include "ucnv_cnv.h"
    40 #include "ucnv_bld.h"
    42 /* size of intermediate and preflighting buffers in ucnv_convert() */
    43 #define CHUNK_SIZE 1024
    45 typedef struct UAmbiguousConverter {
    46     const char *name;
    47     const UChar variant5c;
    48 } UAmbiguousConverter;
    50 static const UAmbiguousConverter ambiguousConverters[]={
    51     { "ibm-897_P100-1995", 0xa5 },
    52     { "ibm-942_P120-1999", 0xa5 },
    53     { "ibm-943_P130-1999", 0xa5 },
    54     { "ibm-946_P100-1995", 0xa5 },
    55     { "ibm-33722_P120-1999", 0xa5 },
    56     { "ibm-1041_P100-1995", 0xa5 },
    57     /*{ "ibm-54191_P100-2006", 0xa5 },*/
    58     /*{ "ibm-62383_P100-2007", 0xa5 },*/
    59     /*{ "ibm-891_P100-1995", 0x20a9 },*/
    60     { "ibm-944_P100-1995", 0x20a9 },
    61     { "ibm-949_P110-1999", 0x20a9 },
    62     { "ibm-1363_P110-1997", 0x20a9 },
    63     { "ISO_2022,locale=ko,version=0", 0x20a9 },
    64     { "ibm-1088_P100-1995", 0x20a9 }
    65 };
    67 /*Calls through createConverter */
    68 U_CAPI UConverter* U_EXPORT2
    69 ucnv_open (const char *name,
    70                        UErrorCode * err)
    71 {
    72     UConverter *r;
    74     if (err == NULL || U_FAILURE (*err)) {
    75         return NULL;
    76     }
    78     r =  ucnv_createConverter(NULL, name, err);
    79     return r;
    80 }
    82 U_CAPI UConverter* U_EXPORT2 
    83 ucnv_openPackage   (const char *packageName, const char *converterName, UErrorCode * err)
    84 {
    85     return ucnv_createConverterFromPackage(packageName, converterName,  err);
    86 }
    88 /*Extracts the UChar* to a char* and calls through createConverter */
    89 U_CAPI UConverter*   U_EXPORT2
    90 ucnv_openU (const UChar * name,
    91                          UErrorCode * err)
    92 {
    93     char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
    95     if (err == NULL || U_FAILURE(*err))
    96         return NULL;
    97     if (name == NULL)
    98         return ucnv_open (NULL, err);
    99     if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)
   100     {
   101         *err = U_ILLEGAL_ARGUMENT_ERROR;
   102         return NULL;
   103     }
   104     return ucnv_open(u_austrcpy(asciiName, name), err);
   105 }
   107 /* Copy the string that is represented by the UConverterPlatform enum
   108  * @param platformString An output buffer
   109  * @param platform An enum representing a platform
   110  * @return the length of the copied string.
   111  */
   112 static int32_t
   113 ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)
   114 {
   115     switch (pltfrm)
   116     {
   117     case UCNV_IBM:
   118         uprv_strcpy(platformString, "ibm-");
   119         return 4;
   120     case UCNV_UNKNOWN:
   121         break;
   122     }
   124     /* default to empty string */
   125     *platformString = 0;
   126     return 0;
   127 }
   129 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
   130  *through createConverter*/
   131 U_CAPI UConverter*   U_EXPORT2
   132 ucnv_openCCSID (int32_t codepage,
   133                 UConverterPlatform platform,
   134                 UErrorCode * err)
   135 {
   136     char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
   137     int32_t myNameLen;
   139     if (err == NULL || U_FAILURE (*err))
   140         return NULL;
   142     /* ucnv_copyPlatformString could return "ibm-" or "cp" */
   143     myNameLen = ucnv_copyPlatformString(myName, platform);
   144     T_CString_integerToString(myName + myNameLen, codepage, 10);
   146     return ucnv_createConverter(NULL, myName, err);
   147 }
   149 /* Creating a temporary stack-based object that can be used in one thread, 
   150 and created from a converter that is shared across threads.
   151 */
   153 U_CAPI UConverter* U_EXPORT2
   154 ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
   155 {
   156     UConverter *localConverter, *allocatedConverter;
   157     int32_t stackBufferSize;
   158     int32_t bufferSizeNeeded;
   159     char *stackBufferChars = (char *)stackBuffer;
   160     UErrorCode cbErr;
   161     UConverterToUnicodeArgs toUArgs = {
   162         sizeof(UConverterToUnicodeArgs),
   163             TRUE,
   164             NULL,
   165             NULL,
   166             NULL,
   167             NULL,
   168             NULL,
   169             NULL
   170     };
   171     UConverterFromUnicodeArgs fromUArgs = {
   172         sizeof(UConverterFromUnicodeArgs),
   173             TRUE,
   174             NULL,
   175             NULL,
   176             NULL,
   177             NULL,
   178             NULL,
   179             NULL
   180     };
   182     UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
   184     if (status == NULL || U_FAILURE(*status)){
   185         UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
   186         return NULL;
   187     }
   189     if (cnv == NULL) {
   190         *status = U_ILLEGAL_ARGUMENT_ERROR;
   191         UTRACE_EXIT_STATUS(*status);
   192         return NULL;
   193     }
   195     UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
   196                                     ucnv_getName(cnv, status), cnv, stackBuffer);
   198     if (cnv->sharedData->impl->safeClone != NULL) {
   199         /* call the custom safeClone function for sizing */
   200         bufferSizeNeeded = 0;
   201         cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
   202         if (U_FAILURE(*status)) {
   203             UTRACE_EXIT_STATUS(*status);
   204             return NULL;
   205         }
   206     }
   207     else
   208     {
   209         /* inherent sizing */
   210         bufferSizeNeeded = sizeof(UConverter);
   211     }
   213     if (pBufferSize == NULL) {
   214         stackBufferSize = 1;
   215         pBufferSize = &stackBufferSize;
   216     } else {
   217         stackBufferSize = *pBufferSize;
   218         if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
   219             *pBufferSize = bufferSizeNeeded;
   220             UTRACE_EXIT_VALUE(bufferSizeNeeded);
   221             return NULL;
   222         }
   223     }
   226     /* Pointers on 64-bit platforms need to be aligned
   227      * on a 64-bit boundary in memory.
   228      */
   229     if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
   230         int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
   231         if(stackBufferSize > offsetUp) {
   232             stackBufferSize -= offsetUp;
   233             stackBufferChars += offsetUp;
   234         } else {
   235             /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
   236             stackBufferSize = 1;
   237         }
   238     }
   240     stackBuffer = (void *)stackBufferChars;
   242     /* Now, see if we must allocate any memory */
   243     if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL)
   244     {
   245         /* allocate one here...*/
   246         localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);
   248         if(localConverter == NULL) {
   249             *status = U_MEMORY_ALLOCATION_ERROR;
   250             UTRACE_EXIT_STATUS(*status);
   251             return NULL;
   252         }
   253         *status = U_SAFECLONE_ALLOCATED_WARNING;
   255         /* record the fact that memory was allocated */
   256         *pBufferSize = bufferSizeNeeded;
   257     } else {
   258         /* just use the stack buffer */
   259         localConverter = (UConverter*) stackBuffer;
   260         allocatedConverter = NULL;
   261     }
   263     uprv_memset(localConverter, 0, bufferSizeNeeded);
   265     /* Copy initial state */
   266     uprv_memcpy(localConverter, cnv, sizeof(UConverter));
   267     localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
   269     /* copy the substitution string */
   270     if (cnv->subChars == (uint8_t *)cnv->subUChars) {
   271         localConverter->subChars = (uint8_t *)localConverter->subUChars;
   272     } else {
   273         localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
   274         if (localConverter->subChars == NULL) {
   275             uprv_free(allocatedConverter);
   276             UTRACE_EXIT_STATUS(*status);
   277             return NULL;
   278         }
   279         uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
   280     }
   282     /* now either call the safeclone fcn or not */
   283     if (cnv->sharedData->impl->safeClone != NULL) {
   284         /* call the custom safeClone function */
   285         localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);
   286     }
   288     if(localConverter==NULL || U_FAILURE(*status)) {
   289         if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {
   290             uprv_free(allocatedConverter->subChars);
   291         }
   292         uprv_free(allocatedConverter);
   293         UTRACE_EXIT_STATUS(*status);
   294         return NULL;
   295     }
   297     /* increment refcount of shared data if needed */
   298     /*
   299     Checking whether it's an algorithic converter is okay
   300     in multithreaded applications because the value never changes.
   301     Don't check referenceCounter for any other value.
   302     */
   303     if (cnv->sharedData->referenceCounter != ~0) {
   304         ucnv_incrementRefCount(cnv->sharedData);
   305     }
   307     if(localConverter == (UConverter*)stackBuffer) {
   308         /* we're using user provided data - set to not destroy */
   309         localConverter->isCopyLocal = TRUE;
   310     }
   312     /* allow callback functions to handle any memory allocation */
   313     toUArgs.converter = fromUArgs.converter = localConverter;
   314     cbErr = U_ZERO_ERROR;
   315     cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr);
   316     cbErr = U_ZERO_ERROR;
   317     cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);
   319     UTRACE_EXIT_PTR_STATUS(localConverter, *status);
   320     return localConverter;
   321 }
   325 /*Decreases the reference counter in the shared immutable section of the object
   326  *and frees the mutable part*/
   328 U_CAPI void  U_EXPORT2
   329 ucnv_close (UConverter * converter)
   330 {
   331     UErrorCode errorCode = U_ZERO_ERROR;
   333     UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
   335     if (converter == NULL)
   336     {
   337         UTRACE_EXIT();
   338         return;
   339     }
   341     UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
   342         ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
   344     /* In order to speed up the close, only call the callbacks when they have been changed.
   345     This performance check will only work when the callbacks are set within a shared library
   346     or from user code that statically links this code. */
   347     /* first, notify the callback functions that the converter is closed */
   348     if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
   349         UConverterToUnicodeArgs toUArgs = {
   350             sizeof(UConverterToUnicodeArgs),
   351                 TRUE,
   352                 NULL,
   353                 NULL,
   354                 NULL,
   355                 NULL,
   356                 NULL,
   357                 NULL
   358         };
   360         toUArgs.converter = converter;
   361         errorCode = U_ZERO_ERROR;
   362         converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
   363     }
   364     if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
   365         UConverterFromUnicodeArgs fromUArgs = {
   366             sizeof(UConverterFromUnicodeArgs),
   367                 TRUE,
   368                 NULL,
   369                 NULL,
   370                 NULL,
   371                 NULL,
   372                 NULL,
   373                 NULL
   374         };
   375         fromUArgs.converter = converter;
   376         errorCode = U_ZERO_ERROR;
   377         converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
   378     }
   380     if (converter->sharedData->impl->close != NULL) {
   381         converter->sharedData->impl->close(converter);
   382     }
   384     if (converter->subChars != (uint8_t *)converter->subUChars) {
   385         uprv_free(converter->subChars);
   386     }
   388     /*
   389     Checking whether it's an algorithic converter is okay
   390     in multithreaded applications because the value never changes.
   391     Don't check referenceCounter for any other value.
   392     */
   393     if (converter->sharedData->referenceCounter != ~0) {
   394         ucnv_unloadSharedDataIfReady(converter->sharedData);
   395     }
   397     if(!converter->isCopyLocal){
   398         uprv_free(converter);
   399     }
   401     UTRACE_EXIT();
   402 }
   404 /*returns a single Name from the list, will return NULL if out of bounds
   405  */
   406 U_CAPI const char*   U_EXPORT2
   407 ucnv_getAvailableName (int32_t n)
   408 {
   409     if (0 <= n && n <= 0xffff) {
   410         UErrorCode err = U_ZERO_ERROR;
   411         const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);
   412         if (U_SUCCESS(err)) {
   413             return name;
   414         }
   415     }
   416     return NULL;
   417 }
   419 U_CAPI int32_t   U_EXPORT2
   420 ucnv_countAvailable ()
   421 {
   422     UErrorCode err = U_ZERO_ERROR;
   423     return ucnv_bld_countAvailableConverters(&err);
   424 }
   426 U_CAPI void    U_EXPORT2
   427 ucnv_getSubstChars (const UConverter * converter,
   428                     char *mySubChar,
   429                     int8_t * len,
   430                     UErrorCode * err)
   431 {
   432     if (U_FAILURE (*err))
   433         return;
   435     if (converter->subCharLen <= 0) {
   436         /* Unicode string or empty string from ucnv_setSubstString(). */
   437         *len = 0;
   438         return;
   439     }
   441     if (*len < converter->subCharLen) /*not enough space in subChars */
   442     {
   443         *err = U_INDEX_OUTOFBOUNDS_ERROR;
   444         return;
   445     }
   447     uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen);   /*fills in the subchars */
   448     *len = converter->subCharLen; /*store # of bytes copied to buffer */
   449 }
   451 U_CAPI void    U_EXPORT2
   452 ucnv_setSubstChars (UConverter * converter,
   453                     const char *mySubChar,
   454                     int8_t len,
   455                     UErrorCode * err)
   456 {
   457     if (U_FAILURE (*err))
   458         return;
   460     /*Makes sure that the subChar is within the codepages char length boundaries */
   461     if ((len > converter->sharedData->staticData->maxBytesPerChar)
   462      || (len < converter->sharedData->staticData->minBytesPerChar))
   463     {
   464         *err = U_ILLEGAL_ARGUMENT_ERROR;
   465         return;
   466     }
   468     uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
   469     converter->subCharLen = len;  /*sets the new len */
   471     /*
   472     * There is currently (2001Feb) no separate API to set/get subChar1.
   473     * In order to always have subChar written after it is explicitly set,
   474     * we set subChar1 to 0.
   475     */
   476     converter->subChar1 = 0;
   478     return;
   479 }
   481 U_CAPI void U_EXPORT2
   482 ucnv_setSubstString(UConverter *cnv,
   483                     const UChar *s,
   484                     int32_t length,
   485                     UErrorCode *err) {
   486     UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1];
   487     char chars[UCNV_ERROR_BUFFER_LENGTH];
   489     UConverter *clone;
   490     uint8_t *subChars;
   491     int32_t cloneSize, length8;
   493     /* Let the following functions check all arguments. */
   494     cloneSize = sizeof(cloneBuffer);
   495     clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
   496     ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err);
   497     length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
   498     ucnv_close(clone);
   499     if (U_FAILURE(*err)) {
   500         return;
   501     }
   503     if (cnv->sharedData->impl->writeSub == NULL
   504 #if !UCONFIG_NO_LEGACY_CONVERSION
   505         || (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
   506          ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)
   507 #endif
   508     ) {
   509         /* The converter is not stateful. Store the charset bytes as a fixed string. */
   510         subChars = (uint8_t *)chars;
   511     } else {
   512         /*
   513          * The converter has a non-default writeSub() function, indicating
   514          * that it is stateful.
   515          * Store the Unicode string for on-the-fly conversion for correct
   516          * state handling.
   517          */
   518         if (length > UCNV_ERROR_BUFFER_LENGTH) {
   519             /*
   520              * Should not occur. The converter should output at least one byte
   521              * per UChar, which means that ucnv_fromUChars() should catch all
   522              * overflows.
   523              */
   524             *err = U_BUFFER_OVERFLOW_ERROR;
   525             return;
   526         }
   527         subChars = (uint8_t *)s;
   528         if (length < 0) {
   529             length = u_strlen(s);
   530         }
   531         length8 = length * U_SIZEOF_UCHAR;
   532     }
   534     /*
   535      * For storing the substitution string, select either the small buffer inside
   536      * UConverter or allocate a subChars buffer.
   537      */
   538     if (length8 > UCNV_MAX_SUBCHAR_LEN) {
   539         /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
   540         if (cnv->subChars == (uint8_t *)cnv->subUChars) {
   541             /* Allocate a new buffer for the string. */
   542             cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
   543             if (cnv->subChars == NULL) {
   544                 cnv->subChars = (uint8_t *)cnv->subUChars;
   545                 *err = U_MEMORY_ALLOCATION_ERROR;
   546                 return;
   547             }
   548             uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
   549         }
   550     }
   552     /* Copy the substitution string into the UConverter or its subChars buffer. */
   553     if (length8 == 0) {
   554         cnv->subCharLen = 0;
   555     } else {
   556         uprv_memcpy(cnv->subChars, subChars, length8);
   557         if (subChars == (uint8_t *)chars) {
   558             cnv->subCharLen = (int8_t)length8;
   559         } else /* subChars == s */ {
   560             cnv->subCharLen = (int8_t)-length;
   561         }
   562     }
   564     /* See comment in ucnv_setSubstChars(). */
   565     cnv->subChar1 = 0;
   566 }
   568 /*resets the internal states of a converter
   569  *goal : have the same behaviour than a freshly created converter
   570  */
   571 static void _reset(UConverter *converter, UConverterResetChoice choice,
   572                    UBool callCallback) {
   573     if(converter == NULL) {
   574         return;
   575     }
   577     if(callCallback) {
   578         /* first, notify the callback functions that the converter is reset */
   579         UErrorCode errorCode;
   581         if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
   582             UConverterToUnicodeArgs toUArgs = {
   583                 sizeof(UConverterToUnicodeArgs),
   584                 TRUE,
   585                 NULL,
   586                 NULL,
   587                 NULL,
   588                 NULL,
   589                 NULL,
   590                 NULL
   591             };
   592             toUArgs.converter = converter;
   593             errorCode = U_ZERO_ERROR;
   594             converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
   595         }
   596         if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
   597             UConverterFromUnicodeArgs fromUArgs = {
   598                 sizeof(UConverterFromUnicodeArgs),
   599                 TRUE,
   600                 NULL,
   601                 NULL,
   602                 NULL,
   603                 NULL,
   604                 NULL,
   605                 NULL
   606             };
   607             fromUArgs.converter = converter;
   608             errorCode = U_ZERO_ERROR;
   609             converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
   610         }
   611     }
   613     /* now reset the converter itself */
   614     if(choice<=UCNV_RESET_TO_UNICODE) {
   615         converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
   616         converter->mode = 0;
   617         converter->toULength = 0;
   618         converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
   619         converter->preToULength = 0;
   620     }
   621     if(choice!=UCNV_RESET_TO_UNICODE) {
   622         converter->fromUnicodeStatus = 0;
   623         converter->fromUChar32 = 0;
   624         converter->invalidUCharLength = converter->charErrorBufferLength = 0;
   625         converter->preFromUFirstCP = U_SENTINEL;
   626         converter->preFromULength = 0;
   627     }
   629     if (converter->sharedData->impl->reset != NULL) {
   630         /* call the custom reset function */
   631         converter->sharedData->impl->reset(converter, choice);
   632     }
   633 }
   635 U_CAPI void  U_EXPORT2
   636 ucnv_reset(UConverter *converter)
   637 {
   638     _reset(converter, UCNV_RESET_BOTH, TRUE);
   639 }
   641 U_CAPI void  U_EXPORT2
   642 ucnv_resetToUnicode(UConverter *converter)
   643 {
   644     _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);
   645 }
   647 U_CAPI void  U_EXPORT2
   648 ucnv_resetFromUnicode(UConverter *converter)
   649 {
   650     _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);
   651 }
   653 U_CAPI int8_t   U_EXPORT2
   654 ucnv_getMaxCharSize (const UConverter * converter)
   655 {
   656     return converter->maxBytesPerUChar;
   657 }
   660 U_CAPI int8_t   U_EXPORT2
   661 ucnv_getMinCharSize (const UConverter * converter)
   662 {
   663     return converter->sharedData->staticData->minBytesPerChar;
   664 }
   666 U_CAPI const char*   U_EXPORT2
   667 ucnv_getName (const UConverter * converter, UErrorCode * err)
   669 {
   670     if (U_FAILURE (*err))
   671         return NULL;
   672     if(converter->sharedData->impl->getName){
   673         const char* temp= converter->sharedData->impl->getName(converter);
   674         if(temp)
   675             return temp;
   676     }
   677     return converter->sharedData->staticData->name;
   678 }
   680 U_CAPI int32_t U_EXPORT2
   681 ucnv_getCCSID(const UConverter * converter,
   682               UErrorCode * err)
   683 {
   684     int32_t ccsid;
   685     if (U_FAILURE (*err))
   686         return -1;
   688     ccsid = converter->sharedData->staticData->codepage;
   689     if (ccsid == 0) {
   690         /* Rare case. This is for cases like gb18030,
   691         which doesn't have an IBM canonical name, but does have an IBM alias. */
   692         const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
   693         if (U_SUCCESS(*err) && standardName) {
   694             const char *ccsidStr = uprv_strchr(standardName, '-');
   695             if (ccsidStr) {
   696                 ccsid = (int32_t)atol(ccsidStr+1);  /* +1 to skip '-' */
   697             }
   698         }
   699     }
   700     return ccsid;
   701 }
   704 U_CAPI UConverterPlatform   U_EXPORT2
   705 ucnv_getPlatform (const UConverter * converter,
   706                                       UErrorCode * err)
   707 {
   708     if (U_FAILURE (*err))
   709         return UCNV_UNKNOWN;
   711     return (UConverterPlatform)converter->sharedData->staticData->platform;
   712 }
   714 U_CAPI void U_EXPORT2
   715     ucnv_getToUCallBack (const UConverter * converter,
   716                          UConverterToUCallback *action,
   717                          const void **context)
   718 {
   719     *action = converter->fromCharErrorBehaviour;
   720     *context = converter->toUContext;
   721 }
   723 U_CAPI void U_EXPORT2
   724     ucnv_getFromUCallBack (const UConverter * converter,
   725                            UConverterFromUCallback *action,
   726                            const void **context)
   727 {
   728     *action = converter->fromUCharErrorBehaviour;
   729     *context = converter->fromUContext;
   730 }
   732 U_CAPI void    U_EXPORT2
   733 ucnv_setToUCallBack (UConverter * converter,
   734                             UConverterToUCallback newAction,
   735                             const void* newContext,
   736                             UConverterToUCallback *oldAction,
   737                             const void** oldContext,
   738                             UErrorCode * err)
   739 {
   740     if (U_FAILURE (*err))
   741         return;
   742     if (oldAction) *oldAction = converter->fromCharErrorBehaviour;
   743     converter->fromCharErrorBehaviour = newAction;
   744     if (oldContext) *oldContext = converter->toUContext;
   745     converter->toUContext = newContext;
   746 }
   748 U_CAPI void  U_EXPORT2
   749 ucnv_setFromUCallBack (UConverter * converter,
   750                             UConverterFromUCallback newAction,
   751                             const void* newContext,
   752                             UConverterFromUCallback *oldAction,
   753                             const void** oldContext,
   754                             UErrorCode * err)
   755 {
   756     if (U_FAILURE (*err))
   757         return;
   758     if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;
   759     converter->fromUCharErrorBehaviour = newAction;
   760     if (oldContext) *oldContext = converter->fromUContext;
   761     converter->fromUContext = newContext;
   762 }
   764 static void
   765 _updateOffsets(int32_t *offsets, int32_t length,
   766                int32_t sourceIndex, int32_t errorInputLength) {
   767     int32_t *limit;
   768     int32_t delta, offset;
   770     if(sourceIndex>=0) {
   771         /*
   772          * adjust each offset by adding the previous sourceIndex
   773          * minus the length of the input sequence that caused an
   774          * error, if any
   775          */
   776         delta=sourceIndex-errorInputLength;
   777     } else {
   778         /*
   779          * set each offset to -1 because this conversion function
   780          * does not handle offsets
   781          */
   782         delta=-1;
   783     }
   785     limit=offsets+length;
   786     if(delta==0) {
   787         /* most common case, nothing to do */
   788     } else if(delta>0) {
   789         /* add the delta to each offset (but not if the offset is <0) */
   790         while(offsets<limit) {
   791             offset=*offsets;
   792             if(offset>=0) {
   793                 *offsets=offset+delta;
   794             }
   795             ++offsets;
   796         }
   797     } else /* delta<0 */ {
   798         /*
   799          * set each offset to -1 because this conversion function
   800          * does not handle offsets
   801          * or the error input sequence started in a previous buffer
   802          */
   803         while(offsets<limit) {
   804             *offsets++=-1;
   805         }
   806     }
   807 }
   809 /* ucnv_fromUnicode --------------------------------------------------------- */
   811 /*
   812  * Implementation note for m:n conversions
   813  *
   814  * While collecting source units to find the longest match for m:n conversion,
   815  * some source units may need to be stored for a partial match.
   816  * When a second buffer does not yield a match on all of the previously stored
   817  * source units, then they must be "replayed", i.e., fed back into the converter.
   818  *
   819  * The code relies on the fact that replaying will not nest -
   820  * converting a replay buffer will not result in a replay.
   821  * This is because a replay is necessary only after the _continuation_ of a
   822  * partial match failed, but a replay buffer is converted as a whole.
   823  * It may result in some of its units being stored again for a partial match,
   824  * but there will not be a continuation _during_ the replay which could fail.
   825  *
   826  * It is conceivable that a callback function could call the converter
   827  * recursively in a way that causes another replay to be stored, but that
   828  * would be an error in the callback function.
   829  * Such violations will cause assertion failures in a debug build,
   830  * and wrong output, but they will not cause a crash.
   831  */
   833 static void
   834 _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
   835     UConverterFromUnicode fromUnicode;
   836     UConverter *cnv;
   837     const UChar *s;
   838     char *t;
   839     int32_t *offsets;
   840     int32_t sourceIndex;
   841     int32_t errorInputLength;
   842     UBool converterSawEndOfInput, calledCallback;
   844     /* variables for m:n conversion */
   845     UChar replay[UCNV_EXT_MAX_UCHARS];
   846     const UChar *realSource, *realSourceLimit;
   847     int32_t realSourceIndex;
   848     UBool realFlush;
   850     cnv=pArgs->converter;
   851     s=pArgs->source;
   852     t=pArgs->target;
   853     offsets=pArgs->offsets;
   855     /* get the converter implementation function */
   856     sourceIndex=0;
   857     if(offsets==NULL) {
   858         fromUnicode=cnv->sharedData->impl->fromUnicode;
   859     } else {
   860         fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
   861         if(fromUnicode==NULL) {
   862             /* there is no WithOffsets implementation */
   863             fromUnicode=cnv->sharedData->impl->fromUnicode;
   864             /* we will write -1 for each offset */
   865             sourceIndex=-1;
   866         }
   867     }
   869     if(cnv->preFromULength>=0) {
   870         /* normal mode */
   871         realSource=NULL;
   873         /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
   874         realSourceLimit=NULL;
   875         realFlush=FALSE;
   876         realSourceIndex=0;
   877     } else {
   878         /*
   879          * Previous m:n conversion stored source units from a partial match
   880          * and failed to consume all of them.
   881          * We need to "replay" them from a temporary buffer and convert them first.
   882          */
   883         realSource=pArgs->source;
   884         realSourceLimit=pArgs->sourceLimit;
   885         realFlush=pArgs->flush;
   886         realSourceIndex=sourceIndex;
   888         uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
   889         pArgs->source=replay;
   890         pArgs->sourceLimit=replay-cnv->preFromULength;
   891         pArgs->flush=FALSE;
   892         sourceIndex=-1;
   894         cnv->preFromULength=0;
   895     }
   897     /*
   898      * loop for conversion and error handling
   899      *
   900      * loop {
   901      *   convert
   902      *   loop {
   903      *     update offsets
   904      *     handle end of input
   905      *     handle errors/call callback
   906      *   }
   907      * }
   908      */
   909     for(;;) {
   910         if(U_SUCCESS(*err)) {
   911             /* convert */
   912             fromUnicode(pArgs, err);
   914             /*
   915              * set a flag for whether the converter
   916              * successfully processed the end of the input
   917              *
   918              * need not check cnv->preFromULength==0 because a replay (<0) will cause
   919              * s<sourceLimit before converterSawEndOfInput is checked
   920              */
   921             converterSawEndOfInput=
   922                 (UBool)(U_SUCCESS(*err) &&
   923                         pArgs->flush && pArgs->source==pArgs->sourceLimit &&
   924                         cnv->fromUChar32==0);
   925         } else {
   926             /* handle error from ucnv_convertEx() */
   927             converterSawEndOfInput=FALSE;
   928         }
   930         /* no callback called yet for this iteration */
   931         calledCallback=FALSE;
   933         /* no sourceIndex adjustment for conversion, only for callback output */
   934         errorInputLength=0;
   936         /*
   937          * loop for offsets and error handling
   938          *
   939          * iterates at most 3 times:
   940          * 1. to clean up after the conversion function
   941          * 2. after the callback
   942          * 3. after the callback again if there was truncated input
   943          */
   944         for(;;) {
   945             /* update offsets if we write any */
   946             if(offsets!=NULL) {
   947                 int32_t length=(int32_t)(pArgs->target-t);
   948                 if(length>0) {
   949                     _updateOffsets(offsets, length, sourceIndex, errorInputLength);
   951                     /*
   952                      * if a converter handles offsets and updates the offsets
   953                      * pointer at the end, then pArgs->offset should not change
   954                      * here;
   955                      * however, some converters do not handle offsets at all
   956                      * (sourceIndex<0) or may not update the offsets pointer
   957                      */
   958                     pArgs->offsets=offsets+=length;
   959                 }
   961                 if(sourceIndex>=0) {
   962                     sourceIndex+=(int32_t)(pArgs->source-s);
   963                 }
   964             }
   966             if(cnv->preFromULength<0) {
   967                 /*
   968                  * switch the source to new replay units (cannot occur while replaying)
   969                  * after offset handling and before end-of-input and callback handling
   970                  */
   971                 if(realSource==NULL) {
   972                     realSource=pArgs->source;
   973                     realSourceLimit=pArgs->sourceLimit;
   974                     realFlush=pArgs->flush;
   975                     realSourceIndex=sourceIndex;
   977                     uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
   978                     pArgs->source=replay;
   979                     pArgs->sourceLimit=replay-cnv->preFromULength;
   980                     pArgs->flush=FALSE;
   981                     if((sourceIndex+=cnv->preFromULength)<0) {
   982                         sourceIndex=-1;
   983                     }
   985                     cnv->preFromULength=0;
   986                 } else {
   987                     /* see implementation note before _fromUnicodeWithCallback() */
   988                     U_ASSERT(realSource==NULL);
   989                     *err=U_INTERNAL_PROGRAM_ERROR;
   990                 }
   991             }
   993             /* update pointers */
   994             s=pArgs->source;
   995             t=pArgs->target;
   997             if(U_SUCCESS(*err)) {
   998                 if(s<pArgs->sourceLimit) {
   999                     /*
  1000                      * continue with the conversion loop while there is still input left
  1001                      * (continue converting by breaking out of only the inner loop)
  1002                      */
  1003                     break;
  1004                 } else if(realSource!=NULL) {
  1005                     /* switch back from replaying to the real source and continue */
  1006                     pArgs->source=realSource;
  1007                     pArgs->sourceLimit=realSourceLimit;
  1008                     pArgs->flush=realFlush;
  1009                     sourceIndex=realSourceIndex;
  1011                     realSource=NULL;
  1012                     break;
  1013                 } else if(pArgs->flush && cnv->fromUChar32!=0) {
  1014                     /*
  1015                      * the entire input stream is consumed
  1016                      * and there is a partial, truncated input sequence left
  1017                      */
  1019                     /* inject an error and continue with callback handling */
  1020                     *err=U_TRUNCATED_CHAR_FOUND;
  1021                     calledCallback=FALSE; /* new error condition */
  1022                 } else {
  1023                     /* input consumed */
  1024                     if(pArgs->flush) {
  1025                         /*
  1026                          * return to the conversion loop once more if the flush
  1027                          * flag is set and the conversion function has not
  1028                          * successfully processed the end of the input yet
  1030                          * (continue converting by breaking out of only the inner loop)
  1031                          */
  1032                         if(!converterSawEndOfInput) {
  1033                             break;
  1036                         /* reset the converter without calling the callback function */
  1037                         _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
  1040                     /* done successfully */
  1041                     return;
  1045             /* U_FAILURE(*err) */
  1047                 UErrorCode e;
  1049                 if( calledCallback ||
  1050                     (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
  1051                     (e!=U_INVALID_CHAR_FOUND &&
  1052                      e!=U_ILLEGAL_CHAR_FOUND &&
  1053                      e!=U_TRUNCATED_CHAR_FOUND)
  1054                 ) {
  1055                     /*
  1056                      * the callback did not or cannot resolve the error:
  1057                      * set output pointers and return
  1059                      * the check for buffer overflow is redundant but it is
  1060                      * a high-runner case and hopefully documents the intent
  1061                      * well
  1063                      * if we were replaying, then the replay buffer must be
  1064                      * copied back into the UConverter
  1065                      * and the real arguments must be restored
  1066                      */
  1067                     if(realSource!=NULL) {
  1068                         int32_t length;
  1070                         U_ASSERT(cnv->preFromULength==0);
  1072                         length=(int32_t)(pArgs->sourceLimit-pArgs->source);
  1073                         if(length>0) {
  1074                             uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR);
  1075                             cnv->preFromULength=(int8_t)-length;
  1078                         pArgs->source=realSource;
  1079                         pArgs->sourceLimit=realSourceLimit;
  1080                         pArgs->flush=realFlush;
  1083                     return;
  1087             /* callback handling */
  1089                 UChar32 codePoint;
  1091                 /* get and write the code point */
  1092                 codePoint=cnv->fromUChar32;
  1093                 errorInputLength=0;
  1094                 U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
  1095                 cnv->invalidUCharLength=(int8_t)errorInputLength;
  1097                 /* set the converter state to deal with the next character */
  1098                 cnv->fromUChar32=0;
  1100                 /* call the callback function */
  1101                 cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
  1102                     cnv->invalidUCharBuffer, errorInputLength, codePoint,
  1103                     *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
  1104                     err);
  1107             /*
  1108              * loop back to the offset handling
  1110              * this flag will indicate after offset handling
  1111              * that a callback was called;
  1112              * if the callback did not resolve the error, then we return
  1113              */
  1114             calledCallback=TRUE;
  1119 /*
  1120  * Output the fromUnicode overflow buffer.
  1121  * Call this function if(cnv->charErrorBufferLength>0).
  1122  * @return TRUE if overflow
  1123  */
  1124 static UBool
  1125 ucnv_outputOverflowFromUnicode(UConverter *cnv,
  1126                                char **target, const char *targetLimit,
  1127                                int32_t **pOffsets,
  1128                                UErrorCode *err) {
  1129     int32_t *offsets;
  1130     char *overflow, *t;
  1131     int32_t i, length;
  1133     t=*target;
  1134     if(pOffsets!=NULL) {
  1135         offsets=*pOffsets;
  1136     } else {
  1137         offsets=NULL;
  1140     overflow=(char *)cnv->charErrorBuffer;
  1141     length=cnv->charErrorBufferLength;
  1142     i=0;
  1143     while(i<length) {
  1144         if(t==targetLimit) {
  1145             /* the overflow buffer contains too much, keep the rest */
  1146             int32_t j=0;
  1148             do {
  1149                 overflow[j++]=overflow[i++];
  1150             } while(i<length);
  1152             cnv->charErrorBufferLength=(int8_t)j;
  1153             *target=t;
  1154             if(offsets!=NULL) {
  1155                 *pOffsets=offsets;
  1157             *err=U_BUFFER_OVERFLOW_ERROR;
  1158             return TRUE;
  1161         /* copy the overflow contents to the target */
  1162         *t++=overflow[i++];
  1163         if(offsets!=NULL) {
  1164             *offsets++=-1; /* no source index available for old output */
  1168     /* the overflow buffer is completely copied to the target */
  1169     cnv->charErrorBufferLength=0;
  1170     *target=t;
  1171     if(offsets!=NULL) {
  1172         *pOffsets=offsets;
  1174     return FALSE;
  1177 U_CAPI void U_EXPORT2
  1178 ucnv_fromUnicode(UConverter *cnv,
  1179                  char **target, const char *targetLimit,
  1180                  const UChar **source, const UChar *sourceLimit,
  1181                  int32_t *offsets,
  1182                  UBool flush,
  1183                  UErrorCode *err) {
  1184     UConverterFromUnicodeArgs args;
  1185     const UChar *s;
  1186     char *t;
  1188     /* check parameters */
  1189     if(err==NULL || U_FAILURE(*err)) {
  1190         return;
  1193     if(cnv==NULL || target==NULL || source==NULL) {
  1194         *err=U_ILLEGAL_ARGUMENT_ERROR;
  1195         return;
  1198     s=*source;
  1199     t=*target;
  1201     if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) {
  1202         /*
  1203         Prevent code from going into an infinite loop in case we do hit this
  1204         limit. The limit pointer is expected to be on a UChar * boundary.
  1205         This also prevents the next argument check from failing.
  1206         */
  1207         sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1);
  1210     /*
  1211      * All these conditions should never happen.
  1213      * 1) Make sure that the limits are >= to the address source or target
  1215      * 2) Make sure that the buffer sizes do not exceed the number range for
  1216      * int32_t because some functions use the size (in units or bytes)
  1217      * rather than comparing pointers, and because offsets are int32_t values.
  1219      * size_t is guaranteed to be unsigned and large enough for the job.
  1221      * Return with an error instead of adjusting the limits because we would
  1222      * not be able to maintain the semantics that either the source must be
  1223      * consumed or the target filled (unless an error occurs).
  1224      * An adjustment would be targetLimit=t+0x7fffffff; for example.
  1226      * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
  1227      * to a char * pointer and provide an incomplete UChar code unit.
  1228      */
  1229     if (sourceLimit<s || targetLimit<t ||
  1230         ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
  1231         ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||
  1232         (((const char *)sourceLimit-(const char *)s) & 1) != 0)
  1234         *err=U_ILLEGAL_ARGUMENT_ERROR;
  1235         return;
  1238     /* output the target overflow buffer */
  1239     if( cnv->charErrorBufferLength>0 &&
  1240         ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)
  1241     ) {
  1242         /* U_BUFFER_OVERFLOW_ERROR */
  1243         return;
  1245     /* *target may have moved, therefore stop using t */
  1247     if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
  1248         /* the overflow buffer is emptied and there is no new input: we are done */
  1249         return;
  1252     /*
  1253      * Do not simply return with a buffer overflow error if
  1254      * !flush && t==targetLimit
  1255      * because it is possible that the source will not generate any output.
  1256      * For example, the skip callback may be called;
  1257      * it does not output anything.
  1258      */
  1260     /* prepare the converter arguments */
  1261     args.converter=cnv;
  1262     args.flush=flush;
  1263     args.offsets=offsets;
  1264     args.source=s;
  1265     args.sourceLimit=sourceLimit;
  1266     args.target=*target;
  1267     args.targetLimit=targetLimit;
  1268     args.size=sizeof(args);
  1270     _fromUnicodeWithCallback(&args, err);
  1272     *source=args.source;
  1273     *target=args.target;
  1276 /* ucnv_toUnicode() --------------------------------------------------------- */
  1278 static void
  1279 _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
  1280     UConverterToUnicode toUnicode;
  1281     UConverter *cnv;
  1282     const char *s;
  1283     UChar *t;
  1284     int32_t *offsets;
  1285     int32_t sourceIndex;
  1286     int32_t errorInputLength;
  1287     UBool converterSawEndOfInput, calledCallback;
  1289     /* variables for m:n conversion */
  1290     char replay[UCNV_EXT_MAX_BYTES];
  1291     const char *realSource, *realSourceLimit;
  1292     int32_t realSourceIndex;
  1293     UBool realFlush;
  1295     cnv=pArgs->converter;
  1296     s=pArgs->source;
  1297     t=pArgs->target;
  1298     offsets=pArgs->offsets;
  1300     /* get the converter implementation function */
  1301     sourceIndex=0;
  1302     if(offsets==NULL) {
  1303         toUnicode=cnv->sharedData->impl->toUnicode;
  1304     } else {
  1305         toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
  1306         if(toUnicode==NULL) {
  1307             /* there is no WithOffsets implementation */
  1308             toUnicode=cnv->sharedData->impl->toUnicode;
  1309             /* we will write -1 for each offset */
  1310             sourceIndex=-1;
  1314     if(cnv->preToULength>=0) {
  1315         /* normal mode */
  1316         realSource=NULL;
  1318         /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
  1319         realSourceLimit=NULL;
  1320         realFlush=FALSE;
  1321         realSourceIndex=0;
  1322     } else {
  1323         /*
  1324          * Previous m:n conversion stored source units from a partial match
  1325          * and failed to consume all of them.
  1326          * We need to "replay" them from a temporary buffer and convert them first.
  1327          */
  1328         realSource=pArgs->source;
  1329         realSourceLimit=pArgs->sourceLimit;
  1330         realFlush=pArgs->flush;
  1331         realSourceIndex=sourceIndex;
  1333         uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
  1334         pArgs->source=replay;
  1335         pArgs->sourceLimit=replay-cnv->preToULength;
  1336         pArgs->flush=FALSE;
  1337         sourceIndex=-1;
  1339         cnv->preToULength=0;
  1342     /*
  1343      * loop for conversion and error handling
  1345      * loop {
  1346      *   convert
  1347      *   loop {
  1348      *     update offsets
  1349      *     handle end of input
  1350      *     handle errors/call callback
  1351      *   }
  1352      * }
  1353      */
  1354     for(;;) {
  1355         if(U_SUCCESS(*err)) {
  1356             /* convert */
  1357             toUnicode(pArgs, err);
  1359             /*
  1360              * set a flag for whether the converter
  1361              * successfully processed the end of the input
  1363              * need not check cnv->preToULength==0 because a replay (<0) will cause
  1364              * s<sourceLimit before converterSawEndOfInput is checked
  1365              */
  1366             converterSawEndOfInput=
  1367                 (UBool)(U_SUCCESS(*err) &&
  1368                         pArgs->flush && pArgs->source==pArgs->sourceLimit &&
  1369                         cnv->toULength==0);
  1370         } else {
  1371             /* handle error from getNextUChar() or ucnv_convertEx() */
  1372             converterSawEndOfInput=FALSE;
  1375         /* no callback called yet for this iteration */
  1376         calledCallback=FALSE;
  1378         /* no sourceIndex adjustment for conversion, only for callback output */
  1379         errorInputLength=0;
  1381         /*
  1382          * loop for offsets and error handling
  1384          * iterates at most 3 times:
  1385          * 1. to clean up after the conversion function
  1386          * 2. after the callback
  1387          * 3. after the callback again if there was truncated input
  1388          */
  1389         for(;;) {
  1390             /* update offsets if we write any */
  1391             if(offsets!=NULL) {
  1392                 int32_t length=(int32_t)(pArgs->target-t);
  1393                 if(length>0) {
  1394                     _updateOffsets(offsets, length, sourceIndex, errorInputLength);
  1396                     /*
  1397                      * if a converter handles offsets and updates the offsets
  1398                      * pointer at the end, then pArgs->offset should not change
  1399                      * here;
  1400                      * however, some converters do not handle offsets at all
  1401                      * (sourceIndex<0) or may not update the offsets pointer
  1402                      */
  1403                     pArgs->offsets=offsets+=length;
  1406                 if(sourceIndex>=0) {
  1407                     sourceIndex+=(int32_t)(pArgs->source-s);
  1411             if(cnv->preToULength<0) {
  1412                 /*
  1413                  * switch the source to new replay units (cannot occur while replaying)
  1414                  * after offset handling and before end-of-input and callback handling
  1415                  */
  1416                 if(realSource==NULL) {
  1417                     realSource=pArgs->source;
  1418                     realSourceLimit=pArgs->sourceLimit;
  1419                     realFlush=pArgs->flush;
  1420                     realSourceIndex=sourceIndex;
  1422                     uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
  1423                     pArgs->source=replay;
  1424                     pArgs->sourceLimit=replay-cnv->preToULength;
  1425                     pArgs->flush=FALSE;
  1426                     if((sourceIndex+=cnv->preToULength)<0) {
  1427                         sourceIndex=-1;
  1430                     cnv->preToULength=0;
  1431                 } else {
  1432                     /* see implementation note before _fromUnicodeWithCallback() */
  1433                     U_ASSERT(realSource==NULL);
  1434                     *err=U_INTERNAL_PROGRAM_ERROR;
  1438             /* update pointers */
  1439             s=pArgs->source;
  1440             t=pArgs->target;
  1442             if(U_SUCCESS(*err)) {
  1443                 if(s<pArgs->sourceLimit) {
  1444                     /*
  1445                      * continue with the conversion loop while there is still input left
  1446                      * (continue converting by breaking out of only the inner loop)
  1447                      */
  1448                     break;
  1449                 } else if(realSource!=NULL) {
  1450                     /* switch back from replaying to the real source and continue */
  1451                     pArgs->source=realSource;
  1452                     pArgs->sourceLimit=realSourceLimit;
  1453                     pArgs->flush=realFlush;
  1454                     sourceIndex=realSourceIndex;
  1456                     realSource=NULL;
  1457                     break;
  1458                 } else if(pArgs->flush && cnv->toULength>0) {
  1459                     /*
  1460                      * the entire input stream is consumed
  1461                      * and there is a partial, truncated input sequence left
  1462                      */
  1464                     /* inject an error and continue with callback handling */
  1465                     *err=U_TRUNCATED_CHAR_FOUND;
  1466                     calledCallback=FALSE; /* new error condition */
  1467                 } else {
  1468                     /* input consumed */
  1469                     if(pArgs->flush) {
  1470                         /*
  1471                          * return to the conversion loop once more if the flush
  1472                          * flag is set and the conversion function has not
  1473                          * successfully processed the end of the input yet
  1475                          * (continue converting by breaking out of only the inner loop)
  1476                          */
  1477                         if(!converterSawEndOfInput) {
  1478                             break;
  1481                         /* reset the converter without calling the callback function */
  1482                         _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
  1485                     /* done successfully */
  1486                     return;
  1490             /* U_FAILURE(*err) */
  1492                 UErrorCode e;
  1494                 if( calledCallback ||
  1495                     (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
  1496                     (e!=U_INVALID_CHAR_FOUND &&
  1497                      e!=U_ILLEGAL_CHAR_FOUND &&
  1498                      e!=U_TRUNCATED_CHAR_FOUND &&
  1499                      e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
  1500                      e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
  1501                 ) {
  1502                     /*
  1503                      * the callback did not or cannot resolve the error:
  1504                      * set output pointers and return
  1506                      * the check for buffer overflow is redundant but it is
  1507                      * a high-runner case and hopefully documents the intent
  1508                      * well
  1510                      * if we were replaying, then the replay buffer must be
  1511                      * copied back into the UConverter
  1512                      * and the real arguments must be restored
  1513                      */
  1514                     if(realSource!=NULL) {
  1515                         int32_t length;
  1517                         U_ASSERT(cnv->preToULength==0);
  1519                         length=(int32_t)(pArgs->sourceLimit-pArgs->source);
  1520                         if(length>0) {
  1521                             uprv_memcpy(cnv->preToU, pArgs->source, length);
  1522                             cnv->preToULength=(int8_t)-length;
  1525                         pArgs->source=realSource;
  1526                         pArgs->sourceLimit=realSourceLimit;
  1527                         pArgs->flush=realFlush;
  1530                     return;
  1534             /* copy toUBytes[] to invalidCharBuffer[] */
  1535             errorInputLength=cnv->invalidCharLength=cnv->toULength;
  1536             if(errorInputLength>0) {
  1537                 uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
  1540             /* set the converter state to deal with the next character */
  1541             cnv->toULength=0;
  1543             /* call the callback function */
  1544             if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {
  1545                 cnv->toUCallbackReason = UCNV_UNASSIGNED;
  1547             cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
  1548                 cnv->invalidCharBuffer, errorInputLength,
  1549                 cnv->toUCallbackReason,
  1550                 err);
  1551             cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */
  1553             /*
  1554              * loop back to the offset handling
  1556              * this flag will indicate after offset handling
  1557              * that a callback was called;
  1558              * if the callback did not resolve the error, then we return
  1559              */
  1560             calledCallback=TRUE;
  1565 /*
  1566  * Output the toUnicode overflow buffer.
  1567  * Call this function if(cnv->UCharErrorBufferLength>0).
  1568  * @return TRUE if overflow
  1569  */
  1570 static UBool
  1571 ucnv_outputOverflowToUnicode(UConverter *cnv,
  1572                              UChar **target, const UChar *targetLimit,
  1573                              int32_t **pOffsets,
  1574                              UErrorCode *err) {
  1575     int32_t *offsets;
  1576     UChar *overflow, *t;
  1577     int32_t i, length;
  1579     t=*target;
  1580     if(pOffsets!=NULL) {
  1581         offsets=*pOffsets;
  1582     } else {
  1583         offsets=NULL;
  1586     overflow=cnv->UCharErrorBuffer;
  1587     length=cnv->UCharErrorBufferLength;
  1588     i=0;
  1589     while(i<length) {
  1590         if(t==targetLimit) {
  1591             /* the overflow buffer contains too much, keep the rest */
  1592             int32_t j=0;
  1594             do {
  1595                 overflow[j++]=overflow[i++];
  1596             } while(i<length);
  1598             cnv->UCharErrorBufferLength=(int8_t)j;
  1599             *target=t;
  1600             if(offsets!=NULL) {
  1601                 *pOffsets=offsets;
  1603             *err=U_BUFFER_OVERFLOW_ERROR;
  1604             return TRUE;
  1607         /* copy the overflow contents to the target */
  1608         *t++=overflow[i++];
  1609         if(offsets!=NULL) {
  1610             *offsets++=-1; /* no source index available for old output */
  1614     /* the overflow buffer is completely copied to the target */
  1615     cnv->UCharErrorBufferLength=0;
  1616     *target=t;
  1617     if(offsets!=NULL) {
  1618         *pOffsets=offsets;
  1620     return FALSE;
  1623 U_CAPI void U_EXPORT2
  1624 ucnv_toUnicode(UConverter *cnv,
  1625                UChar **target, const UChar *targetLimit,
  1626                const char **source, const char *sourceLimit,
  1627                int32_t *offsets,
  1628                UBool flush,
  1629                UErrorCode *err) {
  1630     UConverterToUnicodeArgs args;
  1631     const char *s;
  1632     UChar *t;
  1634     /* check parameters */
  1635     if(err==NULL || U_FAILURE(*err)) {
  1636         return;
  1639     if(cnv==NULL || target==NULL || source==NULL) {
  1640         *err=U_ILLEGAL_ARGUMENT_ERROR;
  1641         return;
  1644     s=*source;
  1645     t=*target;
  1647     if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) {
  1648         /*
  1649         Prevent code from going into an infinite loop in case we do hit this
  1650         limit. The limit pointer is expected to be on a UChar * boundary.
  1651         This also prevents the next argument check from failing.
  1652         */
  1653         targetLimit = (const UChar *)(((const char *)targetLimit) - 1);
  1656     /*
  1657      * All these conditions should never happen.
  1659      * 1) Make sure that the limits are >= to the address source or target
  1661      * 2) Make sure that the buffer sizes do not exceed the number range for
  1662      * int32_t because some functions use the size (in units or bytes)
  1663      * rather than comparing pointers, and because offsets are int32_t values.
  1665      * size_t is guaranteed to be unsigned and large enough for the job.
  1667      * Return with an error instead of adjusting the limits because we would
  1668      * not be able to maintain the semantics that either the source must be
  1669      * consumed or the target filled (unless an error occurs).
  1670      * An adjustment would be sourceLimit=t+0x7fffffff; for example.
  1672      * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
  1673      * to a char * pointer and provide an incomplete UChar code unit.
  1674      */
  1675     if (sourceLimit<s || targetLimit<t ||
  1676         ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
  1677         ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) ||
  1678         (((const char *)targetLimit-(const char *)t) & 1) != 0
  1679     ) {
  1680         *err=U_ILLEGAL_ARGUMENT_ERROR;
  1681         return;
  1684     /* output the target overflow buffer */
  1685     if( cnv->UCharErrorBufferLength>0 &&
  1686         ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)
  1687     ) {
  1688         /* U_BUFFER_OVERFLOW_ERROR */
  1689         return;
  1691     /* *target may have moved, therefore stop using t */
  1693     if(!flush && s==sourceLimit && cnv->preToULength>=0) {
  1694         /* the overflow buffer is emptied and there is no new input: we are done */
  1695         return;
  1698     /*
  1699      * Do not simply return with a buffer overflow error if
  1700      * !flush && t==targetLimit
  1701      * because it is possible that the source will not generate any output.
  1702      * For example, the skip callback may be called;
  1703      * it does not output anything.
  1704      */
  1706     /* prepare the converter arguments */
  1707     args.converter=cnv;
  1708     args.flush=flush;
  1709     args.offsets=offsets;
  1710     args.source=s;
  1711     args.sourceLimit=sourceLimit;
  1712     args.target=*target;
  1713     args.targetLimit=targetLimit;
  1714     args.size=sizeof(args);
  1716     _toUnicodeWithCallback(&args, err);
  1718     *source=args.source;
  1719     *target=args.target;
  1722 /* ucnv_to/fromUChars() ----------------------------------------------------- */
  1724 U_CAPI int32_t U_EXPORT2
  1725 ucnv_fromUChars(UConverter *cnv,
  1726                 char *dest, int32_t destCapacity,
  1727                 const UChar *src, int32_t srcLength,
  1728                 UErrorCode *pErrorCode) {
  1729     const UChar *srcLimit;
  1730     char *originalDest, *destLimit;
  1731     int32_t destLength;
  1733     /* check arguments */
  1734     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
  1735         return 0;
  1738     if( cnv==NULL ||
  1739         destCapacity<0 || (destCapacity>0 && dest==NULL) ||
  1740         srcLength<-1 || (srcLength!=0 && src==NULL)
  1741     ) {
  1742         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1743         return 0;
  1746     /* initialize */
  1747     ucnv_resetFromUnicode(cnv);
  1748     originalDest=dest;
  1749     if(srcLength==-1) {
  1750         srcLength=u_strlen(src);
  1752     if(srcLength>0) {
  1753         srcLimit=src+srcLength;
  1754         destLimit=dest+destCapacity;
  1756         /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
  1757         if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
  1758             destLimit=(char *)U_MAX_PTR(dest);
  1761         /* perform the conversion */
  1762         ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
  1763         destLength=(int32_t)(dest-originalDest);
  1765         /* if an overflow occurs, then get the preflighting length */
  1766         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
  1767             char buffer[1024];
  1769             destLimit=buffer+sizeof(buffer);
  1770             do {
  1771                 dest=buffer;
  1772                 *pErrorCode=U_ZERO_ERROR;
  1773                 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
  1774                 destLength+=(int32_t)(dest-buffer);
  1775             } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
  1777     } else {
  1778         destLength=0;
  1781     return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
  1784 U_CAPI int32_t U_EXPORT2
  1785 ucnv_toUChars(UConverter *cnv,
  1786               UChar *dest, int32_t destCapacity,
  1787               const char *src, int32_t srcLength,
  1788               UErrorCode *pErrorCode) {
  1789     const char *srcLimit;
  1790     UChar *originalDest, *destLimit;
  1791     int32_t destLength;
  1793     /* check arguments */
  1794     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
  1795         return 0;
  1798     if( cnv==NULL ||
  1799         destCapacity<0 || (destCapacity>0 && dest==NULL) ||
  1800         srcLength<-1 || (srcLength!=0 && src==NULL))
  1802         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1803         return 0;
  1806     /* initialize */
  1807     ucnv_resetToUnicode(cnv);
  1808     originalDest=dest;
  1809     if(srcLength==-1) {
  1810         srcLength=(int32_t)uprv_strlen(src);
  1812     if(srcLength>0) {
  1813         srcLimit=src+srcLength;
  1814         destLimit=dest+destCapacity;
  1816         /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
  1817         if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
  1818             destLimit=(UChar *)U_MAX_PTR(dest);
  1821         /* perform the conversion */
  1822         ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
  1823         destLength=(int32_t)(dest-originalDest);
  1825         /* if an overflow occurs, then get the preflighting length */
  1826         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)
  1828             UChar buffer[1024];
  1830             destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR;
  1831             do {
  1832                 dest=buffer;
  1833                 *pErrorCode=U_ZERO_ERROR;
  1834                 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
  1835                 destLength+=(int32_t)(dest-buffer);
  1837             while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
  1839     } else {
  1840         destLength=0;
  1843     return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
  1846 /* ucnv_getNextUChar() ------------------------------------------------------ */
  1848 U_CAPI UChar32 U_EXPORT2
  1849 ucnv_getNextUChar(UConverter *cnv,
  1850                   const char **source, const char *sourceLimit,
  1851                   UErrorCode *err) {
  1852     UConverterToUnicodeArgs args;
  1853     UChar buffer[U16_MAX_LENGTH];
  1854     const char *s;
  1855     UChar32 c;
  1856     int32_t i, length;
  1858     /* check parameters */
  1859     if(err==NULL || U_FAILURE(*err)) {
  1860         return 0xffff;
  1863     if(cnv==NULL || source==NULL) {
  1864         *err=U_ILLEGAL_ARGUMENT_ERROR;
  1865         return 0xffff;
  1868     s=*source;
  1869     if(sourceLimit<s) {
  1870         *err=U_ILLEGAL_ARGUMENT_ERROR;
  1871         return 0xffff;
  1874     /*
  1875      * Make sure that the buffer sizes do not exceed the number range for
  1876      * int32_t because some functions use the size (in units or bytes)
  1877      * rather than comparing pointers, and because offsets are int32_t values.
  1879      * size_t is guaranteed to be unsigned and large enough for the job.
  1881      * Return with an error instead of adjusting the limits because we would
  1882      * not be able to maintain the semantics that either the source must be
  1883      * consumed or the target filled (unless an error occurs).
  1884      * An adjustment would be sourceLimit=t+0x7fffffff; for example.
  1885      */
  1886     if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
  1887         *err=U_ILLEGAL_ARGUMENT_ERROR;
  1888         return 0xffff;
  1891     c=U_SENTINEL;
  1893     /* flush the target overflow buffer */
  1894     if(cnv->UCharErrorBufferLength>0) {
  1895         UChar *overflow;
  1897         overflow=cnv->UCharErrorBuffer;
  1898         i=0;
  1899         length=cnv->UCharErrorBufferLength;
  1900         U16_NEXT(overflow, i, length, c);
  1902         /* move the remaining overflow contents up to the beginning */
  1903         if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
  1904             uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
  1905                          cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
  1908         if(!U16_IS_LEAD(c) || i<length) {
  1909             return c;
  1911         /*
  1912          * Continue if the overflow buffer contained only a lead surrogate,
  1913          * in case the converter outputs single surrogates from complete
  1914          * input sequences.
  1915          */
  1918     /*
  1919      * flush==TRUE is implied for ucnv_getNextUChar()
  1921      * do not simply return even if s==sourceLimit because the converter may
  1922      * not have seen flush==TRUE before
  1923      */
  1925     /* prepare the converter arguments */
  1926     args.converter=cnv;
  1927     args.flush=TRUE;
  1928     args.offsets=NULL;
  1929     args.source=s;
  1930     args.sourceLimit=sourceLimit;
  1931     args.target=buffer;
  1932     args.targetLimit=buffer+1;
  1933     args.size=sizeof(args);
  1935     if(c<0) {
  1936         /*
  1937          * call the native getNextUChar() implementation if we are
  1938          * at a character boundary (toULength==0)
  1940          * unlike with _toUnicode(), getNextUChar() implementations must set
  1941          * U_TRUNCATED_CHAR_FOUND for truncated input,
  1942          * in addition to setting toULength/toUBytes[]
  1943          */
  1944         if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {
  1945             c=cnv->sharedData->impl->getNextUChar(&args, err);
  1946             *source=s=args.source;
  1947             if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
  1948                 /* reset the converter without calling the callback function */
  1949                 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
  1950                 return 0xffff; /* no output */
  1951             } else if(U_SUCCESS(*err) && c>=0) {
  1952                 return c;
  1953             /*
  1954              * else fall through to use _toUnicode() because
  1955              *   UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
  1956              *   U_FAILURE: call _toUnicode() for callback handling (do not output c)
  1957              */
  1961         /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
  1962         _toUnicodeWithCallback(&args, err);
  1964         if(*err==U_BUFFER_OVERFLOW_ERROR) {
  1965             *err=U_ZERO_ERROR;
  1968         i=0;
  1969         length=(int32_t)(args.target-buffer);
  1970     } else {
  1971         /* write the lead surrogate from the overflow buffer */
  1972         buffer[0]=(UChar)c;
  1973         args.target=buffer+1;
  1974         i=0;
  1975         length=1;
  1978     /* buffer contents starts at i and ends before length */
  1980     if(U_FAILURE(*err)) {
  1981         c=0xffff; /* no output */
  1982     } else if(length==0) {
  1983         /* no input or only state changes */
  1984         *err=U_INDEX_OUTOFBOUNDS_ERROR;
  1985         /* no need to reset explicitly because _toUnicodeWithCallback() did it */
  1986         c=0xffff; /* no output */
  1987     } else {
  1988         c=buffer[0];
  1989         i=1;
  1990         if(!U16_IS_LEAD(c)) {
  1991             /* consume c=buffer[0], done */
  1992         } else {
  1993             /* got a lead surrogate, see if a trail surrogate follows */
  1994             UChar c2;
  1996             if(cnv->UCharErrorBufferLength>0) {
  1997                 /* got overflow output from the conversion */
  1998                 if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
  1999                     /* got a trail surrogate, too */
  2000                     c=U16_GET_SUPPLEMENTARY(c, c2);
  2002                     /* move the remaining overflow contents up to the beginning */
  2003                     if((--cnv->UCharErrorBufferLength)>0) {
  2004                         uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
  2005                                      cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
  2007                 } else {
  2008                     /* c is an unpaired lead surrogate, just return it */
  2010             } else if(args.source<sourceLimit) {
  2011                 /* convert once more, to buffer[1] */
  2012                 args.targetLimit=buffer+2;
  2013                 _toUnicodeWithCallback(&args, err);
  2014                 if(*err==U_BUFFER_OVERFLOW_ERROR) {
  2015                     *err=U_ZERO_ERROR;
  2018                 length=(int32_t)(args.target-buffer);
  2019                 if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
  2020                     /* got a trail surrogate, too */
  2021                     c=U16_GET_SUPPLEMENTARY(c, c2);
  2022                     i=2;
  2028     /*
  2029      * move leftover output from buffer[i..length[
  2030      * into the beginning of the overflow buffer
  2031      */
  2032     if(i<length) {
  2033         /* move further overflow back */
  2034         int32_t delta=length-i;
  2035         if((length=cnv->UCharErrorBufferLength)>0) {
  2036             uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
  2037                          length*U_SIZEOF_UCHAR);
  2039         cnv->UCharErrorBufferLength=(int8_t)(length+delta);
  2041         cnv->UCharErrorBuffer[0]=buffer[i++];
  2042         if(delta>1) {
  2043             cnv->UCharErrorBuffer[1]=buffer[i];
  2047     *source=args.source;
  2048     return c;
  2051 /* ucnv_convert() and siblings ---------------------------------------------- */
  2053 U_CAPI void U_EXPORT2
  2054 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
  2055                char **target, const char *targetLimit,
  2056                const char **source, const char *sourceLimit,
  2057                UChar *pivotStart, UChar **pivotSource,
  2058                UChar **pivotTarget, const UChar *pivotLimit,
  2059                UBool reset, UBool flush,
  2060                UErrorCode *pErrorCode) {
  2061     UChar pivotBuffer[CHUNK_SIZE];
  2062     const UChar *myPivotSource;
  2063     UChar *myPivotTarget;
  2064     const char *s;
  2065     char *t;
  2067     UConverterToUnicodeArgs toUArgs;
  2068     UConverterFromUnicodeArgs fromUArgs;
  2069     UConverterConvert convert;
  2071     /* error checking */
  2072     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
  2073         return;
  2076     if( targetCnv==NULL || sourceCnv==NULL ||
  2077         source==NULL || *source==NULL ||
  2078         target==NULL || *target==NULL || targetLimit==NULL
  2079     ) {
  2080         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  2081         return;
  2084     s=*source;
  2085     t=*target;
  2086     if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) {
  2087         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  2088         return;
  2091     /*
  2092      * Make sure that the buffer sizes do not exceed the number range for
  2093      * int32_t. See ucnv_toUnicode() for a more detailed comment.
  2094      */
  2095     if(
  2096         (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) ||
  2097         ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
  2098     ) {
  2099         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  2100         return;
  2103     if(pivotStart==NULL) {
  2104         if(!flush) {
  2105             /* streaming conversion requires an explicit pivot buffer */
  2106             *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  2107             return;
  2110         /* use the stack pivot buffer */
  2111         myPivotSource=myPivotTarget=pivotStart=pivotBuffer;
  2112         pivotSource=(UChar **)&myPivotSource;
  2113         pivotTarget=&myPivotTarget;
  2114         pivotLimit=pivotBuffer+CHUNK_SIZE;
  2115     } else if(  pivotStart>=pivotLimit ||
  2116                 pivotSource==NULL || *pivotSource==NULL ||
  2117                 pivotTarget==NULL || *pivotTarget==NULL ||
  2118                 pivotLimit==NULL
  2119     ) {
  2120         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  2121         return;
  2124     if(sourceLimit==NULL) {
  2125         /* get limit of single-byte-NUL-terminated source string */
  2126         sourceLimit=uprv_strchr(*source, 0);
  2129     if(reset) {
  2130         ucnv_resetToUnicode(sourceCnv);
  2131         ucnv_resetFromUnicode(targetCnv);
  2132         *pivotSource=*pivotTarget=pivotStart;
  2133     } else if(targetCnv->charErrorBufferLength>0) {
  2134         /* output the targetCnv overflow buffer */
  2135         if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) {
  2136             /* U_BUFFER_OVERFLOW_ERROR */
  2137             return;
  2139         /* *target has moved, therefore stop using t */
  2141         if( !flush &&
  2142             targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget &&
  2143             sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit
  2144         ) {
  2145             /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
  2146             return;
  2150     /* Is direct-UTF-8 conversion available? */
  2151     if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
  2152         targetCnv->sharedData->impl->fromUTF8!=NULL
  2153     ) {
  2154         convert=targetCnv->sharedData->impl->fromUTF8;
  2155     } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
  2156                sourceCnv->sharedData->impl->toUTF8!=NULL
  2157     ) {
  2158         convert=sourceCnv->sharedData->impl->toUTF8;
  2159     } else {
  2160         convert=NULL;
  2163     /*
  2164      * If direct-UTF-8 conversion is available, then we use a smaller
  2165      * pivot buffer for error handling and partial matches
  2166      * so that we quickly return to direct conversion.
  2168      * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
  2170      * We could reduce the pivot buffer size further, at the cost of
  2171      * buffer overflows from callbacks.
  2172      * The pivot buffer should not be smaller than the maximum number of
  2173      * fromUnicode extension table input UChars
  2174      * (for m:n conversion, see
  2175      * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
  2176      * or 2 for surrogate pairs.
  2178      * Too small a buffer can cause thrashing between pivoting and direct
  2179      * conversion, with function call overhead outweighing the benefits
  2180      * of direct conversion.
  2181      */
  2182     if(convert!=NULL && (pivotLimit-pivotStart)>32) {
  2183         pivotLimit=pivotStart+32;
  2186     /* prepare the converter arguments */
  2187     fromUArgs.converter=targetCnv;
  2188     fromUArgs.flush=FALSE;
  2189     fromUArgs.offsets=NULL;
  2190     fromUArgs.target=*target;
  2191     fromUArgs.targetLimit=targetLimit;
  2192     fromUArgs.size=sizeof(fromUArgs);
  2194     toUArgs.converter=sourceCnv;
  2195     toUArgs.flush=flush;
  2196     toUArgs.offsets=NULL;
  2197     toUArgs.source=s;
  2198     toUArgs.sourceLimit=sourceLimit;
  2199     toUArgs.targetLimit=pivotLimit;
  2200     toUArgs.size=sizeof(toUArgs);
  2202     /*
  2203      * TODO: Consider separating this function into two functions,
  2204      * extracting exactly the conversion loop,
  2205      * for readability and to reduce the set of visible variables.
  2207      * Otherwise stop using s and t from here on.
  2208      */
  2209     s=t=NULL;
  2211     /*
  2212      * conversion loop
  2214      * The sequence of steps in the loop may appear backward,
  2215      * but the principle is simple:
  2216      * In the chain of
  2217      *   source - sourceCnv overflow - pivot - targetCnv overflow - target
  2218      * empty out later buffers before refilling them from earlier ones.
  2220      * The targetCnv overflow buffer is flushed out only once before the loop.
  2221      */
  2222     for(;;) {
  2223         /*
  2224          * if(pivot not empty or error or replay or flush fromUnicode) {
  2225          *   fromUnicode(pivot -> target);
  2226          * }
  2228          * For pivoting conversion; and for direct conversion for
  2229          * error callback handling and flushing the replay buffer.
  2230          */
  2231         if( *pivotSource<*pivotTarget ||
  2232             U_FAILURE(*pErrorCode) ||
  2233             targetCnv->preFromULength<0 ||
  2234             fromUArgs.flush
  2235         ) {
  2236             fromUArgs.source=*pivotSource;
  2237             fromUArgs.sourceLimit=*pivotTarget;
  2238             _fromUnicodeWithCallback(&fromUArgs, pErrorCode);
  2239             if(U_FAILURE(*pErrorCode)) {
  2240                 /* target overflow, or conversion error */
  2241                 *pivotSource=(UChar *)fromUArgs.source;
  2242                 break;
  2245             /*
  2246              * _fromUnicodeWithCallback() must have consumed the pivot contents
  2247              * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
  2248              */
  2251         /* The pivot buffer is empty; reset it so we start at pivotStart. */
  2252         *pivotSource=*pivotTarget=pivotStart;
  2254         /*
  2255          * if(sourceCnv overflow buffer not empty) {
  2256          *     move(sourceCnv overflow buffer -> pivot);
  2257          *     continue;
  2258          * }
  2259          */
  2260         /* output the sourceCnv overflow buffer */
  2261         if(sourceCnv->UCharErrorBufferLength>0) {
  2262             if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) {
  2263                 /* U_BUFFER_OVERFLOW_ERROR */
  2264                 *pErrorCode=U_ZERO_ERROR;
  2266             continue;
  2269         /*
  2270          * check for end of input and break if done
  2272          * Checking both flush and fromUArgs.flush ensures that the converters
  2273          * have been called with the flush flag set if the ucnv_convertEx()
  2274          * caller set it.
  2275          */
  2276         if( toUArgs.source==sourceLimit &&
  2277             sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&
  2278             (!flush || fromUArgs.flush)
  2279         ) {
  2280             /* done successfully */
  2281             break;
  2284         /*
  2285          * use direct conversion if available
  2286          * but not if continuing a partial match
  2287          * or flushing the toUnicode replay buffer
  2288          */
  2289         if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) {
  2290             if(*pErrorCode==U_USING_DEFAULT_WARNING) {
  2291                 /* remove a warning that may be set by this function */
  2292                 *pErrorCode=U_ZERO_ERROR;
  2294             convert(&fromUArgs, &toUArgs, pErrorCode);
  2295             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
  2296                 break;
  2297             } else if(U_FAILURE(*pErrorCode)) {
  2298                 if(sourceCnv->toULength>0) {
  2299                     /*
  2300                      * Fall through to calling _toUnicodeWithCallback()
  2301                      * for callback handling.
  2303                      * The pivot buffer will be reset with
  2304                      *   *pivotSource=*pivotTarget=pivotStart;
  2305                      * which indicates a toUnicode error to the caller
  2306                      * (*pivotSource==pivotStart shows no pivot UChars consumed).
  2307                      */
  2308                 } else {
  2309                     /*
  2310                      * Indicate a fromUnicode error to the caller
  2311                      * (*pivotSource>pivotStart shows some pivot UChars consumed).
  2312                      */
  2313                     *pivotSource=*pivotTarget=pivotStart+1;
  2314                     /*
  2315                      * Loop around to calling _fromUnicodeWithCallbacks()
  2316                      * for callback handling.
  2317                      */
  2318                     continue;
  2320             } else if(*pErrorCode==U_USING_DEFAULT_WARNING) {
  2321                 /*
  2322                  * No error, but the implementation requested to temporarily
  2323                  * fall back to pivoting.
  2324                  */
  2325                 *pErrorCode=U_ZERO_ERROR;
  2326             /*
  2327              * The following else branches are almost identical to the end-of-input
  2328              * handling in _toUnicodeWithCallback().
  2329              * Avoid calling it just for the end of input.
  2330              */
  2331             } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */
  2332                 /*
  2333                  * the entire input stream is consumed
  2334                  * and there is a partial, truncated input sequence left
  2335                  */
  2337                 /* inject an error and continue with callback handling */
  2338                 *pErrorCode=U_TRUNCATED_CHAR_FOUND;
  2339             } else {
  2340                 /* input consumed */
  2341                 if(flush) {
  2342                     /* reset the converters without calling the callback functions */
  2343                     _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE);
  2344                     _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE);
  2347                 /* done successfully */
  2348                 break;
  2352         /*
  2353          * toUnicode(source -> pivot);
  2355          * For pivoting conversion; and for direct conversion for
  2356          * error callback handling, continuing partial matches
  2357          * and flushing the replay buffer.
  2359          * The pivot buffer is empty and reset.
  2360          */
  2361         toUArgs.target=pivotStart; /* ==*pivotTarget */
  2362         /* toUArgs.targetLimit=pivotLimit; already set before the loop */
  2363         _toUnicodeWithCallback(&toUArgs, pErrorCode);
  2364         *pivotTarget=toUArgs.target;
  2365         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
  2366             /* pivot overflow: continue with the conversion loop */
  2367             *pErrorCode=U_ZERO_ERROR;
  2368         } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) {
  2369             /* conversion error, or there was nothing left to convert */
  2370             break;
  2372         /*
  2373          * else:
  2374          * _toUnicodeWithCallback() wrote into the pivot buffer,
  2375          * continue with fromUnicode conversion.
  2377          * Set the fromUnicode flush flag if we flush and if toUnicode has
  2378          * processed the end of the input.
  2379          */
  2380         if( flush && toUArgs.source==sourceLimit &&
  2381             sourceCnv->preToULength>=0 &&
  2382             sourceCnv->UCharErrorBufferLength==0
  2383         ) {
  2384             fromUArgs.flush=TRUE;
  2388     /*
  2389      * The conversion loop is exited when one of the following is true:
  2390      * - the entire source text has been converted successfully to the target buffer
  2391      * - a target buffer overflow occurred
  2392      * - a conversion error occurred
  2393      */
  2395     *source=toUArgs.source;
  2396     *target=fromUArgs.target;
  2398     /* terminate the target buffer if possible */
  2399     if(flush && U_SUCCESS(*pErrorCode)) {
  2400         if(*target!=targetLimit) {
  2401             **target=0;
  2402             if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
  2403                 *pErrorCode=U_ZERO_ERROR;
  2405         } else {
  2406             *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
  2411 /* internal implementation of ucnv_convert() etc. with preflighting */
  2412 static int32_t
  2413 ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
  2414                      char *target, int32_t targetCapacity,
  2415                      const char *source, int32_t sourceLength,
  2416                      UErrorCode *pErrorCode) {
  2417     UChar pivotBuffer[CHUNK_SIZE];
  2418     UChar *pivot, *pivot2;
  2420     char *myTarget;
  2421     const char *sourceLimit;
  2422     const char *targetLimit;
  2423     int32_t targetLength=0;
  2425     /* set up */
  2426     if(sourceLength<0) {
  2427         sourceLimit=uprv_strchr(source, 0);
  2428     } else {
  2429         sourceLimit=source+sourceLength;
  2432     /* if there is no input data, we're done */
  2433     if(source==sourceLimit) {
  2434         return u_terminateChars(target, targetCapacity, 0, pErrorCode);
  2437     pivot=pivot2=pivotBuffer;
  2438     myTarget=target;
  2439     targetLength=0;
  2441     if(targetCapacity>0) {
  2442         /* perform real conversion */
  2443         targetLimit=target+targetCapacity;
  2444         ucnv_convertEx(outConverter, inConverter,
  2445                        &myTarget, targetLimit,
  2446                        &source, sourceLimit,
  2447                        pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
  2448                        FALSE,
  2449                        TRUE,
  2450                        pErrorCode);
  2451         targetLength=(int32_t)(myTarget-target);
  2454     /*
  2455      * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
  2456      * to it but continue the conversion in order to store in targetCapacity
  2457      * the number of bytes that was required.
  2458      */
  2459     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)
  2461         char targetBuffer[CHUNK_SIZE];
  2463         targetLimit=targetBuffer+CHUNK_SIZE;
  2464         do {
  2465             *pErrorCode=U_ZERO_ERROR;
  2466             myTarget=targetBuffer;
  2467             ucnv_convertEx(outConverter, inConverter,
  2468                            &myTarget, targetLimit,
  2469                            &source, sourceLimit,
  2470                            pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
  2471                            FALSE,
  2472                            TRUE,
  2473                            pErrorCode);
  2474             targetLength+=(int32_t)(myTarget-targetBuffer);
  2475         } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
  2477         /* done with preflighting, set warnings and errors as appropriate */
  2478         return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
  2481     /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
  2482     return targetLength;
  2485 U_CAPI int32_t U_EXPORT2
  2486 ucnv_convert(const char *toConverterName, const char *fromConverterName,
  2487              char *target, int32_t targetCapacity,
  2488              const char *source, int32_t sourceLength,
  2489              UErrorCode *pErrorCode) {
  2490     UConverter in, out; /* stack-allocated */
  2491     UConverter *inConverter, *outConverter;
  2492     int32_t targetLength;
  2494     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
  2495         return 0;
  2498     if( source==NULL || sourceLength<-1 ||
  2499         targetCapacity<0 || (targetCapacity>0 && target==NULL)
  2500     ) {
  2501         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  2502         return 0;
  2505     /* if there is no input data, we're done */
  2506     if(sourceLength==0 || (sourceLength<0 && *source==0)) {
  2507         return u_terminateChars(target, targetCapacity, 0, pErrorCode);
  2510     /* create the converters */
  2511     inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
  2512     if(U_FAILURE(*pErrorCode)) {
  2513         return 0;
  2516     outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
  2517     if(U_FAILURE(*pErrorCode)) {
  2518         ucnv_close(inConverter);
  2519         return 0;
  2522     targetLength=ucnv_internalConvert(outConverter, inConverter,
  2523                                       target, targetCapacity,
  2524                                       source, sourceLength,
  2525                                       pErrorCode);
  2527     ucnv_close(inConverter);
  2528     ucnv_close(outConverter);
  2530     return targetLength;
  2533 /* @internal */
  2534 static int32_t
  2535 ucnv_convertAlgorithmic(UBool convertToAlgorithmic,
  2536                         UConverterType algorithmicType,
  2537                         UConverter *cnv,
  2538                         char *target, int32_t targetCapacity,
  2539                         const char *source, int32_t sourceLength,
  2540                         UErrorCode *pErrorCode) {
  2541     UConverter algoConverterStatic; /* stack-allocated */
  2542     UConverter *algoConverter, *to, *from;
  2543     int32_t targetLength;
  2545     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
  2546         return 0;
  2549     if( cnv==NULL || source==NULL || sourceLength<-1 ||
  2550         targetCapacity<0 || (targetCapacity>0 && target==NULL)
  2551     ) {
  2552         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  2553         return 0;
  2556     /* if there is no input data, we're done */
  2557     if(sourceLength==0 || (sourceLength<0 && *source==0)) {
  2558         return u_terminateChars(target, targetCapacity, 0, pErrorCode);
  2561     /* create the algorithmic converter */
  2562     algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType,
  2563                                                   "", 0, pErrorCode);
  2564     if(U_FAILURE(*pErrorCode)) {
  2565         return 0;
  2568     /* reset the other converter */
  2569     if(convertToAlgorithmic) {
  2570         /* cnv->Unicode->algo */
  2571         ucnv_resetToUnicode(cnv);
  2572         to=algoConverter;
  2573         from=cnv;
  2574     } else {
  2575         /* algo->Unicode->cnv */
  2576         ucnv_resetFromUnicode(cnv);
  2577         from=algoConverter;
  2578         to=cnv;
  2581     targetLength=ucnv_internalConvert(to, from,
  2582                                       target, targetCapacity,
  2583                                       source, sourceLength,
  2584                                       pErrorCode);
  2586     ucnv_close(algoConverter);
  2588     return targetLength;
  2591 U_CAPI int32_t U_EXPORT2
  2592 ucnv_toAlgorithmic(UConverterType algorithmicType,
  2593                    UConverter *cnv,
  2594                    char *target, int32_t targetCapacity,
  2595                    const char *source, int32_t sourceLength,
  2596                    UErrorCode *pErrorCode) {
  2597     return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv,
  2598                                    target, targetCapacity,
  2599                                    source, sourceLength,
  2600                                    pErrorCode);
  2603 U_CAPI int32_t U_EXPORT2
  2604 ucnv_fromAlgorithmic(UConverter *cnv,
  2605                      UConverterType algorithmicType,
  2606                      char *target, int32_t targetCapacity,
  2607                      const char *source, int32_t sourceLength,
  2608                      UErrorCode *pErrorCode) {
  2609     return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv,
  2610                                    target, targetCapacity,
  2611                                    source, sourceLength,
  2612                                    pErrorCode);
  2615 U_CAPI UConverterType  U_EXPORT2
  2616 ucnv_getType(const UConverter* converter)
  2618     int8_t type = converter->sharedData->staticData->conversionType;
  2619 #if !UCONFIG_NO_LEGACY_CONVERSION
  2620     if(type == UCNV_MBCS) {
  2621         return ucnv_MBCSGetType(converter);
  2623 #endif
  2624     return (UConverterType)type;
  2627 U_CAPI void  U_EXPORT2
  2628 ucnv_getStarters(const UConverter* converter, 
  2629                  UBool starters[256],
  2630                  UErrorCode* err)
  2632     if (err == NULL || U_FAILURE(*err)) {
  2633         return;
  2636     if(converter->sharedData->impl->getStarters != NULL) {
  2637         converter->sharedData->impl->getStarters(converter, starters, err);
  2638     } else {
  2639         *err = U_ILLEGAL_ARGUMENT_ERROR;
  2643 static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv)
  2645     UErrorCode errorCode;
  2646     const char *name;
  2647     int32_t i;
  2649     if(cnv==NULL) {
  2650         return NULL;
  2653     errorCode=U_ZERO_ERROR;
  2654     name=ucnv_getName(cnv, &errorCode);
  2655     if(U_FAILURE(errorCode)) {
  2656         return NULL;
  2659     for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i)
  2661         if(0==uprv_strcmp(name, ambiguousConverters[i].name))
  2663             return ambiguousConverters+i;
  2667     return NULL;
  2670 U_CAPI void  U_EXPORT2
  2671 ucnv_fixFileSeparator(const UConverter *cnv, 
  2672                       UChar* source, 
  2673                       int32_t sourceLength) {
  2674     const UAmbiguousConverter *a;
  2675     int32_t i;
  2676     UChar variant5c;
  2678     if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL)
  2680         return;
  2683     variant5c=a->variant5c;
  2684     for(i=0; i<sourceLength; ++i) {
  2685         if(source[i]==variant5c) {
  2686             source[i]=0x5c;
  2691 U_CAPI UBool  U_EXPORT2
  2692 ucnv_isAmbiguous(const UConverter *cnv) {
  2693     return (UBool)(ucnv_getAmbiguous(cnv)!=NULL);
  2696 U_CAPI void  U_EXPORT2
  2697 ucnv_setFallback(UConverter *cnv, UBool usesFallback)
  2699     cnv->useFallback = usesFallback;
  2702 U_CAPI UBool  U_EXPORT2
  2703 ucnv_usesFallback(const UConverter *cnv)
  2705     return cnv->useFallback;
  2708 U_CAPI void  U_EXPORT2
  2709 ucnv_getInvalidChars (const UConverter * converter,
  2710                       char *errBytes,
  2711                       int8_t * len,
  2712                       UErrorCode * err)
  2714     if (err == NULL || U_FAILURE(*err))
  2716         return;
  2718     if (len == NULL || errBytes == NULL || converter == NULL)
  2720         *err = U_ILLEGAL_ARGUMENT_ERROR;
  2721         return;
  2723     if (*len < converter->invalidCharLength)
  2725         *err = U_INDEX_OUTOFBOUNDS_ERROR;
  2726         return;
  2728     if ((*len = converter->invalidCharLength) > 0)
  2730         uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);
  2734 U_CAPI void  U_EXPORT2
  2735 ucnv_getInvalidUChars (const UConverter * converter,
  2736                        UChar *errChars,
  2737                        int8_t * len,
  2738                        UErrorCode * err)
  2740     if (err == NULL || U_FAILURE(*err))
  2742         return;
  2744     if (len == NULL || errChars == NULL || converter == NULL)
  2746         *err = U_ILLEGAL_ARGUMENT_ERROR;
  2747         return;
  2749     if (*len < converter->invalidUCharLength)
  2751         *err = U_INDEX_OUTOFBOUNDS_ERROR;
  2752         return;
  2754     if ((*len = converter->invalidUCharLength) > 0)
  2756         uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len));
  2760 #define SIG_MAX_LEN 5
  2762 U_CAPI const char* U_EXPORT2
  2763 ucnv_detectUnicodeSignature( const char* source,
  2764                              int32_t sourceLength,
  2765                              int32_t* signatureLength,
  2766                              UErrorCode* pErrorCode) {
  2767     int32_t dummy;
  2769     /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
  2770      * bytes we don't misdetect something 
  2771      */
  2772     char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
  2773     int i = 0;
  2775     if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){
  2776         return NULL;
  2779     if(source == NULL || sourceLength < -1){
  2780         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
  2781         return NULL;
  2784     if(signatureLength == NULL) {
  2785         signatureLength = &dummy;
  2788     if(sourceLength==-1){
  2789         sourceLength=(int32_t)uprv_strlen(source);
  2793     while(i<sourceLength&& i<SIG_MAX_LEN){
  2794         start[i]=source[i];
  2795         i++;
  2798     if(start[0] == '\xFE' && start[1] == '\xFF') {
  2799         *signatureLength=2;
  2800         return  "UTF-16BE";
  2801     } else if(start[0] == '\xFF' && start[1] == '\xFE') {
  2802         if(start[2] == '\x00' && start[3] =='\x00') {
  2803             *signatureLength=4;
  2804             return "UTF-32LE";
  2805         } else {
  2806             *signatureLength=2;
  2807             return  "UTF-16LE";
  2809     } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
  2810         *signatureLength=3;
  2811         return  "UTF-8";
  2812     } else if(start[0] == '\x00' && start[1] == '\x00' && 
  2813               start[2] == '\xFE' && start[3]=='\xFF') {
  2814         *signatureLength=4;
  2815         return  "UTF-32BE";
  2816     } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {
  2817         *signatureLength=3;
  2818         return "SCSU";
  2819     } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {
  2820         *signatureLength=3;
  2821         return "BOCU-1";
  2822     } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {
  2823         /*
  2824          * UTF-7: Initial U+FEFF is encoded as +/v8  or  +/v9  or  +/v+  or  +/v/
  2825          * depending on the second UTF-16 code unit.
  2826          * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
  2827          * if it occurs.
  2829          * So far we have +/v
  2830          */
  2831         if(start[3] == '\x38' && start[4] == '\x2D') {
  2832             /* 5 bytes +/v8- */
  2833             *signatureLength=5;
  2834             return "UTF-7";
  2835         } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') {
  2836             /* 4 bytes +/v8  or  +/v9  or  +/v+  or  +/v/ */
  2837             *signatureLength=4;
  2838             return "UTF-7";
  2840     }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
  2841         *signatureLength=4;
  2842         return "UTF-EBCDIC";
  2846     /* no known Unicode signature byte sequence recognized */
  2847     *signatureLength=0;
  2848     return NULL;
  2851 U_CAPI int32_t U_EXPORT2
  2852 ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status)
  2854     if(status == NULL || U_FAILURE(*status)){
  2855         return -1;
  2857     if(cnv == NULL){
  2858         *status = U_ILLEGAL_ARGUMENT_ERROR;
  2859         return -1;
  2862     if(cnv->preFromUFirstCP >= 0){
  2863         return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;
  2864     }else if(cnv->preFromULength < 0){
  2865         return -cnv->preFromULength ;
  2866     }else if(cnv->fromUChar32 > 0){
  2867         return 1;
  2869     return 0; 
  2873 U_CAPI int32_t U_EXPORT2
  2874 ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){
  2876     if(status == NULL || U_FAILURE(*status)){
  2877         return -1;
  2879     if(cnv == NULL){
  2880         *status = U_ILLEGAL_ARGUMENT_ERROR;
  2881         return -1;
  2884     if(cnv->preToULength > 0){
  2885         return cnv->preToULength ;
  2886     }else if(cnv->preToULength < 0){
  2887         return -cnv->preToULength;
  2888     }else if(cnv->toULength > 0){
  2889         return cnv->toULength;
  2891     return 0;
  2894 U_CAPI UBool U_EXPORT2
  2895 ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){
  2896     if (U_FAILURE(*status)) {
  2897         return FALSE;
  2900     if (cnv == NULL) {
  2901         *status = U_ILLEGAL_ARGUMENT_ERROR;
  2902         return FALSE;
  2905     switch (ucnv_getType(cnv)) {
  2906         case UCNV_SBCS:
  2907         case UCNV_DBCS:
  2908         case UCNV_UTF32_BigEndian:
  2909         case UCNV_UTF32_LittleEndian:
  2910         case UCNV_UTF32:
  2911         case UCNV_US_ASCII:
  2912             return TRUE;
  2913         default:
  2914             return FALSE;
  2917 #endif
  2919 /*
  2920  * Hey, Emacs, please set the following:
  2922  * Local Variables:
  2923  * indent-tabs-mode: nil
  2924  * End:
  2926  */

mercurial