michael@0: /* michael@0: ****************************************************************************** michael@0: * michael@0: * Copyright (C) 1998-2013, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ****************************************************************************** michael@0: * michael@0: * ucnv.c: michael@0: * Implements APIs for the ICU's codeset conversion library; michael@0: * mostly calls through internal functions; michael@0: * created by Bertrand A. Damiba michael@0: * michael@0: * Modification History: michael@0: * michael@0: * Date Name Description michael@0: * 04/04/99 helena Fixed internal header inclusion. michael@0: * 05/09/00 helena Added implementation to handle fallback mappings. michael@0: * 06/20/2000 helena OS/400 port changes; mostly typecast. michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_CONVERSION michael@0: michael@0: #include "unicode/ustring.h" michael@0: #include "unicode/ucnv.h" michael@0: #include "unicode/ucnv_err.h" michael@0: #include "unicode/uset.h" michael@0: #include "unicode/utf.h" michael@0: #include "unicode/utf16.h" michael@0: #include "putilimp.h" michael@0: #include "cmemory.h" michael@0: #include "cstring.h" michael@0: #include "uassert.h" michael@0: #include "utracimp.h" michael@0: #include "ustr_imp.h" michael@0: #include "ucnv_imp.h" michael@0: #include "ucnv_cnv.h" michael@0: #include "ucnv_bld.h" michael@0: michael@0: /* size of intermediate and preflighting buffers in ucnv_convert() */ michael@0: #define CHUNK_SIZE 1024 michael@0: michael@0: typedef struct UAmbiguousConverter { michael@0: const char *name; michael@0: const UChar variant5c; michael@0: } UAmbiguousConverter; michael@0: michael@0: static const UAmbiguousConverter ambiguousConverters[]={ michael@0: { "ibm-897_P100-1995", 0xa5 }, michael@0: { "ibm-942_P120-1999", 0xa5 }, michael@0: { "ibm-943_P130-1999", 0xa5 }, michael@0: { "ibm-946_P100-1995", 0xa5 }, michael@0: { "ibm-33722_P120-1999", 0xa5 }, michael@0: { "ibm-1041_P100-1995", 0xa5 }, michael@0: /*{ "ibm-54191_P100-2006", 0xa5 },*/ michael@0: /*{ "ibm-62383_P100-2007", 0xa5 },*/ michael@0: /*{ "ibm-891_P100-1995", 0x20a9 },*/ michael@0: { "ibm-944_P100-1995", 0x20a9 }, michael@0: { "ibm-949_P110-1999", 0x20a9 }, michael@0: { "ibm-1363_P110-1997", 0x20a9 }, michael@0: { "ISO_2022,locale=ko,version=0", 0x20a9 }, michael@0: { "ibm-1088_P100-1995", 0x20a9 } michael@0: }; michael@0: michael@0: /*Calls through createConverter */ michael@0: U_CAPI UConverter* U_EXPORT2 michael@0: ucnv_open (const char *name, michael@0: UErrorCode * err) michael@0: { michael@0: UConverter *r; michael@0: michael@0: if (err == NULL || U_FAILURE (*err)) { michael@0: return NULL; michael@0: } michael@0: michael@0: r = ucnv_createConverter(NULL, name, err); michael@0: return r; michael@0: } michael@0: michael@0: U_CAPI UConverter* U_EXPORT2 michael@0: ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err) michael@0: { michael@0: return ucnv_createConverterFromPackage(packageName, converterName, err); michael@0: } michael@0: michael@0: /*Extracts the UChar* to a char* and calls through createConverter */ michael@0: U_CAPI UConverter* U_EXPORT2 michael@0: ucnv_openU (const UChar * name, michael@0: UErrorCode * err) michael@0: { michael@0: char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH]; michael@0: michael@0: if (err == NULL || U_FAILURE(*err)) michael@0: return NULL; michael@0: if (name == NULL) michael@0: return ucnv_open (NULL, err); michael@0: if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH) michael@0: { michael@0: *err = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return NULL; michael@0: } michael@0: return ucnv_open(u_austrcpy(asciiName, name), err); michael@0: } michael@0: michael@0: /* Copy the string that is represented by the UConverterPlatform enum michael@0: * @param platformString An output buffer michael@0: * @param platform An enum representing a platform michael@0: * @return the length of the copied string. michael@0: */ michael@0: static int32_t michael@0: ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm) michael@0: { michael@0: switch (pltfrm) michael@0: { michael@0: case UCNV_IBM: michael@0: uprv_strcpy(platformString, "ibm-"); michael@0: return 4; michael@0: case UCNV_UNKNOWN: michael@0: break; michael@0: } michael@0: michael@0: /* default to empty string */ michael@0: *platformString = 0; michael@0: return 0; michael@0: } michael@0: michael@0: /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls michael@0: *through createConverter*/ michael@0: U_CAPI UConverter* U_EXPORT2 michael@0: ucnv_openCCSID (int32_t codepage, michael@0: UConverterPlatform platform, michael@0: UErrorCode * err) michael@0: { michael@0: char myName[UCNV_MAX_CONVERTER_NAME_LENGTH]; michael@0: int32_t myNameLen; michael@0: michael@0: if (err == NULL || U_FAILURE (*err)) michael@0: return NULL; michael@0: michael@0: /* ucnv_copyPlatformString could return "ibm-" or "cp" */ michael@0: myNameLen = ucnv_copyPlatformString(myName, platform); michael@0: T_CString_integerToString(myName + myNameLen, codepage, 10); michael@0: michael@0: return ucnv_createConverter(NULL, myName, err); michael@0: } michael@0: michael@0: /* Creating a temporary stack-based object that can be used in one thread, michael@0: and created from a converter that is shared across threads. michael@0: */ michael@0: michael@0: U_CAPI UConverter* U_EXPORT2 michael@0: ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) michael@0: { michael@0: UConverter *localConverter, *allocatedConverter; michael@0: int32_t stackBufferSize; michael@0: int32_t bufferSizeNeeded; michael@0: char *stackBufferChars = (char *)stackBuffer; michael@0: UErrorCode cbErr; michael@0: UConverterToUnicodeArgs toUArgs = { michael@0: sizeof(UConverterToUnicodeArgs), michael@0: TRUE, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL michael@0: }; michael@0: UConverterFromUnicodeArgs fromUArgs = { michael@0: sizeof(UConverterFromUnicodeArgs), michael@0: TRUE, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL michael@0: }; michael@0: michael@0: UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE); michael@0: michael@0: if (status == NULL || U_FAILURE(*status)){ michael@0: UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR); michael@0: return NULL; michael@0: } michael@0: michael@0: if (cnv == NULL) { michael@0: *status = U_ILLEGAL_ARGUMENT_ERROR; michael@0: UTRACE_EXIT_STATUS(*status); michael@0: return NULL; michael@0: } michael@0: michael@0: UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p", michael@0: ucnv_getName(cnv, status), cnv, stackBuffer); michael@0: michael@0: if (cnv->sharedData->impl->safeClone != NULL) { michael@0: /* call the custom safeClone function for sizing */ michael@0: bufferSizeNeeded = 0; michael@0: cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status); michael@0: if (U_FAILURE(*status)) { michael@0: UTRACE_EXIT_STATUS(*status); michael@0: return NULL; michael@0: } michael@0: } michael@0: else michael@0: { michael@0: /* inherent sizing */ michael@0: bufferSizeNeeded = sizeof(UConverter); michael@0: } michael@0: michael@0: if (pBufferSize == NULL) { michael@0: stackBufferSize = 1; michael@0: pBufferSize = &stackBufferSize; michael@0: } else { michael@0: stackBufferSize = *pBufferSize; michael@0: if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ michael@0: *pBufferSize = bufferSizeNeeded; michael@0: UTRACE_EXIT_VALUE(bufferSizeNeeded); michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: michael@0: /* Pointers on 64-bit platforms need to be aligned michael@0: * on a 64-bit boundary in memory. michael@0: */ michael@0: if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { michael@0: int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); michael@0: if(stackBufferSize > offsetUp) { michael@0: stackBufferSize -= offsetUp; michael@0: stackBufferChars += offsetUp; michael@0: } else { michael@0: /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ michael@0: stackBufferSize = 1; michael@0: } michael@0: } michael@0: michael@0: stackBuffer = (void *)stackBufferChars; michael@0: michael@0: /* Now, see if we must allocate any memory */ michael@0: if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL) michael@0: { michael@0: /* allocate one here...*/ michael@0: localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded); michael@0: michael@0: if(localConverter == NULL) { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: UTRACE_EXIT_STATUS(*status); michael@0: return NULL; michael@0: } michael@0: *status = U_SAFECLONE_ALLOCATED_WARNING; michael@0: michael@0: /* record the fact that memory was allocated */ michael@0: *pBufferSize = bufferSizeNeeded; michael@0: } else { michael@0: /* just use the stack buffer */ michael@0: localConverter = (UConverter*) stackBuffer; michael@0: allocatedConverter = NULL; michael@0: } michael@0: michael@0: uprv_memset(localConverter, 0, bufferSizeNeeded); michael@0: michael@0: /* Copy initial state */ michael@0: uprv_memcpy(localConverter, cnv, sizeof(UConverter)); michael@0: localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE; michael@0: michael@0: /* copy the substitution string */ michael@0: if (cnv->subChars == (uint8_t *)cnv->subUChars) { michael@0: localConverter->subChars = (uint8_t *)localConverter->subUChars; michael@0: } else { michael@0: localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); michael@0: if (localConverter->subChars == NULL) { michael@0: uprv_free(allocatedConverter); michael@0: UTRACE_EXIT_STATUS(*status); michael@0: return NULL; michael@0: } michael@0: uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); michael@0: } michael@0: michael@0: /* now either call the safeclone fcn or not */ michael@0: if (cnv->sharedData->impl->safeClone != NULL) { michael@0: /* call the custom safeClone function */ michael@0: localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status); michael@0: } michael@0: michael@0: if(localConverter==NULL || U_FAILURE(*status)) { michael@0: if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) { michael@0: uprv_free(allocatedConverter->subChars); michael@0: } michael@0: uprv_free(allocatedConverter); michael@0: UTRACE_EXIT_STATUS(*status); michael@0: return NULL; michael@0: } michael@0: michael@0: /* increment refcount of shared data if needed */ michael@0: /* michael@0: Checking whether it's an algorithic converter is okay michael@0: in multithreaded applications because the value never changes. michael@0: Don't check referenceCounter for any other value. michael@0: */ michael@0: if (cnv->sharedData->referenceCounter != ~0) { michael@0: ucnv_incrementRefCount(cnv->sharedData); michael@0: } michael@0: michael@0: if(localConverter == (UConverter*)stackBuffer) { michael@0: /* we're using user provided data - set to not destroy */ michael@0: localConverter->isCopyLocal = TRUE; michael@0: } michael@0: michael@0: /* allow callback functions to handle any memory allocation */ michael@0: toUArgs.converter = fromUArgs.converter = localConverter; michael@0: cbErr = U_ZERO_ERROR; michael@0: cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr); michael@0: cbErr = U_ZERO_ERROR; michael@0: cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr); michael@0: michael@0: UTRACE_EXIT_PTR_STATUS(localConverter, *status); michael@0: return localConverter; michael@0: } michael@0: michael@0: michael@0: michael@0: /*Decreases the reference counter in the shared immutable section of the object michael@0: *and frees the mutable part*/ michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_close (UConverter * converter) michael@0: { michael@0: UErrorCode errorCode = U_ZERO_ERROR; michael@0: michael@0: UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE); michael@0: michael@0: if (converter == NULL) michael@0: { michael@0: UTRACE_EXIT(); michael@0: return; michael@0: } michael@0: michael@0: UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b", michael@0: ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal); michael@0: michael@0: /* In order to speed up the close, only call the callbacks when they have been changed. michael@0: This performance check will only work when the callbacks are set within a shared library michael@0: or from user code that statically links this code. */ michael@0: /* first, notify the callback functions that the converter is closed */ michael@0: if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { michael@0: UConverterToUnicodeArgs toUArgs = { michael@0: sizeof(UConverterToUnicodeArgs), michael@0: TRUE, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL michael@0: }; michael@0: michael@0: toUArgs.converter = converter; michael@0: errorCode = U_ZERO_ERROR; michael@0: converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode); michael@0: } michael@0: if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { michael@0: UConverterFromUnicodeArgs fromUArgs = { michael@0: sizeof(UConverterFromUnicodeArgs), michael@0: TRUE, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL michael@0: }; michael@0: fromUArgs.converter = converter; michael@0: errorCode = U_ZERO_ERROR; michael@0: converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode); michael@0: } michael@0: michael@0: if (converter->sharedData->impl->close != NULL) { michael@0: converter->sharedData->impl->close(converter); michael@0: } michael@0: michael@0: if (converter->subChars != (uint8_t *)converter->subUChars) { michael@0: uprv_free(converter->subChars); michael@0: } michael@0: michael@0: /* michael@0: Checking whether it's an algorithic converter is okay michael@0: in multithreaded applications because the value never changes. michael@0: Don't check referenceCounter for any other value. michael@0: */ michael@0: if (converter->sharedData->referenceCounter != ~0) { michael@0: ucnv_unloadSharedDataIfReady(converter->sharedData); michael@0: } michael@0: michael@0: if(!converter->isCopyLocal){ michael@0: uprv_free(converter); michael@0: } michael@0: michael@0: UTRACE_EXIT(); michael@0: } michael@0: michael@0: /*returns a single Name from the list, will return NULL if out of bounds michael@0: */ michael@0: U_CAPI const char* U_EXPORT2 michael@0: ucnv_getAvailableName (int32_t n) michael@0: { michael@0: if (0 <= n && n <= 0xffff) { michael@0: UErrorCode err = U_ZERO_ERROR; michael@0: const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err); michael@0: if (U_SUCCESS(err)) { michael@0: return name; michael@0: } michael@0: } michael@0: return NULL; michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ucnv_countAvailable () michael@0: { michael@0: UErrorCode err = U_ZERO_ERROR; michael@0: return ucnv_bld_countAvailableConverters(&err); michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_getSubstChars (const UConverter * converter, michael@0: char *mySubChar, michael@0: int8_t * len, michael@0: UErrorCode * err) michael@0: { michael@0: if (U_FAILURE (*err)) michael@0: return; michael@0: michael@0: if (converter->subCharLen <= 0) { michael@0: /* Unicode string or empty string from ucnv_setSubstString(). */ michael@0: *len = 0; michael@0: return; michael@0: } michael@0: michael@0: if (*len < converter->subCharLen) /*not enough space in subChars */ michael@0: { michael@0: *err = U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return; michael@0: } michael@0: michael@0: uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */ michael@0: *len = converter->subCharLen; /*store # of bytes copied to buffer */ michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_setSubstChars (UConverter * converter, michael@0: const char *mySubChar, michael@0: int8_t len, michael@0: UErrorCode * err) michael@0: { michael@0: if (U_FAILURE (*err)) michael@0: return; michael@0: michael@0: /*Makes sure that the subChar is within the codepages char length boundaries */ michael@0: if ((len > converter->sharedData->staticData->maxBytesPerChar) michael@0: || (len < converter->sharedData->staticData->minBytesPerChar)) michael@0: { michael@0: *err = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: michael@0: uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */ michael@0: converter->subCharLen = len; /*sets the new len */ michael@0: michael@0: /* michael@0: * There is currently (2001Feb) no separate API to set/get subChar1. michael@0: * In order to always have subChar written after it is explicitly set, michael@0: * we set subChar1 to 0. michael@0: */ michael@0: converter->subChar1 = 0; michael@0: michael@0: return; michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_setSubstString(UConverter *cnv, michael@0: const UChar *s, michael@0: int32_t length, michael@0: UErrorCode *err) { michael@0: UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1]; michael@0: char chars[UCNV_ERROR_BUFFER_LENGTH]; michael@0: michael@0: UConverter *clone; michael@0: uint8_t *subChars; michael@0: int32_t cloneSize, length8; michael@0: michael@0: /* Let the following functions check all arguments. */ michael@0: cloneSize = sizeof(cloneBuffer); michael@0: clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err); michael@0: ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err); michael@0: length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err); michael@0: ucnv_close(clone); michael@0: if (U_FAILURE(*err)) { michael@0: return; michael@0: } michael@0: michael@0: if (cnv->sharedData->impl->writeSub == NULL michael@0: #if !UCONFIG_NO_LEGACY_CONVERSION michael@0: || (cnv->sharedData->staticData->conversionType == UCNV_MBCS && michael@0: ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL) michael@0: #endif michael@0: ) { michael@0: /* The converter is not stateful. Store the charset bytes as a fixed string. */ michael@0: subChars = (uint8_t *)chars; michael@0: } else { michael@0: /* michael@0: * The converter has a non-default writeSub() function, indicating michael@0: * that it is stateful. michael@0: * Store the Unicode string for on-the-fly conversion for correct michael@0: * state handling. michael@0: */ michael@0: if (length > UCNV_ERROR_BUFFER_LENGTH) { michael@0: /* michael@0: * Should not occur. The converter should output at least one byte michael@0: * per UChar, which means that ucnv_fromUChars() should catch all michael@0: * overflows. michael@0: */ michael@0: *err = U_BUFFER_OVERFLOW_ERROR; michael@0: return; michael@0: } michael@0: subChars = (uint8_t *)s; michael@0: if (length < 0) { michael@0: length = u_strlen(s); michael@0: } michael@0: length8 = length * U_SIZEOF_UCHAR; michael@0: } michael@0: michael@0: /* michael@0: * For storing the substitution string, select either the small buffer inside michael@0: * UConverter or allocate a subChars buffer. michael@0: */ michael@0: if (length8 > UCNV_MAX_SUBCHAR_LEN) { michael@0: /* Use a separate buffer for the string. Outside UConverter to not make it too large. */ michael@0: if (cnv->subChars == (uint8_t *)cnv->subUChars) { michael@0: /* Allocate a new buffer for the string. */ michael@0: cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); michael@0: if (cnv->subChars == NULL) { michael@0: cnv->subChars = (uint8_t *)cnv->subUChars; michael@0: *err = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); michael@0: } michael@0: } michael@0: michael@0: /* Copy the substitution string into the UConverter or its subChars buffer. */ michael@0: if (length8 == 0) { michael@0: cnv->subCharLen = 0; michael@0: } else { michael@0: uprv_memcpy(cnv->subChars, subChars, length8); michael@0: if (subChars == (uint8_t *)chars) { michael@0: cnv->subCharLen = (int8_t)length8; michael@0: } else /* subChars == s */ { michael@0: cnv->subCharLen = (int8_t)-length; michael@0: } michael@0: } michael@0: michael@0: /* See comment in ucnv_setSubstChars(). */ michael@0: cnv->subChar1 = 0; michael@0: } michael@0: michael@0: /*resets the internal states of a converter michael@0: *goal : have the same behaviour than a freshly created converter michael@0: */ michael@0: static void _reset(UConverter *converter, UConverterResetChoice choice, michael@0: UBool callCallback) { michael@0: if(converter == NULL) { michael@0: return; michael@0: } michael@0: michael@0: if(callCallback) { michael@0: /* first, notify the callback functions that the converter is reset */ michael@0: UErrorCode errorCode; michael@0: michael@0: if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { michael@0: UConverterToUnicodeArgs toUArgs = { michael@0: sizeof(UConverterToUnicodeArgs), michael@0: TRUE, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL michael@0: }; michael@0: toUArgs.converter = converter; michael@0: errorCode = U_ZERO_ERROR; michael@0: converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode); michael@0: } michael@0: if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { michael@0: UConverterFromUnicodeArgs fromUArgs = { michael@0: sizeof(UConverterFromUnicodeArgs), michael@0: TRUE, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL michael@0: }; michael@0: fromUArgs.converter = converter; michael@0: errorCode = U_ZERO_ERROR; michael@0: converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode); michael@0: } michael@0: } michael@0: michael@0: /* now reset the converter itself */ michael@0: if(choice<=UCNV_RESET_TO_UNICODE) { michael@0: converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus; michael@0: converter->mode = 0; michael@0: converter->toULength = 0; michael@0: converter->invalidCharLength = converter->UCharErrorBufferLength = 0; michael@0: converter->preToULength = 0; michael@0: } michael@0: if(choice!=UCNV_RESET_TO_UNICODE) { michael@0: converter->fromUnicodeStatus = 0; michael@0: converter->fromUChar32 = 0; michael@0: converter->invalidUCharLength = converter->charErrorBufferLength = 0; michael@0: converter->preFromUFirstCP = U_SENTINEL; michael@0: converter->preFromULength = 0; michael@0: } michael@0: michael@0: if (converter->sharedData->impl->reset != NULL) { michael@0: /* call the custom reset function */ michael@0: converter->sharedData->impl->reset(converter, choice); michael@0: } michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_reset(UConverter *converter) michael@0: { michael@0: _reset(converter, UCNV_RESET_BOTH, TRUE); michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_resetToUnicode(UConverter *converter) michael@0: { michael@0: _reset(converter, UCNV_RESET_TO_UNICODE, TRUE); michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_resetFromUnicode(UConverter *converter) michael@0: { michael@0: _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE); michael@0: } michael@0: michael@0: U_CAPI int8_t U_EXPORT2 michael@0: ucnv_getMaxCharSize (const UConverter * converter) michael@0: { michael@0: return converter->maxBytesPerUChar; michael@0: } michael@0: michael@0: michael@0: U_CAPI int8_t U_EXPORT2 michael@0: ucnv_getMinCharSize (const UConverter * converter) michael@0: { michael@0: return converter->sharedData->staticData->minBytesPerChar; michael@0: } michael@0: michael@0: U_CAPI const char* U_EXPORT2 michael@0: ucnv_getName (const UConverter * converter, UErrorCode * err) michael@0: michael@0: { michael@0: if (U_FAILURE (*err)) michael@0: return NULL; michael@0: if(converter->sharedData->impl->getName){ michael@0: const char* temp= converter->sharedData->impl->getName(converter); michael@0: if(temp) michael@0: return temp; michael@0: } michael@0: return converter->sharedData->staticData->name; michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ucnv_getCCSID(const UConverter * converter, michael@0: UErrorCode * err) michael@0: { michael@0: int32_t ccsid; michael@0: if (U_FAILURE (*err)) michael@0: return -1; michael@0: michael@0: ccsid = converter->sharedData->staticData->codepage; michael@0: if (ccsid == 0) { michael@0: /* Rare case. This is for cases like gb18030, michael@0: which doesn't have an IBM canonical name, but does have an IBM alias. */ michael@0: const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err); michael@0: if (U_SUCCESS(*err) && standardName) { michael@0: const char *ccsidStr = uprv_strchr(standardName, '-'); michael@0: if (ccsidStr) { michael@0: ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */ michael@0: } michael@0: } michael@0: } michael@0: return ccsid; michael@0: } michael@0: michael@0: michael@0: U_CAPI UConverterPlatform U_EXPORT2 michael@0: ucnv_getPlatform (const UConverter * converter, michael@0: UErrorCode * err) michael@0: { michael@0: if (U_FAILURE (*err)) michael@0: return UCNV_UNKNOWN; michael@0: michael@0: return (UConverterPlatform)converter->sharedData->staticData->platform; michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_getToUCallBack (const UConverter * converter, michael@0: UConverterToUCallback *action, michael@0: const void **context) michael@0: { michael@0: *action = converter->fromCharErrorBehaviour; michael@0: *context = converter->toUContext; michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_getFromUCallBack (const UConverter * converter, michael@0: UConverterFromUCallback *action, michael@0: const void **context) michael@0: { michael@0: *action = converter->fromUCharErrorBehaviour; michael@0: *context = converter->fromUContext; michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_setToUCallBack (UConverter * converter, michael@0: UConverterToUCallback newAction, michael@0: const void* newContext, michael@0: UConverterToUCallback *oldAction, michael@0: const void** oldContext, michael@0: UErrorCode * err) michael@0: { michael@0: if (U_FAILURE (*err)) michael@0: return; michael@0: if (oldAction) *oldAction = converter->fromCharErrorBehaviour; michael@0: converter->fromCharErrorBehaviour = newAction; michael@0: if (oldContext) *oldContext = converter->toUContext; michael@0: converter->toUContext = newContext; michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_setFromUCallBack (UConverter * converter, michael@0: UConverterFromUCallback newAction, michael@0: const void* newContext, michael@0: UConverterFromUCallback *oldAction, michael@0: const void** oldContext, michael@0: UErrorCode * err) michael@0: { michael@0: if (U_FAILURE (*err)) michael@0: return; michael@0: if (oldAction) *oldAction = converter->fromUCharErrorBehaviour; michael@0: converter->fromUCharErrorBehaviour = newAction; michael@0: if (oldContext) *oldContext = converter->fromUContext; michael@0: converter->fromUContext = newContext; michael@0: } michael@0: michael@0: static void michael@0: _updateOffsets(int32_t *offsets, int32_t length, michael@0: int32_t sourceIndex, int32_t errorInputLength) { michael@0: int32_t *limit; michael@0: int32_t delta, offset; michael@0: michael@0: if(sourceIndex>=0) { michael@0: /* michael@0: * adjust each offset by adding the previous sourceIndex michael@0: * minus the length of the input sequence that caused an michael@0: * error, if any michael@0: */ michael@0: delta=sourceIndex-errorInputLength; michael@0: } else { michael@0: /* michael@0: * set each offset to -1 because this conversion function michael@0: * does not handle offsets michael@0: */ michael@0: delta=-1; michael@0: } michael@0: michael@0: limit=offsets+length; michael@0: if(delta==0) { michael@0: /* most common case, nothing to do */ michael@0: } else if(delta>0) { michael@0: /* add the delta to each offset (but not if the offset is <0) */ michael@0: while(offsets=0) { michael@0: *offsets=offset+delta; michael@0: } michael@0: ++offsets; michael@0: } michael@0: } else /* delta<0 */ { michael@0: /* michael@0: * set each offset to -1 because this conversion function michael@0: * does not handle offsets michael@0: * or the error input sequence started in a previous buffer michael@0: */ michael@0: while(offsetsconverter; michael@0: s=pArgs->source; michael@0: t=pArgs->target; michael@0: offsets=pArgs->offsets; michael@0: michael@0: /* get the converter implementation function */ michael@0: sourceIndex=0; michael@0: if(offsets==NULL) { michael@0: fromUnicode=cnv->sharedData->impl->fromUnicode; michael@0: } else { michael@0: fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets; michael@0: if(fromUnicode==NULL) { michael@0: /* there is no WithOffsets implementation */ michael@0: fromUnicode=cnv->sharedData->impl->fromUnicode; michael@0: /* we will write -1 for each offset */ michael@0: sourceIndex=-1; michael@0: } michael@0: } michael@0: michael@0: if(cnv->preFromULength>=0) { michael@0: /* normal mode */ michael@0: realSource=NULL; michael@0: michael@0: /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ michael@0: realSourceLimit=NULL; michael@0: realFlush=FALSE; michael@0: realSourceIndex=0; michael@0: } else { michael@0: /* michael@0: * Previous m:n conversion stored source units from a partial match michael@0: * and failed to consume all of them. michael@0: * We need to "replay" them from a temporary buffer and convert them first. michael@0: */ michael@0: realSource=pArgs->source; michael@0: realSourceLimit=pArgs->sourceLimit; michael@0: realFlush=pArgs->flush; michael@0: realSourceIndex=sourceIndex; michael@0: michael@0: uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); michael@0: pArgs->source=replay; michael@0: pArgs->sourceLimit=replay-cnv->preFromULength; michael@0: pArgs->flush=FALSE; michael@0: sourceIndex=-1; michael@0: michael@0: cnv->preFromULength=0; michael@0: } michael@0: michael@0: /* michael@0: * loop for conversion and error handling michael@0: * michael@0: * loop { michael@0: * convert michael@0: * loop { michael@0: * update offsets michael@0: * handle end of input michael@0: * handle errors/call callback michael@0: * } michael@0: * } michael@0: */ michael@0: for(;;) { michael@0: if(U_SUCCESS(*err)) { michael@0: /* convert */ michael@0: fromUnicode(pArgs, err); michael@0: michael@0: /* michael@0: * set a flag for whether the converter michael@0: * successfully processed the end of the input michael@0: * michael@0: * need not check cnv->preFromULength==0 because a replay (<0) will cause michael@0: * sflush && pArgs->source==pArgs->sourceLimit && michael@0: cnv->fromUChar32==0); michael@0: } else { michael@0: /* handle error from ucnv_convertEx() */ michael@0: converterSawEndOfInput=FALSE; michael@0: } michael@0: michael@0: /* no callback called yet for this iteration */ michael@0: calledCallback=FALSE; michael@0: michael@0: /* no sourceIndex adjustment for conversion, only for callback output */ michael@0: errorInputLength=0; michael@0: michael@0: /* michael@0: * loop for offsets and error handling michael@0: * michael@0: * iterates at most 3 times: michael@0: * 1. to clean up after the conversion function michael@0: * 2. after the callback michael@0: * 3. after the callback again if there was truncated input michael@0: */ michael@0: for(;;) { michael@0: /* update offsets if we write any */ michael@0: if(offsets!=NULL) { michael@0: int32_t length=(int32_t)(pArgs->target-t); michael@0: if(length>0) { michael@0: _updateOffsets(offsets, length, sourceIndex, errorInputLength); michael@0: michael@0: /* michael@0: * if a converter handles offsets and updates the offsets michael@0: * pointer at the end, then pArgs->offset should not change michael@0: * here; michael@0: * however, some converters do not handle offsets at all michael@0: * (sourceIndex<0) or may not update the offsets pointer michael@0: */ michael@0: pArgs->offsets=offsets+=length; michael@0: } michael@0: michael@0: if(sourceIndex>=0) { michael@0: sourceIndex+=(int32_t)(pArgs->source-s); michael@0: } michael@0: } michael@0: michael@0: if(cnv->preFromULength<0) { michael@0: /* michael@0: * switch the source to new replay units (cannot occur while replaying) michael@0: * after offset handling and before end-of-input and callback handling michael@0: */ michael@0: if(realSource==NULL) { michael@0: realSource=pArgs->source; michael@0: realSourceLimit=pArgs->sourceLimit; michael@0: realFlush=pArgs->flush; michael@0: realSourceIndex=sourceIndex; michael@0: michael@0: uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); michael@0: pArgs->source=replay; michael@0: pArgs->sourceLimit=replay-cnv->preFromULength; michael@0: pArgs->flush=FALSE; michael@0: if((sourceIndex+=cnv->preFromULength)<0) { michael@0: sourceIndex=-1; michael@0: } michael@0: michael@0: cnv->preFromULength=0; michael@0: } else { michael@0: /* see implementation note before _fromUnicodeWithCallback() */ michael@0: U_ASSERT(realSource==NULL); michael@0: *err=U_INTERNAL_PROGRAM_ERROR; michael@0: } michael@0: } michael@0: michael@0: /* update pointers */ michael@0: s=pArgs->source; michael@0: t=pArgs->target; michael@0: michael@0: if(U_SUCCESS(*err)) { michael@0: if(ssourceLimit) { michael@0: /* michael@0: * continue with the conversion loop while there is still input left michael@0: * (continue converting by breaking out of only the inner loop) michael@0: */ michael@0: break; michael@0: } else if(realSource!=NULL) { michael@0: /* switch back from replaying to the real source and continue */ michael@0: pArgs->source=realSource; michael@0: pArgs->sourceLimit=realSourceLimit; michael@0: pArgs->flush=realFlush; michael@0: sourceIndex=realSourceIndex; michael@0: michael@0: realSource=NULL; michael@0: break; michael@0: } else if(pArgs->flush && cnv->fromUChar32!=0) { michael@0: /* michael@0: * the entire input stream is consumed michael@0: * and there is a partial, truncated input sequence left michael@0: */ michael@0: michael@0: /* inject an error and continue with callback handling */ michael@0: *err=U_TRUNCATED_CHAR_FOUND; michael@0: calledCallback=FALSE; /* new error condition */ michael@0: } else { michael@0: /* input consumed */ michael@0: if(pArgs->flush) { michael@0: /* michael@0: * return to the conversion loop once more if the flush michael@0: * flag is set and the conversion function has not michael@0: * successfully processed the end of the input yet michael@0: * michael@0: * (continue converting by breaking out of only the inner loop) michael@0: */ michael@0: if(!converterSawEndOfInput) { michael@0: break; michael@0: } michael@0: michael@0: /* reset the converter without calling the callback function */ michael@0: _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE); michael@0: } michael@0: michael@0: /* done successfully */ michael@0: return; michael@0: } michael@0: } michael@0: michael@0: /* U_FAILURE(*err) */ michael@0: { michael@0: UErrorCode e; michael@0: michael@0: if( calledCallback || michael@0: (e=*err)==U_BUFFER_OVERFLOW_ERROR || michael@0: (e!=U_INVALID_CHAR_FOUND && michael@0: e!=U_ILLEGAL_CHAR_FOUND && michael@0: e!=U_TRUNCATED_CHAR_FOUND) michael@0: ) { michael@0: /* michael@0: * the callback did not or cannot resolve the error: michael@0: * set output pointers and return michael@0: * michael@0: * the check for buffer overflow is redundant but it is michael@0: * a high-runner case and hopefully documents the intent michael@0: * well michael@0: * michael@0: * if we were replaying, then the replay buffer must be michael@0: * copied back into the UConverter michael@0: * and the real arguments must be restored michael@0: */ michael@0: if(realSource!=NULL) { michael@0: int32_t length; michael@0: michael@0: U_ASSERT(cnv->preFromULength==0); michael@0: michael@0: length=(int32_t)(pArgs->sourceLimit-pArgs->source); michael@0: if(length>0) { michael@0: uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR); michael@0: cnv->preFromULength=(int8_t)-length; michael@0: } michael@0: michael@0: pArgs->source=realSource; michael@0: pArgs->sourceLimit=realSourceLimit; michael@0: pArgs->flush=realFlush; michael@0: } michael@0: michael@0: return; michael@0: } michael@0: } michael@0: michael@0: /* callback handling */ michael@0: { michael@0: UChar32 codePoint; michael@0: michael@0: /* get and write the code point */ michael@0: codePoint=cnv->fromUChar32; michael@0: errorInputLength=0; michael@0: U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint); michael@0: cnv->invalidUCharLength=(int8_t)errorInputLength; michael@0: michael@0: /* set the converter state to deal with the next character */ michael@0: cnv->fromUChar32=0; michael@0: michael@0: /* call the callback function */ michael@0: cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, michael@0: cnv->invalidUCharBuffer, errorInputLength, codePoint, michael@0: *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL, michael@0: err); michael@0: } michael@0: michael@0: /* michael@0: * loop back to the offset handling michael@0: * michael@0: * this flag will indicate after offset handling michael@0: * that a callback was called; michael@0: * if the callback did not resolve the error, then we return michael@0: */ michael@0: calledCallback=TRUE; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* michael@0: * Output the fromUnicode overflow buffer. michael@0: * Call this function if(cnv->charErrorBufferLength>0). michael@0: * @return TRUE if overflow michael@0: */ michael@0: static UBool michael@0: ucnv_outputOverflowFromUnicode(UConverter *cnv, michael@0: char **target, const char *targetLimit, michael@0: int32_t **pOffsets, michael@0: UErrorCode *err) { michael@0: int32_t *offsets; michael@0: char *overflow, *t; michael@0: int32_t i, length; michael@0: michael@0: t=*target; michael@0: if(pOffsets!=NULL) { michael@0: offsets=*pOffsets; michael@0: } else { michael@0: offsets=NULL; michael@0: } michael@0: michael@0: overflow=(char *)cnv->charErrorBuffer; michael@0: length=cnv->charErrorBufferLength; michael@0: i=0; michael@0: while(icharErrorBufferLength=(int8_t)j; michael@0: *target=t; michael@0: if(offsets!=NULL) { michael@0: *pOffsets=offsets; michael@0: } michael@0: *err=U_BUFFER_OVERFLOW_ERROR; michael@0: return TRUE; michael@0: } michael@0: michael@0: /* copy the overflow contents to the target */ michael@0: *t++=overflow[i++]; michael@0: if(offsets!=NULL) { michael@0: *offsets++=-1; /* no source index available for old output */ michael@0: } michael@0: } michael@0: michael@0: /* the overflow buffer is completely copied to the target */ michael@0: cnv->charErrorBufferLength=0; michael@0: *target=t; michael@0: if(offsets!=NULL) { michael@0: *pOffsets=offsets; michael@0: } michael@0: return FALSE; michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_fromUnicode(UConverter *cnv, michael@0: char **target, const char *targetLimit, michael@0: const UChar **source, const UChar *sourceLimit, michael@0: int32_t *offsets, michael@0: UBool flush, michael@0: UErrorCode *err) { michael@0: UConverterFromUnicodeArgs args; michael@0: const UChar *s; michael@0: char *t; michael@0: michael@0: /* check parameters */ michael@0: if(err==NULL || U_FAILURE(*err)) { michael@0: return; michael@0: } michael@0: michael@0: if(cnv==NULL || target==NULL || source==NULL) { michael@0: *err=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: michael@0: s=*source; michael@0: t=*target; michael@0: michael@0: if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) { michael@0: /* michael@0: Prevent code from going into an infinite loop in case we do hit this michael@0: limit. The limit pointer is expected to be on a UChar * boundary. michael@0: This also prevents the next argument check from failing. michael@0: */ michael@0: sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1); michael@0: } michael@0: michael@0: /* michael@0: * All these conditions should never happen. michael@0: * michael@0: * 1) Make sure that the limits are >= to the address source or target michael@0: * michael@0: * 2) Make sure that the buffer sizes do not exceed the number range for michael@0: * int32_t because some functions use the size (in units or bytes) michael@0: * rather than comparing pointers, and because offsets are int32_t values. michael@0: * michael@0: * size_t is guaranteed to be unsigned and large enough for the job. michael@0: * michael@0: * Return with an error instead of adjusting the limits because we would michael@0: * not be able to maintain the semantics that either the source must be michael@0: * consumed or the target filled (unless an error occurs). michael@0: * An adjustment would be targetLimit=t+0x7fffffff; for example. michael@0: * michael@0: * 3) Make sure that the user didn't incorrectly cast a UChar * pointer michael@0: * to a char * pointer and provide an incomplete UChar code unit. michael@0: */ michael@0: if (sourceLimit(size_t)0x3fffffff && sourceLimit>s) || michael@0: ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) || michael@0: (((const char *)sourceLimit-(const char *)s) & 1) != 0) michael@0: { michael@0: *err=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: michael@0: /* output the target overflow buffer */ michael@0: if( cnv->charErrorBufferLength>0 && michael@0: ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err) michael@0: ) { michael@0: /* U_BUFFER_OVERFLOW_ERROR */ michael@0: return; michael@0: } michael@0: /* *target may have moved, therefore stop using t */ michael@0: michael@0: if(!flush && s==sourceLimit && cnv->preFromULength>=0) { michael@0: /* the overflow buffer is emptied and there is no new input: we are done */ michael@0: return; michael@0: } michael@0: michael@0: /* michael@0: * Do not simply return with a buffer overflow error if michael@0: * !flush && t==targetLimit michael@0: * because it is possible that the source will not generate any output. michael@0: * For example, the skip callback may be called; michael@0: * it does not output anything. michael@0: */ michael@0: michael@0: /* prepare the converter arguments */ michael@0: args.converter=cnv; michael@0: args.flush=flush; michael@0: args.offsets=offsets; michael@0: args.source=s; michael@0: args.sourceLimit=sourceLimit; michael@0: args.target=*target; michael@0: args.targetLimit=targetLimit; michael@0: args.size=sizeof(args); michael@0: michael@0: _fromUnicodeWithCallback(&args, err); michael@0: michael@0: *source=args.source; michael@0: *target=args.target; michael@0: } michael@0: michael@0: /* ucnv_toUnicode() --------------------------------------------------------- */ michael@0: michael@0: static void michael@0: _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { michael@0: UConverterToUnicode toUnicode; michael@0: UConverter *cnv; michael@0: const char *s; michael@0: UChar *t; michael@0: int32_t *offsets; michael@0: int32_t sourceIndex; michael@0: int32_t errorInputLength; michael@0: UBool converterSawEndOfInput, calledCallback; michael@0: michael@0: /* variables for m:n conversion */ michael@0: char replay[UCNV_EXT_MAX_BYTES]; michael@0: const char *realSource, *realSourceLimit; michael@0: int32_t realSourceIndex; michael@0: UBool realFlush; michael@0: michael@0: cnv=pArgs->converter; michael@0: s=pArgs->source; michael@0: t=pArgs->target; michael@0: offsets=pArgs->offsets; michael@0: michael@0: /* get the converter implementation function */ michael@0: sourceIndex=0; michael@0: if(offsets==NULL) { michael@0: toUnicode=cnv->sharedData->impl->toUnicode; michael@0: } else { michael@0: toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets; michael@0: if(toUnicode==NULL) { michael@0: /* there is no WithOffsets implementation */ michael@0: toUnicode=cnv->sharedData->impl->toUnicode; michael@0: /* we will write -1 for each offset */ michael@0: sourceIndex=-1; michael@0: } michael@0: } michael@0: michael@0: if(cnv->preToULength>=0) { michael@0: /* normal mode */ michael@0: realSource=NULL; michael@0: michael@0: /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ michael@0: realSourceLimit=NULL; michael@0: realFlush=FALSE; michael@0: realSourceIndex=0; michael@0: } else { michael@0: /* michael@0: * Previous m:n conversion stored source units from a partial match michael@0: * and failed to consume all of them. michael@0: * We need to "replay" them from a temporary buffer and convert them first. michael@0: */ michael@0: realSource=pArgs->source; michael@0: realSourceLimit=pArgs->sourceLimit; michael@0: realFlush=pArgs->flush; michael@0: realSourceIndex=sourceIndex; michael@0: michael@0: uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); michael@0: pArgs->source=replay; michael@0: pArgs->sourceLimit=replay-cnv->preToULength; michael@0: pArgs->flush=FALSE; michael@0: sourceIndex=-1; michael@0: michael@0: cnv->preToULength=0; michael@0: } michael@0: michael@0: /* michael@0: * loop for conversion and error handling michael@0: * michael@0: * loop { michael@0: * convert michael@0: * loop { michael@0: * update offsets michael@0: * handle end of input michael@0: * handle errors/call callback michael@0: * } michael@0: * } michael@0: */ michael@0: for(;;) { michael@0: if(U_SUCCESS(*err)) { michael@0: /* convert */ michael@0: toUnicode(pArgs, err); michael@0: michael@0: /* michael@0: * set a flag for whether the converter michael@0: * successfully processed the end of the input michael@0: * michael@0: * need not check cnv->preToULength==0 because a replay (<0) will cause michael@0: * sflush && pArgs->source==pArgs->sourceLimit && michael@0: cnv->toULength==0); michael@0: } else { michael@0: /* handle error from getNextUChar() or ucnv_convertEx() */ michael@0: converterSawEndOfInput=FALSE; michael@0: } michael@0: michael@0: /* no callback called yet for this iteration */ michael@0: calledCallback=FALSE; michael@0: michael@0: /* no sourceIndex adjustment for conversion, only for callback output */ michael@0: errorInputLength=0; michael@0: michael@0: /* michael@0: * loop for offsets and error handling michael@0: * michael@0: * iterates at most 3 times: michael@0: * 1. to clean up after the conversion function michael@0: * 2. after the callback michael@0: * 3. after the callback again if there was truncated input michael@0: */ michael@0: for(;;) { michael@0: /* update offsets if we write any */ michael@0: if(offsets!=NULL) { michael@0: int32_t length=(int32_t)(pArgs->target-t); michael@0: if(length>0) { michael@0: _updateOffsets(offsets, length, sourceIndex, errorInputLength); michael@0: michael@0: /* michael@0: * if a converter handles offsets and updates the offsets michael@0: * pointer at the end, then pArgs->offset should not change michael@0: * here; michael@0: * however, some converters do not handle offsets at all michael@0: * (sourceIndex<0) or may not update the offsets pointer michael@0: */ michael@0: pArgs->offsets=offsets+=length; michael@0: } michael@0: michael@0: if(sourceIndex>=0) { michael@0: sourceIndex+=(int32_t)(pArgs->source-s); michael@0: } michael@0: } michael@0: michael@0: if(cnv->preToULength<0) { michael@0: /* michael@0: * switch the source to new replay units (cannot occur while replaying) michael@0: * after offset handling and before end-of-input and callback handling michael@0: */ michael@0: if(realSource==NULL) { michael@0: realSource=pArgs->source; michael@0: realSourceLimit=pArgs->sourceLimit; michael@0: realFlush=pArgs->flush; michael@0: realSourceIndex=sourceIndex; michael@0: michael@0: uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); michael@0: pArgs->source=replay; michael@0: pArgs->sourceLimit=replay-cnv->preToULength; michael@0: pArgs->flush=FALSE; michael@0: if((sourceIndex+=cnv->preToULength)<0) { michael@0: sourceIndex=-1; michael@0: } michael@0: michael@0: cnv->preToULength=0; michael@0: } else { michael@0: /* see implementation note before _fromUnicodeWithCallback() */ michael@0: U_ASSERT(realSource==NULL); michael@0: *err=U_INTERNAL_PROGRAM_ERROR; michael@0: } michael@0: } michael@0: michael@0: /* update pointers */ michael@0: s=pArgs->source; michael@0: t=pArgs->target; michael@0: michael@0: if(U_SUCCESS(*err)) { michael@0: if(ssourceLimit) { michael@0: /* michael@0: * continue with the conversion loop while there is still input left michael@0: * (continue converting by breaking out of only the inner loop) michael@0: */ michael@0: break; michael@0: } else if(realSource!=NULL) { michael@0: /* switch back from replaying to the real source and continue */ michael@0: pArgs->source=realSource; michael@0: pArgs->sourceLimit=realSourceLimit; michael@0: pArgs->flush=realFlush; michael@0: sourceIndex=realSourceIndex; michael@0: michael@0: realSource=NULL; michael@0: break; michael@0: } else if(pArgs->flush && cnv->toULength>0) { michael@0: /* michael@0: * the entire input stream is consumed michael@0: * and there is a partial, truncated input sequence left michael@0: */ michael@0: michael@0: /* inject an error and continue with callback handling */ michael@0: *err=U_TRUNCATED_CHAR_FOUND; michael@0: calledCallback=FALSE; /* new error condition */ michael@0: } else { michael@0: /* input consumed */ michael@0: if(pArgs->flush) { michael@0: /* michael@0: * return to the conversion loop once more if the flush michael@0: * flag is set and the conversion function has not michael@0: * successfully processed the end of the input yet michael@0: * michael@0: * (continue converting by breaking out of only the inner loop) michael@0: */ michael@0: if(!converterSawEndOfInput) { michael@0: break; michael@0: } michael@0: michael@0: /* reset the converter without calling the callback function */ michael@0: _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); michael@0: } michael@0: michael@0: /* done successfully */ michael@0: return; michael@0: } michael@0: } michael@0: michael@0: /* U_FAILURE(*err) */ michael@0: { michael@0: UErrorCode e; michael@0: michael@0: if( calledCallback || michael@0: (e=*err)==U_BUFFER_OVERFLOW_ERROR || michael@0: (e!=U_INVALID_CHAR_FOUND && michael@0: e!=U_ILLEGAL_CHAR_FOUND && michael@0: e!=U_TRUNCATED_CHAR_FOUND && michael@0: e!=U_ILLEGAL_ESCAPE_SEQUENCE && michael@0: e!=U_UNSUPPORTED_ESCAPE_SEQUENCE) michael@0: ) { michael@0: /* michael@0: * the callback did not or cannot resolve the error: michael@0: * set output pointers and return michael@0: * michael@0: * the check for buffer overflow is redundant but it is michael@0: * a high-runner case and hopefully documents the intent michael@0: * well michael@0: * michael@0: * if we were replaying, then the replay buffer must be michael@0: * copied back into the UConverter michael@0: * and the real arguments must be restored michael@0: */ michael@0: if(realSource!=NULL) { michael@0: int32_t length; michael@0: michael@0: U_ASSERT(cnv->preToULength==0); michael@0: michael@0: length=(int32_t)(pArgs->sourceLimit-pArgs->source); michael@0: if(length>0) { michael@0: uprv_memcpy(cnv->preToU, pArgs->source, length); michael@0: cnv->preToULength=(int8_t)-length; michael@0: } michael@0: michael@0: pArgs->source=realSource; michael@0: pArgs->sourceLimit=realSourceLimit; michael@0: pArgs->flush=realFlush; michael@0: } michael@0: michael@0: return; michael@0: } michael@0: } michael@0: michael@0: /* copy toUBytes[] to invalidCharBuffer[] */ michael@0: errorInputLength=cnv->invalidCharLength=cnv->toULength; michael@0: if(errorInputLength>0) { michael@0: uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength); michael@0: } michael@0: michael@0: /* set the converter state to deal with the next character */ michael@0: cnv->toULength=0; michael@0: michael@0: /* call the callback function */ michael@0: if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) { michael@0: cnv->toUCallbackReason = UCNV_UNASSIGNED; michael@0: } michael@0: cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, michael@0: cnv->invalidCharBuffer, errorInputLength, michael@0: cnv->toUCallbackReason, michael@0: err); michael@0: cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */ michael@0: michael@0: /* michael@0: * loop back to the offset handling michael@0: * michael@0: * this flag will indicate after offset handling michael@0: * that a callback was called; michael@0: * if the callback did not resolve the error, then we return michael@0: */ michael@0: calledCallback=TRUE; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* michael@0: * Output the toUnicode overflow buffer. michael@0: * Call this function if(cnv->UCharErrorBufferLength>0). michael@0: * @return TRUE if overflow michael@0: */ michael@0: static UBool michael@0: ucnv_outputOverflowToUnicode(UConverter *cnv, michael@0: UChar **target, const UChar *targetLimit, michael@0: int32_t **pOffsets, michael@0: UErrorCode *err) { michael@0: int32_t *offsets; michael@0: UChar *overflow, *t; michael@0: int32_t i, length; michael@0: michael@0: t=*target; michael@0: if(pOffsets!=NULL) { michael@0: offsets=*pOffsets; michael@0: } else { michael@0: offsets=NULL; michael@0: } michael@0: michael@0: overflow=cnv->UCharErrorBuffer; michael@0: length=cnv->UCharErrorBufferLength; michael@0: i=0; michael@0: while(iUCharErrorBufferLength=(int8_t)j; michael@0: *target=t; michael@0: if(offsets!=NULL) { michael@0: *pOffsets=offsets; michael@0: } michael@0: *err=U_BUFFER_OVERFLOW_ERROR; michael@0: return TRUE; michael@0: } michael@0: michael@0: /* copy the overflow contents to the target */ michael@0: *t++=overflow[i++]; michael@0: if(offsets!=NULL) { michael@0: *offsets++=-1; /* no source index available for old output */ michael@0: } michael@0: } michael@0: michael@0: /* the overflow buffer is completely copied to the target */ michael@0: cnv->UCharErrorBufferLength=0; michael@0: *target=t; michael@0: if(offsets!=NULL) { michael@0: *pOffsets=offsets; michael@0: } michael@0: return FALSE; michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_toUnicode(UConverter *cnv, michael@0: UChar **target, const UChar *targetLimit, michael@0: const char **source, const char *sourceLimit, michael@0: int32_t *offsets, michael@0: UBool flush, michael@0: UErrorCode *err) { michael@0: UConverterToUnicodeArgs args; michael@0: const char *s; michael@0: UChar *t; michael@0: michael@0: /* check parameters */ michael@0: if(err==NULL || U_FAILURE(*err)) { michael@0: return; michael@0: } michael@0: michael@0: if(cnv==NULL || target==NULL || source==NULL) { michael@0: *err=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: michael@0: s=*source; michael@0: t=*target; michael@0: michael@0: if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) { michael@0: /* michael@0: Prevent code from going into an infinite loop in case we do hit this michael@0: limit. The limit pointer is expected to be on a UChar * boundary. michael@0: This also prevents the next argument check from failing. michael@0: */ michael@0: targetLimit = (const UChar *)(((const char *)targetLimit) - 1); michael@0: } michael@0: michael@0: /* michael@0: * All these conditions should never happen. michael@0: * michael@0: * 1) Make sure that the limits are >= to the address source or target michael@0: * michael@0: * 2) Make sure that the buffer sizes do not exceed the number range for michael@0: * int32_t because some functions use the size (in units or bytes) michael@0: * rather than comparing pointers, and because offsets are int32_t values. michael@0: * michael@0: * size_t is guaranteed to be unsigned and large enough for the job. michael@0: * michael@0: * Return with an error instead of adjusting the limits because we would michael@0: * not be able to maintain the semantics that either the source must be michael@0: * consumed or the target filled (unless an error occurs). michael@0: * An adjustment would be sourceLimit=t+0x7fffffff; for example. michael@0: * michael@0: * 3) Make sure that the user didn't incorrectly cast a UChar * pointer michael@0: * to a char * pointer and provide an incomplete UChar code unit. michael@0: */ michael@0: if (sourceLimit(size_t)0x7fffffff && sourceLimit>s) || michael@0: ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) || michael@0: (((const char *)targetLimit-(const char *)t) & 1) != 0 michael@0: ) { michael@0: *err=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: michael@0: /* output the target overflow buffer */ michael@0: if( cnv->UCharErrorBufferLength>0 && michael@0: ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err) michael@0: ) { michael@0: /* U_BUFFER_OVERFLOW_ERROR */ michael@0: return; michael@0: } michael@0: /* *target may have moved, therefore stop using t */ michael@0: michael@0: if(!flush && s==sourceLimit && cnv->preToULength>=0) { michael@0: /* the overflow buffer is emptied and there is no new input: we are done */ michael@0: return; michael@0: } michael@0: michael@0: /* michael@0: * Do not simply return with a buffer overflow error if michael@0: * !flush && t==targetLimit michael@0: * because it is possible that the source will not generate any output. michael@0: * For example, the skip callback may be called; michael@0: * it does not output anything. michael@0: */ michael@0: michael@0: /* prepare the converter arguments */ michael@0: args.converter=cnv; michael@0: args.flush=flush; michael@0: args.offsets=offsets; michael@0: args.source=s; michael@0: args.sourceLimit=sourceLimit; michael@0: args.target=*target; michael@0: args.targetLimit=targetLimit; michael@0: args.size=sizeof(args); michael@0: michael@0: _toUnicodeWithCallback(&args, err); michael@0: michael@0: *source=args.source; michael@0: *target=args.target; michael@0: } michael@0: michael@0: /* ucnv_to/fromUChars() ----------------------------------------------------- */ michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ucnv_fromUChars(UConverter *cnv, michael@0: char *dest, int32_t destCapacity, michael@0: const UChar *src, int32_t srcLength, michael@0: UErrorCode *pErrorCode) { michael@0: const UChar *srcLimit; michael@0: char *originalDest, *destLimit; michael@0: int32_t destLength; michael@0: michael@0: /* check arguments */ michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: if( cnv==NULL || michael@0: destCapacity<0 || (destCapacity>0 && dest==NULL) || michael@0: srcLength<-1 || (srcLength!=0 && src==NULL) michael@0: ) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: /* initialize */ michael@0: ucnv_resetFromUnicode(cnv); michael@0: originalDest=dest; michael@0: if(srcLength==-1) { michael@0: srcLength=u_strlen(src); michael@0: } michael@0: if(srcLength>0) { michael@0: srcLimit=src+srcLength; michael@0: destLimit=dest+destCapacity; michael@0: michael@0: /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ michael@0: if(destLimit0 && dest==NULL) || michael@0: srcLength<-1 || (srcLength!=0 && src==NULL)) michael@0: { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: /* initialize */ michael@0: ucnv_resetToUnicode(cnv); michael@0: originalDest=dest; michael@0: if(srcLength==-1) { michael@0: srcLength=(int32_t)uprv_strlen(src); michael@0: } michael@0: if(srcLength>0) { michael@0: srcLimit=src+srcLength; michael@0: destLimit=dest+destCapacity; michael@0: michael@0: /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ michael@0: if(destLimit(size_t)0x7fffffff && sourceLimit>s)) { michael@0: *err=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0xffff; michael@0: } michael@0: michael@0: c=U_SENTINEL; michael@0: michael@0: /* flush the target overflow buffer */ michael@0: if(cnv->UCharErrorBufferLength>0) { michael@0: UChar *overflow; michael@0: michael@0: overflow=cnv->UCharErrorBuffer; michael@0: i=0; michael@0: length=cnv->UCharErrorBufferLength; michael@0: U16_NEXT(overflow, i, length, c); michael@0: michael@0: /* move the remaining overflow contents up to the beginning */ michael@0: if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) { michael@0: uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i, michael@0: cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); michael@0: } michael@0: michael@0: if(!U16_IS_LEAD(c) || itoULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) { michael@0: c=cnv->sharedData->impl->getNextUChar(&args, err); michael@0: *source=s=args.source; michael@0: if(*err==U_INDEX_OUTOFBOUNDS_ERROR) { michael@0: /* reset the converter without calling the callback function */ michael@0: _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); michael@0: return 0xffff; /* no output */ michael@0: } else if(U_SUCCESS(*err) && c>=0) { michael@0: return c; michael@0: /* michael@0: * else fall through to use _toUnicode() because michael@0: * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all michael@0: * U_FAILURE: call _toUnicode() for callback handling (do not output c) michael@0: */ michael@0: } michael@0: } michael@0: michael@0: /* convert to one UChar in buffer[0], or handle getNextUChar() errors */ michael@0: _toUnicodeWithCallback(&args, err); michael@0: michael@0: if(*err==U_BUFFER_OVERFLOW_ERROR) { michael@0: *err=U_ZERO_ERROR; michael@0: } michael@0: michael@0: i=0; michael@0: length=(int32_t)(args.target-buffer); michael@0: } else { michael@0: /* write the lead surrogate from the overflow buffer */ michael@0: buffer[0]=(UChar)c; michael@0: args.target=buffer+1; michael@0: i=0; michael@0: length=1; michael@0: } michael@0: michael@0: /* buffer contents starts at i and ends before length */ michael@0: michael@0: if(U_FAILURE(*err)) { michael@0: c=0xffff; /* no output */ michael@0: } else if(length==0) { michael@0: /* no input or only state changes */ michael@0: *err=U_INDEX_OUTOFBOUNDS_ERROR; michael@0: /* no need to reset explicitly because _toUnicodeWithCallback() did it */ michael@0: c=0xffff; /* no output */ michael@0: } else { michael@0: c=buffer[0]; michael@0: i=1; michael@0: if(!U16_IS_LEAD(c)) { michael@0: /* consume c=buffer[0], done */ michael@0: } else { michael@0: /* got a lead surrogate, see if a trail surrogate follows */ michael@0: UChar c2; michael@0: michael@0: if(cnv->UCharErrorBufferLength>0) { michael@0: /* got overflow output from the conversion */ michael@0: if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) { michael@0: /* got a trail surrogate, too */ michael@0: c=U16_GET_SUPPLEMENTARY(c, c2); michael@0: michael@0: /* move the remaining overflow contents up to the beginning */ michael@0: if((--cnv->UCharErrorBufferLength)>0) { michael@0: uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1, michael@0: cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); michael@0: } michael@0: } else { michael@0: /* c is an unpaired lead surrogate, just return it */ michael@0: } michael@0: } else if(args.sourceUCharErrorBufferLength)>0) { michael@0: uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer, michael@0: length*U_SIZEOF_UCHAR); michael@0: } michael@0: cnv->UCharErrorBufferLength=(int8_t)(length+delta); michael@0: michael@0: cnv->UCharErrorBuffer[0]=buffer[i++]; michael@0: if(delta>1) { michael@0: cnv->UCharErrorBuffer[1]=buffer[i]; michael@0: } michael@0: } michael@0: michael@0: *source=args.source; michael@0: return c; michael@0: } michael@0: michael@0: /* ucnv_convert() and siblings ---------------------------------------------- */ michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, michael@0: char **target, const char *targetLimit, michael@0: const char **source, const char *sourceLimit, michael@0: UChar *pivotStart, UChar **pivotSource, michael@0: UChar **pivotTarget, const UChar *pivotLimit, michael@0: UBool reset, UBool flush, michael@0: UErrorCode *pErrorCode) { michael@0: UChar pivotBuffer[CHUNK_SIZE]; michael@0: const UChar *myPivotSource; michael@0: UChar *myPivotTarget; michael@0: const char *s; michael@0: char *t; michael@0: michael@0: UConverterToUnicodeArgs toUArgs; michael@0: UConverterFromUnicodeArgs fromUArgs; michael@0: UConverterConvert convert; michael@0: michael@0: /* error checking */ michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return; michael@0: } michael@0: michael@0: if( targetCnv==NULL || sourceCnv==NULL || michael@0: source==NULL || *source==NULL || michael@0: target==NULL || *target==NULL || targetLimit==NULL michael@0: ) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: michael@0: s=*source; michael@0: t=*target; michael@0: if((sourceLimit!=NULL && sourceLimit(size_t)0x7fffffff && sourceLimit>s)) || michael@0: ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) michael@0: ) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: michael@0: if(pivotStart==NULL) { michael@0: if(!flush) { michael@0: /* streaming conversion requires an explicit pivot buffer */ michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: michael@0: /* use the stack pivot buffer */ michael@0: myPivotSource=myPivotTarget=pivotStart=pivotBuffer; michael@0: pivotSource=(UChar **)&myPivotSource; michael@0: pivotTarget=&myPivotTarget; michael@0: pivotLimit=pivotBuffer+CHUNK_SIZE; michael@0: } else if( pivotStart>=pivotLimit || michael@0: pivotSource==NULL || *pivotSource==NULL || michael@0: pivotTarget==NULL || *pivotTarget==NULL || michael@0: pivotLimit==NULL michael@0: ) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: michael@0: if(sourceLimit==NULL) { michael@0: /* get limit of single-byte-NUL-terminated source string */ michael@0: sourceLimit=uprv_strchr(*source, 0); michael@0: } michael@0: michael@0: if(reset) { michael@0: ucnv_resetToUnicode(sourceCnv); michael@0: ucnv_resetFromUnicode(targetCnv); michael@0: *pivotSource=*pivotTarget=pivotStart; michael@0: } else if(targetCnv->charErrorBufferLength>0) { michael@0: /* output the targetCnv overflow buffer */ michael@0: if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) { michael@0: /* U_BUFFER_OVERFLOW_ERROR */ michael@0: return; michael@0: } michael@0: /* *target has moved, therefore stop using t */ michael@0: michael@0: if( !flush && michael@0: targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget && michael@0: sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit michael@0: ) { michael@0: /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */ michael@0: return; michael@0: } michael@0: } michael@0: michael@0: /* Is direct-UTF-8 conversion available? */ michael@0: if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 && michael@0: targetCnv->sharedData->impl->fromUTF8!=NULL michael@0: ) { michael@0: convert=targetCnv->sharedData->impl->fromUTF8; michael@0: } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 && michael@0: sourceCnv->sharedData->impl->toUTF8!=NULL michael@0: ) { michael@0: convert=sourceCnv->sharedData->impl->toUTF8; michael@0: } else { michael@0: convert=NULL; michael@0: } michael@0: michael@0: /* michael@0: * If direct-UTF-8 conversion is available, then we use a smaller michael@0: * pivot buffer for error handling and partial matches michael@0: * so that we quickly return to direct conversion. michael@0: * michael@0: * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH. michael@0: * michael@0: * We could reduce the pivot buffer size further, at the cost of michael@0: * buffer overflows from callbacks. michael@0: * The pivot buffer should not be smaller than the maximum number of michael@0: * fromUnicode extension table input UChars michael@0: * (for m:n conversion, see michael@0: * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS]) michael@0: * or 2 for surrogate pairs. michael@0: * michael@0: * Too small a buffer can cause thrashing between pivoting and direct michael@0: * conversion, with function call overhead outweighing the benefits michael@0: * of direct conversion. michael@0: */ michael@0: if(convert!=NULL && (pivotLimit-pivotStart)>32) { michael@0: pivotLimit=pivotStart+32; michael@0: } michael@0: michael@0: /* prepare the converter arguments */ michael@0: fromUArgs.converter=targetCnv; michael@0: fromUArgs.flush=FALSE; michael@0: fromUArgs.offsets=NULL; michael@0: fromUArgs.target=*target; michael@0: fromUArgs.targetLimit=targetLimit; michael@0: fromUArgs.size=sizeof(fromUArgs); michael@0: michael@0: toUArgs.converter=sourceCnv; michael@0: toUArgs.flush=flush; michael@0: toUArgs.offsets=NULL; michael@0: toUArgs.source=s; michael@0: toUArgs.sourceLimit=sourceLimit; michael@0: toUArgs.targetLimit=pivotLimit; michael@0: toUArgs.size=sizeof(toUArgs); michael@0: michael@0: /* michael@0: * TODO: Consider separating this function into two functions, michael@0: * extracting exactly the conversion loop, michael@0: * for readability and to reduce the set of visible variables. michael@0: * michael@0: * Otherwise stop using s and t from here on. michael@0: */ michael@0: s=t=NULL; michael@0: michael@0: /* michael@0: * conversion loop michael@0: * michael@0: * The sequence of steps in the loop may appear backward, michael@0: * but the principle is simple: michael@0: * In the chain of michael@0: * source - sourceCnv overflow - pivot - targetCnv overflow - target michael@0: * empty out later buffers before refilling them from earlier ones. michael@0: * michael@0: * The targetCnv overflow buffer is flushed out only once before the loop. michael@0: */ michael@0: for(;;) { michael@0: /* michael@0: * if(pivot not empty or error or replay or flush fromUnicode) { michael@0: * fromUnicode(pivot -> target); michael@0: * } michael@0: * michael@0: * For pivoting conversion; and for direct conversion for michael@0: * error callback handling and flushing the replay buffer. michael@0: */ michael@0: if( *pivotSource<*pivotTarget || michael@0: U_FAILURE(*pErrorCode) || michael@0: targetCnv->preFromULength<0 || michael@0: fromUArgs.flush michael@0: ) { michael@0: fromUArgs.source=*pivotSource; michael@0: fromUArgs.sourceLimit=*pivotTarget; michael@0: _fromUnicodeWithCallback(&fromUArgs, pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: /* target overflow, or conversion error */ michael@0: *pivotSource=(UChar *)fromUArgs.source; michael@0: break; michael@0: } michael@0: michael@0: /* michael@0: * _fromUnicodeWithCallback() must have consumed the pivot contents michael@0: * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS() michael@0: */ michael@0: } michael@0: michael@0: /* The pivot buffer is empty; reset it so we start at pivotStart. */ michael@0: *pivotSource=*pivotTarget=pivotStart; michael@0: michael@0: /* michael@0: * if(sourceCnv overflow buffer not empty) { michael@0: * move(sourceCnv overflow buffer -> pivot); michael@0: * continue; michael@0: * } michael@0: */ michael@0: /* output the sourceCnv overflow buffer */ michael@0: if(sourceCnv->UCharErrorBufferLength>0) { michael@0: if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) { michael@0: /* U_BUFFER_OVERFLOW_ERROR */ michael@0: *pErrorCode=U_ZERO_ERROR; michael@0: } michael@0: continue; michael@0: } michael@0: michael@0: /* michael@0: * check for end of input and break if done michael@0: * michael@0: * Checking both flush and fromUArgs.flush ensures that the converters michael@0: * have been called with the flush flag set if the ucnv_convertEx() michael@0: * caller set it. michael@0: */ michael@0: if( toUArgs.source==sourceLimit && michael@0: sourceCnv->preToULength>=0 && sourceCnv->toULength==0 && michael@0: (!flush || fromUArgs.flush) michael@0: ) { michael@0: /* done successfully */ michael@0: break; michael@0: } michael@0: michael@0: /* michael@0: * use direct conversion if available michael@0: * but not if continuing a partial match michael@0: * or flushing the toUnicode replay buffer michael@0: */ michael@0: if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) { michael@0: if(*pErrorCode==U_USING_DEFAULT_WARNING) { michael@0: /* remove a warning that may be set by this function */ michael@0: *pErrorCode=U_ZERO_ERROR; michael@0: } michael@0: convert(&fromUArgs, &toUArgs, pErrorCode); michael@0: if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { michael@0: break; michael@0: } else if(U_FAILURE(*pErrorCode)) { michael@0: if(sourceCnv->toULength>0) { michael@0: /* michael@0: * Fall through to calling _toUnicodeWithCallback() michael@0: * for callback handling. michael@0: * michael@0: * The pivot buffer will be reset with michael@0: * *pivotSource=*pivotTarget=pivotStart; michael@0: * which indicates a toUnicode error to the caller michael@0: * (*pivotSource==pivotStart shows no pivot UChars consumed). michael@0: */ michael@0: } else { michael@0: /* michael@0: * Indicate a fromUnicode error to the caller michael@0: * (*pivotSource>pivotStart shows some pivot UChars consumed). michael@0: */ michael@0: *pivotSource=*pivotTarget=pivotStart+1; michael@0: /* michael@0: * Loop around to calling _fromUnicodeWithCallbacks() michael@0: * for callback handling. michael@0: */ michael@0: continue; michael@0: } michael@0: } else if(*pErrorCode==U_USING_DEFAULT_WARNING) { michael@0: /* michael@0: * No error, but the implementation requested to temporarily michael@0: * fall back to pivoting. michael@0: */ michael@0: *pErrorCode=U_ZERO_ERROR; michael@0: /* michael@0: * The following else branches are almost identical to the end-of-input michael@0: * handling in _toUnicodeWithCallback(). michael@0: * Avoid calling it just for the end of input. michael@0: */ michael@0: } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */ michael@0: /* michael@0: * the entire input stream is consumed michael@0: * and there is a partial, truncated input sequence left michael@0: */ michael@0: michael@0: /* inject an error and continue with callback handling */ michael@0: *pErrorCode=U_TRUNCATED_CHAR_FOUND; michael@0: } else { michael@0: /* input consumed */ michael@0: if(flush) { michael@0: /* reset the converters without calling the callback functions */ michael@0: _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE); michael@0: _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE); michael@0: } michael@0: michael@0: /* done successfully */ michael@0: break; michael@0: } michael@0: } michael@0: michael@0: /* michael@0: * toUnicode(source -> pivot); michael@0: * michael@0: * For pivoting conversion; and for direct conversion for michael@0: * error callback handling, continuing partial matches michael@0: * and flushing the replay buffer. michael@0: * michael@0: * The pivot buffer is empty and reset. michael@0: */ michael@0: toUArgs.target=pivotStart; /* ==*pivotTarget */ michael@0: /* toUArgs.targetLimit=pivotLimit; already set before the loop */ michael@0: _toUnicodeWithCallback(&toUArgs, pErrorCode); michael@0: *pivotTarget=toUArgs.target; michael@0: if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { michael@0: /* pivot overflow: continue with the conversion loop */ michael@0: *pErrorCode=U_ZERO_ERROR; michael@0: } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) { michael@0: /* conversion error, or there was nothing left to convert */ michael@0: break; michael@0: } michael@0: /* michael@0: * else: michael@0: * _toUnicodeWithCallback() wrote into the pivot buffer, michael@0: * continue with fromUnicode conversion. michael@0: * michael@0: * Set the fromUnicode flush flag if we flush and if toUnicode has michael@0: * processed the end of the input. michael@0: */ michael@0: if( flush && toUArgs.source==sourceLimit && michael@0: sourceCnv->preToULength>=0 && michael@0: sourceCnv->UCharErrorBufferLength==0 michael@0: ) { michael@0: fromUArgs.flush=TRUE; michael@0: } michael@0: } michael@0: michael@0: /* michael@0: * The conversion loop is exited when one of the following is true: michael@0: * - the entire source text has been converted successfully to the target buffer michael@0: * - a target buffer overflow occurred michael@0: * - a conversion error occurred michael@0: */ michael@0: michael@0: *source=toUArgs.source; michael@0: *target=fromUArgs.target; michael@0: michael@0: /* terminate the target buffer if possible */ michael@0: if(flush && U_SUCCESS(*pErrorCode)) { michael@0: if(*target!=targetLimit) { michael@0: **target=0; michael@0: if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { michael@0: *pErrorCode=U_ZERO_ERROR; michael@0: } michael@0: } else { michael@0: *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* internal implementation of ucnv_convert() etc. with preflighting */ michael@0: static int32_t michael@0: ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter, michael@0: char *target, int32_t targetCapacity, michael@0: const char *source, int32_t sourceLength, michael@0: UErrorCode *pErrorCode) { michael@0: UChar pivotBuffer[CHUNK_SIZE]; michael@0: UChar *pivot, *pivot2; michael@0: michael@0: char *myTarget; michael@0: const char *sourceLimit; michael@0: const char *targetLimit; michael@0: int32_t targetLength=0; michael@0: michael@0: /* set up */ michael@0: if(sourceLength<0) { michael@0: sourceLimit=uprv_strchr(source, 0); michael@0: } else { michael@0: sourceLimit=source+sourceLength; michael@0: } michael@0: michael@0: /* if there is no input data, we're done */ michael@0: if(source==sourceLimit) { michael@0: return u_terminateChars(target, targetCapacity, 0, pErrorCode); michael@0: } michael@0: michael@0: pivot=pivot2=pivotBuffer; michael@0: myTarget=target; michael@0: targetLength=0; michael@0: michael@0: if(targetCapacity>0) { michael@0: /* perform real conversion */ michael@0: targetLimit=target+targetCapacity; michael@0: ucnv_convertEx(outConverter, inConverter, michael@0: &myTarget, targetLimit, michael@0: &source, sourceLimit, michael@0: pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, michael@0: FALSE, michael@0: TRUE, michael@0: pErrorCode); michael@0: targetLength=(int32_t)(myTarget-target); michael@0: } michael@0: michael@0: /* michael@0: * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing michael@0: * to it but continue the conversion in order to store in targetCapacity michael@0: * the number of bytes that was required. michael@0: */ michael@0: if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0) michael@0: { michael@0: char targetBuffer[CHUNK_SIZE]; michael@0: michael@0: targetLimit=targetBuffer+CHUNK_SIZE; michael@0: do { michael@0: *pErrorCode=U_ZERO_ERROR; michael@0: myTarget=targetBuffer; michael@0: ucnv_convertEx(outConverter, inConverter, michael@0: &myTarget, targetLimit, michael@0: &source, sourceLimit, michael@0: pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, michael@0: FALSE, michael@0: TRUE, michael@0: pErrorCode); michael@0: targetLength+=(int32_t)(myTarget-targetBuffer); michael@0: } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); michael@0: michael@0: /* done with preflighting, set warnings and errors as appropriate */ michael@0: return u_terminateChars(target, targetCapacity, targetLength, pErrorCode); michael@0: } michael@0: michael@0: /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */ michael@0: return targetLength; michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ucnv_convert(const char *toConverterName, const char *fromConverterName, michael@0: char *target, int32_t targetCapacity, michael@0: const char *source, int32_t sourceLength, michael@0: UErrorCode *pErrorCode) { michael@0: UConverter in, out; /* stack-allocated */ michael@0: UConverter *inConverter, *outConverter; michael@0: int32_t targetLength; michael@0: michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: if( source==NULL || sourceLength<-1 || michael@0: targetCapacity<0 || (targetCapacity>0 && target==NULL) michael@0: ) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: /* if there is no input data, we're done */ michael@0: if(sourceLength==0 || (sourceLength<0 && *source==0)) { michael@0: return u_terminateChars(target, targetCapacity, 0, pErrorCode); michael@0: } michael@0: michael@0: /* create the converters */ michael@0: inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: ucnv_close(inConverter); michael@0: return 0; michael@0: } michael@0: michael@0: targetLength=ucnv_internalConvert(outConverter, inConverter, michael@0: target, targetCapacity, michael@0: source, sourceLength, michael@0: pErrorCode); michael@0: michael@0: ucnv_close(inConverter); michael@0: ucnv_close(outConverter); michael@0: michael@0: return targetLength; michael@0: } michael@0: michael@0: /* @internal */ michael@0: static int32_t michael@0: ucnv_convertAlgorithmic(UBool convertToAlgorithmic, michael@0: UConverterType algorithmicType, michael@0: UConverter *cnv, michael@0: char *target, int32_t targetCapacity, michael@0: const char *source, int32_t sourceLength, michael@0: UErrorCode *pErrorCode) { michael@0: UConverter algoConverterStatic; /* stack-allocated */ michael@0: UConverter *algoConverter, *to, *from; michael@0: int32_t targetLength; michael@0: michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: if( cnv==NULL || source==NULL || sourceLength<-1 || michael@0: targetCapacity<0 || (targetCapacity>0 && target==NULL) michael@0: ) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: /* if there is no input data, we're done */ michael@0: if(sourceLength==0 || (sourceLength<0 && *source==0)) { michael@0: return u_terminateChars(target, targetCapacity, 0, pErrorCode); michael@0: } michael@0: michael@0: /* create the algorithmic converter */ michael@0: algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType, michael@0: "", 0, pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: /* reset the other converter */ michael@0: if(convertToAlgorithmic) { michael@0: /* cnv->Unicode->algo */ michael@0: ucnv_resetToUnicode(cnv); michael@0: to=algoConverter; michael@0: from=cnv; michael@0: } else { michael@0: /* algo->Unicode->cnv */ michael@0: ucnv_resetFromUnicode(cnv); michael@0: from=algoConverter; michael@0: to=cnv; michael@0: } michael@0: michael@0: targetLength=ucnv_internalConvert(to, from, michael@0: target, targetCapacity, michael@0: source, sourceLength, michael@0: pErrorCode); michael@0: michael@0: ucnv_close(algoConverter); michael@0: michael@0: return targetLength; michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ucnv_toAlgorithmic(UConverterType algorithmicType, michael@0: UConverter *cnv, michael@0: char *target, int32_t targetCapacity, michael@0: const char *source, int32_t sourceLength, michael@0: UErrorCode *pErrorCode) { michael@0: return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv, michael@0: target, targetCapacity, michael@0: source, sourceLength, michael@0: pErrorCode); michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ucnv_fromAlgorithmic(UConverter *cnv, michael@0: UConverterType algorithmicType, michael@0: char *target, int32_t targetCapacity, michael@0: const char *source, int32_t sourceLength, michael@0: UErrorCode *pErrorCode) { michael@0: return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv, michael@0: target, targetCapacity, michael@0: source, sourceLength, michael@0: pErrorCode); michael@0: } michael@0: michael@0: U_CAPI UConverterType U_EXPORT2 michael@0: ucnv_getType(const UConverter* converter) michael@0: { michael@0: int8_t type = converter->sharedData->staticData->conversionType; michael@0: #if !UCONFIG_NO_LEGACY_CONVERSION michael@0: if(type == UCNV_MBCS) { michael@0: return ucnv_MBCSGetType(converter); michael@0: } michael@0: #endif michael@0: return (UConverterType)type; michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_getStarters(const UConverter* converter, michael@0: UBool starters[256], michael@0: UErrorCode* err) michael@0: { michael@0: if (err == NULL || U_FAILURE(*err)) { michael@0: return; michael@0: } michael@0: michael@0: if(converter->sharedData->impl->getStarters != NULL) { michael@0: converter->sharedData->impl->getStarters(converter, starters, err); michael@0: } else { michael@0: *err = U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: } michael@0: michael@0: static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv) michael@0: { michael@0: UErrorCode errorCode; michael@0: const char *name; michael@0: int32_t i; michael@0: michael@0: if(cnv==NULL) { michael@0: return NULL; michael@0: } michael@0: michael@0: errorCode=U_ZERO_ERROR; michael@0: name=ucnv_getName(cnv, &errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: return NULL; michael@0: } michael@0: michael@0: for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i) michael@0: { michael@0: if(0==uprv_strcmp(name, ambiguousConverters[i].name)) michael@0: { michael@0: return ambiguousConverters+i; michael@0: } michael@0: } michael@0: michael@0: return NULL; michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_fixFileSeparator(const UConverter *cnv, michael@0: UChar* source, michael@0: int32_t sourceLength) { michael@0: const UAmbiguousConverter *a; michael@0: int32_t i; michael@0: UChar variant5c; michael@0: michael@0: if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL) michael@0: { michael@0: return; michael@0: } michael@0: michael@0: variant5c=a->variant5c; michael@0: for(i=0; iuseFallback = usesFallback; michael@0: } michael@0: michael@0: U_CAPI UBool U_EXPORT2 michael@0: ucnv_usesFallback(const UConverter *cnv) michael@0: { michael@0: return cnv->useFallback; michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_getInvalidChars (const UConverter * converter, michael@0: char *errBytes, michael@0: int8_t * len, michael@0: UErrorCode * err) michael@0: { michael@0: if (err == NULL || U_FAILURE(*err)) michael@0: { michael@0: return; michael@0: } michael@0: if (len == NULL || errBytes == NULL || converter == NULL) michael@0: { michael@0: *err = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: if (*len < converter->invalidCharLength) michael@0: { michael@0: *err = U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return; michael@0: } michael@0: if ((*len = converter->invalidCharLength) > 0) michael@0: { michael@0: uprv_memcpy (errBytes, converter->invalidCharBuffer, *len); michael@0: } michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ucnv_getInvalidUChars (const UConverter * converter, michael@0: UChar *errChars, michael@0: int8_t * len, michael@0: UErrorCode * err) michael@0: { michael@0: if (err == NULL || U_FAILURE(*err)) michael@0: { michael@0: return; michael@0: } michael@0: if (len == NULL || errChars == NULL || converter == NULL) michael@0: { michael@0: *err = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: if (*len < converter->invalidUCharLength) michael@0: { michael@0: *err = U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return; michael@0: } michael@0: if ((*len = converter->invalidUCharLength) > 0) michael@0: { michael@0: uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len)); michael@0: } michael@0: } michael@0: michael@0: #define SIG_MAX_LEN 5 michael@0: michael@0: U_CAPI const char* U_EXPORT2 michael@0: ucnv_detectUnicodeSignature( const char* source, michael@0: int32_t sourceLength, michael@0: int32_t* signatureLength, michael@0: UErrorCode* pErrorCode) { michael@0: int32_t dummy; michael@0: michael@0: /* initial 0xa5 bytes: make sure that if we read preFromUFirstCP >= 0){ michael@0: return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ; michael@0: }else if(cnv->preFromULength < 0){ michael@0: return -cnv->preFromULength ; michael@0: }else if(cnv->fromUChar32 > 0){ michael@0: return 1; michael@0: } michael@0: return 0; michael@0: michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){ michael@0: michael@0: if(status == NULL || U_FAILURE(*status)){ michael@0: return -1; michael@0: } michael@0: if(cnv == NULL){ michael@0: *status = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return -1; michael@0: } michael@0: michael@0: if(cnv->preToULength > 0){ michael@0: return cnv->preToULength ; michael@0: }else if(cnv->preToULength < 0){ michael@0: return -cnv->preToULength; michael@0: }else if(cnv->toULength > 0){ michael@0: return cnv->toULength; michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: U_CAPI UBool U_EXPORT2 michael@0: ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){ michael@0: if (U_FAILURE(*status)) { michael@0: return FALSE; michael@0: } michael@0: michael@0: if (cnv == NULL) { michael@0: *status = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return FALSE; michael@0: } michael@0: michael@0: switch (ucnv_getType(cnv)) { michael@0: case UCNV_SBCS: michael@0: case UCNV_DBCS: michael@0: case UCNV_UTF32_BigEndian: michael@0: case UCNV_UTF32_LittleEndian: michael@0: case UCNV_UTF32: michael@0: case UCNV_US_ASCII: michael@0: return TRUE; michael@0: default: michael@0: return FALSE; michael@0: } michael@0: } michael@0: #endif michael@0: michael@0: /* michael@0: * Hey, Emacs, please set the following: michael@0: * michael@0: * Local Variables: michael@0: * indent-tabs-mode: nil michael@0: * End: michael@0: * michael@0: */