Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | ****************************************************************************** |
michael@0 | 3 | * |
michael@0 | 4 | * Copyright (C) 1998-2013, International Business Machines |
michael@0 | 5 | * Corporation and others. All Rights Reserved. |
michael@0 | 6 | * |
michael@0 | 7 | ****************************************************************************** |
michael@0 | 8 | * |
michael@0 | 9 | * ucnv.c: |
michael@0 | 10 | * Implements APIs for the ICU's codeset conversion library; |
michael@0 | 11 | * mostly calls through internal functions; |
michael@0 | 12 | * created by Bertrand A. Damiba |
michael@0 | 13 | * |
michael@0 | 14 | * Modification History: |
michael@0 | 15 | * |
michael@0 | 16 | * Date Name Description |
michael@0 | 17 | * 04/04/99 helena Fixed internal header inclusion. |
michael@0 | 18 | * 05/09/00 helena Added implementation to handle fallback mappings. |
michael@0 | 19 | * 06/20/2000 helena OS/400 port changes; mostly typecast. |
michael@0 | 20 | */ |
michael@0 | 21 | |
michael@0 | 22 | #include "unicode/utypes.h" |
michael@0 | 23 | |
michael@0 | 24 | #if !UCONFIG_NO_CONVERSION |
michael@0 | 25 | |
michael@0 | 26 | #include "unicode/ustring.h" |
michael@0 | 27 | #include "unicode/ucnv.h" |
michael@0 | 28 | #include "unicode/ucnv_err.h" |
michael@0 | 29 | #include "unicode/uset.h" |
michael@0 | 30 | #include "unicode/utf.h" |
michael@0 | 31 | #include "unicode/utf16.h" |
michael@0 | 32 | #include "putilimp.h" |
michael@0 | 33 | #include "cmemory.h" |
michael@0 | 34 | #include "cstring.h" |
michael@0 | 35 | #include "uassert.h" |
michael@0 | 36 | #include "utracimp.h" |
michael@0 | 37 | #include "ustr_imp.h" |
michael@0 | 38 | #include "ucnv_imp.h" |
michael@0 | 39 | #include "ucnv_cnv.h" |
michael@0 | 40 | #include "ucnv_bld.h" |
michael@0 | 41 | |
michael@0 | 42 | /* size of intermediate and preflighting buffers in ucnv_convert() */ |
michael@0 | 43 | #define CHUNK_SIZE 1024 |
michael@0 | 44 | |
michael@0 | 45 | typedef struct UAmbiguousConverter { |
michael@0 | 46 | const char *name; |
michael@0 | 47 | const UChar variant5c; |
michael@0 | 48 | } UAmbiguousConverter; |
michael@0 | 49 | |
michael@0 | 50 | static const UAmbiguousConverter ambiguousConverters[]={ |
michael@0 | 51 | { "ibm-897_P100-1995", 0xa5 }, |
michael@0 | 52 | { "ibm-942_P120-1999", 0xa5 }, |
michael@0 | 53 | { "ibm-943_P130-1999", 0xa5 }, |
michael@0 | 54 | { "ibm-946_P100-1995", 0xa5 }, |
michael@0 | 55 | { "ibm-33722_P120-1999", 0xa5 }, |
michael@0 | 56 | { "ibm-1041_P100-1995", 0xa5 }, |
michael@0 | 57 | /*{ "ibm-54191_P100-2006", 0xa5 },*/ |
michael@0 | 58 | /*{ "ibm-62383_P100-2007", 0xa5 },*/ |
michael@0 | 59 | /*{ "ibm-891_P100-1995", 0x20a9 },*/ |
michael@0 | 60 | { "ibm-944_P100-1995", 0x20a9 }, |
michael@0 | 61 | { "ibm-949_P110-1999", 0x20a9 }, |
michael@0 | 62 | { "ibm-1363_P110-1997", 0x20a9 }, |
michael@0 | 63 | { "ISO_2022,locale=ko,version=0", 0x20a9 }, |
michael@0 | 64 | { "ibm-1088_P100-1995", 0x20a9 } |
michael@0 | 65 | }; |
michael@0 | 66 | |
michael@0 | 67 | /*Calls through createConverter */ |
michael@0 | 68 | U_CAPI UConverter* U_EXPORT2 |
michael@0 | 69 | ucnv_open (const char *name, |
michael@0 | 70 | UErrorCode * err) |
michael@0 | 71 | { |
michael@0 | 72 | UConverter *r; |
michael@0 | 73 | |
michael@0 | 74 | if (err == NULL || U_FAILURE (*err)) { |
michael@0 | 75 | return NULL; |
michael@0 | 76 | } |
michael@0 | 77 | |
michael@0 | 78 | r = ucnv_createConverter(NULL, name, err); |
michael@0 | 79 | return r; |
michael@0 | 80 | } |
michael@0 | 81 | |
michael@0 | 82 | U_CAPI UConverter* U_EXPORT2 |
michael@0 | 83 | ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err) |
michael@0 | 84 | { |
michael@0 | 85 | return ucnv_createConverterFromPackage(packageName, converterName, err); |
michael@0 | 86 | } |
michael@0 | 87 | |
michael@0 | 88 | /*Extracts the UChar* to a char* and calls through createConverter */ |
michael@0 | 89 | U_CAPI UConverter* U_EXPORT2 |
michael@0 | 90 | ucnv_openU (const UChar * name, |
michael@0 | 91 | UErrorCode * err) |
michael@0 | 92 | { |
michael@0 | 93 | char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH]; |
michael@0 | 94 | |
michael@0 | 95 | if (err == NULL || U_FAILURE(*err)) |
michael@0 | 96 | return NULL; |
michael@0 | 97 | if (name == NULL) |
michael@0 | 98 | return ucnv_open (NULL, err); |
michael@0 | 99 | if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH) |
michael@0 | 100 | { |
michael@0 | 101 | *err = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 102 | return NULL; |
michael@0 | 103 | } |
michael@0 | 104 | return ucnv_open(u_austrcpy(asciiName, name), err); |
michael@0 | 105 | } |
michael@0 | 106 | |
michael@0 | 107 | /* Copy the string that is represented by the UConverterPlatform enum |
michael@0 | 108 | * @param platformString An output buffer |
michael@0 | 109 | * @param platform An enum representing a platform |
michael@0 | 110 | * @return the length of the copied string. |
michael@0 | 111 | */ |
michael@0 | 112 | static int32_t |
michael@0 | 113 | ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm) |
michael@0 | 114 | { |
michael@0 | 115 | switch (pltfrm) |
michael@0 | 116 | { |
michael@0 | 117 | case UCNV_IBM: |
michael@0 | 118 | uprv_strcpy(platformString, "ibm-"); |
michael@0 | 119 | return 4; |
michael@0 | 120 | case UCNV_UNKNOWN: |
michael@0 | 121 | break; |
michael@0 | 122 | } |
michael@0 | 123 | |
michael@0 | 124 | /* default to empty string */ |
michael@0 | 125 | *platformString = 0; |
michael@0 | 126 | return 0; |
michael@0 | 127 | } |
michael@0 | 128 | |
michael@0 | 129 | /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls |
michael@0 | 130 | *through createConverter*/ |
michael@0 | 131 | U_CAPI UConverter* U_EXPORT2 |
michael@0 | 132 | ucnv_openCCSID (int32_t codepage, |
michael@0 | 133 | UConverterPlatform platform, |
michael@0 | 134 | UErrorCode * err) |
michael@0 | 135 | { |
michael@0 | 136 | char myName[UCNV_MAX_CONVERTER_NAME_LENGTH]; |
michael@0 | 137 | int32_t myNameLen; |
michael@0 | 138 | |
michael@0 | 139 | if (err == NULL || U_FAILURE (*err)) |
michael@0 | 140 | return NULL; |
michael@0 | 141 | |
michael@0 | 142 | /* ucnv_copyPlatformString could return "ibm-" or "cp" */ |
michael@0 | 143 | myNameLen = ucnv_copyPlatformString(myName, platform); |
michael@0 | 144 | T_CString_integerToString(myName + myNameLen, codepage, 10); |
michael@0 | 145 | |
michael@0 | 146 | return ucnv_createConverter(NULL, myName, err); |
michael@0 | 147 | } |
michael@0 | 148 | |
michael@0 | 149 | /* Creating a temporary stack-based object that can be used in one thread, |
michael@0 | 150 | and created from a converter that is shared across threads. |
michael@0 | 151 | */ |
michael@0 | 152 | |
michael@0 | 153 | U_CAPI UConverter* U_EXPORT2 |
michael@0 | 154 | ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) |
michael@0 | 155 | { |
michael@0 | 156 | UConverter *localConverter, *allocatedConverter; |
michael@0 | 157 | int32_t stackBufferSize; |
michael@0 | 158 | int32_t bufferSizeNeeded; |
michael@0 | 159 | char *stackBufferChars = (char *)stackBuffer; |
michael@0 | 160 | UErrorCode cbErr; |
michael@0 | 161 | UConverterToUnicodeArgs toUArgs = { |
michael@0 | 162 | sizeof(UConverterToUnicodeArgs), |
michael@0 | 163 | TRUE, |
michael@0 | 164 | NULL, |
michael@0 | 165 | NULL, |
michael@0 | 166 | NULL, |
michael@0 | 167 | NULL, |
michael@0 | 168 | NULL, |
michael@0 | 169 | NULL |
michael@0 | 170 | }; |
michael@0 | 171 | UConverterFromUnicodeArgs fromUArgs = { |
michael@0 | 172 | sizeof(UConverterFromUnicodeArgs), |
michael@0 | 173 | TRUE, |
michael@0 | 174 | NULL, |
michael@0 | 175 | NULL, |
michael@0 | 176 | NULL, |
michael@0 | 177 | NULL, |
michael@0 | 178 | NULL, |
michael@0 | 179 | NULL |
michael@0 | 180 | }; |
michael@0 | 181 | |
michael@0 | 182 | UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE); |
michael@0 | 183 | |
michael@0 | 184 | if (status == NULL || U_FAILURE(*status)){ |
michael@0 | 185 | UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR); |
michael@0 | 186 | return NULL; |
michael@0 | 187 | } |
michael@0 | 188 | |
michael@0 | 189 | if (cnv == NULL) { |
michael@0 | 190 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 191 | UTRACE_EXIT_STATUS(*status); |
michael@0 | 192 | return NULL; |
michael@0 | 193 | } |
michael@0 | 194 | |
michael@0 | 195 | UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p", |
michael@0 | 196 | ucnv_getName(cnv, status), cnv, stackBuffer); |
michael@0 | 197 | |
michael@0 | 198 | if (cnv->sharedData->impl->safeClone != NULL) { |
michael@0 | 199 | /* call the custom safeClone function for sizing */ |
michael@0 | 200 | bufferSizeNeeded = 0; |
michael@0 | 201 | cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status); |
michael@0 | 202 | if (U_FAILURE(*status)) { |
michael@0 | 203 | UTRACE_EXIT_STATUS(*status); |
michael@0 | 204 | return NULL; |
michael@0 | 205 | } |
michael@0 | 206 | } |
michael@0 | 207 | else |
michael@0 | 208 | { |
michael@0 | 209 | /* inherent sizing */ |
michael@0 | 210 | bufferSizeNeeded = sizeof(UConverter); |
michael@0 | 211 | } |
michael@0 | 212 | |
michael@0 | 213 | if (pBufferSize == NULL) { |
michael@0 | 214 | stackBufferSize = 1; |
michael@0 | 215 | pBufferSize = &stackBufferSize; |
michael@0 | 216 | } else { |
michael@0 | 217 | stackBufferSize = *pBufferSize; |
michael@0 | 218 | if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ |
michael@0 | 219 | *pBufferSize = bufferSizeNeeded; |
michael@0 | 220 | UTRACE_EXIT_VALUE(bufferSizeNeeded); |
michael@0 | 221 | return NULL; |
michael@0 | 222 | } |
michael@0 | 223 | } |
michael@0 | 224 | |
michael@0 | 225 | |
michael@0 | 226 | /* Pointers on 64-bit platforms need to be aligned |
michael@0 | 227 | * on a 64-bit boundary in memory. |
michael@0 | 228 | */ |
michael@0 | 229 | if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { |
michael@0 | 230 | int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); |
michael@0 | 231 | if(stackBufferSize > offsetUp) { |
michael@0 | 232 | stackBufferSize -= offsetUp; |
michael@0 | 233 | stackBufferChars += offsetUp; |
michael@0 | 234 | } else { |
michael@0 | 235 | /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ |
michael@0 | 236 | stackBufferSize = 1; |
michael@0 | 237 | } |
michael@0 | 238 | } |
michael@0 | 239 | |
michael@0 | 240 | stackBuffer = (void *)stackBufferChars; |
michael@0 | 241 | |
michael@0 | 242 | /* Now, see if we must allocate any memory */ |
michael@0 | 243 | if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL) |
michael@0 | 244 | { |
michael@0 | 245 | /* allocate one here...*/ |
michael@0 | 246 | localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded); |
michael@0 | 247 | |
michael@0 | 248 | if(localConverter == NULL) { |
michael@0 | 249 | *status = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 250 | UTRACE_EXIT_STATUS(*status); |
michael@0 | 251 | return NULL; |
michael@0 | 252 | } |
michael@0 | 253 | *status = U_SAFECLONE_ALLOCATED_WARNING; |
michael@0 | 254 | |
michael@0 | 255 | /* record the fact that memory was allocated */ |
michael@0 | 256 | *pBufferSize = bufferSizeNeeded; |
michael@0 | 257 | } else { |
michael@0 | 258 | /* just use the stack buffer */ |
michael@0 | 259 | localConverter = (UConverter*) stackBuffer; |
michael@0 | 260 | allocatedConverter = NULL; |
michael@0 | 261 | } |
michael@0 | 262 | |
michael@0 | 263 | uprv_memset(localConverter, 0, bufferSizeNeeded); |
michael@0 | 264 | |
michael@0 | 265 | /* Copy initial state */ |
michael@0 | 266 | uprv_memcpy(localConverter, cnv, sizeof(UConverter)); |
michael@0 | 267 | localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE; |
michael@0 | 268 | |
michael@0 | 269 | /* copy the substitution string */ |
michael@0 | 270 | if (cnv->subChars == (uint8_t *)cnv->subUChars) { |
michael@0 | 271 | localConverter->subChars = (uint8_t *)localConverter->subUChars; |
michael@0 | 272 | } else { |
michael@0 | 273 | localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); |
michael@0 | 274 | if (localConverter->subChars == NULL) { |
michael@0 | 275 | uprv_free(allocatedConverter); |
michael@0 | 276 | UTRACE_EXIT_STATUS(*status); |
michael@0 | 277 | return NULL; |
michael@0 | 278 | } |
michael@0 | 279 | uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); |
michael@0 | 280 | } |
michael@0 | 281 | |
michael@0 | 282 | /* now either call the safeclone fcn or not */ |
michael@0 | 283 | if (cnv->sharedData->impl->safeClone != NULL) { |
michael@0 | 284 | /* call the custom safeClone function */ |
michael@0 | 285 | localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status); |
michael@0 | 286 | } |
michael@0 | 287 | |
michael@0 | 288 | if(localConverter==NULL || U_FAILURE(*status)) { |
michael@0 | 289 | if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) { |
michael@0 | 290 | uprv_free(allocatedConverter->subChars); |
michael@0 | 291 | } |
michael@0 | 292 | uprv_free(allocatedConverter); |
michael@0 | 293 | UTRACE_EXIT_STATUS(*status); |
michael@0 | 294 | return NULL; |
michael@0 | 295 | } |
michael@0 | 296 | |
michael@0 | 297 | /* increment refcount of shared data if needed */ |
michael@0 | 298 | /* |
michael@0 | 299 | Checking whether it's an algorithic converter is okay |
michael@0 | 300 | in multithreaded applications because the value never changes. |
michael@0 | 301 | Don't check referenceCounter for any other value. |
michael@0 | 302 | */ |
michael@0 | 303 | if (cnv->sharedData->referenceCounter != ~0) { |
michael@0 | 304 | ucnv_incrementRefCount(cnv->sharedData); |
michael@0 | 305 | } |
michael@0 | 306 | |
michael@0 | 307 | if(localConverter == (UConverter*)stackBuffer) { |
michael@0 | 308 | /* we're using user provided data - set to not destroy */ |
michael@0 | 309 | localConverter->isCopyLocal = TRUE; |
michael@0 | 310 | } |
michael@0 | 311 | |
michael@0 | 312 | /* allow callback functions to handle any memory allocation */ |
michael@0 | 313 | toUArgs.converter = fromUArgs.converter = localConverter; |
michael@0 | 314 | cbErr = U_ZERO_ERROR; |
michael@0 | 315 | cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr); |
michael@0 | 316 | cbErr = U_ZERO_ERROR; |
michael@0 | 317 | cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr); |
michael@0 | 318 | |
michael@0 | 319 | UTRACE_EXIT_PTR_STATUS(localConverter, *status); |
michael@0 | 320 | return localConverter; |
michael@0 | 321 | } |
michael@0 | 322 | |
michael@0 | 323 | |
michael@0 | 324 | |
michael@0 | 325 | /*Decreases the reference counter in the shared immutable section of the object |
michael@0 | 326 | *and frees the mutable part*/ |
michael@0 | 327 | |
michael@0 | 328 | U_CAPI void U_EXPORT2 |
michael@0 | 329 | ucnv_close (UConverter * converter) |
michael@0 | 330 | { |
michael@0 | 331 | UErrorCode errorCode = U_ZERO_ERROR; |
michael@0 | 332 | |
michael@0 | 333 | UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE); |
michael@0 | 334 | |
michael@0 | 335 | if (converter == NULL) |
michael@0 | 336 | { |
michael@0 | 337 | UTRACE_EXIT(); |
michael@0 | 338 | return; |
michael@0 | 339 | } |
michael@0 | 340 | |
michael@0 | 341 | UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b", |
michael@0 | 342 | ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal); |
michael@0 | 343 | |
michael@0 | 344 | /* In order to speed up the close, only call the callbacks when they have been changed. |
michael@0 | 345 | This performance check will only work when the callbacks are set within a shared library |
michael@0 | 346 | or from user code that statically links this code. */ |
michael@0 | 347 | /* first, notify the callback functions that the converter is closed */ |
michael@0 | 348 | if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { |
michael@0 | 349 | UConverterToUnicodeArgs toUArgs = { |
michael@0 | 350 | sizeof(UConverterToUnicodeArgs), |
michael@0 | 351 | TRUE, |
michael@0 | 352 | NULL, |
michael@0 | 353 | NULL, |
michael@0 | 354 | NULL, |
michael@0 | 355 | NULL, |
michael@0 | 356 | NULL, |
michael@0 | 357 | NULL |
michael@0 | 358 | }; |
michael@0 | 359 | |
michael@0 | 360 | toUArgs.converter = converter; |
michael@0 | 361 | errorCode = U_ZERO_ERROR; |
michael@0 | 362 | converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode); |
michael@0 | 363 | } |
michael@0 | 364 | if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { |
michael@0 | 365 | UConverterFromUnicodeArgs fromUArgs = { |
michael@0 | 366 | sizeof(UConverterFromUnicodeArgs), |
michael@0 | 367 | TRUE, |
michael@0 | 368 | NULL, |
michael@0 | 369 | NULL, |
michael@0 | 370 | NULL, |
michael@0 | 371 | NULL, |
michael@0 | 372 | NULL, |
michael@0 | 373 | NULL |
michael@0 | 374 | }; |
michael@0 | 375 | fromUArgs.converter = converter; |
michael@0 | 376 | errorCode = U_ZERO_ERROR; |
michael@0 | 377 | converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode); |
michael@0 | 378 | } |
michael@0 | 379 | |
michael@0 | 380 | if (converter->sharedData->impl->close != NULL) { |
michael@0 | 381 | converter->sharedData->impl->close(converter); |
michael@0 | 382 | } |
michael@0 | 383 | |
michael@0 | 384 | if (converter->subChars != (uint8_t *)converter->subUChars) { |
michael@0 | 385 | uprv_free(converter->subChars); |
michael@0 | 386 | } |
michael@0 | 387 | |
michael@0 | 388 | /* |
michael@0 | 389 | Checking whether it's an algorithic converter is okay |
michael@0 | 390 | in multithreaded applications because the value never changes. |
michael@0 | 391 | Don't check referenceCounter for any other value. |
michael@0 | 392 | */ |
michael@0 | 393 | if (converter->sharedData->referenceCounter != ~0) { |
michael@0 | 394 | ucnv_unloadSharedDataIfReady(converter->sharedData); |
michael@0 | 395 | } |
michael@0 | 396 | |
michael@0 | 397 | if(!converter->isCopyLocal){ |
michael@0 | 398 | uprv_free(converter); |
michael@0 | 399 | } |
michael@0 | 400 | |
michael@0 | 401 | UTRACE_EXIT(); |
michael@0 | 402 | } |
michael@0 | 403 | |
michael@0 | 404 | /*returns a single Name from the list, will return NULL if out of bounds |
michael@0 | 405 | */ |
michael@0 | 406 | U_CAPI const char* U_EXPORT2 |
michael@0 | 407 | ucnv_getAvailableName (int32_t n) |
michael@0 | 408 | { |
michael@0 | 409 | if (0 <= n && n <= 0xffff) { |
michael@0 | 410 | UErrorCode err = U_ZERO_ERROR; |
michael@0 | 411 | const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err); |
michael@0 | 412 | if (U_SUCCESS(err)) { |
michael@0 | 413 | return name; |
michael@0 | 414 | } |
michael@0 | 415 | } |
michael@0 | 416 | return NULL; |
michael@0 | 417 | } |
michael@0 | 418 | |
michael@0 | 419 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 420 | ucnv_countAvailable () |
michael@0 | 421 | { |
michael@0 | 422 | UErrorCode err = U_ZERO_ERROR; |
michael@0 | 423 | return ucnv_bld_countAvailableConverters(&err); |
michael@0 | 424 | } |
michael@0 | 425 | |
michael@0 | 426 | U_CAPI void U_EXPORT2 |
michael@0 | 427 | ucnv_getSubstChars (const UConverter * converter, |
michael@0 | 428 | char *mySubChar, |
michael@0 | 429 | int8_t * len, |
michael@0 | 430 | UErrorCode * err) |
michael@0 | 431 | { |
michael@0 | 432 | if (U_FAILURE (*err)) |
michael@0 | 433 | return; |
michael@0 | 434 | |
michael@0 | 435 | if (converter->subCharLen <= 0) { |
michael@0 | 436 | /* Unicode string or empty string from ucnv_setSubstString(). */ |
michael@0 | 437 | *len = 0; |
michael@0 | 438 | return; |
michael@0 | 439 | } |
michael@0 | 440 | |
michael@0 | 441 | if (*len < converter->subCharLen) /*not enough space in subChars */ |
michael@0 | 442 | { |
michael@0 | 443 | *err = U_INDEX_OUTOFBOUNDS_ERROR; |
michael@0 | 444 | return; |
michael@0 | 445 | } |
michael@0 | 446 | |
michael@0 | 447 | uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */ |
michael@0 | 448 | *len = converter->subCharLen; /*store # of bytes copied to buffer */ |
michael@0 | 449 | } |
michael@0 | 450 | |
michael@0 | 451 | U_CAPI void U_EXPORT2 |
michael@0 | 452 | ucnv_setSubstChars (UConverter * converter, |
michael@0 | 453 | const char *mySubChar, |
michael@0 | 454 | int8_t len, |
michael@0 | 455 | UErrorCode * err) |
michael@0 | 456 | { |
michael@0 | 457 | if (U_FAILURE (*err)) |
michael@0 | 458 | return; |
michael@0 | 459 | |
michael@0 | 460 | /*Makes sure that the subChar is within the codepages char length boundaries */ |
michael@0 | 461 | if ((len > converter->sharedData->staticData->maxBytesPerChar) |
michael@0 | 462 | || (len < converter->sharedData->staticData->minBytesPerChar)) |
michael@0 | 463 | { |
michael@0 | 464 | *err = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 465 | return; |
michael@0 | 466 | } |
michael@0 | 467 | |
michael@0 | 468 | uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */ |
michael@0 | 469 | converter->subCharLen = len; /*sets the new len */ |
michael@0 | 470 | |
michael@0 | 471 | /* |
michael@0 | 472 | * There is currently (2001Feb) no separate API to set/get subChar1. |
michael@0 | 473 | * In order to always have subChar written after it is explicitly set, |
michael@0 | 474 | * we set subChar1 to 0. |
michael@0 | 475 | */ |
michael@0 | 476 | converter->subChar1 = 0; |
michael@0 | 477 | |
michael@0 | 478 | return; |
michael@0 | 479 | } |
michael@0 | 480 | |
michael@0 | 481 | U_CAPI void U_EXPORT2 |
michael@0 | 482 | ucnv_setSubstString(UConverter *cnv, |
michael@0 | 483 | const UChar *s, |
michael@0 | 484 | int32_t length, |
michael@0 | 485 | UErrorCode *err) { |
michael@0 | 486 | UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1]; |
michael@0 | 487 | char chars[UCNV_ERROR_BUFFER_LENGTH]; |
michael@0 | 488 | |
michael@0 | 489 | UConverter *clone; |
michael@0 | 490 | uint8_t *subChars; |
michael@0 | 491 | int32_t cloneSize, length8; |
michael@0 | 492 | |
michael@0 | 493 | /* Let the following functions check all arguments. */ |
michael@0 | 494 | cloneSize = sizeof(cloneBuffer); |
michael@0 | 495 | clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err); |
michael@0 | 496 | ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err); |
michael@0 | 497 | length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err); |
michael@0 | 498 | ucnv_close(clone); |
michael@0 | 499 | if (U_FAILURE(*err)) { |
michael@0 | 500 | return; |
michael@0 | 501 | } |
michael@0 | 502 | |
michael@0 | 503 | if (cnv->sharedData->impl->writeSub == NULL |
michael@0 | 504 | #if !UCONFIG_NO_LEGACY_CONVERSION |
michael@0 | 505 | || (cnv->sharedData->staticData->conversionType == UCNV_MBCS && |
michael@0 | 506 | ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL) |
michael@0 | 507 | #endif |
michael@0 | 508 | ) { |
michael@0 | 509 | /* The converter is not stateful. Store the charset bytes as a fixed string. */ |
michael@0 | 510 | subChars = (uint8_t *)chars; |
michael@0 | 511 | } else { |
michael@0 | 512 | /* |
michael@0 | 513 | * The converter has a non-default writeSub() function, indicating |
michael@0 | 514 | * that it is stateful. |
michael@0 | 515 | * Store the Unicode string for on-the-fly conversion for correct |
michael@0 | 516 | * state handling. |
michael@0 | 517 | */ |
michael@0 | 518 | if (length > UCNV_ERROR_BUFFER_LENGTH) { |
michael@0 | 519 | /* |
michael@0 | 520 | * Should not occur. The converter should output at least one byte |
michael@0 | 521 | * per UChar, which means that ucnv_fromUChars() should catch all |
michael@0 | 522 | * overflows. |
michael@0 | 523 | */ |
michael@0 | 524 | *err = U_BUFFER_OVERFLOW_ERROR; |
michael@0 | 525 | return; |
michael@0 | 526 | } |
michael@0 | 527 | subChars = (uint8_t *)s; |
michael@0 | 528 | if (length < 0) { |
michael@0 | 529 | length = u_strlen(s); |
michael@0 | 530 | } |
michael@0 | 531 | length8 = length * U_SIZEOF_UCHAR; |
michael@0 | 532 | } |
michael@0 | 533 | |
michael@0 | 534 | /* |
michael@0 | 535 | * For storing the substitution string, select either the small buffer inside |
michael@0 | 536 | * UConverter or allocate a subChars buffer. |
michael@0 | 537 | */ |
michael@0 | 538 | if (length8 > UCNV_MAX_SUBCHAR_LEN) { |
michael@0 | 539 | /* Use a separate buffer for the string. Outside UConverter to not make it too large. */ |
michael@0 | 540 | if (cnv->subChars == (uint8_t *)cnv->subUChars) { |
michael@0 | 541 | /* Allocate a new buffer for the string. */ |
michael@0 | 542 | cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); |
michael@0 | 543 | if (cnv->subChars == NULL) { |
michael@0 | 544 | cnv->subChars = (uint8_t *)cnv->subUChars; |
michael@0 | 545 | *err = U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 546 | return; |
michael@0 | 547 | } |
michael@0 | 548 | uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); |
michael@0 | 549 | } |
michael@0 | 550 | } |
michael@0 | 551 | |
michael@0 | 552 | /* Copy the substitution string into the UConverter or its subChars buffer. */ |
michael@0 | 553 | if (length8 == 0) { |
michael@0 | 554 | cnv->subCharLen = 0; |
michael@0 | 555 | } else { |
michael@0 | 556 | uprv_memcpy(cnv->subChars, subChars, length8); |
michael@0 | 557 | if (subChars == (uint8_t *)chars) { |
michael@0 | 558 | cnv->subCharLen = (int8_t)length8; |
michael@0 | 559 | } else /* subChars == s */ { |
michael@0 | 560 | cnv->subCharLen = (int8_t)-length; |
michael@0 | 561 | } |
michael@0 | 562 | } |
michael@0 | 563 | |
michael@0 | 564 | /* See comment in ucnv_setSubstChars(). */ |
michael@0 | 565 | cnv->subChar1 = 0; |
michael@0 | 566 | } |
michael@0 | 567 | |
michael@0 | 568 | /*resets the internal states of a converter |
michael@0 | 569 | *goal : have the same behaviour than a freshly created converter |
michael@0 | 570 | */ |
michael@0 | 571 | static void _reset(UConverter *converter, UConverterResetChoice choice, |
michael@0 | 572 | UBool callCallback) { |
michael@0 | 573 | if(converter == NULL) { |
michael@0 | 574 | return; |
michael@0 | 575 | } |
michael@0 | 576 | |
michael@0 | 577 | if(callCallback) { |
michael@0 | 578 | /* first, notify the callback functions that the converter is reset */ |
michael@0 | 579 | UErrorCode errorCode; |
michael@0 | 580 | |
michael@0 | 581 | if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { |
michael@0 | 582 | UConverterToUnicodeArgs toUArgs = { |
michael@0 | 583 | sizeof(UConverterToUnicodeArgs), |
michael@0 | 584 | TRUE, |
michael@0 | 585 | NULL, |
michael@0 | 586 | NULL, |
michael@0 | 587 | NULL, |
michael@0 | 588 | NULL, |
michael@0 | 589 | NULL, |
michael@0 | 590 | NULL |
michael@0 | 591 | }; |
michael@0 | 592 | toUArgs.converter = converter; |
michael@0 | 593 | errorCode = U_ZERO_ERROR; |
michael@0 | 594 | converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode); |
michael@0 | 595 | } |
michael@0 | 596 | if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { |
michael@0 | 597 | UConverterFromUnicodeArgs fromUArgs = { |
michael@0 | 598 | sizeof(UConverterFromUnicodeArgs), |
michael@0 | 599 | TRUE, |
michael@0 | 600 | NULL, |
michael@0 | 601 | NULL, |
michael@0 | 602 | NULL, |
michael@0 | 603 | NULL, |
michael@0 | 604 | NULL, |
michael@0 | 605 | NULL |
michael@0 | 606 | }; |
michael@0 | 607 | fromUArgs.converter = converter; |
michael@0 | 608 | errorCode = U_ZERO_ERROR; |
michael@0 | 609 | converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode); |
michael@0 | 610 | } |
michael@0 | 611 | } |
michael@0 | 612 | |
michael@0 | 613 | /* now reset the converter itself */ |
michael@0 | 614 | if(choice<=UCNV_RESET_TO_UNICODE) { |
michael@0 | 615 | converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus; |
michael@0 | 616 | converter->mode = 0; |
michael@0 | 617 | converter->toULength = 0; |
michael@0 | 618 | converter->invalidCharLength = converter->UCharErrorBufferLength = 0; |
michael@0 | 619 | converter->preToULength = 0; |
michael@0 | 620 | } |
michael@0 | 621 | if(choice!=UCNV_RESET_TO_UNICODE) { |
michael@0 | 622 | converter->fromUnicodeStatus = 0; |
michael@0 | 623 | converter->fromUChar32 = 0; |
michael@0 | 624 | converter->invalidUCharLength = converter->charErrorBufferLength = 0; |
michael@0 | 625 | converter->preFromUFirstCP = U_SENTINEL; |
michael@0 | 626 | converter->preFromULength = 0; |
michael@0 | 627 | } |
michael@0 | 628 | |
michael@0 | 629 | if (converter->sharedData->impl->reset != NULL) { |
michael@0 | 630 | /* call the custom reset function */ |
michael@0 | 631 | converter->sharedData->impl->reset(converter, choice); |
michael@0 | 632 | } |
michael@0 | 633 | } |
michael@0 | 634 | |
michael@0 | 635 | U_CAPI void U_EXPORT2 |
michael@0 | 636 | ucnv_reset(UConverter *converter) |
michael@0 | 637 | { |
michael@0 | 638 | _reset(converter, UCNV_RESET_BOTH, TRUE); |
michael@0 | 639 | } |
michael@0 | 640 | |
michael@0 | 641 | U_CAPI void U_EXPORT2 |
michael@0 | 642 | ucnv_resetToUnicode(UConverter *converter) |
michael@0 | 643 | { |
michael@0 | 644 | _reset(converter, UCNV_RESET_TO_UNICODE, TRUE); |
michael@0 | 645 | } |
michael@0 | 646 | |
michael@0 | 647 | U_CAPI void U_EXPORT2 |
michael@0 | 648 | ucnv_resetFromUnicode(UConverter *converter) |
michael@0 | 649 | { |
michael@0 | 650 | _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE); |
michael@0 | 651 | } |
michael@0 | 652 | |
michael@0 | 653 | U_CAPI int8_t U_EXPORT2 |
michael@0 | 654 | ucnv_getMaxCharSize (const UConverter * converter) |
michael@0 | 655 | { |
michael@0 | 656 | return converter->maxBytesPerUChar; |
michael@0 | 657 | } |
michael@0 | 658 | |
michael@0 | 659 | |
michael@0 | 660 | U_CAPI int8_t U_EXPORT2 |
michael@0 | 661 | ucnv_getMinCharSize (const UConverter * converter) |
michael@0 | 662 | { |
michael@0 | 663 | return converter->sharedData->staticData->minBytesPerChar; |
michael@0 | 664 | } |
michael@0 | 665 | |
michael@0 | 666 | U_CAPI const char* U_EXPORT2 |
michael@0 | 667 | ucnv_getName (const UConverter * converter, UErrorCode * err) |
michael@0 | 668 | |
michael@0 | 669 | { |
michael@0 | 670 | if (U_FAILURE (*err)) |
michael@0 | 671 | return NULL; |
michael@0 | 672 | if(converter->sharedData->impl->getName){ |
michael@0 | 673 | const char* temp= converter->sharedData->impl->getName(converter); |
michael@0 | 674 | if(temp) |
michael@0 | 675 | return temp; |
michael@0 | 676 | } |
michael@0 | 677 | return converter->sharedData->staticData->name; |
michael@0 | 678 | } |
michael@0 | 679 | |
michael@0 | 680 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 681 | ucnv_getCCSID(const UConverter * converter, |
michael@0 | 682 | UErrorCode * err) |
michael@0 | 683 | { |
michael@0 | 684 | int32_t ccsid; |
michael@0 | 685 | if (U_FAILURE (*err)) |
michael@0 | 686 | return -1; |
michael@0 | 687 | |
michael@0 | 688 | ccsid = converter->sharedData->staticData->codepage; |
michael@0 | 689 | if (ccsid == 0) { |
michael@0 | 690 | /* Rare case. This is for cases like gb18030, |
michael@0 | 691 | which doesn't have an IBM canonical name, but does have an IBM alias. */ |
michael@0 | 692 | const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err); |
michael@0 | 693 | if (U_SUCCESS(*err) && standardName) { |
michael@0 | 694 | const char *ccsidStr = uprv_strchr(standardName, '-'); |
michael@0 | 695 | if (ccsidStr) { |
michael@0 | 696 | ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */ |
michael@0 | 697 | } |
michael@0 | 698 | } |
michael@0 | 699 | } |
michael@0 | 700 | return ccsid; |
michael@0 | 701 | } |
michael@0 | 702 | |
michael@0 | 703 | |
michael@0 | 704 | U_CAPI UConverterPlatform U_EXPORT2 |
michael@0 | 705 | ucnv_getPlatform (const UConverter * converter, |
michael@0 | 706 | UErrorCode * err) |
michael@0 | 707 | { |
michael@0 | 708 | if (U_FAILURE (*err)) |
michael@0 | 709 | return UCNV_UNKNOWN; |
michael@0 | 710 | |
michael@0 | 711 | return (UConverterPlatform)converter->sharedData->staticData->platform; |
michael@0 | 712 | } |
michael@0 | 713 | |
michael@0 | 714 | U_CAPI void U_EXPORT2 |
michael@0 | 715 | ucnv_getToUCallBack (const UConverter * converter, |
michael@0 | 716 | UConverterToUCallback *action, |
michael@0 | 717 | const void **context) |
michael@0 | 718 | { |
michael@0 | 719 | *action = converter->fromCharErrorBehaviour; |
michael@0 | 720 | *context = converter->toUContext; |
michael@0 | 721 | } |
michael@0 | 722 | |
michael@0 | 723 | U_CAPI void U_EXPORT2 |
michael@0 | 724 | ucnv_getFromUCallBack (const UConverter * converter, |
michael@0 | 725 | UConverterFromUCallback *action, |
michael@0 | 726 | const void **context) |
michael@0 | 727 | { |
michael@0 | 728 | *action = converter->fromUCharErrorBehaviour; |
michael@0 | 729 | *context = converter->fromUContext; |
michael@0 | 730 | } |
michael@0 | 731 | |
michael@0 | 732 | U_CAPI void U_EXPORT2 |
michael@0 | 733 | ucnv_setToUCallBack (UConverter * converter, |
michael@0 | 734 | UConverterToUCallback newAction, |
michael@0 | 735 | const void* newContext, |
michael@0 | 736 | UConverterToUCallback *oldAction, |
michael@0 | 737 | const void** oldContext, |
michael@0 | 738 | UErrorCode * err) |
michael@0 | 739 | { |
michael@0 | 740 | if (U_FAILURE (*err)) |
michael@0 | 741 | return; |
michael@0 | 742 | if (oldAction) *oldAction = converter->fromCharErrorBehaviour; |
michael@0 | 743 | converter->fromCharErrorBehaviour = newAction; |
michael@0 | 744 | if (oldContext) *oldContext = converter->toUContext; |
michael@0 | 745 | converter->toUContext = newContext; |
michael@0 | 746 | } |
michael@0 | 747 | |
michael@0 | 748 | U_CAPI void U_EXPORT2 |
michael@0 | 749 | ucnv_setFromUCallBack (UConverter * converter, |
michael@0 | 750 | UConverterFromUCallback newAction, |
michael@0 | 751 | const void* newContext, |
michael@0 | 752 | UConverterFromUCallback *oldAction, |
michael@0 | 753 | const void** oldContext, |
michael@0 | 754 | UErrorCode * err) |
michael@0 | 755 | { |
michael@0 | 756 | if (U_FAILURE (*err)) |
michael@0 | 757 | return; |
michael@0 | 758 | if (oldAction) *oldAction = converter->fromUCharErrorBehaviour; |
michael@0 | 759 | converter->fromUCharErrorBehaviour = newAction; |
michael@0 | 760 | if (oldContext) *oldContext = converter->fromUContext; |
michael@0 | 761 | converter->fromUContext = newContext; |
michael@0 | 762 | } |
michael@0 | 763 | |
michael@0 | 764 | static void |
michael@0 | 765 | _updateOffsets(int32_t *offsets, int32_t length, |
michael@0 | 766 | int32_t sourceIndex, int32_t errorInputLength) { |
michael@0 | 767 | int32_t *limit; |
michael@0 | 768 | int32_t delta, offset; |
michael@0 | 769 | |
michael@0 | 770 | if(sourceIndex>=0) { |
michael@0 | 771 | /* |
michael@0 | 772 | * adjust each offset by adding the previous sourceIndex |
michael@0 | 773 | * minus the length of the input sequence that caused an |
michael@0 | 774 | * error, if any |
michael@0 | 775 | */ |
michael@0 | 776 | delta=sourceIndex-errorInputLength; |
michael@0 | 777 | } else { |
michael@0 | 778 | /* |
michael@0 | 779 | * set each offset to -1 because this conversion function |
michael@0 | 780 | * does not handle offsets |
michael@0 | 781 | */ |
michael@0 | 782 | delta=-1; |
michael@0 | 783 | } |
michael@0 | 784 | |
michael@0 | 785 | limit=offsets+length; |
michael@0 | 786 | if(delta==0) { |
michael@0 | 787 | /* most common case, nothing to do */ |
michael@0 | 788 | } else if(delta>0) { |
michael@0 | 789 | /* add the delta to each offset (but not if the offset is <0) */ |
michael@0 | 790 | while(offsets<limit) { |
michael@0 | 791 | offset=*offsets; |
michael@0 | 792 | if(offset>=0) { |
michael@0 | 793 | *offsets=offset+delta; |
michael@0 | 794 | } |
michael@0 | 795 | ++offsets; |
michael@0 | 796 | } |
michael@0 | 797 | } else /* delta<0 */ { |
michael@0 | 798 | /* |
michael@0 | 799 | * set each offset to -1 because this conversion function |
michael@0 | 800 | * does not handle offsets |
michael@0 | 801 | * or the error input sequence started in a previous buffer |
michael@0 | 802 | */ |
michael@0 | 803 | while(offsets<limit) { |
michael@0 | 804 | *offsets++=-1; |
michael@0 | 805 | } |
michael@0 | 806 | } |
michael@0 | 807 | } |
michael@0 | 808 | |
michael@0 | 809 | /* ucnv_fromUnicode --------------------------------------------------------- */ |
michael@0 | 810 | |
michael@0 | 811 | /* |
michael@0 | 812 | * Implementation note for m:n conversions |
michael@0 | 813 | * |
michael@0 | 814 | * While collecting source units to find the longest match for m:n conversion, |
michael@0 | 815 | * some source units may need to be stored for a partial match. |
michael@0 | 816 | * When a second buffer does not yield a match on all of the previously stored |
michael@0 | 817 | * source units, then they must be "replayed", i.e., fed back into the converter. |
michael@0 | 818 | * |
michael@0 | 819 | * The code relies on the fact that replaying will not nest - |
michael@0 | 820 | * converting a replay buffer will not result in a replay. |
michael@0 | 821 | * This is because a replay is necessary only after the _continuation_ of a |
michael@0 | 822 | * partial match failed, but a replay buffer is converted as a whole. |
michael@0 | 823 | * It may result in some of its units being stored again for a partial match, |
michael@0 | 824 | * but there will not be a continuation _during_ the replay which could fail. |
michael@0 | 825 | * |
michael@0 | 826 | * It is conceivable that a callback function could call the converter |
michael@0 | 827 | * recursively in a way that causes another replay to be stored, but that |
michael@0 | 828 | * would be an error in the callback function. |
michael@0 | 829 | * Such violations will cause assertion failures in a debug build, |
michael@0 | 830 | * and wrong output, but they will not cause a crash. |
michael@0 | 831 | */ |
michael@0 | 832 | |
michael@0 | 833 | static void |
michael@0 | 834 | _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) { |
michael@0 | 835 | UConverterFromUnicode fromUnicode; |
michael@0 | 836 | UConverter *cnv; |
michael@0 | 837 | const UChar *s; |
michael@0 | 838 | char *t; |
michael@0 | 839 | int32_t *offsets; |
michael@0 | 840 | int32_t sourceIndex; |
michael@0 | 841 | int32_t errorInputLength; |
michael@0 | 842 | UBool converterSawEndOfInput, calledCallback; |
michael@0 | 843 | |
michael@0 | 844 | /* variables for m:n conversion */ |
michael@0 | 845 | UChar replay[UCNV_EXT_MAX_UCHARS]; |
michael@0 | 846 | const UChar *realSource, *realSourceLimit; |
michael@0 | 847 | int32_t realSourceIndex; |
michael@0 | 848 | UBool realFlush; |
michael@0 | 849 | |
michael@0 | 850 | cnv=pArgs->converter; |
michael@0 | 851 | s=pArgs->source; |
michael@0 | 852 | t=pArgs->target; |
michael@0 | 853 | offsets=pArgs->offsets; |
michael@0 | 854 | |
michael@0 | 855 | /* get the converter implementation function */ |
michael@0 | 856 | sourceIndex=0; |
michael@0 | 857 | if(offsets==NULL) { |
michael@0 | 858 | fromUnicode=cnv->sharedData->impl->fromUnicode; |
michael@0 | 859 | } else { |
michael@0 | 860 | fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets; |
michael@0 | 861 | if(fromUnicode==NULL) { |
michael@0 | 862 | /* there is no WithOffsets implementation */ |
michael@0 | 863 | fromUnicode=cnv->sharedData->impl->fromUnicode; |
michael@0 | 864 | /* we will write -1 for each offset */ |
michael@0 | 865 | sourceIndex=-1; |
michael@0 | 866 | } |
michael@0 | 867 | } |
michael@0 | 868 | |
michael@0 | 869 | if(cnv->preFromULength>=0) { |
michael@0 | 870 | /* normal mode */ |
michael@0 | 871 | realSource=NULL; |
michael@0 | 872 | |
michael@0 | 873 | /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ |
michael@0 | 874 | realSourceLimit=NULL; |
michael@0 | 875 | realFlush=FALSE; |
michael@0 | 876 | realSourceIndex=0; |
michael@0 | 877 | } else { |
michael@0 | 878 | /* |
michael@0 | 879 | * Previous m:n conversion stored source units from a partial match |
michael@0 | 880 | * and failed to consume all of them. |
michael@0 | 881 | * We need to "replay" them from a temporary buffer and convert them first. |
michael@0 | 882 | */ |
michael@0 | 883 | realSource=pArgs->source; |
michael@0 | 884 | realSourceLimit=pArgs->sourceLimit; |
michael@0 | 885 | realFlush=pArgs->flush; |
michael@0 | 886 | realSourceIndex=sourceIndex; |
michael@0 | 887 | |
michael@0 | 888 | uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); |
michael@0 | 889 | pArgs->source=replay; |
michael@0 | 890 | pArgs->sourceLimit=replay-cnv->preFromULength; |
michael@0 | 891 | pArgs->flush=FALSE; |
michael@0 | 892 | sourceIndex=-1; |
michael@0 | 893 | |
michael@0 | 894 | cnv->preFromULength=0; |
michael@0 | 895 | } |
michael@0 | 896 | |
michael@0 | 897 | /* |
michael@0 | 898 | * loop for conversion and error handling |
michael@0 | 899 | * |
michael@0 | 900 | * loop { |
michael@0 | 901 | * convert |
michael@0 | 902 | * loop { |
michael@0 | 903 | * update offsets |
michael@0 | 904 | * handle end of input |
michael@0 | 905 | * handle errors/call callback |
michael@0 | 906 | * } |
michael@0 | 907 | * } |
michael@0 | 908 | */ |
michael@0 | 909 | for(;;) { |
michael@0 | 910 | if(U_SUCCESS(*err)) { |
michael@0 | 911 | /* convert */ |
michael@0 | 912 | fromUnicode(pArgs, err); |
michael@0 | 913 | |
michael@0 | 914 | /* |
michael@0 | 915 | * set a flag for whether the converter |
michael@0 | 916 | * successfully processed the end of the input |
michael@0 | 917 | * |
michael@0 | 918 | * need not check cnv->preFromULength==0 because a replay (<0) will cause |
michael@0 | 919 | * s<sourceLimit before converterSawEndOfInput is checked |
michael@0 | 920 | */ |
michael@0 | 921 | converterSawEndOfInput= |
michael@0 | 922 | (UBool)(U_SUCCESS(*err) && |
michael@0 | 923 | pArgs->flush && pArgs->source==pArgs->sourceLimit && |
michael@0 | 924 | cnv->fromUChar32==0); |
michael@0 | 925 | } else { |
michael@0 | 926 | /* handle error from ucnv_convertEx() */ |
michael@0 | 927 | converterSawEndOfInput=FALSE; |
michael@0 | 928 | } |
michael@0 | 929 | |
michael@0 | 930 | /* no callback called yet for this iteration */ |
michael@0 | 931 | calledCallback=FALSE; |
michael@0 | 932 | |
michael@0 | 933 | /* no sourceIndex adjustment for conversion, only for callback output */ |
michael@0 | 934 | errorInputLength=0; |
michael@0 | 935 | |
michael@0 | 936 | /* |
michael@0 | 937 | * loop for offsets and error handling |
michael@0 | 938 | * |
michael@0 | 939 | * iterates at most 3 times: |
michael@0 | 940 | * 1. to clean up after the conversion function |
michael@0 | 941 | * 2. after the callback |
michael@0 | 942 | * 3. after the callback again if there was truncated input |
michael@0 | 943 | */ |
michael@0 | 944 | for(;;) { |
michael@0 | 945 | /* update offsets if we write any */ |
michael@0 | 946 | if(offsets!=NULL) { |
michael@0 | 947 | int32_t length=(int32_t)(pArgs->target-t); |
michael@0 | 948 | if(length>0) { |
michael@0 | 949 | _updateOffsets(offsets, length, sourceIndex, errorInputLength); |
michael@0 | 950 | |
michael@0 | 951 | /* |
michael@0 | 952 | * if a converter handles offsets and updates the offsets |
michael@0 | 953 | * pointer at the end, then pArgs->offset should not change |
michael@0 | 954 | * here; |
michael@0 | 955 | * however, some converters do not handle offsets at all |
michael@0 | 956 | * (sourceIndex<0) or may not update the offsets pointer |
michael@0 | 957 | */ |
michael@0 | 958 | pArgs->offsets=offsets+=length; |
michael@0 | 959 | } |
michael@0 | 960 | |
michael@0 | 961 | if(sourceIndex>=0) { |
michael@0 | 962 | sourceIndex+=(int32_t)(pArgs->source-s); |
michael@0 | 963 | } |
michael@0 | 964 | } |
michael@0 | 965 | |
michael@0 | 966 | if(cnv->preFromULength<0) { |
michael@0 | 967 | /* |
michael@0 | 968 | * switch the source to new replay units (cannot occur while replaying) |
michael@0 | 969 | * after offset handling and before end-of-input and callback handling |
michael@0 | 970 | */ |
michael@0 | 971 | if(realSource==NULL) { |
michael@0 | 972 | realSource=pArgs->source; |
michael@0 | 973 | realSourceLimit=pArgs->sourceLimit; |
michael@0 | 974 | realFlush=pArgs->flush; |
michael@0 | 975 | realSourceIndex=sourceIndex; |
michael@0 | 976 | |
michael@0 | 977 | uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); |
michael@0 | 978 | pArgs->source=replay; |
michael@0 | 979 | pArgs->sourceLimit=replay-cnv->preFromULength; |
michael@0 | 980 | pArgs->flush=FALSE; |
michael@0 | 981 | if((sourceIndex+=cnv->preFromULength)<0) { |
michael@0 | 982 | sourceIndex=-1; |
michael@0 | 983 | } |
michael@0 | 984 | |
michael@0 | 985 | cnv->preFromULength=0; |
michael@0 | 986 | } else { |
michael@0 | 987 | /* see implementation note before _fromUnicodeWithCallback() */ |
michael@0 | 988 | U_ASSERT(realSource==NULL); |
michael@0 | 989 | *err=U_INTERNAL_PROGRAM_ERROR; |
michael@0 | 990 | } |
michael@0 | 991 | } |
michael@0 | 992 | |
michael@0 | 993 | /* update pointers */ |
michael@0 | 994 | s=pArgs->source; |
michael@0 | 995 | t=pArgs->target; |
michael@0 | 996 | |
michael@0 | 997 | if(U_SUCCESS(*err)) { |
michael@0 | 998 | if(s<pArgs->sourceLimit) { |
michael@0 | 999 | /* |
michael@0 | 1000 | * continue with the conversion loop while there is still input left |
michael@0 | 1001 | * (continue converting by breaking out of only the inner loop) |
michael@0 | 1002 | */ |
michael@0 | 1003 | break; |
michael@0 | 1004 | } else if(realSource!=NULL) { |
michael@0 | 1005 | /* switch back from replaying to the real source and continue */ |
michael@0 | 1006 | pArgs->source=realSource; |
michael@0 | 1007 | pArgs->sourceLimit=realSourceLimit; |
michael@0 | 1008 | pArgs->flush=realFlush; |
michael@0 | 1009 | sourceIndex=realSourceIndex; |
michael@0 | 1010 | |
michael@0 | 1011 | realSource=NULL; |
michael@0 | 1012 | break; |
michael@0 | 1013 | } else if(pArgs->flush && cnv->fromUChar32!=0) { |
michael@0 | 1014 | /* |
michael@0 | 1015 | * the entire input stream is consumed |
michael@0 | 1016 | * and there is a partial, truncated input sequence left |
michael@0 | 1017 | */ |
michael@0 | 1018 | |
michael@0 | 1019 | /* inject an error and continue with callback handling */ |
michael@0 | 1020 | *err=U_TRUNCATED_CHAR_FOUND; |
michael@0 | 1021 | calledCallback=FALSE; /* new error condition */ |
michael@0 | 1022 | } else { |
michael@0 | 1023 | /* input consumed */ |
michael@0 | 1024 | if(pArgs->flush) { |
michael@0 | 1025 | /* |
michael@0 | 1026 | * return to the conversion loop once more if the flush |
michael@0 | 1027 | * flag is set and the conversion function has not |
michael@0 | 1028 | * successfully processed the end of the input yet |
michael@0 | 1029 | * |
michael@0 | 1030 | * (continue converting by breaking out of only the inner loop) |
michael@0 | 1031 | */ |
michael@0 | 1032 | if(!converterSawEndOfInput) { |
michael@0 | 1033 | break; |
michael@0 | 1034 | } |
michael@0 | 1035 | |
michael@0 | 1036 | /* reset the converter without calling the callback function */ |
michael@0 | 1037 | _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE); |
michael@0 | 1038 | } |
michael@0 | 1039 | |
michael@0 | 1040 | /* done successfully */ |
michael@0 | 1041 | return; |
michael@0 | 1042 | } |
michael@0 | 1043 | } |
michael@0 | 1044 | |
michael@0 | 1045 | /* U_FAILURE(*err) */ |
michael@0 | 1046 | { |
michael@0 | 1047 | UErrorCode e; |
michael@0 | 1048 | |
michael@0 | 1049 | if( calledCallback || |
michael@0 | 1050 | (e=*err)==U_BUFFER_OVERFLOW_ERROR || |
michael@0 | 1051 | (e!=U_INVALID_CHAR_FOUND && |
michael@0 | 1052 | e!=U_ILLEGAL_CHAR_FOUND && |
michael@0 | 1053 | e!=U_TRUNCATED_CHAR_FOUND) |
michael@0 | 1054 | ) { |
michael@0 | 1055 | /* |
michael@0 | 1056 | * the callback did not or cannot resolve the error: |
michael@0 | 1057 | * set output pointers and return |
michael@0 | 1058 | * |
michael@0 | 1059 | * the check for buffer overflow is redundant but it is |
michael@0 | 1060 | * a high-runner case and hopefully documents the intent |
michael@0 | 1061 | * well |
michael@0 | 1062 | * |
michael@0 | 1063 | * if we were replaying, then the replay buffer must be |
michael@0 | 1064 | * copied back into the UConverter |
michael@0 | 1065 | * and the real arguments must be restored |
michael@0 | 1066 | */ |
michael@0 | 1067 | if(realSource!=NULL) { |
michael@0 | 1068 | int32_t length; |
michael@0 | 1069 | |
michael@0 | 1070 | U_ASSERT(cnv->preFromULength==0); |
michael@0 | 1071 | |
michael@0 | 1072 | length=(int32_t)(pArgs->sourceLimit-pArgs->source); |
michael@0 | 1073 | if(length>0) { |
michael@0 | 1074 | uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR); |
michael@0 | 1075 | cnv->preFromULength=(int8_t)-length; |
michael@0 | 1076 | } |
michael@0 | 1077 | |
michael@0 | 1078 | pArgs->source=realSource; |
michael@0 | 1079 | pArgs->sourceLimit=realSourceLimit; |
michael@0 | 1080 | pArgs->flush=realFlush; |
michael@0 | 1081 | } |
michael@0 | 1082 | |
michael@0 | 1083 | return; |
michael@0 | 1084 | } |
michael@0 | 1085 | } |
michael@0 | 1086 | |
michael@0 | 1087 | /* callback handling */ |
michael@0 | 1088 | { |
michael@0 | 1089 | UChar32 codePoint; |
michael@0 | 1090 | |
michael@0 | 1091 | /* get and write the code point */ |
michael@0 | 1092 | codePoint=cnv->fromUChar32; |
michael@0 | 1093 | errorInputLength=0; |
michael@0 | 1094 | U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint); |
michael@0 | 1095 | cnv->invalidUCharLength=(int8_t)errorInputLength; |
michael@0 | 1096 | |
michael@0 | 1097 | /* set the converter state to deal with the next character */ |
michael@0 | 1098 | cnv->fromUChar32=0; |
michael@0 | 1099 | |
michael@0 | 1100 | /* call the callback function */ |
michael@0 | 1101 | cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, |
michael@0 | 1102 | cnv->invalidUCharBuffer, errorInputLength, codePoint, |
michael@0 | 1103 | *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL, |
michael@0 | 1104 | err); |
michael@0 | 1105 | } |
michael@0 | 1106 | |
michael@0 | 1107 | /* |
michael@0 | 1108 | * loop back to the offset handling |
michael@0 | 1109 | * |
michael@0 | 1110 | * this flag will indicate after offset handling |
michael@0 | 1111 | * that a callback was called; |
michael@0 | 1112 | * if the callback did not resolve the error, then we return |
michael@0 | 1113 | */ |
michael@0 | 1114 | calledCallback=TRUE; |
michael@0 | 1115 | } |
michael@0 | 1116 | } |
michael@0 | 1117 | } |
michael@0 | 1118 | |
michael@0 | 1119 | /* |
michael@0 | 1120 | * Output the fromUnicode overflow buffer. |
michael@0 | 1121 | * Call this function if(cnv->charErrorBufferLength>0). |
michael@0 | 1122 | * @return TRUE if overflow |
michael@0 | 1123 | */ |
michael@0 | 1124 | static UBool |
michael@0 | 1125 | ucnv_outputOverflowFromUnicode(UConverter *cnv, |
michael@0 | 1126 | char **target, const char *targetLimit, |
michael@0 | 1127 | int32_t **pOffsets, |
michael@0 | 1128 | UErrorCode *err) { |
michael@0 | 1129 | int32_t *offsets; |
michael@0 | 1130 | char *overflow, *t; |
michael@0 | 1131 | int32_t i, length; |
michael@0 | 1132 | |
michael@0 | 1133 | t=*target; |
michael@0 | 1134 | if(pOffsets!=NULL) { |
michael@0 | 1135 | offsets=*pOffsets; |
michael@0 | 1136 | } else { |
michael@0 | 1137 | offsets=NULL; |
michael@0 | 1138 | } |
michael@0 | 1139 | |
michael@0 | 1140 | overflow=(char *)cnv->charErrorBuffer; |
michael@0 | 1141 | length=cnv->charErrorBufferLength; |
michael@0 | 1142 | i=0; |
michael@0 | 1143 | while(i<length) { |
michael@0 | 1144 | if(t==targetLimit) { |
michael@0 | 1145 | /* the overflow buffer contains too much, keep the rest */ |
michael@0 | 1146 | int32_t j=0; |
michael@0 | 1147 | |
michael@0 | 1148 | do { |
michael@0 | 1149 | overflow[j++]=overflow[i++]; |
michael@0 | 1150 | } while(i<length); |
michael@0 | 1151 | |
michael@0 | 1152 | cnv->charErrorBufferLength=(int8_t)j; |
michael@0 | 1153 | *target=t; |
michael@0 | 1154 | if(offsets!=NULL) { |
michael@0 | 1155 | *pOffsets=offsets; |
michael@0 | 1156 | } |
michael@0 | 1157 | *err=U_BUFFER_OVERFLOW_ERROR; |
michael@0 | 1158 | return TRUE; |
michael@0 | 1159 | } |
michael@0 | 1160 | |
michael@0 | 1161 | /* copy the overflow contents to the target */ |
michael@0 | 1162 | *t++=overflow[i++]; |
michael@0 | 1163 | if(offsets!=NULL) { |
michael@0 | 1164 | *offsets++=-1; /* no source index available for old output */ |
michael@0 | 1165 | } |
michael@0 | 1166 | } |
michael@0 | 1167 | |
michael@0 | 1168 | /* the overflow buffer is completely copied to the target */ |
michael@0 | 1169 | cnv->charErrorBufferLength=0; |
michael@0 | 1170 | *target=t; |
michael@0 | 1171 | if(offsets!=NULL) { |
michael@0 | 1172 | *pOffsets=offsets; |
michael@0 | 1173 | } |
michael@0 | 1174 | return FALSE; |
michael@0 | 1175 | } |
michael@0 | 1176 | |
michael@0 | 1177 | U_CAPI void U_EXPORT2 |
michael@0 | 1178 | ucnv_fromUnicode(UConverter *cnv, |
michael@0 | 1179 | char **target, const char *targetLimit, |
michael@0 | 1180 | const UChar **source, const UChar *sourceLimit, |
michael@0 | 1181 | int32_t *offsets, |
michael@0 | 1182 | UBool flush, |
michael@0 | 1183 | UErrorCode *err) { |
michael@0 | 1184 | UConverterFromUnicodeArgs args; |
michael@0 | 1185 | const UChar *s; |
michael@0 | 1186 | char *t; |
michael@0 | 1187 | |
michael@0 | 1188 | /* check parameters */ |
michael@0 | 1189 | if(err==NULL || U_FAILURE(*err)) { |
michael@0 | 1190 | return; |
michael@0 | 1191 | } |
michael@0 | 1192 | |
michael@0 | 1193 | if(cnv==NULL || target==NULL || source==NULL) { |
michael@0 | 1194 | *err=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 1195 | return; |
michael@0 | 1196 | } |
michael@0 | 1197 | |
michael@0 | 1198 | s=*source; |
michael@0 | 1199 | t=*target; |
michael@0 | 1200 | |
michael@0 | 1201 | if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) { |
michael@0 | 1202 | /* |
michael@0 | 1203 | Prevent code from going into an infinite loop in case we do hit this |
michael@0 | 1204 | limit. The limit pointer is expected to be on a UChar * boundary. |
michael@0 | 1205 | This also prevents the next argument check from failing. |
michael@0 | 1206 | */ |
michael@0 | 1207 | sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1); |
michael@0 | 1208 | } |
michael@0 | 1209 | |
michael@0 | 1210 | /* |
michael@0 | 1211 | * All these conditions should never happen. |
michael@0 | 1212 | * |
michael@0 | 1213 | * 1) Make sure that the limits are >= to the address source or target |
michael@0 | 1214 | * |
michael@0 | 1215 | * 2) Make sure that the buffer sizes do not exceed the number range for |
michael@0 | 1216 | * int32_t because some functions use the size (in units or bytes) |
michael@0 | 1217 | * rather than comparing pointers, and because offsets are int32_t values. |
michael@0 | 1218 | * |
michael@0 | 1219 | * size_t is guaranteed to be unsigned and large enough for the job. |
michael@0 | 1220 | * |
michael@0 | 1221 | * Return with an error instead of adjusting the limits because we would |
michael@0 | 1222 | * not be able to maintain the semantics that either the source must be |
michael@0 | 1223 | * consumed or the target filled (unless an error occurs). |
michael@0 | 1224 | * An adjustment would be targetLimit=t+0x7fffffff; for example. |
michael@0 | 1225 | * |
michael@0 | 1226 | * 3) Make sure that the user didn't incorrectly cast a UChar * pointer |
michael@0 | 1227 | * to a char * pointer and provide an incomplete UChar code unit. |
michael@0 | 1228 | */ |
michael@0 | 1229 | if (sourceLimit<s || targetLimit<t || |
michael@0 | 1230 | ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) || |
michael@0 | 1231 | ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) || |
michael@0 | 1232 | (((const char *)sourceLimit-(const char *)s) & 1) != 0) |
michael@0 | 1233 | { |
michael@0 | 1234 | *err=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 1235 | return; |
michael@0 | 1236 | } |
michael@0 | 1237 | |
michael@0 | 1238 | /* output the target overflow buffer */ |
michael@0 | 1239 | if( cnv->charErrorBufferLength>0 && |
michael@0 | 1240 | ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err) |
michael@0 | 1241 | ) { |
michael@0 | 1242 | /* U_BUFFER_OVERFLOW_ERROR */ |
michael@0 | 1243 | return; |
michael@0 | 1244 | } |
michael@0 | 1245 | /* *target may have moved, therefore stop using t */ |
michael@0 | 1246 | |
michael@0 | 1247 | if(!flush && s==sourceLimit && cnv->preFromULength>=0) { |
michael@0 | 1248 | /* the overflow buffer is emptied and there is no new input: we are done */ |
michael@0 | 1249 | return; |
michael@0 | 1250 | } |
michael@0 | 1251 | |
michael@0 | 1252 | /* |
michael@0 | 1253 | * Do not simply return with a buffer overflow error if |
michael@0 | 1254 | * !flush && t==targetLimit |
michael@0 | 1255 | * because it is possible that the source will not generate any output. |
michael@0 | 1256 | * For example, the skip callback may be called; |
michael@0 | 1257 | * it does not output anything. |
michael@0 | 1258 | */ |
michael@0 | 1259 | |
michael@0 | 1260 | /* prepare the converter arguments */ |
michael@0 | 1261 | args.converter=cnv; |
michael@0 | 1262 | args.flush=flush; |
michael@0 | 1263 | args.offsets=offsets; |
michael@0 | 1264 | args.source=s; |
michael@0 | 1265 | args.sourceLimit=sourceLimit; |
michael@0 | 1266 | args.target=*target; |
michael@0 | 1267 | args.targetLimit=targetLimit; |
michael@0 | 1268 | args.size=sizeof(args); |
michael@0 | 1269 | |
michael@0 | 1270 | _fromUnicodeWithCallback(&args, err); |
michael@0 | 1271 | |
michael@0 | 1272 | *source=args.source; |
michael@0 | 1273 | *target=args.target; |
michael@0 | 1274 | } |
michael@0 | 1275 | |
michael@0 | 1276 | /* ucnv_toUnicode() --------------------------------------------------------- */ |
michael@0 | 1277 | |
michael@0 | 1278 | static void |
michael@0 | 1279 | _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { |
michael@0 | 1280 | UConverterToUnicode toUnicode; |
michael@0 | 1281 | UConverter *cnv; |
michael@0 | 1282 | const char *s; |
michael@0 | 1283 | UChar *t; |
michael@0 | 1284 | int32_t *offsets; |
michael@0 | 1285 | int32_t sourceIndex; |
michael@0 | 1286 | int32_t errorInputLength; |
michael@0 | 1287 | UBool converterSawEndOfInput, calledCallback; |
michael@0 | 1288 | |
michael@0 | 1289 | /* variables for m:n conversion */ |
michael@0 | 1290 | char replay[UCNV_EXT_MAX_BYTES]; |
michael@0 | 1291 | const char *realSource, *realSourceLimit; |
michael@0 | 1292 | int32_t realSourceIndex; |
michael@0 | 1293 | UBool realFlush; |
michael@0 | 1294 | |
michael@0 | 1295 | cnv=pArgs->converter; |
michael@0 | 1296 | s=pArgs->source; |
michael@0 | 1297 | t=pArgs->target; |
michael@0 | 1298 | offsets=pArgs->offsets; |
michael@0 | 1299 | |
michael@0 | 1300 | /* get the converter implementation function */ |
michael@0 | 1301 | sourceIndex=0; |
michael@0 | 1302 | if(offsets==NULL) { |
michael@0 | 1303 | toUnicode=cnv->sharedData->impl->toUnicode; |
michael@0 | 1304 | } else { |
michael@0 | 1305 | toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets; |
michael@0 | 1306 | if(toUnicode==NULL) { |
michael@0 | 1307 | /* there is no WithOffsets implementation */ |
michael@0 | 1308 | toUnicode=cnv->sharedData->impl->toUnicode; |
michael@0 | 1309 | /* we will write -1 for each offset */ |
michael@0 | 1310 | sourceIndex=-1; |
michael@0 | 1311 | } |
michael@0 | 1312 | } |
michael@0 | 1313 | |
michael@0 | 1314 | if(cnv->preToULength>=0) { |
michael@0 | 1315 | /* normal mode */ |
michael@0 | 1316 | realSource=NULL; |
michael@0 | 1317 | |
michael@0 | 1318 | /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ |
michael@0 | 1319 | realSourceLimit=NULL; |
michael@0 | 1320 | realFlush=FALSE; |
michael@0 | 1321 | realSourceIndex=0; |
michael@0 | 1322 | } else { |
michael@0 | 1323 | /* |
michael@0 | 1324 | * Previous m:n conversion stored source units from a partial match |
michael@0 | 1325 | * and failed to consume all of them. |
michael@0 | 1326 | * We need to "replay" them from a temporary buffer and convert them first. |
michael@0 | 1327 | */ |
michael@0 | 1328 | realSource=pArgs->source; |
michael@0 | 1329 | realSourceLimit=pArgs->sourceLimit; |
michael@0 | 1330 | realFlush=pArgs->flush; |
michael@0 | 1331 | realSourceIndex=sourceIndex; |
michael@0 | 1332 | |
michael@0 | 1333 | uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); |
michael@0 | 1334 | pArgs->source=replay; |
michael@0 | 1335 | pArgs->sourceLimit=replay-cnv->preToULength; |
michael@0 | 1336 | pArgs->flush=FALSE; |
michael@0 | 1337 | sourceIndex=-1; |
michael@0 | 1338 | |
michael@0 | 1339 | cnv->preToULength=0; |
michael@0 | 1340 | } |
michael@0 | 1341 | |
michael@0 | 1342 | /* |
michael@0 | 1343 | * loop for conversion and error handling |
michael@0 | 1344 | * |
michael@0 | 1345 | * loop { |
michael@0 | 1346 | * convert |
michael@0 | 1347 | * loop { |
michael@0 | 1348 | * update offsets |
michael@0 | 1349 | * handle end of input |
michael@0 | 1350 | * handle errors/call callback |
michael@0 | 1351 | * } |
michael@0 | 1352 | * } |
michael@0 | 1353 | */ |
michael@0 | 1354 | for(;;) { |
michael@0 | 1355 | if(U_SUCCESS(*err)) { |
michael@0 | 1356 | /* convert */ |
michael@0 | 1357 | toUnicode(pArgs, err); |
michael@0 | 1358 | |
michael@0 | 1359 | /* |
michael@0 | 1360 | * set a flag for whether the converter |
michael@0 | 1361 | * successfully processed the end of the input |
michael@0 | 1362 | * |
michael@0 | 1363 | * need not check cnv->preToULength==0 because a replay (<0) will cause |
michael@0 | 1364 | * s<sourceLimit before converterSawEndOfInput is checked |
michael@0 | 1365 | */ |
michael@0 | 1366 | converterSawEndOfInput= |
michael@0 | 1367 | (UBool)(U_SUCCESS(*err) && |
michael@0 | 1368 | pArgs->flush && pArgs->source==pArgs->sourceLimit && |
michael@0 | 1369 | cnv->toULength==0); |
michael@0 | 1370 | } else { |
michael@0 | 1371 | /* handle error from getNextUChar() or ucnv_convertEx() */ |
michael@0 | 1372 | converterSawEndOfInput=FALSE; |
michael@0 | 1373 | } |
michael@0 | 1374 | |
michael@0 | 1375 | /* no callback called yet for this iteration */ |
michael@0 | 1376 | calledCallback=FALSE; |
michael@0 | 1377 | |
michael@0 | 1378 | /* no sourceIndex adjustment for conversion, only for callback output */ |
michael@0 | 1379 | errorInputLength=0; |
michael@0 | 1380 | |
michael@0 | 1381 | /* |
michael@0 | 1382 | * loop for offsets and error handling |
michael@0 | 1383 | * |
michael@0 | 1384 | * iterates at most 3 times: |
michael@0 | 1385 | * 1. to clean up after the conversion function |
michael@0 | 1386 | * 2. after the callback |
michael@0 | 1387 | * 3. after the callback again if there was truncated input |
michael@0 | 1388 | */ |
michael@0 | 1389 | for(;;) { |
michael@0 | 1390 | /* update offsets if we write any */ |
michael@0 | 1391 | if(offsets!=NULL) { |
michael@0 | 1392 | int32_t length=(int32_t)(pArgs->target-t); |
michael@0 | 1393 | if(length>0) { |
michael@0 | 1394 | _updateOffsets(offsets, length, sourceIndex, errorInputLength); |
michael@0 | 1395 | |
michael@0 | 1396 | /* |
michael@0 | 1397 | * if a converter handles offsets and updates the offsets |
michael@0 | 1398 | * pointer at the end, then pArgs->offset should not change |
michael@0 | 1399 | * here; |
michael@0 | 1400 | * however, some converters do not handle offsets at all |
michael@0 | 1401 | * (sourceIndex<0) or may not update the offsets pointer |
michael@0 | 1402 | */ |
michael@0 | 1403 | pArgs->offsets=offsets+=length; |
michael@0 | 1404 | } |
michael@0 | 1405 | |
michael@0 | 1406 | if(sourceIndex>=0) { |
michael@0 | 1407 | sourceIndex+=(int32_t)(pArgs->source-s); |
michael@0 | 1408 | } |
michael@0 | 1409 | } |
michael@0 | 1410 | |
michael@0 | 1411 | if(cnv->preToULength<0) { |
michael@0 | 1412 | /* |
michael@0 | 1413 | * switch the source to new replay units (cannot occur while replaying) |
michael@0 | 1414 | * after offset handling and before end-of-input and callback handling |
michael@0 | 1415 | */ |
michael@0 | 1416 | if(realSource==NULL) { |
michael@0 | 1417 | realSource=pArgs->source; |
michael@0 | 1418 | realSourceLimit=pArgs->sourceLimit; |
michael@0 | 1419 | realFlush=pArgs->flush; |
michael@0 | 1420 | realSourceIndex=sourceIndex; |
michael@0 | 1421 | |
michael@0 | 1422 | uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); |
michael@0 | 1423 | pArgs->source=replay; |
michael@0 | 1424 | pArgs->sourceLimit=replay-cnv->preToULength; |
michael@0 | 1425 | pArgs->flush=FALSE; |
michael@0 | 1426 | if((sourceIndex+=cnv->preToULength)<0) { |
michael@0 | 1427 | sourceIndex=-1; |
michael@0 | 1428 | } |
michael@0 | 1429 | |
michael@0 | 1430 | cnv->preToULength=0; |
michael@0 | 1431 | } else { |
michael@0 | 1432 | /* see implementation note before _fromUnicodeWithCallback() */ |
michael@0 | 1433 | U_ASSERT(realSource==NULL); |
michael@0 | 1434 | *err=U_INTERNAL_PROGRAM_ERROR; |
michael@0 | 1435 | } |
michael@0 | 1436 | } |
michael@0 | 1437 | |
michael@0 | 1438 | /* update pointers */ |
michael@0 | 1439 | s=pArgs->source; |
michael@0 | 1440 | t=pArgs->target; |
michael@0 | 1441 | |
michael@0 | 1442 | if(U_SUCCESS(*err)) { |
michael@0 | 1443 | if(s<pArgs->sourceLimit) { |
michael@0 | 1444 | /* |
michael@0 | 1445 | * continue with the conversion loop while there is still input left |
michael@0 | 1446 | * (continue converting by breaking out of only the inner loop) |
michael@0 | 1447 | */ |
michael@0 | 1448 | break; |
michael@0 | 1449 | } else if(realSource!=NULL) { |
michael@0 | 1450 | /* switch back from replaying to the real source and continue */ |
michael@0 | 1451 | pArgs->source=realSource; |
michael@0 | 1452 | pArgs->sourceLimit=realSourceLimit; |
michael@0 | 1453 | pArgs->flush=realFlush; |
michael@0 | 1454 | sourceIndex=realSourceIndex; |
michael@0 | 1455 | |
michael@0 | 1456 | realSource=NULL; |
michael@0 | 1457 | break; |
michael@0 | 1458 | } else if(pArgs->flush && cnv->toULength>0) { |
michael@0 | 1459 | /* |
michael@0 | 1460 | * the entire input stream is consumed |
michael@0 | 1461 | * and there is a partial, truncated input sequence left |
michael@0 | 1462 | */ |
michael@0 | 1463 | |
michael@0 | 1464 | /* inject an error and continue with callback handling */ |
michael@0 | 1465 | *err=U_TRUNCATED_CHAR_FOUND; |
michael@0 | 1466 | calledCallback=FALSE; /* new error condition */ |
michael@0 | 1467 | } else { |
michael@0 | 1468 | /* input consumed */ |
michael@0 | 1469 | if(pArgs->flush) { |
michael@0 | 1470 | /* |
michael@0 | 1471 | * return to the conversion loop once more if the flush |
michael@0 | 1472 | * flag is set and the conversion function has not |
michael@0 | 1473 | * successfully processed the end of the input yet |
michael@0 | 1474 | * |
michael@0 | 1475 | * (continue converting by breaking out of only the inner loop) |
michael@0 | 1476 | */ |
michael@0 | 1477 | if(!converterSawEndOfInput) { |
michael@0 | 1478 | break; |
michael@0 | 1479 | } |
michael@0 | 1480 | |
michael@0 | 1481 | /* reset the converter without calling the callback function */ |
michael@0 | 1482 | _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); |
michael@0 | 1483 | } |
michael@0 | 1484 | |
michael@0 | 1485 | /* done successfully */ |
michael@0 | 1486 | return; |
michael@0 | 1487 | } |
michael@0 | 1488 | } |
michael@0 | 1489 | |
michael@0 | 1490 | /* U_FAILURE(*err) */ |
michael@0 | 1491 | { |
michael@0 | 1492 | UErrorCode e; |
michael@0 | 1493 | |
michael@0 | 1494 | if( calledCallback || |
michael@0 | 1495 | (e=*err)==U_BUFFER_OVERFLOW_ERROR || |
michael@0 | 1496 | (e!=U_INVALID_CHAR_FOUND && |
michael@0 | 1497 | e!=U_ILLEGAL_CHAR_FOUND && |
michael@0 | 1498 | e!=U_TRUNCATED_CHAR_FOUND && |
michael@0 | 1499 | e!=U_ILLEGAL_ESCAPE_SEQUENCE && |
michael@0 | 1500 | e!=U_UNSUPPORTED_ESCAPE_SEQUENCE) |
michael@0 | 1501 | ) { |
michael@0 | 1502 | /* |
michael@0 | 1503 | * the callback did not or cannot resolve the error: |
michael@0 | 1504 | * set output pointers and return |
michael@0 | 1505 | * |
michael@0 | 1506 | * the check for buffer overflow is redundant but it is |
michael@0 | 1507 | * a high-runner case and hopefully documents the intent |
michael@0 | 1508 | * well |
michael@0 | 1509 | * |
michael@0 | 1510 | * if we were replaying, then the replay buffer must be |
michael@0 | 1511 | * copied back into the UConverter |
michael@0 | 1512 | * and the real arguments must be restored |
michael@0 | 1513 | */ |
michael@0 | 1514 | if(realSource!=NULL) { |
michael@0 | 1515 | int32_t length; |
michael@0 | 1516 | |
michael@0 | 1517 | U_ASSERT(cnv->preToULength==0); |
michael@0 | 1518 | |
michael@0 | 1519 | length=(int32_t)(pArgs->sourceLimit-pArgs->source); |
michael@0 | 1520 | if(length>0) { |
michael@0 | 1521 | uprv_memcpy(cnv->preToU, pArgs->source, length); |
michael@0 | 1522 | cnv->preToULength=(int8_t)-length; |
michael@0 | 1523 | } |
michael@0 | 1524 | |
michael@0 | 1525 | pArgs->source=realSource; |
michael@0 | 1526 | pArgs->sourceLimit=realSourceLimit; |
michael@0 | 1527 | pArgs->flush=realFlush; |
michael@0 | 1528 | } |
michael@0 | 1529 | |
michael@0 | 1530 | return; |
michael@0 | 1531 | } |
michael@0 | 1532 | } |
michael@0 | 1533 | |
michael@0 | 1534 | /* copy toUBytes[] to invalidCharBuffer[] */ |
michael@0 | 1535 | errorInputLength=cnv->invalidCharLength=cnv->toULength; |
michael@0 | 1536 | if(errorInputLength>0) { |
michael@0 | 1537 | uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength); |
michael@0 | 1538 | } |
michael@0 | 1539 | |
michael@0 | 1540 | /* set the converter state to deal with the next character */ |
michael@0 | 1541 | cnv->toULength=0; |
michael@0 | 1542 | |
michael@0 | 1543 | /* call the callback function */ |
michael@0 | 1544 | if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) { |
michael@0 | 1545 | cnv->toUCallbackReason = UCNV_UNASSIGNED; |
michael@0 | 1546 | } |
michael@0 | 1547 | cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, |
michael@0 | 1548 | cnv->invalidCharBuffer, errorInputLength, |
michael@0 | 1549 | cnv->toUCallbackReason, |
michael@0 | 1550 | err); |
michael@0 | 1551 | cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */ |
michael@0 | 1552 | |
michael@0 | 1553 | /* |
michael@0 | 1554 | * loop back to the offset handling |
michael@0 | 1555 | * |
michael@0 | 1556 | * this flag will indicate after offset handling |
michael@0 | 1557 | * that a callback was called; |
michael@0 | 1558 | * if the callback did not resolve the error, then we return |
michael@0 | 1559 | */ |
michael@0 | 1560 | calledCallback=TRUE; |
michael@0 | 1561 | } |
michael@0 | 1562 | } |
michael@0 | 1563 | } |
michael@0 | 1564 | |
michael@0 | 1565 | /* |
michael@0 | 1566 | * Output the toUnicode overflow buffer. |
michael@0 | 1567 | * Call this function if(cnv->UCharErrorBufferLength>0). |
michael@0 | 1568 | * @return TRUE if overflow |
michael@0 | 1569 | */ |
michael@0 | 1570 | static UBool |
michael@0 | 1571 | ucnv_outputOverflowToUnicode(UConverter *cnv, |
michael@0 | 1572 | UChar **target, const UChar *targetLimit, |
michael@0 | 1573 | int32_t **pOffsets, |
michael@0 | 1574 | UErrorCode *err) { |
michael@0 | 1575 | int32_t *offsets; |
michael@0 | 1576 | UChar *overflow, *t; |
michael@0 | 1577 | int32_t i, length; |
michael@0 | 1578 | |
michael@0 | 1579 | t=*target; |
michael@0 | 1580 | if(pOffsets!=NULL) { |
michael@0 | 1581 | offsets=*pOffsets; |
michael@0 | 1582 | } else { |
michael@0 | 1583 | offsets=NULL; |
michael@0 | 1584 | } |
michael@0 | 1585 | |
michael@0 | 1586 | overflow=cnv->UCharErrorBuffer; |
michael@0 | 1587 | length=cnv->UCharErrorBufferLength; |
michael@0 | 1588 | i=0; |
michael@0 | 1589 | while(i<length) { |
michael@0 | 1590 | if(t==targetLimit) { |
michael@0 | 1591 | /* the overflow buffer contains too much, keep the rest */ |
michael@0 | 1592 | int32_t j=0; |
michael@0 | 1593 | |
michael@0 | 1594 | do { |
michael@0 | 1595 | overflow[j++]=overflow[i++]; |
michael@0 | 1596 | } while(i<length); |
michael@0 | 1597 | |
michael@0 | 1598 | cnv->UCharErrorBufferLength=(int8_t)j; |
michael@0 | 1599 | *target=t; |
michael@0 | 1600 | if(offsets!=NULL) { |
michael@0 | 1601 | *pOffsets=offsets; |
michael@0 | 1602 | } |
michael@0 | 1603 | *err=U_BUFFER_OVERFLOW_ERROR; |
michael@0 | 1604 | return TRUE; |
michael@0 | 1605 | } |
michael@0 | 1606 | |
michael@0 | 1607 | /* copy the overflow contents to the target */ |
michael@0 | 1608 | *t++=overflow[i++]; |
michael@0 | 1609 | if(offsets!=NULL) { |
michael@0 | 1610 | *offsets++=-1; /* no source index available for old output */ |
michael@0 | 1611 | } |
michael@0 | 1612 | } |
michael@0 | 1613 | |
michael@0 | 1614 | /* the overflow buffer is completely copied to the target */ |
michael@0 | 1615 | cnv->UCharErrorBufferLength=0; |
michael@0 | 1616 | *target=t; |
michael@0 | 1617 | if(offsets!=NULL) { |
michael@0 | 1618 | *pOffsets=offsets; |
michael@0 | 1619 | } |
michael@0 | 1620 | return FALSE; |
michael@0 | 1621 | } |
michael@0 | 1622 | |
michael@0 | 1623 | U_CAPI void U_EXPORT2 |
michael@0 | 1624 | ucnv_toUnicode(UConverter *cnv, |
michael@0 | 1625 | UChar **target, const UChar *targetLimit, |
michael@0 | 1626 | const char **source, const char *sourceLimit, |
michael@0 | 1627 | int32_t *offsets, |
michael@0 | 1628 | UBool flush, |
michael@0 | 1629 | UErrorCode *err) { |
michael@0 | 1630 | UConverterToUnicodeArgs args; |
michael@0 | 1631 | const char *s; |
michael@0 | 1632 | UChar *t; |
michael@0 | 1633 | |
michael@0 | 1634 | /* check parameters */ |
michael@0 | 1635 | if(err==NULL || U_FAILURE(*err)) { |
michael@0 | 1636 | return; |
michael@0 | 1637 | } |
michael@0 | 1638 | |
michael@0 | 1639 | if(cnv==NULL || target==NULL || source==NULL) { |
michael@0 | 1640 | *err=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 1641 | return; |
michael@0 | 1642 | } |
michael@0 | 1643 | |
michael@0 | 1644 | s=*source; |
michael@0 | 1645 | t=*target; |
michael@0 | 1646 | |
michael@0 | 1647 | if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) { |
michael@0 | 1648 | /* |
michael@0 | 1649 | Prevent code from going into an infinite loop in case we do hit this |
michael@0 | 1650 | limit. The limit pointer is expected to be on a UChar * boundary. |
michael@0 | 1651 | This also prevents the next argument check from failing. |
michael@0 | 1652 | */ |
michael@0 | 1653 | targetLimit = (const UChar *)(((const char *)targetLimit) - 1); |
michael@0 | 1654 | } |
michael@0 | 1655 | |
michael@0 | 1656 | /* |
michael@0 | 1657 | * All these conditions should never happen. |
michael@0 | 1658 | * |
michael@0 | 1659 | * 1) Make sure that the limits are >= to the address source or target |
michael@0 | 1660 | * |
michael@0 | 1661 | * 2) Make sure that the buffer sizes do not exceed the number range for |
michael@0 | 1662 | * int32_t because some functions use the size (in units or bytes) |
michael@0 | 1663 | * rather than comparing pointers, and because offsets are int32_t values. |
michael@0 | 1664 | * |
michael@0 | 1665 | * size_t is guaranteed to be unsigned and large enough for the job. |
michael@0 | 1666 | * |
michael@0 | 1667 | * Return with an error instead of adjusting the limits because we would |
michael@0 | 1668 | * not be able to maintain the semantics that either the source must be |
michael@0 | 1669 | * consumed or the target filled (unless an error occurs). |
michael@0 | 1670 | * An adjustment would be sourceLimit=t+0x7fffffff; for example. |
michael@0 | 1671 | * |
michael@0 | 1672 | * 3) Make sure that the user didn't incorrectly cast a UChar * pointer |
michael@0 | 1673 | * to a char * pointer and provide an incomplete UChar code unit. |
michael@0 | 1674 | */ |
michael@0 | 1675 | if (sourceLimit<s || targetLimit<t || |
michael@0 | 1676 | ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) || |
michael@0 | 1677 | ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) || |
michael@0 | 1678 | (((const char *)targetLimit-(const char *)t) & 1) != 0 |
michael@0 | 1679 | ) { |
michael@0 | 1680 | *err=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 1681 | return; |
michael@0 | 1682 | } |
michael@0 | 1683 | |
michael@0 | 1684 | /* output the target overflow buffer */ |
michael@0 | 1685 | if( cnv->UCharErrorBufferLength>0 && |
michael@0 | 1686 | ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err) |
michael@0 | 1687 | ) { |
michael@0 | 1688 | /* U_BUFFER_OVERFLOW_ERROR */ |
michael@0 | 1689 | return; |
michael@0 | 1690 | } |
michael@0 | 1691 | /* *target may have moved, therefore stop using t */ |
michael@0 | 1692 | |
michael@0 | 1693 | if(!flush && s==sourceLimit && cnv->preToULength>=0) { |
michael@0 | 1694 | /* the overflow buffer is emptied and there is no new input: we are done */ |
michael@0 | 1695 | return; |
michael@0 | 1696 | } |
michael@0 | 1697 | |
michael@0 | 1698 | /* |
michael@0 | 1699 | * Do not simply return with a buffer overflow error if |
michael@0 | 1700 | * !flush && t==targetLimit |
michael@0 | 1701 | * because it is possible that the source will not generate any output. |
michael@0 | 1702 | * For example, the skip callback may be called; |
michael@0 | 1703 | * it does not output anything. |
michael@0 | 1704 | */ |
michael@0 | 1705 | |
michael@0 | 1706 | /* prepare the converter arguments */ |
michael@0 | 1707 | args.converter=cnv; |
michael@0 | 1708 | args.flush=flush; |
michael@0 | 1709 | args.offsets=offsets; |
michael@0 | 1710 | args.source=s; |
michael@0 | 1711 | args.sourceLimit=sourceLimit; |
michael@0 | 1712 | args.target=*target; |
michael@0 | 1713 | args.targetLimit=targetLimit; |
michael@0 | 1714 | args.size=sizeof(args); |
michael@0 | 1715 | |
michael@0 | 1716 | _toUnicodeWithCallback(&args, err); |
michael@0 | 1717 | |
michael@0 | 1718 | *source=args.source; |
michael@0 | 1719 | *target=args.target; |
michael@0 | 1720 | } |
michael@0 | 1721 | |
michael@0 | 1722 | /* ucnv_to/fromUChars() ----------------------------------------------------- */ |
michael@0 | 1723 | |
michael@0 | 1724 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 1725 | ucnv_fromUChars(UConverter *cnv, |
michael@0 | 1726 | char *dest, int32_t destCapacity, |
michael@0 | 1727 | const UChar *src, int32_t srcLength, |
michael@0 | 1728 | UErrorCode *pErrorCode) { |
michael@0 | 1729 | const UChar *srcLimit; |
michael@0 | 1730 | char *originalDest, *destLimit; |
michael@0 | 1731 | int32_t destLength; |
michael@0 | 1732 | |
michael@0 | 1733 | /* check arguments */ |
michael@0 | 1734 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
michael@0 | 1735 | return 0; |
michael@0 | 1736 | } |
michael@0 | 1737 | |
michael@0 | 1738 | if( cnv==NULL || |
michael@0 | 1739 | destCapacity<0 || (destCapacity>0 && dest==NULL) || |
michael@0 | 1740 | srcLength<-1 || (srcLength!=0 && src==NULL) |
michael@0 | 1741 | ) { |
michael@0 | 1742 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 1743 | return 0; |
michael@0 | 1744 | } |
michael@0 | 1745 | |
michael@0 | 1746 | /* initialize */ |
michael@0 | 1747 | ucnv_resetFromUnicode(cnv); |
michael@0 | 1748 | originalDest=dest; |
michael@0 | 1749 | if(srcLength==-1) { |
michael@0 | 1750 | srcLength=u_strlen(src); |
michael@0 | 1751 | } |
michael@0 | 1752 | if(srcLength>0) { |
michael@0 | 1753 | srcLimit=src+srcLength; |
michael@0 | 1754 | destLimit=dest+destCapacity; |
michael@0 | 1755 | |
michael@0 | 1756 | /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ |
michael@0 | 1757 | if(destLimit<dest || (destLimit==NULL && dest!=NULL)) { |
michael@0 | 1758 | destLimit=(char *)U_MAX_PTR(dest); |
michael@0 | 1759 | } |
michael@0 | 1760 | |
michael@0 | 1761 | /* perform the conversion */ |
michael@0 | 1762 | ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); |
michael@0 | 1763 | destLength=(int32_t)(dest-originalDest); |
michael@0 | 1764 | |
michael@0 | 1765 | /* if an overflow occurs, then get the preflighting length */ |
michael@0 | 1766 | if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
michael@0 | 1767 | char buffer[1024]; |
michael@0 | 1768 | |
michael@0 | 1769 | destLimit=buffer+sizeof(buffer); |
michael@0 | 1770 | do { |
michael@0 | 1771 | dest=buffer; |
michael@0 | 1772 | *pErrorCode=U_ZERO_ERROR; |
michael@0 | 1773 | ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); |
michael@0 | 1774 | destLength+=(int32_t)(dest-buffer); |
michael@0 | 1775 | } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); |
michael@0 | 1776 | } |
michael@0 | 1777 | } else { |
michael@0 | 1778 | destLength=0; |
michael@0 | 1779 | } |
michael@0 | 1780 | |
michael@0 | 1781 | return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode); |
michael@0 | 1782 | } |
michael@0 | 1783 | |
michael@0 | 1784 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 1785 | ucnv_toUChars(UConverter *cnv, |
michael@0 | 1786 | UChar *dest, int32_t destCapacity, |
michael@0 | 1787 | const char *src, int32_t srcLength, |
michael@0 | 1788 | UErrorCode *pErrorCode) { |
michael@0 | 1789 | const char *srcLimit; |
michael@0 | 1790 | UChar *originalDest, *destLimit; |
michael@0 | 1791 | int32_t destLength; |
michael@0 | 1792 | |
michael@0 | 1793 | /* check arguments */ |
michael@0 | 1794 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
michael@0 | 1795 | return 0; |
michael@0 | 1796 | } |
michael@0 | 1797 | |
michael@0 | 1798 | if( cnv==NULL || |
michael@0 | 1799 | destCapacity<0 || (destCapacity>0 && dest==NULL) || |
michael@0 | 1800 | srcLength<-1 || (srcLength!=0 && src==NULL)) |
michael@0 | 1801 | { |
michael@0 | 1802 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 1803 | return 0; |
michael@0 | 1804 | } |
michael@0 | 1805 | |
michael@0 | 1806 | /* initialize */ |
michael@0 | 1807 | ucnv_resetToUnicode(cnv); |
michael@0 | 1808 | originalDest=dest; |
michael@0 | 1809 | if(srcLength==-1) { |
michael@0 | 1810 | srcLength=(int32_t)uprv_strlen(src); |
michael@0 | 1811 | } |
michael@0 | 1812 | if(srcLength>0) { |
michael@0 | 1813 | srcLimit=src+srcLength; |
michael@0 | 1814 | destLimit=dest+destCapacity; |
michael@0 | 1815 | |
michael@0 | 1816 | /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ |
michael@0 | 1817 | if(destLimit<dest || (destLimit==NULL && dest!=NULL)) { |
michael@0 | 1818 | destLimit=(UChar *)U_MAX_PTR(dest); |
michael@0 | 1819 | } |
michael@0 | 1820 | |
michael@0 | 1821 | /* perform the conversion */ |
michael@0 | 1822 | ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); |
michael@0 | 1823 | destLength=(int32_t)(dest-originalDest); |
michael@0 | 1824 | |
michael@0 | 1825 | /* if an overflow occurs, then get the preflighting length */ |
michael@0 | 1826 | if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) |
michael@0 | 1827 | { |
michael@0 | 1828 | UChar buffer[1024]; |
michael@0 | 1829 | |
michael@0 | 1830 | destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR; |
michael@0 | 1831 | do { |
michael@0 | 1832 | dest=buffer; |
michael@0 | 1833 | *pErrorCode=U_ZERO_ERROR; |
michael@0 | 1834 | ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); |
michael@0 | 1835 | destLength+=(int32_t)(dest-buffer); |
michael@0 | 1836 | } |
michael@0 | 1837 | while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); |
michael@0 | 1838 | } |
michael@0 | 1839 | } else { |
michael@0 | 1840 | destLength=0; |
michael@0 | 1841 | } |
michael@0 | 1842 | |
michael@0 | 1843 | return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode); |
michael@0 | 1844 | } |
michael@0 | 1845 | |
michael@0 | 1846 | /* ucnv_getNextUChar() ------------------------------------------------------ */ |
michael@0 | 1847 | |
michael@0 | 1848 | U_CAPI UChar32 U_EXPORT2 |
michael@0 | 1849 | ucnv_getNextUChar(UConverter *cnv, |
michael@0 | 1850 | const char **source, const char *sourceLimit, |
michael@0 | 1851 | UErrorCode *err) { |
michael@0 | 1852 | UConverterToUnicodeArgs args; |
michael@0 | 1853 | UChar buffer[U16_MAX_LENGTH]; |
michael@0 | 1854 | const char *s; |
michael@0 | 1855 | UChar32 c; |
michael@0 | 1856 | int32_t i, length; |
michael@0 | 1857 | |
michael@0 | 1858 | /* check parameters */ |
michael@0 | 1859 | if(err==NULL || U_FAILURE(*err)) { |
michael@0 | 1860 | return 0xffff; |
michael@0 | 1861 | } |
michael@0 | 1862 | |
michael@0 | 1863 | if(cnv==NULL || source==NULL) { |
michael@0 | 1864 | *err=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 1865 | return 0xffff; |
michael@0 | 1866 | } |
michael@0 | 1867 | |
michael@0 | 1868 | s=*source; |
michael@0 | 1869 | if(sourceLimit<s) { |
michael@0 | 1870 | *err=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 1871 | return 0xffff; |
michael@0 | 1872 | } |
michael@0 | 1873 | |
michael@0 | 1874 | /* |
michael@0 | 1875 | * Make sure that the buffer sizes do not exceed the number range for |
michael@0 | 1876 | * int32_t because some functions use the size (in units or bytes) |
michael@0 | 1877 | * rather than comparing pointers, and because offsets are int32_t values. |
michael@0 | 1878 | * |
michael@0 | 1879 | * size_t is guaranteed to be unsigned and large enough for the job. |
michael@0 | 1880 | * |
michael@0 | 1881 | * Return with an error instead of adjusting the limits because we would |
michael@0 | 1882 | * not be able to maintain the semantics that either the source must be |
michael@0 | 1883 | * consumed or the target filled (unless an error occurs). |
michael@0 | 1884 | * An adjustment would be sourceLimit=t+0x7fffffff; for example. |
michael@0 | 1885 | */ |
michael@0 | 1886 | if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) { |
michael@0 | 1887 | *err=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 1888 | return 0xffff; |
michael@0 | 1889 | } |
michael@0 | 1890 | |
michael@0 | 1891 | c=U_SENTINEL; |
michael@0 | 1892 | |
michael@0 | 1893 | /* flush the target overflow buffer */ |
michael@0 | 1894 | if(cnv->UCharErrorBufferLength>0) { |
michael@0 | 1895 | UChar *overflow; |
michael@0 | 1896 | |
michael@0 | 1897 | overflow=cnv->UCharErrorBuffer; |
michael@0 | 1898 | i=0; |
michael@0 | 1899 | length=cnv->UCharErrorBufferLength; |
michael@0 | 1900 | U16_NEXT(overflow, i, length, c); |
michael@0 | 1901 | |
michael@0 | 1902 | /* move the remaining overflow contents up to the beginning */ |
michael@0 | 1903 | if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) { |
michael@0 | 1904 | uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i, |
michael@0 | 1905 | cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); |
michael@0 | 1906 | } |
michael@0 | 1907 | |
michael@0 | 1908 | if(!U16_IS_LEAD(c) || i<length) { |
michael@0 | 1909 | return c; |
michael@0 | 1910 | } |
michael@0 | 1911 | /* |
michael@0 | 1912 | * Continue if the overflow buffer contained only a lead surrogate, |
michael@0 | 1913 | * in case the converter outputs single surrogates from complete |
michael@0 | 1914 | * input sequences. |
michael@0 | 1915 | */ |
michael@0 | 1916 | } |
michael@0 | 1917 | |
michael@0 | 1918 | /* |
michael@0 | 1919 | * flush==TRUE is implied for ucnv_getNextUChar() |
michael@0 | 1920 | * |
michael@0 | 1921 | * do not simply return even if s==sourceLimit because the converter may |
michael@0 | 1922 | * not have seen flush==TRUE before |
michael@0 | 1923 | */ |
michael@0 | 1924 | |
michael@0 | 1925 | /* prepare the converter arguments */ |
michael@0 | 1926 | args.converter=cnv; |
michael@0 | 1927 | args.flush=TRUE; |
michael@0 | 1928 | args.offsets=NULL; |
michael@0 | 1929 | args.source=s; |
michael@0 | 1930 | args.sourceLimit=sourceLimit; |
michael@0 | 1931 | args.target=buffer; |
michael@0 | 1932 | args.targetLimit=buffer+1; |
michael@0 | 1933 | args.size=sizeof(args); |
michael@0 | 1934 | |
michael@0 | 1935 | if(c<0) { |
michael@0 | 1936 | /* |
michael@0 | 1937 | * call the native getNextUChar() implementation if we are |
michael@0 | 1938 | * at a character boundary (toULength==0) |
michael@0 | 1939 | * |
michael@0 | 1940 | * unlike with _toUnicode(), getNextUChar() implementations must set |
michael@0 | 1941 | * U_TRUNCATED_CHAR_FOUND for truncated input, |
michael@0 | 1942 | * in addition to setting toULength/toUBytes[] |
michael@0 | 1943 | */ |
michael@0 | 1944 | if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) { |
michael@0 | 1945 | c=cnv->sharedData->impl->getNextUChar(&args, err); |
michael@0 | 1946 | *source=s=args.source; |
michael@0 | 1947 | if(*err==U_INDEX_OUTOFBOUNDS_ERROR) { |
michael@0 | 1948 | /* reset the converter without calling the callback function */ |
michael@0 | 1949 | _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); |
michael@0 | 1950 | return 0xffff; /* no output */ |
michael@0 | 1951 | } else if(U_SUCCESS(*err) && c>=0) { |
michael@0 | 1952 | return c; |
michael@0 | 1953 | /* |
michael@0 | 1954 | * else fall through to use _toUnicode() because |
michael@0 | 1955 | * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all |
michael@0 | 1956 | * U_FAILURE: call _toUnicode() for callback handling (do not output c) |
michael@0 | 1957 | */ |
michael@0 | 1958 | } |
michael@0 | 1959 | } |
michael@0 | 1960 | |
michael@0 | 1961 | /* convert to one UChar in buffer[0], or handle getNextUChar() errors */ |
michael@0 | 1962 | _toUnicodeWithCallback(&args, err); |
michael@0 | 1963 | |
michael@0 | 1964 | if(*err==U_BUFFER_OVERFLOW_ERROR) { |
michael@0 | 1965 | *err=U_ZERO_ERROR; |
michael@0 | 1966 | } |
michael@0 | 1967 | |
michael@0 | 1968 | i=0; |
michael@0 | 1969 | length=(int32_t)(args.target-buffer); |
michael@0 | 1970 | } else { |
michael@0 | 1971 | /* write the lead surrogate from the overflow buffer */ |
michael@0 | 1972 | buffer[0]=(UChar)c; |
michael@0 | 1973 | args.target=buffer+1; |
michael@0 | 1974 | i=0; |
michael@0 | 1975 | length=1; |
michael@0 | 1976 | } |
michael@0 | 1977 | |
michael@0 | 1978 | /* buffer contents starts at i and ends before length */ |
michael@0 | 1979 | |
michael@0 | 1980 | if(U_FAILURE(*err)) { |
michael@0 | 1981 | c=0xffff; /* no output */ |
michael@0 | 1982 | } else if(length==0) { |
michael@0 | 1983 | /* no input or only state changes */ |
michael@0 | 1984 | *err=U_INDEX_OUTOFBOUNDS_ERROR; |
michael@0 | 1985 | /* no need to reset explicitly because _toUnicodeWithCallback() did it */ |
michael@0 | 1986 | c=0xffff; /* no output */ |
michael@0 | 1987 | } else { |
michael@0 | 1988 | c=buffer[0]; |
michael@0 | 1989 | i=1; |
michael@0 | 1990 | if(!U16_IS_LEAD(c)) { |
michael@0 | 1991 | /* consume c=buffer[0], done */ |
michael@0 | 1992 | } else { |
michael@0 | 1993 | /* got a lead surrogate, see if a trail surrogate follows */ |
michael@0 | 1994 | UChar c2; |
michael@0 | 1995 | |
michael@0 | 1996 | if(cnv->UCharErrorBufferLength>0) { |
michael@0 | 1997 | /* got overflow output from the conversion */ |
michael@0 | 1998 | if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) { |
michael@0 | 1999 | /* got a trail surrogate, too */ |
michael@0 | 2000 | c=U16_GET_SUPPLEMENTARY(c, c2); |
michael@0 | 2001 | |
michael@0 | 2002 | /* move the remaining overflow contents up to the beginning */ |
michael@0 | 2003 | if((--cnv->UCharErrorBufferLength)>0) { |
michael@0 | 2004 | uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1, |
michael@0 | 2005 | cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); |
michael@0 | 2006 | } |
michael@0 | 2007 | } else { |
michael@0 | 2008 | /* c is an unpaired lead surrogate, just return it */ |
michael@0 | 2009 | } |
michael@0 | 2010 | } else if(args.source<sourceLimit) { |
michael@0 | 2011 | /* convert once more, to buffer[1] */ |
michael@0 | 2012 | args.targetLimit=buffer+2; |
michael@0 | 2013 | _toUnicodeWithCallback(&args, err); |
michael@0 | 2014 | if(*err==U_BUFFER_OVERFLOW_ERROR) { |
michael@0 | 2015 | *err=U_ZERO_ERROR; |
michael@0 | 2016 | } |
michael@0 | 2017 | |
michael@0 | 2018 | length=(int32_t)(args.target-buffer); |
michael@0 | 2019 | if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) { |
michael@0 | 2020 | /* got a trail surrogate, too */ |
michael@0 | 2021 | c=U16_GET_SUPPLEMENTARY(c, c2); |
michael@0 | 2022 | i=2; |
michael@0 | 2023 | } |
michael@0 | 2024 | } |
michael@0 | 2025 | } |
michael@0 | 2026 | } |
michael@0 | 2027 | |
michael@0 | 2028 | /* |
michael@0 | 2029 | * move leftover output from buffer[i..length[ |
michael@0 | 2030 | * into the beginning of the overflow buffer |
michael@0 | 2031 | */ |
michael@0 | 2032 | if(i<length) { |
michael@0 | 2033 | /* move further overflow back */ |
michael@0 | 2034 | int32_t delta=length-i; |
michael@0 | 2035 | if((length=cnv->UCharErrorBufferLength)>0) { |
michael@0 | 2036 | uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer, |
michael@0 | 2037 | length*U_SIZEOF_UCHAR); |
michael@0 | 2038 | } |
michael@0 | 2039 | cnv->UCharErrorBufferLength=(int8_t)(length+delta); |
michael@0 | 2040 | |
michael@0 | 2041 | cnv->UCharErrorBuffer[0]=buffer[i++]; |
michael@0 | 2042 | if(delta>1) { |
michael@0 | 2043 | cnv->UCharErrorBuffer[1]=buffer[i]; |
michael@0 | 2044 | } |
michael@0 | 2045 | } |
michael@0 | 2046 | |
michael@0 | 2047 | *source=args.source; |
michael@0 | 2048 | return c; |
michael@0 | 2049 | } |
michael@0 | 2050 | |
michael@0 | 2051 | /* ucnv_convert() and siblings ---------------------------------------------- */ |
michael@0 | 2052 | |
michael@0 | 2053 | U_CAPI void U_EXPORT2 |
michael@0 | 2054 | ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, |
michael@0 | 2055 | char **target, const char *targetLimit, |
michael@0 | 2056 | const char **source, const char *sourceLimit, |
michael@0 | 2057 | UChar *pivotStart, UChar **pivotSource, |
michael@0 | 2058 | UChar **pivotTarget, const UChar *pivotLimit, |
michael@0 | 2059 | UBool reset, UBool flush, |
michael@0 | 2060 | UErrorCode *pErrorCode) { |
michael@0 | 2061 | UChar pivotBuffer[CHUNK_SIZE]; |
michael@0 | 2062 | const UChar *myPivotSource; |
michael@0 | 2063 | UChar *myPivotTarget; |
michael@0 | 2064 | const char *s; |
michael@0 | 2065 | char *t; |
michael@0 | 2066 | |
michael@0 | 2067 | UConverterToUnicodeArgs toUArgs; |
michael@0 | 2068 | UConverterFromUnicodeArgs fromUArgs; |
michael@0 | 2069 | UConverterConvert convert; |
michael@0 | 2070 | |
michael@0 | 2071 | /* error checking */ |
michael@0 | 2072 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
michael@0 | 2073 | return; |
michael@0 | 2074 | } |
michael@0 | 2075 | |
michael@0 | 2076 | if( targetCnv==NULL || sourceCnv==NULL || |
michael@0 | 2077 | source==NULL || *source==NULL || |
michael@0 | 2078 | target==NULL || *target==NULL || targetLimit==NULL |
michael@0 | 2079 | ) { |
michael@0 | 2080 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 2081 | return; |
michael@0 | 2082 | } |
michael@0 | 2083 | |
michael@0 | 2084 | s=*source; |
michael@0 | 2085 | t=*target; |
michael@0 | 2086 | if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) { |
michael@0 | 2087 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 2088 | return; |
michael@0 | 2089 | } |
michael@0 | 2090 | |
michael@0 | 2091 | /* |
michael@0 | 2092 | * Make sure that the buffer sizes do not exceed the number range for |
michael@0 | 2093 | * int32_t. See ucnv_toUnicode() for a more detailed comment. |
michael@0 | 2094 | */ |
michael@0 | 2095 | if( |
michael@0 | 2096 | (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) || |
michael@0 | 2097 | ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) |
michael@0 | 2098 | ) { |
michael@0 | 2099 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 2100 | return; |
michael@0 | 2101 | } |
michael@0 | 2102 | |
michael@0 | 2103 | if(pivotStart==NULL) { |
michael@0 | 2104 | if(!flush) { |
michael@0 | 2105 | /* streaming conversion requires an explicit pivot buffer */ |
michael@0 | 2106 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 2107 | return; |
michael@0 | 2108 | } |
michael@0 | 2109 | |
michael@0 | 2110 | /* use the stack pivot buffer */ |
michael@0 | 2111 | myPivotSource=myPivotTarget=pivotStart=pivotBuffer; |
michael@0 | 2112 | pivotSource=(UChar **)&myPivotSource; |
michael@0 | 2113 | pivotTarget=&myPivotTarget; |
michael@0 | 2114 | pivotLimit=pivotBuffer+CHUNK_SIZE; |
michael@0 | 2115 | } else if( pivotStart>=pivotLimit || |
michael@0 | 2116 | pivotSource==NULL || *pivotSource==NULL || |
michael@0 | 2117 | pivotTarget==NULL || *pivotTarget==NULL || |
michael@0 | 2118 | pivotLimit==NULL |
michael@0 | 2119 | ) { |
michael@0 | 2120 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 2121 | return; |
michael@0 | 2122 | } |
michael@0 | 2123 | |
michael@0 | 2124 | if(sourceLimit==NULL) { |
michael@0 | 2125 | /* get limit of single-byte-NUL-terminated source string */ |
michael@0 | 2126 | sourceLimit=uprv_strchr(*source, 0); |
michael@0 | 2127 | } |
michael@0 | 2128 | |
michael@0 | 2129 | if(reset) { |
michael@0 | 2130 | ucnv_resetToUnicode(sourceCnv); |
michael@0 | 2131 | ucnv_resetFromUnicode(targetCnv); |
michael@0 | 2132 | *pivotSource=*pivotTarget=pivotStart; |
michael@0 | 2133 | } else if(targetCnv->charErrorBufferLength>0) { |
michael@0 | 2134 | /* output the targetCnv overflow buffer */ |
michael@0 | 2135 | if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) { |
michael@0 | 2136 | /* U_BUFFER_OVERFLOW_ERROR */ |
michael@0 | 2137 | return; |
michael@0 | 2138 | } |
michael@0 | 2139 | /* *target has moved, therefore stop using t */ |
michael@0 | 2140 | |
michael@0 | 2141 | if( !flush && |
michael@0 | 2142 | targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget && |
michael@0 | 2143 | sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit |
michael@0 | 2144 | ) { |
michael@0 | 2145 | /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */ |
michael@0 | 2146 | return; |
michael@0 | 2147 | } |
michael@0 | 2148 | } |
michael@0 | 2149 | |
michael@0 | 2150 | /* Is direct-UTF-8 conversion available? */ |
michael@0 | 2151 | if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 && |
michael@0 | 2152 | targetCnv->sharedData->impl->fromUTF8!=NULL |
michael@0 | 2153 | ) { |
michael@0 | 2154 | convert=targetCnv->sharedData->impl->fromUTF8; |
michael@0 | 2155 | } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 && |
michael@0 | 2156 | sourceCnv->sharedData->impl->toUTF8!=NULL |
michael@0 | 2157 | ) { |
michael@0 | 2158 | convert=sourceCnv->sharedData->impl->toUTF8; |
michael@0 | 2159 | } else { |
michael@0 | 2160 | convert=NULL; |
michael@0 | 2161 | } |
michael@0 | 2162 | |
michael@0 | 2163 | /* |
michael@0 | 2164 | * If direct-UTF-8 conversion is available, then we use a smaller |
michael@0 | 2165 | * pivot buffer for error handling and partial matches |
michael@0 | 2166 | * so that we quickly return to direct conversion. |
michael@0 | 2167 | * |
michael@0 | 2168 | * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH. |
michael@0 | 2169 | * |
michael@0 | 2170 | * We could reduce the pivot buffer size further, at the cost of |
michael@0 | 2171 | * buffer overflows from callbacks. |
michael@0 | 2172 | * The pivot buffer should not be smaller than the maximum number of |
michael@0 | 2173 | * fromUnicode extension table input UChars |
michael@0 | 2174 | * (for m:n conversion, see |
michael@0 | 2175 | * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS]) |
michael@0 | 2176 | * or 2 for surrogate pairs. |
michael@0 | 2177 | * |
michael@0 | 2178 | * Too small a buffer can cause thrashing between pivoting and direct |
michael@0 | 2179 | * conversion, with function call overhead outweighing the benefits |
michael@0 | 2180 | * of direct conversion. |
michael@0 | 2181 | */ |
michael@0 | 2182 | if(convert!=NULL && (pivotLimit-pivotStart)>32) { |
michael@0 | 2183 | pivotLimit=pivotStart+32; |
michael@0 | 2184 | } |
michael@0 | 2185 | |
michael@0 | 2186 | /* prepare the converter arguments */ |
michael@0 | 2187 | fromUArgs.converter=targetCnv; |
michael@0 | 2188 | fromUArgs.flush=FALSE; |
michael@0 | 2189 | fromUArgs.offsets=NULL; |
michael@0 | 2190 | fromUArgs.target=*target; |
michael@0 | 2191 | fromUArgs.targetLimit=targetLimit; |
michael@0 | 2192 | fromUArgs.size=sizeof(fromUArgs); |
michael@0 | 2193 | |
michael@0 | 2194 | toUArgs.converter=sourceCnv; |
michael@0 | 2195 | toUArgs.flush=flush; |
michael@0 | 2196 | toUArgs.offsets=NULL; |
michael@0 | 2197 | toUArgs.source=s; |
michael@0 | 2198 | toUArgs.sourceLimit=sourceLimit; |
michael@0 | 2199 | toUArgs.targetLimit=pivotLimit; |
michael@0 | 2200 | toUArgs.size=sizeof(toUArgs); |
michael@0 | 2201 | |
michael@0 | 2202 | /* |
michael@0 | 2203 | * TODO: Consider separating this function into two functions, |
michael@0 | 2204 | * extracting exactly the conversion loop, |
michael@0 | 2205 | * for readability and to reduce the set of visible variables. |
michael@0 | 2206 | * |
michael@0 | 2207 | * Otherwise stop using s and t from here on. |
michael@0 | 2208 | */ |
michael@0 | 2209 | s=t=NULL; |
michael@0 | 2210 | |
michael@0 | 2211 | /* |
michael@0 | 2212 | * conversion loop |
michael@0 | 2213 | * |
michael@0 | 2214 | * The sequence of steps in the loop may appear backward, |
michael@0 | 2215 | * but the principle is simple: |
michael@0 | 2216 | * In the chain of |
michael@0 | 2217 | * source - sourceCnv overflow - pivot - targetCnv overflow - target |
michael@0 | 2218 | * empty out later buffers before refilling them from earlier ones. |
michael@0 | 2219 | * |
michael@0 | 2220 | * The targetCnv overflow buffer is flushed out only once before the loop. |
michael@0 | 2221 | */ |
michael@0 | 2222 | for(;;) { |
michael@0 | 2223 | /* |
michael@0 | 2224 | * if(pivot not empty or error or replay or flush fromUnicode) { |
michael@0 | 2225 | * fromUnicode(pivot -> target); |
michael@0 | 2226 | * } |
michael@0 | 2227 | * |
michael@0 | 2228 | * For pivoting conversion; and for direct conversion for |
michael@0 | 2229 | * error callback handling and flushing the replay buffer. |
michael@0 | 2230 | */ |
michael@0 | 2231 | if( *pivotSource<*pivotTarget || |
michael@0 | 2232 | U_FAILURE(*pErrorCode) || |
michael@0 | 2233 | targetCnv->preFromULength<0 || |
michael@0 | 2234 | fromUArgs.flush |
michael@0 | 2235 | ) { |
michael@0 | 2236 | fromUArgs.source=*pivotSource; |
michael@0 | 2237 | fromUArgs.sourceLimit=*pivotTarget; |
michael@0 | 2238 | _fromUnicodeWithCallback(&fromUArgs, pErrorCode); |
michael@0 | 2239 | if(U_FAILURE(*pErrorCode)) { |
michael@0 | 2240 | /* target overflow, or conversion error */ |
michael@0 | 2241 | *pivotSource=(UChar *)fromUArgs.source; |
michael@0 | 2242 | break; |
michael@0 | 2243 | } |
michael@0 | 2244 | |
michael@0 | 2245 | /* |
michael@0 | 2246 | * _fromUnicodeWithCallback() must have consumed the pivot contents |
michael@0 | 2247 | * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS() |
michael@0 | 2248 | */ |
michael@0 | 2249 | } |
michael@0 | 2250 | |
michael@0 | 2251 | /* The pivot buffer is empty; reset it so we start at pivotStart. */ |
michael@0 | 2252 | *pivotSource=*pivotTarget=pivotStart; |
michael@0 | 2253 | |
michael@0 | 2254 | /* |
michael@0 | 2255 | * if(sourceCnv overflow buffer not empty) { |
michael@0 | 2256 | * move(sourceCnv overflow buffer -> pivot); |
michael@0 | 2257 | * continue; |
michael@0 | 2258 | * } |
michael@0 | 2259 | */ |
michael@0 | 2260 | /* output the sourceCnv overflow buffer */ |
michael@0 | 2261 | if(sourceCnv->UCharErrorBufferLength>0) { |
michael@0 | 2262 | if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) { |
michael@0 | 2263 | /* U_BUFFER_OVERFLOW_ERROR */ |
michael@0 | 2264 | *pErrorCode=U_ZERO_ERROR; |
michael@0 | 2265 | } |
michael@0 | 2266 | continue; |
michael@0 | 2267 | } |
michael@0 | 2268 | |
michael@0 | 2269 | /* |
michael@0 | 2270 | * check for end of input and break if done |
michael@0 | 2271 | * |
michael@0 | 2272 | * Checking both flush and fromUArgs.flush ensures that the converters |
michael@0 | 2273 | * have been called with the flush flag set if the ucnv_convertEx() |
michael@0 | 2274 | * caller set it. |
michael@0 | 2275 | */ |
michael@0 | 2276 | if( toUArgs.source==sourceLimit && |
michael@0 | 2277 | sourceCnv->preToULength>=0 && sourceCnv->toULength==0 && |
michael@0 | 2278 | (!flush || fromUArgs.flush) |
michael@0 | 2279 | ) { |
michael@0 | 2280 | /* done successfully */ |
michael@0 | 2281 | break; |
michael@0 | 2282 | } |
michael@0 | 2283 | |
michael@0 | 2284 | /* |
michael@0 | 2285 | * use direct conversion if available |
michael@0 | 2286 | * but not if continuing a partial match |
michael@0 | 2287 | * or flushing the toUnicode replay buffer |
michael@0 | 2288 | */ |
michael@0 | 2289 | if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) { |
michael@0 | 2290 | if(*pErrorCode==U_USING_DEFAULT_WARNING) { |
michael@0 | 2291 | /* remove a warning that may be set by this function */ |
michael@0 | 2292 | *pErrorCode=U_ZERO_ERROR; |
michael@0 | 2293 | } |
michael@0 | 2294 | convert(&fromUArgs, &toUArgs, pErrorCode); |
michael@0 | 2295 | if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
michael@0 | 2296 | break; |
michael@0 | 2297 | } else if(U_FAILURE(*pErrorCode)) { |
michael@0 | 2298 | if(sourceCnv->toULength>0) { |
michael@0 | 2299 | /* |
michael@0 | 2300 | * Fall through to calling _toUnicodeWithCallback() |
michael@0 | 2301 | * for callback handling. |
michael@0 | 2302 | * |
michael@0 | 2303 | * The pivot buffer will be reset with |
michael@0 | 2304 | * *pivotSource=*pivotTarget=pivotStart; |
michael@0 | 2305 | * which indicates a toUnicode error to the caller |
michael@0 | 2306 | * (*pivotSource==pivotStart shows no pivot UChars consumed). |
michael@0 | 2307 | */ |
michael@0 | 2308 | } else { |
michael@0 | 2309 | /* |
michael@0 | 2310 | * Indicate a fromUnicode error to the caller |
michael@0 | 2311 | * (*pivotSource>pivotStart shows some pivot UChars consumed). |
michael@0 | 2312 | */ |
michael@0 | 2313 | *pivotSource=*pivotTarget=pivotStart+1; |
michael@0 | 2314 | /* |
michael@0 | 2315 | * Loop around to calling _fromUnicodeWithCallbacks() |
michael@0 | 2316 | * for callback handling. |
michael@0 | 2317 | */ |
michael@0 | 2318 | continue; |
michael@0 | 2319 | } |
michael@0 | 2320 | } else if(*pErrorCode==U_USING_DEFAULT_WARNING) { |
michael@0 | 2321 | /* |
michael@0 | 2322 | * No error, but the implementation requested to temporarily |
michael@0 | 2323 | * fall back to pivoting. |
michael@0 | 2324 | */ |
michael@0 | 2325 | *pErrorCode=U_ZERO_ERROR; |
michael@0 | 2326 | /* |
michael@0 | 2327 | * The following else branches are almost identical to the end-of-input |
michael@0 | 2328 | * handling in _toUnicodeWithCallback(). |
michael@0 | 2329 | * Avoid calling it just for the end of input. |
michael@0 | 2330 | */ |
michael@0 | 2331 | } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */ |
michael@0 | 2332 | /* |
michael@0 | 2333 | * the entire input stream is consumed |
michael@0 | 2334 | * and there is a partial, truncated input sequence left |
michael@0 | 2335 | */ |
michael@0 | 2336 | |
michael@0 | 2337 | /* inject an error and continue with callback handling */ |
michael@0 | 2338 | *pErrorCode=U_TRUNCATED_CHAR_FOUND; |
michael@0 | 2339 | } else { |
michael@0 | 2340 | /* input consumed */ |
michael@0 | 2341 | if(flush) { |
michael@0 | 2342 | /* reset the converters without calling the callback functions */ |
michael@0 | 2343 | _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE); |
michael@0 | 2344 | _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE); |
michael@0 | 2345 | } |
michael@0 | 2346 | |
michael@0 | 2347 | /* done successfully */ |
michael@0 | 2348 | break; |
michael@0 | 2349 | } |
michael@0 | 2350 | } |
michael@0 | 2351 | |
michael@0 | 2352 | /* |
michael@0 | 2353 | * toUnicode(source -> pivot); |
michael@0 | 2354 | * |
michael@0 | 2355 | * For pivoting conversion; and for direct conversion for |
michael@0 | 2356 | * error callback handling, continuing partial matches |
michael@0 | 2357 | * and flushing the replay buffer. |
michael@0 | 2358 | * |
michael@0 | 2359 | * The pivot buffer is empty and reset. |
michael@0 | 2360 | */ |
michael@0 | 2361 | toUArgs.target=pivotStart; /* ==*pivotTarget */ |
michael@0 | 2362 | /* toUArgs.targetLimit=pivotLimit; already set before the loop */ |
michael@0 | 2363 | _toUnicodeWithCallback(&toUArgs, pErrorCode); |
michael@0 | 2364 | *pivotTarget=toUArgs.target; |
michael@0 | 2365 | if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { |
michael@0 | 2366 | /* pivot overflow: continue with the conversion loop */ |
michael@0 | 2367 | *pErrorCode=U_ZERO_ERROR; |
michael@0 | 2368 | } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) { |
michael@0 | 2369 | /* conversion error, or there was nothing left to convert */ |
michael@0 | 2370 | break; |
michael@0 | 2371 | } |
michael@0 | 2372 | /* |
michael@0 | 2373 | * else: |
michael@0 | 2374 | * _toUnicodeWithCallback() wrote into the pivot buffer, |
michael@0 | 2375 | * continue with fromUnicode conversion. |
michael@0 | 2376 | * |
michael@0 | 2377 | * Set the fromUnicode flush flag if we flush and if toUnicode has |
michael@0 | 2378 | * processed the end of the input. |
michael@0 | 2379 | */ |
michael@0 | 2380 | if( flush && toUArgs.source==sourceLimit && |
michael@0 | 2381 | sourceCnv->preToULength>=0 && |
michael@0 | 2382 | sourceCnv->UCharErrorBufferLength==0 |
michael@0 | 2383 | ) { |
michael@0 | 2384 | fromUArgs.flush=TRUE; |
michael@0 | 2385 | } |
michael@0 | 2386 | } |
michael@0 | 2387 | |
michael@0 | 2388 | /* |
michael@0 | 2389 | * The conversion loop is exited when one of the following is true: |
michael@0 | 2390 | * - the entire source text has been converted successfully to the target buffer |
michael@0 | 2391 | * - a target buffer overflow occurred |
michael@0 | 2392 | * - a conversion error occurred |
michael@0 | 2393 | */ |
michael@0 | 2394 | |
michael@0 | 2395 | *source=toUArgs.source; |
michael@0 | 2396 | *target=fromUArgs.target; |
michael@0 | 2397 | |
michael@0 | 2398 | /* terminate the target buffer if possible */ |
michael@0 | 2399 | if(flush && U_SUCCESS(*pErrorCode)) { |
michael@0 | 2400 | if(*target!=targetLimit) { |
michael@0 | 2401 | **target=0; |
michael@0 | 2402 | if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { |
michael@0 | 2403 | *pErrorCode=U_ZERO_ERROR; |
michael@0 | 2404 | } |
michael@0 | 2405 | } else { |
michael@0 | 2406 | *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; |
michael@0 | 2407 | } |
michael@0 | 2408 | } |
michael@0 | 2409 | } |
michael@0 | 2410 | |
michael@0 | 2411 | /* internal implementation of ucnv_convert() etc. with preflighting */ |
michael@0 | 2412 | static int32_t |
michael@0 | 2413 | ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter, |
michael@0 | 2414 | char *target, int32_t targetCapacity, |
michael@0 | 2415 | const char *source, int32_t sourceLength, |
michael@0 | 2416 | UErrorCode *pErrorCode) { |
michael@0 | 2417 | UChar pivotBuffer[CHUNK_SIZE]; |
michael@0 | 2418 | UChar *pivot, *pivot2; |
michael@0 | 2419 | |
michael@0 | 2420 | char *myTarget; |
michael@0 | 2421 | const char *sourceLimit; |
michael@0 | 2422 | const char *targetLimit; |
michael@0 | 2423 | int32_t targetLength=0; |
michael@0 | 2424 | |
michael@0 | 2425 | /* set up */ |
michael@0 | 2426 | if(sourceLength<0) { |
michael@0 | 2427 | sourceLimit=uprv_strchr(source, 0); |
michael@0 | 2428 | } else { |
michael@0 | 2429 | sourceLimit=source+sourceLength; |
michael@0 | 2430 | } |
michael@0 | 2431 | |
michael@0 | 2432 | /* if there is no input data, we're done */ |
michael@0 | 2433 | if(source==sourceLimit) { |
michael@0 | 2434 | return u_terminateChars(target, targetCapacity, 0, pErrorCode); |
michael@0 | 2435 | } |
michael@0 | 2436 | |
michael@0 | 2437 | pivot=pivot2=pivotBuffer; |
michael@0 | 2438 | myTarget=target; |
michael@0 | 2439 | targetLength=0; |
michael@0 | 2440 | |
michael@0 | 2441 | if(targetCapacity>0) { |
michael@0 | 2442 | /* perform real conversion */ |
michael@0 | 2443 | targetLimit=target+targetCapacity; |
michael@0 | 2444 | ucnv_convertEx(outConverter, inConverter, |
michael@0 | 2445 | &myTarget, targetLimit, |
michael@0 | 2446 | &source, sourceLimit, |
michael@0 | 2447 | pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, |
michael@0 | 2448 | FALSE, |
michael@0 | 2449 | TRUE, |
michael@0 | 2450 | pErrorCode); |
michael@0 | 2451 | targetLength=(int32_t)(myTarget-target); |
michael@0 | 2452 | } |
michael@0 | 2453 | |
michael@0 | 2454 | /* |
michael@0 | 2455 | * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing |
michael@0 | 2456 | * to it but continue the conversion in order to store in targetCapacity |
michael@0 | 2457 | * the number of bytes that was required. |
michael@0 | 2458 | */ |
michael@0 | 2459 | if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0) |
michael@0 | 2460 | { |
michael@0 | 2461 | char targetBuffer[CHUNK_SIZE]; |
michael@0 | 2462 | |
michael@0 | 2463 | targetLimit=targetBuffer+CHUNK_SIZE; |
michael@0 | 2464 | do { |
michael@0 | 2465 | *pErrorCode=U_ZERO_ERROR; |
michael@0 | 2466 | myTarget=targetBuffer; |
michael@0 | 2467 | ucnv_convertEx(outConverter, inConverter, |
michael@0 | 2468 | &myTarget, targetLimit, |
michael@0 | 2469 | &source, sourceLimit, |
michael@0 | 2470 | pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, |
michael@0 | 2471 | FALSE, |
michael@0 | 2472 | TRUE, |
michael@0 | 2473 | pErrorCode); |
michael@0 | 2474 | targetLength+=(int32_t)(myTarget-targetBuffer); |
michael@0 | 2475 | } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); |
michael@0 | 2476 | |
michael@0 | 2477 | /* done with preflighting, set warnings and errors as appropriate */ |
michael@0 | 2478 | return u_terminateChars(target, targetCapacity, targetLength, pErrorCode); |
michael@0 | 2479 | } |
michael@0 | 2480 | |
michael@0 | 2481 | /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */ |
michael@0 | 2482 | return targetLength; |
michael@0 | 2483 | } |
michael@0 | 2484 | |
michael@0 | 2485 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 2486 | ucnv_convert(const char *toConverterName, const char *fromConverterName, |
michael@0 | 2487 | char *target, int32_t targetCapacity, |
michael@0 | 2488 | const char *source, int32_t sourceLength, |
michael@0 | 2489 | UErrorCode *pErrorCode) { |
michael@0 | 2490 | UConverter in, out; /* stack-allocated */ |
michael@0 | 2491 | UConverter *inConverter, *outConverter; |
michael@0 | 2492 | int32_t targetLength; |
michael@0 | 2493 | |
michael@0 | 2494 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
michael@0 | 2495 | return 0; |
michael@0 | 2496 | } |
michael@0 | 2497 | |
michael@0 | 2498 | if( source==NULL || sourceLength<-1 || |
michael@0 | 2499 | targetCapacity<0 || (targetCapacity>0 && target==NULL) |
michael@0 | 2500 | ) { |
michael@0 | 2501 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 2502 | return 0; |
michael@0 | 2503 | } |
michael@0 | 2504 | |
michael@0 | 2505 | /* if there is no input data, we're done */ |
michael@0 | 2506 | if(sourceLength==0 || (sourceLength<0 && *source==0)) { |
michael@0 | 2507 | return u_terminateChars(target, targetCapacity, 0, pErrorCode); |
michael@0 | 2508 | } |
michael@0 | 2509 | |
michael@0 | 2510 | /* create the converters */ |
michael@0 | 2511 | inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode); |
michael@0 | 2512 | if(U_FAILURE(*pErrorCode)) { |
michael@0 | 2513 | return 0; |
michael@0 | 2514 | } |
michael@0 | 2515 | |
michael@0 | 2516 | outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode); |
michael@0 | 2517 | if(U_FAILURE(*pErrorCode)) { |
michael@0 | 2518 | ucnv_close(inConverter); |
michael@0 | 2519 | return 0; |
michael@0 | 2520 | } |
michael@0 | 2521 | |
michael@0 | 2522 | targetLength=ucnv_internalConvert(outConverter, inConverter, |
michael@0 | 2523 | target, targetCapacity, |
michael@0 | 2524 | source, sourceLength, |
michael@0 | 2525 | pErrorCode); |
michael@0 | 2526 | |
michael@0 | 2527 | ucnv_close(inConverter); |
michael@0 | 2528 | ucnv_close(outConverter); |
michael@0 | 2529 | |
michael@0 | 2530 | return targetLength; |
michael@0 | 2531 | } |
michael@0 | 2532 | |
michael@0 | 2533 | /* @internal */ |
michael@0 | 2534 | static int32_t |
michael@0 | 2535 | ucnv_convertAlgorithmic(UBool convertToAlgorithmic, |
michael@0 | 2536 | UConverterType algorithmicType, |
michael@0 | 2537 | UConverter *cnv, |
michael@0 | 2538 | char *target, int32_t targetCapacity, |
michael@0 | 2539 | const char *source, int32_t sourceLength, |
michael@0 | 2540 | UErrorCode *pErrorCode) { |
michael@0 | 2541 | UConverter algoConverterStatic; /* stack-allocated */ |
michael@0 | 2542 | UConverter *algoConverter, *to, *from; |
michael@0 | 2543 | int32_t targetLength; |
michael@0 | 2544 | |
michael@0 | 2545 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
michael@0 | 2546 | return 0; |
michael@0 | 2547 | } |
michael@0 | 2548 | |
michael@0 | 2549 | if( cnv==NULL || source==NULL || sourceLength<-1 || |
michael@0 | 2550 | targetCapacity<0 || (targetCapacity>0 && target==NULL) |
michael@0 | 2551 | ) { |
michael@0 | 2552 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 2553 | return 0; |
michael@0 | 2554 | } |
michael@0 | 2555 | |
michael@0 | 2556 | /* if there is no input data, we're done */ |
michael@0 | 2557 | if(sourceLength==0 || (sourceLength<0 && *source==0)) { |
michael@0 | 2558 | return u_terminateChars(target, targetCapacity, 0, pErrorCode); |
michael@0 | 2559 | } |
michael@0 | 2560 | |
michael@0 | 2561 | /* create the algorithmic converter */ |
michael@0 | 2562 | algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType, |
michael@0 | 2563 | "", 0, pErrorCode); |
michael@0 | 2564 | if(U_FAILURE(*pErrorCode)) { |
michael@0 | 2565 | return 0; |
michael@0 | 2566 | } |
michael@0 | 2567 | |
michael@0 | 2568 | /* reset the other converter */ |
michael@0 | 2569 | if(convertToAlgorithmic) { |
michael@0 | 2570 | /* cnv->Unicode->algo */ |
michael@0 | 2571 | ucnv_resetToUnicode(cnv); |
michael@0 | 2572 | to=algoConverter; |
michael@0 | 2573 | from=cnv; |
michael@0 | 2574 | } else { |
michael@0 | 2575 | /* algo->Unicode->cnv */ |
michael@0 | 2576 | ucnv_resetFromUnicode(cnv); |
michael@0 | 2577 | from=algoConverter; |
michael@0 | 2578 | to=cnv; |
michael@0 | 2579 | } |
michael@0 | 2580 | |
michael@0 | 2581 | targetLength=ucnv_internalConvert(to, from, |
michael@0 | 2582 | target, targetCapacity, |
michael@0 | 2583 | source, sourceLength, |
michael@0 | 2584 | pErrorCode); |
michael@0 | 2585 | |
michael@0 | 2586 | ucnv_close(algoConverter); |
michael@0 | 2587 | |
michael@0 | 2588 | return targetLength; |
michael@0 | 2589 | } |
michael@0 | 2590 | |
michael@0 | 2591 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 2592 | ucnv_toAlgorithmic(UConverterType algorithmicType, |
michael@0 | 2593 | UConverter *cnv, |
michael@0 | 2594 | char *target, int32_t targetCapacity, |
michael@0 | 2595 | const char *source, int32_t sourceLength, |
michael@0 | 2596 | UErrorCode *pErrorCode) { |
michael@0 | 2597 | return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv, |
michael@0 | 2598 | target, targetCapacity, |
michael@0 | 2599 | source, sourceLength, |
michael@0 | 2600 | pErrorCode); |
michael@0 | 2601 | } |
michael@0 | 2602 | |
michael@0 | 2603 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 2604 | ucnv_fromAlgorithmic(UConverter *cnv, |
michael@0 | 2605 | UConverterType algorithmicType, |
michael@0 | 2606 | char *target, int32_t targetCapacity, |
michael@0 | 2607 | const char *source, int32_t sourceLength, |
michael@0 | 2608 | UErrorCode *pErrorCode) { |
michael@0 | 2609 | return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv, |
michael@0 | 2610 | target, targetCapacity, |
michael@0 | 2611 | source, sourceLength, |
michael@0 | 2612 | pErrorCode); |
michael@0 | 2613 | } |
michael@0 | 2614 | |
michael@0 | 2615 | U_CAPI UConverterType U_EXPORT2 |
michael@0 | 2616 | ucnv_getType(const UConverter* converter) |
michael@0 | 2617 | { |
michael@0 | 2618 | int8_t type = converter->sharedData->staticData->conversionType; |
michael@0 | 2619 | #if !UCONFIG_NO_LEGACY_CONVERSION |
michael@0 | 2620 | if(type == UCNV_MBCS) { |
michael@0 | 2621 | return ucnv_MBCSGetType(converter); |
michael@0 | 2622 | } |
michael@0 | 2623 | #endif |
michael@0 | 2624 | return (UConverterType)type; |
michael@0 | 2625 | } |
michael@0 | 2626 | |
michael@0 | 2627 | U_CAPI void U_EXPORT2 |
michael@0 | 2628 | ucnv_getStarters(const UConverter* converter, |
michael@0 | 2629 | UBool starters[256], |
michael@0 | 2630 | UErrorCode* err) |
michael@0 | 2631 | { |
michael@0 | 2632 | if (err == NULL || U_FAILURE(*err)) { |
michael@0 | 2633 | return; |
michael@0 | 2634 | } |
michael@0 | 2635 | |
michael@0 | 2636 | if(converter->sharedData->impl->getStarters != NULL) { |
michael@0 | 2637 | converter->sharedData->impl->getStarters(converter, starters, err); |
michael@0 | 2638 | } else { |
michael@0 | 2639 | *err = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 2640 | } |
michael@0 | 2641 | } |
michael@0 | 2642 | |
michael@0 | 2643 | static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv) |
michael@0 | 2644 | { |
michael@0 | 2645 | UErrorCode errorCode; |
michael@0 | 2646 | const char *name; |
michael@0 | 2647 | int32_t i; |
michael@0 | 2648 | |
michael@0 | 2649 | if(cnv==NULL) { |
michael@0 | 2650 | return NULL; |
michael@0 | 2651 | } |
michael@0 | 2652 | |
michael@0 | 2653 | errorCode=U_ZERO_ERROR; |
michael@0 | 2654 | name=ucnv_getName(cnv, &errorCode); |
michael@0 | 2655 | if(U_FAILURE(errorCode)) { |
michael@0 | 2656 | return NULL; |
michael@0 | 2657 | } |
michael@0 | 2658 | |
michael@0 | 2659 | for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i) |
michael@0 | 2660 | { |
michael@0 | 2661 | if(0==uprv_strcmp(name, ambiguousConverters[i].name)) |
michael@0 | 2662 | { |
michael@0 | 2663 | return ambiguousConverters+i; |
michael@0 | 2664 | } |
michael@0 | 2665 | } |
michael@0 | 2666 | |
michael@0 | 2667 | return NULL; |
michael@0 | 2668 | } |
michael@0 | 2669 | |
michael@0 | 2670 | U_CAPI void U_EXPORT2 |
michael@0 | 2671 | ucnv_fixFileSeparator(const UConverter *cnv, |
michael@0 | 2672 | UChar* source, |
michael@0 | 2673 | int32_t sourceLength) { |
michael@0 | 2674 | const UAmbiguousConverter *a; |
michael@0 | 2675 | int32_t i; |
michael@0 | 2676 | UChar variant5c; |
michael@0 | 2677 | |
michael@0 | 2678 | if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL) |
michael@0 | 2679 | { |
michael@0 | 2680 | return; |
michael@0 | 2681 | } |
michael@0 | 2682 | |
michael@0 | 2683 | variant5c=a->variant5c; |
michael@0 | 2684 | for(i=0; i<sourceLength; ++i) { |
michael@0 | 2685 | if(source[i]==variant5c) { |
michael@0 | 2686 | source[i]=0x5c; |
michael@0 | 2687 | } |
michael@0 | 2688 | } |
michael@0 | 2689 | } |
michael@0 | 2690 | |
michael@0 | 2691 | U_CAPI UBool U_EXPORT2 |
michael@0 | 2692 | ucnv_isAmbiguous(const UConverter *cnv) { |
michael@0 | 2693 | return (UBool)(ucnv_getAmbiguous(cnv)!=NULL); |
michael@0 | 2694 | } |
michael@0 | 2695 | |
michael@0 | 2696 | U_CAPI void U_EXPORT2 |
michael@0 | 2697 | ucnv_setFallback(UConverter *cnv, UBool usesFallback) |
michael@0 | 2698 | { |
michael@0 | 2699 | cnv->useFallback = usesFallback; |
michael@0 | 2700 | } |
michael@0 | 2701 | |
michael@0 | 2702 | U_CAPI UBool U_EXPORT2 |
michael@0 | 2703 | ucnv_usesFallback(const UConverter *cnv) |
michael@0 | 2704 | { |
michael@0 | 2705 | return cnv->useFallback; |
michael@0 | 2706 | } |
michael@0 | 2707 | |
michael@0 | 2708 | U_CAPI void U_EXPORT2 |
michael@0 | 2709 | ucnv_getInvalidChars (const UConverter * converter, |
michael@0 | 2710 | char *errBytes, |
michael@0 | 2711 | int8_t * len, |
michael@0 | 2712 | UErrorCode * err) |
michael@0 | 2713 | { |
michael@0 | 2714 | if (err == NULL || U_FAILURE(*err)) |
michael@0 | 2715 | { |
michael@0 | 2716 | return; |
michael@0 | 2717 | } |
michael@0 | 2718 | if (len == NULL || errBytes == NULL || converter == NULL) |
michael@0 | 2719 | { |
michael@0 | 2720 | *err = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 2721 | return; |
michael@0 | 2722 | } |
michael@0 | 2723 | if (*len < converter->invalidCharLength) |
michael@0 | 2724 | { |
michael@0 | 2725 | *err = U_INDEX_OUTOFBOUNDS_ERROR; |
michael@0 | 2726 | return; |
michael@0 | 2727 | } |
michael@0 | 2728 | if ((*len = converter->invalidCharLength) > 0) |
michael@0 | 2729 | { |
michael@0 | 2730 | uprv_memcpy (errBytes, converter->invalidCharBuffer, *len); |
michael@0 | 2731 | } |
michael@0 | 2732 | } |
michael@0 | 2733 | |
michael@0 | 2734 | U_CAPI void U_EXPORT2 |
michael@0 | 2735 | ucnv_getInvalidUChars (const UConverter * converter, |
michael@0 | 2736 | UChar *errChars, |
michael@0 | 2737 | int8_t * len, |
michael@0 | 2738 | UErrorCode * err) |
michael@0 | 2739 | { |
michael@0 | 2740 | if (err == NULL || U_FAILURE(*err)) |
michael@0 | 2741 | { |
michael@0 | 2742 | return; |
michael@0 | 2743 | } |
michael@0 | 2744 | if (len == NULL || errChars == NULL || converter == NULL) |
michael@0 | 2745 | { |
michael@0 | 2746 | *err = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 2747 | return; |
michael@0 | 2748 | } |
michael@0 | 2749 | if (*len < converter->invalidUCharLength) |
michael@0 | 2750 | { |
michael@0 | 2751 | *err = U_INDEX_OUTOFBOUNDS_ERROR; |
michael@0 | 2752 | return; |
michael@0 | 2753 | } |
michael@0 | 2754 | if ((*len = converter->invalidUCharLength) > 0) |
michael@0 | 2755 | { |
michael@0 | 2756 | uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len)); |
michael@0 | 2757 | } |
michael@0 | 2758 | } |
michael@0 | 2759 | |
michael@0 | 2760 | #define SIG_MAX_LEN 5 |
michael@0 | 2761 | |
michael@0 | 2762 | U_CAPI const char* U_EXPORT2 |
michael@0 | 2763 | ucnv_detectUnicodeSignature( const char* source, |
michael@0 | 2764 | int32_t sourceLength, |
michael@0 | 2765 | int32_t* signatureLength, |
michael@0 | 2766 | UErrorCode* pErrorCode) { |
michael@0 | 2767 | int32_t dummy; |
michael@0 | 2768 | |
michael@0 | 2769 | /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN |
michael@0 | 2770 | * bytes we don't misdetect something |
michael@0 | 2771 | */ |
michael@0 | 2772 | char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' }; |
michael@0 | 2773 | int i = 0; |
michael@0 | 2774 | |
michael@0 | 2775 | if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){ |
michael@0 | 2776 | return NULL; |
michael@0 | 2777 | } |
michael@0 | 2778 | |
michael@0 | 2779 | if(source == NULL || sourceLength < -1){ |
michael@0 | 2780 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 2781 | return NULL; |
michael@0 | 2782 | } |
michael@0 | 2783 | |
michael@0 | 2784 | if(signatureLength == NULL) { |
michael@0 | 2785 | signatureLength = &dummy; |
michael@0 | 2786 | } |
michael@0 | 2787 | |
michael@0 | 2788 | if(sourceLength==-1){ |
michael@0 | 2789 | sourceLength=(int32_t)uprv_strlen(source); |
michael@0 | 2790 | } |
michael@0 | 2791 | |
michael@0 | 2792 | |
michael@0 | 2793 | while(i<sourceLength&& i<SIG_MAX_LEN){ |
michael@0 | 2794 | start[i]=source[i]; |
michael@0 | 2795 | i++; |
michael@0 | 2796 | } |
michael@0 | 2797 | |
michael@0 | 2798 | if(start[0] == '\xFE' && start[1] == '\xFF') { |
michael@0 | 2799 | *signatureLength=2; |
michael@0 | 2800 | return "UTF-16BE"; |
michael@0 | 2801 | } else if(start[0] == '\xFF' && start[1] == '\xFE') { |
michael@0 | 2802 | if(start[2] == '\x00' && start[3] =='\x00') { |
michael@0 | 2803 | *signatureLength=4; |
michael@0 | 2804 | return "UTF-32LE"; |
michael@0 | 2805 | } else { |
michael@0 | 2806 | *signatureLength=2; |
michael@0 | 2807 | return "UTF-16LE"; |
michael@0 | 2808 | } |
michael@0 | 2809 | } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') { |
michael@0 | 2810 | *signatureLength=3; |
michael@0 | 2811 | return "UTF-8"; |
michael@0 | 2812 | } else if(start[0] == '\x00' && start[1] == '\x00' && |
michael@0 | 2813 | start[2] == '\xFE' && start[3]=='\xFF') { |
michael@0 | 2814 | *signatureLength=4; |
michael@0 | 2815 | return "UTF-32BE"; |
michael@0 | 2816 | } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') { |
michael@0 | 2817 | *signatureLength=3; |
michael@0 | 2818 | return "SCSU"; |
michael@0 | 2819 | } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') { |
michael@0 | 2820 | *signatureLength=3; |
michael@0 | 2821 | return "BOCU-1"; |
michael@0 | 2822 | } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') { |
michael@0 | 2823 | /* |
michael@0 | 2824 | * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/ |
michael@0 | 2825 | * depending on the second UTF-16 code unit. |
michael@0 | 2826 | * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF |
michael@0 | 2827 | * if it occurs. |
michael@0 | 2828 | * |
michael@0 | 2829 | * So far we have +/v |
michael@0 | 2830 | */ |
michael@0 | 2831 | if(start[3] == '\x38' && start[4] == '\x2D') { |
michael@0 | 2832 | /* 5 bytes +/v8- */ |
michael@0 | 2833 | *signatureLength=5; |
michael@0 | 2834 | return "UTF-7"; |
michael@0 | 2835 | } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') { |
michael@0 | 2836 | /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */ |
michael@0 | 2837 | *signatureLength=4; |
michael@0 | 2838 | return "UTF-7"; |
michael@0 | 2839 | } |
michael@0 | 2840 | }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){ |
michael@0 | 2841 | *signatureLength=4; |
michael@0 | 2842 | return "UTF-EBCDIC"; |
michael@0 | 2843 | } |
michael@0 | 2844 | |
michael@0 | 2845 | |
michael@0 | 2846 | /* no known Unicode signature byte sequence recognized */ |
michael@0 | 2847 | *signatureLength=0; |
michael@0 | 2848 | return NULL; |
michael@0 | 2849 | } |
michael@0 | 2850 | |
michael@0 | 2851 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 2852 | ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status) |
michael@0 | 2853 | { |
michael@0 | 2854 | if(status == NULL || U_FAILURE(*status)){ |
michael@0 | 2855 | return -1; |
michael@0 | 2856 | } |
michael@0 | 2857 | if(cnv == NULL){ |
michael@0 | 2858 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 2859 | return -1; |
michael@0 | 2860 | } |
michael@0 | 2861 | |
michael@0 | 2862 | if(cnv->preFromUFirstCP >= 0){ |
michael@0 | 2863 | return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ; |
michael@0 | 2864 | }else if(cnv->preFromULength < 0){ |
michael@0 | 2865 | return -cnv->preFromULength ; |
michael@0 | 2866 | }else if(cnv->fromUChar32 > 0){ |
michael@0 | 2867 | return 1; |
michael@0 | 2868 | } |
michael@0 | 2869 | return 0; |
michael@0 | 2870 | |
michael@0 | 2871 | } |
michael@0 | 2872 | |
michael@0 | 2873 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 2874 | ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){ |
michael@0 | 2875 | |
michael@0 | 2876 | if(status == NULL || U_FAILURE(*status)){ |
michael@0 | 2877 | return -1; |
michael@0 | 2878 | } |
michael@0 | 2879 | if(cnv == NULL){ |
michael@0 | 2880 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 2881 | return -1; |
michael@0 | 2882 | } |
michael@0 | 2883 | |
michael@0 | 2884 | if(cnv->preToULength > 0){ |
michael@0 | 2885 | return cnv->preToULength ; |
michael@0 | 2886 | }else if(cnv->preToULength < 0){ |
michael@0 | 2887 | return -cnv->preToULength; |
michael@0 | 2888 | }else if(cnv->toULength > 0){ |
michael@0 | 2889 | return cnv->toULength; |
michael@0 | 2890 | } |
michael@0 | 2891 | return 0; |
michael@0 | 2892 | } |
michael@0 | 2893 | |
michael@0 | 2894 | U_CAPI UBool U_EXPORT2 |
michael@0 | 2895 | ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){ |
michael@0 | 2896 | if (U_FAILURE(*status)) { |
michael@0 | 2897 | return FALSE; |
michael@0 | 2898 | } |
michael@0 | 2899 | |
michael@0 | 2900 | if (cnv == NULL) { |
michael@0 | 2901 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 2902 | return FALSE; |
michael@0 | 2903 | } |
michael@0 | 2904 | |
michael@0 | 2905 | switch (ucnv_getType(cnv)) { |
michael@0 | 2906 | case UCNV_SBCS: |
michael@0 | 2907 | case UCNV_DBCS: |
michael@0 | 2908 | case UCNV_UTF32_BigEndian: |
michael@0 | 2909 | case UCNV_UTF32_LittleEndian: |
michael@0 | 2910 | case UCNV_UTF32: |
michael@0 | 2911 | case UCNV_US_ASCII: |
michael@0 | 2912 | return TRUE; |
michael@0 | 2913 | default: |
michael@0 | 2914 | return FALSE; |
michael@0 | 2915 | } |
michael@0 | 2916 | } |
michael@0 | 2917 | #endif |
michael@0 | 2918 | |
michael@0 | 2919 | /* |
michael@0 | 2920 | * Hey, Emacs, please set the following: |
michael@0 | 2921 | * |
michael@0 | 2922 | * Local Variables: |
michael@0 | 2923 | * indent-tabs-mode: nil |
michael@0 | 2924 | * End: |
michael@0 | 2925 | * |
michael@0 | 2926 | */ |