1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ucnv_u32.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1249 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 2002-2011, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* file name: ucnv_u32.c 1.10 +* encoding: US-ASCII 1.11 +* tab size: 8 (not used) 1.12 +* indentation:4 1.13 +* 1.14 +* created on: 2002jul01 1.15 +* created by: Markus W. Scherer 1.16 +* 1.17 +* UTF-32 converter implementation. Used to be in ucnv_utf.c. 1.18 +*/ 1.19 + 1.20 +#include "unicode/utypes.h" 1.21 + 1.22 +#if !UCONFIG_NO_CONVERSION 1.23 + 1.24 +#include "unicode/ucnv.h" 1.25 +#include "unicode/utf.h" 1.26 +#include "ucnv_bld.h" 1.27 +#include "ucnv_cnv.h" 1.28 +#include "cmemory.h" 1.29 + 1.30 +#define MAXIMUM_UCS2 0x0000FFFF 1.31 +#define MAXIMUM_UTF 0x0010FFFF 1.32 +#define HALF_SHIFT 10 1.33 +#define HALF_BASE 0x0010000 1.34 +#define HALF_MASK 0x3FF 1.35 +#define SURROGATE_HIGH_START 0xD800 1.36 +#define SURROGATE_LOW_START 0xDC00 1.37 + 1.38 +/* -SURROGATE_LOW_START + HALF_BASE */ 1.39 +#define SURROGATE_LOW_BASE 9216 1.40 + 1.41 +enum { 1.42 + UCNV_NEED_TO_WRITE_BOM=1 1.43 +}; 1.44 + 1.45 +/* UTF-32BE ----------------------------------------------------------------- */ 1.46 + 1.47 +static void 1.48 +T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args, 1.49 + UErrorCode * err) 1.50 +{ 1.51 + const unsigned char *mySource = (unsigned char *) args->source; 1.52 + UChar *myTarget = args->target; 1.53 + const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; 1.54 + const UChar *targetLimit = args->targetLimit; 1.55 + unsigned char *toUBytes = args->converter->toUBytes; 1.56 + uint32_t ch, i; 1.57 + 1.58 + /* Restore state of current sequence */ 1.59 + if (args->converter->toUnicodeStatus && myTarget < targetLimit) { 1.60 + i = args->converter->toULength; /* restore # of bytes consumed */ 1.61 + args->converter->toULength = 0; 1.62 + 1.63 + ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/ 1.64 + args->converter->toUnicodeStatus = 0; 1.65 + goto morebytes; 1.66 + } 1.67 + 1.68 + while (mySource < sourceLimit && myTarget < targetLimit) { 1.69 + i = 0; 1.70 + ch = 0; 1.71 +morebytes: 1.72 + while (i < sizeof(uint32_t)) { 1.73 + if (mySource < sourceLimit) { 1.74 + ch = (ch << 8) | (uint8_t)(*mySource); 1.75 + toUBytes[i++] = (char) *(mySource++); 1.76 + } 1.77 + else { 1.78 + /* stores a partially calculated target*/ 1.79 + /* + 1 to make 0 a valid character */ 1.80 + args->converter->toUnicodeStatus = ch + 1; 1.81 + args->converter->toULength = (int8_t) i; 1.82 + goto donefornow; 1.83 + } 1.84 + } 1.85 + 1.86 + if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { 1.87 + /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ 1.88 + if (ch <= MAXIMUM_UCS2) 1.89 + { 1.90 + /* fits in 16 bits */ 1.91 + *(myTarget++) = (UChar) ch; 1.92 + } 1.93 + else { 1.94 + /* write out the surrogates */ 1.95 + *(myTarget++) = U16_LEAD(ch); 1.96 + ch = U16_TRAIL(ch); 1.97 + if (myTarget < targetLimit) { 1.98 + *(myTarget++) = (UChar)ch; 1.99 + } 1.100 + else { 1.101 + /* Put in overflow buffer (not handled here) */ 1.102 + args->converter->UCharErrorBuffer[0] = (UChar) ch; 1.103 + args->converter->UCharErrorBufferLength = 1; 1.104 + *err = U_BUFFER_OVERFLOW_ERROR; 1.105 + break; 1.106 + } 1.107 + } 1.108 + } 1.109 + else { 1.110 + args->converter->toULength = (int8_t)i; 1.111 + *err = U_ILLEGAL_CHAR_FOUND; 1.112 + break; 1.113 + } 1.114 + } 1.115 + 1.116 +donefornow: 1.117 + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { 1.118 + /* End of target buffer */ 1.119 + *err = U_BUFFER_OVERFLOW_ERROR; 1.120 + } 1.121 + 1.122 + args->target = myTarget; 1.123 + args->source = (const char *) mySource; 1.124 +} 1.125 + 1.126 +static void 1.127 +T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, 1.128 + UErrorCode * err) 1.129 +{ 1.130 + const unsigned char *mySource = (unsigned char *) args->source; 1.131 + UChar *myTarget = args->target; 1.132 + int32_t *myOffsets = args->offsets; 1.133 + const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; 1.134 + const UChar *targetLimit = args->targetLimit; 1.135 + unsigned char *toUBytes = args->converter->toUBytes; 1.136 + uint32_t ch, i; 1.137 + int32_t offsetNum = 0; 1.138 + 1.139 + /* Restore state of current sequence */ 1.140 + if (args->converter->toUnicodeStatus && myTarget < targetLimit) { 1.141 + i = args->converter->toULength; /* restore # of bytes consumed */ 1.142 + args->converter->toULength = 0; 1.143 + 1.144 + ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/ 1.145 + args->converter->toUnicodeStatus = 0; 1.146 + goto morebytes; 1.147 + } 1.148 + 1.149 + while (mySource < sourceLimit && myTarget < targetLimit) { 1.150 + i = 0; 1.151 + ch = 0; 1.152 +morebytes: 1.153 + while (i < sizeof(uint32_t)) { 1.154 + if (mySource < sourceLimit) { 1.155 + ch = (ch << 8) | (uint8_t)(*mySource); 1.156 + toUBytes[i++] = (char) *(mySource++); 1.157 + } 1.158 + else { 1.159 + /* stores a partially calculated target*/ 1.160 + /* + 1 to make 0 a valid character */ 1.161 + args->converter->toUnicodeStatus = ch + 1; 1.162 + args->converter->toULength = (int8_t) i; 1.163 + goto donefornow; 1.164 + } 1.165 + } 1.166 + 1.167 + if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { 1.168 + /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ 1.169 + if (ch <= MAXIMUM_UCS2) { 1.170 + /* fits in 16 bits */ 1.171 + *(myTarget++) = (UChar) ch; 1.172 + *(myOffsets++) = offsetNum; 1.173 + } 1.174 + else { 1.175 + /* write out the surrogates */ 1.176 + *(myTarget++) = U16_LEAD(ch); 1.177 + *myOffsets++ = offsetNum; 1.178 + ch = U16_TRAIL(ch); 1.179 + if (myTarget < targetLimit) 1.180 + { 1.181 + *(myTarget++) = (UChar)ch; 1.182 + *(myOffsets++) = offsetNum; 1.183 + } 1.184 + else { 1.185 + /* Put in overflow buffer (not handled here) */ 1.186 + args->converter->UCharErrorBuffer[0] = (UChar) ch; 1.187 + args->converter->UCharErrorBufferLength = 1; 1.188 + *err = U_BUFFER_OVERFLOW_ERROR; 1.189 + break; 1.190 + } 1.191 + } 1.192 + } 1.193 + else { 1.194 + args->converter->toULength = (int8_t)i; 1.195 + *err = U_ILLEGAL_CHAR_FOUND; 1.196 + break; 1.197 + } 1.198 + offsetNum += i; 1.199 + } 1.200 + 1.201 +donefornow: 1.202 + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) 1.203 + { 1.204 + /* End of target buffer */ 1.205 + *err = U_BUFFER_OVERFLOW_ERROR; 1.206 + } 1.207 + 1.208 + args->target = myTarget; 1.209 + args->source = (const char *) mySource; 1.210 + args->offsets = myOffsets; 1.211 +} 1.212 + 1.213 +static void 1.214 +T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args, 1.215 + UErrorCode * err) 1.216 +{ 1.217 + const UChar *mySource = args->source; 1.218 + unsigned char *myTarget; 1.219 + const UChar *sourceLimit = args->sourceLimit; 1.220 + const unsigned char *targetLimit = (unsigned char *) args->targetLimit; 1.221 + UChar32 ch, ch2; 1.222 + unsigned int indexToWrite; 1.223 + unsigned char temp[sizeof(uint32_t)]; 1.224 + 1.225 + if(mySource >= sourceLimit) { 1.226 + /* no input, nothing to do */ 1.227 + return; 1.228 + } 1.229 + 1.230 + /* write the BOM if necessary */ 1.231 + if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { 1.232 + static const char bom[]={ 0, 0, (char)0xfe, (char)0xff }; 1.233 + ucnv_fromUWriteBytes(args->converter, 1.234 + bom, 4, 1.235 + &args->target, args->targetLimit, 1.236 + &args->offsets, -1, 1.237 + err); 1.238 + args->converter->fromUnicodeStatus=0; 1.239 + } 1.240 + 1.241 + myTarget = (unsigned char *) args->target; 1.242 + temp[0] = 0; 1.243 + 1.244 + if (args->converter->fromUChar32) { 1.245 + ch = args->converter->fromUChar32; 1.246 + args->converter->fromUChar32 = 0; 1.247 + goto lowsurogate; 1.248 + } 1.249 + 1.250 + while (mySource < sourceLimit && myTarget < targetLimit) { 1.251 + ch = *(mySource++); 1.252 + 1.253 + if (U_IS_SURROGATE(ch)) { 1.254 + if (U_IS_LEAD(ch)) { 1.255 +lowsurogate: 1.256 + if (mySource < sourceLimit) { 1.257 + ch2 = *mySource; 1.258 + if (U_IS_TRAIL(ch2)) { 1.259 + ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; 1.260 + mySource++; 1.261 + } 1.262 + else { 1.263 + /* this is an unmatched trail code unit (2nd surrogate) */ 1.264 + /* callback(illegal) */ 1.265 + args->converter->fromUChar32 = ch; 1.266 + *err = U_ILLEGAL_CHAR_FOUND; 1.267 + break; 1.268 + } 1.269 + } 1.270 + else { 1.271 + /* ran out of source */ 1.272 + args->converter->fromUChar32 = ch; 1.273 + if (args->flush) { 1.274 + /* this is an unmatched trail code unit (2nd surrogate) */ 1.275 + /* callback(illegal) */ 1.276 + *err = U_ILLEGAL_CHAR_FOUND; 1.277 + } 1.278 + break; 1.279 + } 1.280 + } 1.281 + else { 1.282 + /* this is an unmatched trail code unit (2nd surrogate) */ 1.283 + /* callback(illegal) */ 1.284 + args->converter->fromUChar32 = ch; 1.285 + *err = U_ILLEGAL_CHAR_FOUND; 1.286 + break; 1.287 + } 1.288 + } 1.289 + 1.290 + /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ 1.291 + temp[1] = (uint8_t) (ch >> 16 & 0x1F); 1.292 + temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ 1.293 + temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ 1.294 + 1.295 + for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) { 1.296 + if (myTarget < targetLimit) { 1.297 + *(myTarget++) = temp[indexToWrite]; 1.298 + } 1.299 + else { 1.300 + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; 1.301 + *err = U_BUFFER_OVERFLOW_ERROR; 1.302 + } 1.303 + } 1.304 + } 1.305 + 1.306 + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { 1.307 + *err = U_BUFFER_OVERFLOW_ERROR; 1.308 + } 1.309 + 1.310 + args->target = (char *) myTarget; 1.311 + args->source = mySource; 1.312 +} 1.313 + 1.314 +static void 1.315 +T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, 1.316 + UErrorCode * err) 1.317 +{ 1.318 + const UChar *mySource = args->source; 1.319 + unsigned char *myTarget; 1.320 + int32_t *myOffsets; 1.321 + const UChar *sourceLimit = args->sourceLimit; 1.322 + const unsigned char *targetLimit = (unsigned char *) args->targetLimit; 1.323 + UChar32 ch, ch2; 1.324 + int32_t offsetNum = 0; 1.325 + unsigned int indexToWrite; 1.326 + unsigned char temp[sizeof(uint32_t)]; 1.327 + 1.328 + if(mySource >= sourceLimit) { 1.329 + /* no input, nothing to do */ 1.330 + return; 1.331 + } 1.332 + 1.333 + /* write the BOM if necessary */ 1.334 + if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { 1.335 + static const char bom[]={ 0, 0, (char)0xfe, (char)0xff }; 1.336 + ucnv_fromUWriteBytes(args->converter, 1.337 + bom, 4, 1.338 + &args->target, args->targetLimit, 1.339 + &args->offsets, -1, 1.340 + err); 1.341 + args->converter->fromUnicodeStatus=0; 1.342 + } 1.343 + 1.344 + myTarget = (unsigned char *) args->target; 1.345 + myOffsets = args->offsets; 1.346 + temp[0] = 0; 1.347 + 1.348 + if (args->converter->fromUChar32) { 1.349 + ch = args->converter->fromUChar32; 1.350 + args->converter->fromUChar32 = 0; 1.351 + goto lowsurogate; 1.352 + } 1.353 + 1.354 + while (mySource < sourceLimit && myTarget < targetLimit) { 1.355 + ch = *(mySource++); 1.356 + 1.357 + if (U_IS_SURROGATE(ch)) { 1.358 + if (U_IS_LEAD(ch)) { 1.359 +lowsurogate: 1.360 + if (mySource < sourceLimit) { 1.361 + ch2 = *mySource; 1.362 + if (U_IS_TRAIL(ch2)) { 1.363 + ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; 1.364 + mySource++; 1.365 + } 1.366 + else { 1.367 + /* this is an unmatched trail code unit (2nd surrogate) */ 1.368 + /* callback(illegal) */ 1.369 + args->converter->fromUChar32 = ch; 1.370 + *err = U_ILLEGAL_CHAR_FOUND; 1.371 + break; 1.372 + } 1.373 + } 1.374 + else { 1.375 + /* ran out of source */ 1.376 + args->converter->fromUChar32 = ch; 1.377 + if (args->flush) { 1.378 + /* this is an unmatched trail code unit (2nd surrogate) */ 1.379 + /* callback(illegal) */ 1.380 + *err = U_ILLEGAL_CHAR_FOUND; 1.381 + } 1.382 + break; 1.383 + } 1.384 + } 1.385 + else { 1.386 + /* this is an unmatched trail code unit (2nd surrogate) */ 1.387 + /* callback(illegal) */ 1.388 + args->converter->fromUChar32 = ch; 1.389 + *err = U_ILLEGAL_CHAR_FOUND; 1.390 + break; 1.391 + } 1.392 + } 1.393 + 1.394 + /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ 1.395 + temp[1] = (uint8_t) (ch >> 16 & 0x1F); 1.396 + temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ 1.397 + temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ 1.398 + 1.399 + for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) { 1.400 + if (myTarget < targetLimit) { 1.401 + *(myTarget++) = temp[indexToWrite]; 1.402 + *(myOffsets++) = offsetNum; 1.403 + } 1.404 + else { 1.405 + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; 1.406 + *err = U_BUFFER_OVERFLOW_ERROR; 1.407 + } 1.408 + } 1.409 + offsetNum = offsetNum + 1 + (temp[1] != 0); 1.410 + } 1.411 + 1.412 + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { 1.413 + *err = U_BUFFER_OVERFLOW_ERROR; 1.414 + } 1.415 + 1.416 + args->target = (char *) myTarget; 1.417 + args->source = mySource; 1.418 + args->offsets = myOffsets; 1.419 +} 1.420 + 1.421 +static UChar32 1.422 +T_UConverter_getNextUChar_UTF32_BE(UConverterToUnicodeArgs* args, 1.423 + UErrorCode* err) 1.424 +{ 1.425 + const uint8_t *mySource; 1.426 + UChar32 myUChar; 1.427 + int32_t length; 1.428 + 1.429 + mySource = (const uint8_t *)args->source; 1.430 + if (mySource >= (const uint8_t *)args->sourceLimit) 1.431 + { 1.432 + /* no input */ 1.433 + *err = U_INDEX_OUTOFBOUNDS_ERROR; 1.434 + return 0xffff; 1.435 + } 1.436 + 1.437 + length = (int32_t)((const uint8_t *)args->sourceLimit - mySource); 1.438 + if (length < 4) 1.439 + { 1.440 + /* got a partial character */ 1.441 + uprv_memcpy(args->converter->toUBytes, mySource, length); 1.442 + args->converter->toULength = (int8_t)length; 1.443 + args->source = (const char *)(mySource + length); 1.444 + *err = U_TRUNCATED_CHAR_FOUND; 1.445 + return 0xffff; 1.446 + } 1.447 + 1.448 + /* Don't even try to do a direct cast because the value may be on an odd address. */ 1.449 + myUChar = ((UChar32)mySource[0] << 24) 1.450 + | ((UChar32)mySource[1] << 16) 1.451 + | ((UChar32)mySource[2] << 8) 1.452 + | ((UChar32)mySource[3]); 1.453 + 1.454 + args->source = (const char *)(mySource + 4); 1.455 + if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) { 1.456 + return myUChar; 1.457 + } 1.458 + 1.459 + uprv_memcpy(args->converter->toUBytes, mySource, 4); 1.460 + args->converter->toULength = 4; 1.461 + 1.462 + *err = U_ILLEGAL_CHAR_FOUND; 1.463 + return 0xffff; 1.464 +} 1.465 + 1.466 +static const UConverterImpl _UTF32BEImpl = { 1.467 + UCNV_UTF32_BigEndian, 1.468 + 1.469 + NULL, 1.470 + NULL, 1.471 + 1.472 + NULL, 1.473 + NULL, 1.474 + NULL, 1.475 + 1.476 + T_UConverter_toUnicode_UTF32_BE, 1.477 + T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC, 1.478 + T_UConverter_fromUnicode_UTF32_BE, 1.479 + T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC, 1.480 + T_UConverter_getNextUChar_UTF32_BE, 1.481 + 1.482 + NULL, 1.483 + NULL, 1.484 + NULL, 1.485 + NULL, 1.486 + ucnv_getNonSurrogateUnicodeSet 1.487 +}; 1.488 + 1.489 +/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */ 1.490 +static const UConverterStaticData _UTF32BEStaticData = { 1.491 + sizeof(UConverterStaticData), 1.492 + "UTF-32BE", 1.493 + 1232, 1.494 + UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4, 1.495 + { 0, 0, 0xff, 0xfd }, 4, FALSE, FALSE, 1.496 + 0, 1.497 + 0, 1.498 + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 1.499 +}; 1.500 + 1.501 +const UConverterSharedData _UTF32BEData = { 1.502 + sizeof(UConverterSharedData), ~((uint32_t) 0), 1.503 + NULL, NULL, &_UTF32BEStaticData, FALSE, &_UTF32BEImpl, 1.504 + 0 1.505 +}; 1.506 + 1.507 +/* UTF-32LE ---------------------------------------------------------- */ 1.508 + 1.509 +static void 1.510 +T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args, 1.511 + UErrorCode * err) 1.512 +{ 1.513 + const unsigned char *mySource = (unsigned char *) args->source; 1.514 + UChar *myTarget = args->target; 1.515 + const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; 1.516 + const UChar *targetLimit = args->targetLimit; 1.517 + unsigned char *toUBytes = args->converter->toUBytes; 1.518 + uint32_t ch, i; 1.519 + 1.520 + /* Restore state of current sequence */ 1.521 + if (args->converter->toUnicodeStatus && myTarget < targetLimit) 1.522 + { 1.523 + i = args->converter->toULength; /* restore # of bytes consumed */ 1.524 + args->converter->toULength = 0; 1.525 + 1.526 + /* Stores the previously calculated ch from a previous call*/ 1.527 + ch = args->converter->toUnicodeStatus - 1; 1.528 + args->converter->toUnicodeStatus = 0; 1.529 + goto morebytes; 1.530 + } 1.531 + 1.532 + while (mySource < sourceLimit && myTarget < targetLimit) 1.533 + { 1.534 + i = 0; 1.535 + ch = 0; 1.536 +morebytes: 1.537 + while (i < sizeof(uint32_t)) 1.538 + { 1.539 + if (mySource < sourceLimit) 1.540 + { 1.541 + ch |= ((uint8_t)(*mySource)) << (i * 8); 1.542 + toUBytes[i++] = (char) *(mySource++); 1.543 + } 1.544 + else 1.545 + { 1.546 + /* stores a partially calculated target*/ 1.547 + /* + 1 to make 0 a valid character */ 1.548 + args->converter->toUnicodeStatus = ch + 1; 1.549 + args->converter->toULength = (int8_t) i; 1.550 + goto donefornow; 1.551 + } 1.552 + } 1.553 + 1.554 + if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { 1.555 + /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ 1.556 + if (ch <= MAXIMUM_UCS2) { 1.557 + /* fits in 16 bits */ 1.558 + *(myTarget++) = (UChar) ch; 1.559 + } 1.560 + else { 1.561 + /* write out the surrogates */ 1.562 + *(myTarget++) = U16_LEAD(ch); 1.563 + ch = U16_TRAIL(ch); 1.564 + if (myTarget < targetLimit) { 1.565 + *(myTarget++) = (UChar)ch; 1.566 + } 1.567 + else { 1.568 + /* Put in overflow buffer (not handled here) */ 1.569 + args->converter->UCharErrorBuffer[0] = (UChar) ch; 1.570 + args->converter->UCharErrorBufferLength = 1; 1.571 + *err = U_BUFFER_OVERFLOW_ERROR; 1.572 + break; 1.573 + } 1.574 + } 1.575 + } 1.576 + else { 1.577 + args->converter->toULength = (int8_t)i; 1.578 + *err = U_ILLEGAL_CHAR_FOUND; 1.579 + break; 1.580 + } 1.581 + } 1.582 + 1.583 +donefornow: 1.584 + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) 1.585 + { 1.586 + /* End of target buffer */ 1.587 + *err = U_BUFFER_OVERFLOW_ERROR; 1.588 + } 1.589 + 1.590 + args->target = myTarget; 1.591 + args->source = (const char *) mySource; 1.592 +} 1.593 + 1.594 +static void 1.595 +T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, 1.596 + UErrorCode * err) 1.597 +{ 1.598 + const unsigned char *mySource = (unsigned char *) args->source; 1.599 + UChar *myTarget = args->target; 1.600 + int32_t *myOffsets = args->offsets; 1.601 + const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; 1.602 + const UChar *targetLimit = args->targetLimit; 1.603 + unsigned char *toUBytes = args->converter->toUBytes; 1.604 + uint32_t ch, i; 1.605 + int32_t offsetNum = 0; 1.606 + 1.607 + /* Restore state of current sequence */ 1.608 + if (args->converter->toUnicodeStatus && myTarget < targetLimit) 1.609 + { 1.610 + i = args->converter->toULength; /* restore # of bytes consumed */ 1.611 + args->converter->toULength = 0; 1.612 + 1.613 + /* Stores the previously calculated ch from a previous call*/ 1.614 + ch = args->converter->toUnicodeStatus - 1; 1.615 + args->converter->toUnicodeStatus = 0; 1.616 + goto morebytes; 1.617 + } 1.618 + 1.619 + while (mySource < sourceLimit && myTarget < targetLimit) 1.620 + { 1.621 + i = 0; 1.622 + ch = 0; 1.623 +morebytes: 1.624 + while (i < sizeof(uint32_t)) 1.625 + { 1.626 + if (mySource < sourceLimit) 1.627 + { 1.628 + ch |= ((uint8_t)(*mySource)) << (i * 8); 1.629 + toUBytes[i++] = (char) *(mySource++); 1.630 + } 1.631 + else 1.632 + { 1.633 + /* stores a partially calculated target*/ 1.634 + /* + 1 to make 0 a valid character */ 1.635 + args->converter->toUnicodeStatus = ch + 1; 1.636 + args->converter->toULength = (int8_t) i; 1.637 + goto donefornow; 1.638 + } 1.639 + } 1.640 + 1.641 + if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) 1.642 + { 1.643 + /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ 1.644 + if (ch <= MAXIMUM_UCS2) 1.645 + { 1.646 + /* fits in 16 bits */ 1.647 + *(myTarget++) = (UChar) ch; 1.648 + *(myOffsets++) = offsetNum; 1.649 + } 1.650 + else { 1.651 + /* write out the surrogates */ 1.652 + *(myTarget++) = U16_LEAD(ch); 1.653 + *(myOffsets++) = offsetNum; 1.654 + ch = U16_TRAIL(ch); 1.655 + if (myTarget < targetLimit) 1.656 + { 1.657 + *(myTarget++) = (UChar)ch; 1.658 + *(myOffsets++) = offsetNum; 1.659 + } 1.660 + else 1.661 + { 1.662 + /* Put in overflow buffer (not handled here) */ 1.663 + args->converter->UCharErrorBuffer[0] = (UChar) ch; 1.664 + args->converter->UCharErrorBufferLength = 1; 1.665 + *err = U_BUFFER_OVERFLOW_ERROR; 1.666 + break; 1.667 + } 1.668 + } 1.669 + } 1.670 + else 1.671 + { 1.672 + args->converter->toULength = (int8_t)i; 1.673 + *err = U_ILLEGAL_CHAR_FOUND; 1.674 + break; 1.675 + } 1.676 + offsetNum += i; 1.677 + } 1.678 + 1.679 +donefornow: 1.680 + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) 1.681 + { 1.682 + /* End of target buffer */ 1.683 + *err = U_BUFFER_OVERFLOW_ERROR; 1.684 + } 1.685 + 1.686 + args->target = myTarget; 1.687 + args->source = (const char *) mySource; 1.688 + args->offsets = myOffsets; 1.689 +} 1.690 + 1.691 +static void 1.692 +T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args, 1.693 + UErrorCode * err) 1.694 +{ 1.695 + const UChar *mySource = args->source; 1.696 + unsigned char *myTarget; 1.697 + const UChar *sourceLimit = args->sourceLimit; 1.698 + const unsigned char *targetLimit = (unsigned char *) args->targetLimit; 1.699 + UChar32 ch, ch2; 1.700 + unsigned int indexToWrite; 1.701 + unsigned char temp[sizeof(uint32_t)]; 1.702 + 1.703 + if(mySource >= sourceLimit) { 1.704 + /* no input, nothing to do */ 1.705 + return; 1.706 + } 1.707 + 1.708 + /* write the BOM if necessary */ 1.709 + if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { 1.710 + static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 }; 1.711 + ucnv_fromUWriteBytes(args->converter, 1.712 + bom, 4, 1.713 + &args->target, args->targetLimit, 1.714 + &args->offsets, -1, 1.715 + err); 1.716 + args->converter->fromUnicodeStatus=0; 1.717 + } 1.718 + 1.719 + myTarget = (unsigned char *) args->target; 1.720 + temp[3] = 0; 1.721 + 1.722 + if (args->converter->fromUChar32) 1.723 + { 1.724 + ch = args->converter->fromUChar32; 1.725 + args->converter->fromUChar32 = 0; 1.726 + goto lowsurogate; 1.727 + } 1.728 + 1.729 + while (mySource < sourceLimit && myTarget < targetLimit) 1.730 + { 1.731 + ch = *(mySource++); 1.732 + 1.733 + if (U16_IS_SURROGATE(ch)) { 1.734 + if (U16_IS_LEAD(ch)) 1.735 + { 1.736 +lowsurogate: 1.737 + if (mySource < sourceLimit) 1.738 + { 1.739 + ch2 = *mySource; 1.740 + if (U16_IS_TRAIL(ch2)) { 1.741 + ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; 1.742 + mySource++; 1.743 + } 1.744 + else { 1.745 + /* this is an unmatched trail code unit (2nd surrogate) */ 1.746 + /* callback(illegal) */ 1.747 + args->converter->fromUChar32 = ch; 1.748 + *err = U_ILLEGAL_CHAR_FOUND; 1.749 + break; 1.750 + } 1.751 + } 1.752 + else { 1.753 + /* ran out of source */ 1.754 + args->converter->fromUChar32 = ch; 1.755 + if (args->flush) { 1.756 + /* this is an unmatched trail code unit (2nd surrogate) */ 1.757 + /* callback(illegal) */ 1.758 + *err = U_ILLEGAL_CHAR_FOUND; 1.759 + } 1.760 + break; 1.761 + } 1.762 + } 1.763 + else { 1.764 + /* this is an unmatched trail code unit (2nd surrogate) */ 1.765 + /* callback(illegal) */ 1.766 + args->converter->fromUChar32 = ch; 1.767 + *err = U_ILLEGAL_CHAR_FOUND; 1.768 + break; 1.769 + } 1.770 + } 1.771 + 1.772 + /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ 1.773 + temp[2] = (uint8_t) (ch >> 16 & 0x1F); 1.774 + temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ 1.775 + temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ 1.776 + 1.777 + for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) 1.778 + { 1.779 + if (myTarget < targetLimit) 1.780 + { 1.781 + *(myTarget++) = temp[indexToWrite]; 1.782 + } 1.783 + else 1.784 + { 1.785 + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; 1.786 + *err = U_BUFFER_OVERFLOW_ERROR; 1.787 + } 1.788 + } 1.789 + } 1.790 + 1.791 + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) 1.792 + { 1.793 + *err = U_BUFFER_OVERFLOW_ERROR; 1.794 + } 1.795 + 1.796 + args->target = (char *) myTarget; 1.797 + args->source = mySource; 1.798 +} 1.799 + 1.800 +static void 1.801 +T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, 1.802 + UErrorCode * err) 1.803 +{ 1.804 + const UChar *mySource = args->source; 1.805 + unsigned char *myTarget; 1.806 + int32_t *myOffsets; 1.807 + const UChar *sourceLimit = args->sourceLimit; 1.808 + const unsigned char *targetLimit = (unsigned char *) args->targetLimit; 1.809 + UChar32 ch, ch2; 1.810 + unsigned int indexToWrite; 1.811 + unsigned char temp[sizeof(uint32_t)]; 1.812 + int32_t offsetNum = 0; 1.813 + 1.814 + if(mySource >= sourceLimit) { 1.815 + /* no input, nothing to do */ 1.816 + return; 1.817 + } 1.818 + 1.819 + /* write the BOM if necessary */ 1.820 + if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { 1.821 + static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 }; 1.822 + ucnv_fromUWriteBytes(args->converter, 1.823 + bom, 4, 1.824 + &args->target, args->targetLimit, 1.825 + &args->offsets, -1, 1.826 + err); 1.827 + args->converter->fromUnicodeStatus=0; 1.828 + } 1.829 + 1.830 + myTarget = (unsigned char *) args->target; 1.831 + myOffsets = args->offsets; 1.832 + temp[3] = 0; 1.833 + 1.834 + if (args->converter->fromUChar32) 1.835 + { 1.836 + ch = args->converter->fromUChar32; 1.837 + args->converter->fromUChar32 = 0; 1.838 + goto lowsurogate; 1.839 + } 1.840 + 1.841 + while (mySource < sourceLimit && myTarget < targetLimit) 1.842 + { 1.843 + ch = *(mySource++); 1.844 + 1.845 + if (U16_IS_SURROGATE(ch)) { 1.846 + if (U16_IS_LEAD(ch)) 1.847 + { 1.848 +lowsurogate: 1.849 + if (mySource < sourceLimit) 1.850 + { 1.851 + ch2 = *mySource; 1.852 + if (U16_IS_TRAIL(ch2)) 1.853 + { 1.854 + ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; 1.855 + mySource++; 1.856 + } 1.857 + else { 1.858 + /* this is an unmatched trail code unit (2nd surrogate) */ 1.859 + /* callback(illegal) */ 1.860 + args->converter->fromUChar32 = ch; 1.861 + *err = U_ILLEGAL_CHAR_FOUND; 1.862 + break; 1.863 + } 1.864 + } 1.865 + else { 1.866 + /* ran out of source */ 1.867 + args->converter->fromUChar32 = ch; 1.868 + if (args->flush) { 1.869 + /* this is an unmatched trail code unit (2nd surrogate) */ 1.870 + /* callback(illegal) */ 1.871 + *err = U_ILLEGAL_CHAR_FOUND; 1.872 + } 1.873 + break; 1.874 + } 1.875 + } 1.876 + else { 1.877 + /* this is an unmatched trail code unit (2nd surrogate) */ 1.878 + /* callback(illegal) */ 1.879 + args->converter->fromUChar32 = ch; 1.880 + *err = U_ILLEGAL_CHAR_FOUND; 1.881 + break; 1.882 + } 1.883 + } 1.884 + 1.885 + /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ 1.886 + temp[2] = (uint8_t) (ch >> 16 & 0x1F); 1.887 + temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ 1.888 + temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ 1.889 + 1.890 + for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) 1.891 + { 1.892 + if (myTarget < targetLimit) 1.893 + { 1.894 + *(myTarget++) = temp[indexToWrite]; 1.895 + *(myOffsets++) = offsetNum; 1.896 + } 1.897 + else 1.898 + { 1.899 + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; 1.900 + *err = U_BUFFER_OVERFLOW_ERROR; 1.901 + } 1.902 + } 1.903 + offsetNum = offsetNum + 1 + (temp[2] != 0); 1.904 + } 1.905 + 1.906 + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) 1.907 + { 1.908 + *err = U_BUFFER_OVERFLOW_ERROR; 1.909 + } 1.910 + 1.911 + args->target = (char *) myTarget; 1.912 + args->source = mySource; 1.913 + args->offsets = myOffsets; 1.914 +} 1.915 + 1.916 +static UChar32 1.917 +T_UConverter_getNextUChar_UTF32_LE(UConverterToUnicodeArgs* args, 1.918 + UErrorCode* err) 1.919 +{ 1.920 + const uint8_t *mySource; 1.921 + UChar32 myUChar; 1.922 + int32_t length; 1.923 + 1.924 + mySource = (const uint8_t *)args->source; 1.925 + if (mySource >= (const uint8_t *)args->sourceLimit) 1.926 + { 1.927 + /* no input */ 1.928 + *err = U_INDEX_OUTOFBOUNDS_ERROR; 1.929 + return 0xffff; 1.930 + } 1.931 + 1.932 + length = (int32_t)((const uint8_t *)args->sourceLimit - mySource); 1.933 + if (length < 4) 1.934 + { 1.935 + /* got a partial character */ 1.936 + uprv_memcpy(args->converter->toUBytes, mySource, length); 1.937 + args->converter->toULength = (int8_t)length; 1.938 + args->source = (const char *)(mySource + length); 1.939 + *err = U_TRUNCATED_CHAR_FOUND; 1.940 + return 0xffff; 1.941 + } 1.942 + 1.943 + /* Don't even try to do a direct cast because the value may be on an odd address. */ 1.944 + myUChar = ((UChar32)mySource[3] << 24) 1.945 + | ((UChar32)mySource[2] << 16) 1.946 + | ((UChar32)mySource[1] << 8) 1.947 + | ((UChar32)mySource[0]); 1.948 + 1.949 + args->source = (const char *)(mySource + 4); 1.950 + if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) { 1.951 + return myUChar; 1.952 + } 1.953 + 1.954 + uprv_memcpy(args->converter->toUBytes, mySource, 4); 1.955 + args->converter->toULength = 4; 1.956 + 1.957 + *err = U_ILLEGAL_CHAR_FOUND; 1.958 + return 0xffff; 1.959 +} 1.960 + 1.961 +static const UConverterImpl _UTF32LEImpl = { 1.962 + UCNV_UTF32_LittleEndian, 1.963 + 1.964 + NULL, 1.965 + NULL, 1.966 + 1.967 + NULL, 1.968 + NULL, 1.969 + NULL, 1.970 + 1.971 + T_UConverter_toUnicode_UTF32_LE, 1.972 + T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC, 1.973 + T_UConverter_fromUnicode_UTF32_LE, 1.974 + T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC, 1.975 + T_UConverter_getNextUChar_UTF32_LE, 1.976 + 1.977 + NULL, 1.978 + NULL, 1.979 + NULL, 1.980 + NULL, 1.981 + ucnv_getNonSurrogateUnicodeSet 1.982 +}; 1.983 + 1.984 +/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */ 1.985 +static const UConverterStaticData _UTF32LEStaticData = { 1.986 + sizeof(UConverterStaticData), 1.987 + "UTF-32LE", 1.988 + 1234, 1.989 + UCNV_IBM, UCNV_UTF32_LittleEndian, 4, 4, 1.990 + { 0xfd, 0xff, 0, 0 }, 4, FALSE, FALSE, 1.991 + 0, 1.992 + 0, 1.993 + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 1.994 +}; 1.995 + 1.996 + 1.997 +const UConverterSharedData _UTF32LEData = { 1.998 + sizeof(UConverterSharedData), ~((uint32_t) 0), 1.999 + NULL, NULL, &_UTF32LEStaticData, FALSE, &_UTF32LEImpl, 1.1000 + 0 1.1001 +}; 1.1002 + 1.1003 +/* UTF-32 (Detect BOM) ------------------------------------------------------ */ 1.1004 + 1.1005 +/* 1.1006 + * Detect a BOM at the beginning of the stream and select UTF-32BE or UTF-32LE 1.1007 + * accordingly. 1.1008 + * 1.1009 + * State values: 1.1010 + * 0 initial state 1.1011 + * 1 saw 00 1.1012 + * 2 saw 00 00 1.1013 + * 3 saw 00 00 FE 1.1014 + * 4 - 1.1015 + * 5 saw FF 1.1016 + * 6 saw FF FE 1.1017 + * 7 saw FF FE 00 1.1018 + * 8 UTF-32BE mode 1.1019 + * 9 UTF-32LE mode 1.1020 + * 1.1021 + * During detection: state&3==number of matching bytes so far. 1.1022 + * 1.1023 + * On output, emit U+FEFF as the first code point. 1.1024 + */ 1.1025 + 1.1026 +static void 1.1027 +_UTF32Reset(UConverter *cnv, UConverterResetChoice choice) { 1.1028 + if(choice<=UCNV_RESET_TO_UNICODE) { 1.1029 + /* reset toUnicode: state=0 */ 1.1030 + cnv->mode=0; 1.1031 + } 1.1032 + if(choice!=UCNV_RESET_TO_UNICODE) { 1.1033 + /* reset fromUnicode: prepare to output the UTF-32PE BOM */ 1.1034 + cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; 1.1035 + } 1.1036 +} 1.1037 + 1.1038 +static void 1.1039 +_UTF32Open(UConverter *cnv, 1.1040 + UConverterLoadArgs *pArgs, 1.1041 + UErrorCode *pErrorCode) { 1.1042 + _UTF32Reset(cnv, UCNV_RESET_BOTH); 1.1043 +} 1.1044 + 1.1045 +static const char utf32BOM[8]={ 0, 0, (char)0xfe, (char)0xff, (char)0xff, (char)0xfe, 0, 0 }; 1.1046 + 1.1047 +static void 1.1048 +_UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 1.1049 + UErrorCode *pErrorCode) { 1.1050 + UConverter *cnv=pArgs->converter; 1.1051 + const char *source=pArgs->source; 1.1052 + const char *sourceLimit=pArgs->sourceLimit; 1.1053 + int32_t *offsets=pArgs->offsets; 1.1054 + 1.1055 + int32_t state, offsetDelta; 1.1056 + char b; 1.1057 + 1.1058 + state=cnv->mode; 1.1059 + 1.1060 + /* 1.1061 + * If we detect a BOM in this buffer, then we must add the BOM size to the 1.1062 + * offsets because the actual converter function will not see and count the BOM. 1.1063 + * offsetDelta will have the number of the BOM bytes that are in the current buffer. 1.1064 + */ 1.1065 + offsetDelta=0; 1.1066 + 1.1067 + while(source<sourceLimit && U_SUCCESS(*pErrorCode)) { 1.1068 + switch(state) { 1.1069 + case 0: 1.1070 + b=*source; 1.1071 + if(b==0) { 1.1072 + state=1; /* could be 00 00 FE FF */ 1.1073 + } else if(b==(char)0xff) { 1.1074 + state=5; /* could be FF FE 00 00 */ 1.1075 + } else { 1.1076 + state=8; /* default to UTF-32BE */ 1.1077 + continue; 1.1078 + } 1.1079 + ++source; 1.1080 + break; 1.1081 + case 1: 1.1082 + case 2: 1.1083 + case 3: 1.1084 + case 5: 1.1085 + case 6: 1.1086 + case 7: 1.1087 + if(*source==utf32BOM[state]) { 1.1088 + ++state; 1.1089 + ++source; 1.1090 + if(state==4) { 1.1091 + state=8; /* detect UTF-32BE */ 1.1092 + offsetDelta=(int32_t)(source-pArgs->source); 1.1093 + } else if(state==8) { 1.1094 + state=9; /* detect UTF-32LE */ 1.1095 + offsetDelta=(int32_t)(source-pArgs->source); 1.1096 + } 1.1097 + } else { 1.1098 + /* switch to UTF-32BE and pass the previous bytes */ 1.1099 + int32_t count=(int32_t)(source-pArgs->source); /* number of bytes from this buffer */ 1.1100 + 1.1101 + /* reset the source */ 1.1102 + source=pArgs->source; 1.1103 + 1.1104 + if(count==(state&3)) { 1.1105 + /* simple: all in the same buffer, just reset source */ 1.1106 + } else { 1.1107 + UBool oldFlush=pArgs->flush; 1.1108 + 1.1109 + /* some of the bytes are from a previous buffer, replay those first */ 1.1110 + pArgs->source=utf32BOM+(state&4); /* select the correct BOM */ 1.1111 + pArgs->sourceLimit=pArgs->source+((state&3)-count); /* replay previous bytes */ 1.1112 + pArgs->flush=FALSE; /* this sourceLimit is not the real source stream limit */ 1.1113 + 1.1114 + /* no offsets: bytes from previous buffer, and not enough for output */ 1.1115 + T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); 1.1116 + 1.1117 + /* restore real pointers; pArgs->source will be set in case 8/9 */ 1.1118 + pArgs->sourceLimit=sourceLimit; 1.1119 + pArgs->flush=oldFlush; 1.1120 + } 1.1121 + state=8; 1.1122 + continue; 1.1123 + } 1.1124 + break; 1.1125 + case 8: 1.1126 + /* call UTF-32BE */ 1.1127 + pArgs->source=source; 1.1128 + if(offsets==NULL) { 1.1129 + T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); 1.1130 + } else { 1.1131 + T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(pArgs, pErrorCode); 1.1132 + } 1.1133 + source=pArgs->source; 1.1134 + break; 1.1135 + case 9: 1.1136 + /* call UTF-32LE */ 1.1137 + pArgs->source=source; 1.1138 + if(offsets==NULL) { 1.1139 + T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode); 1.1140 + } else { 1.1141 + T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(pArgs, pErrorCode); 1.1142 + } 1.1143 + source=pArgs->source; 1.1144 + break; 1.1145 + default: 1.1146 + break; /* does not occur */ 1.1147 + } 1.1148 + } 1.1149 + 1.1150 + /* add BOM size to offsets - see comment at offsetDelta declaration */ 1.1151 + if(offsets!=NULL && offsetDelta!=0) { 1.1152 + int32_t *offsetsLimit=pArgs->offsets; 1.1153 + while(offsets<offsetsLimit) { 1.1154 + *offsets++ += offsetDelta; 1.1155 + } 1.1156 + } 1.1157 + 1.1158 + pArgs->source=source; 1.1159 + 1.1160 + if(source==sourceLimit && pArgs->flush) { 1.1161 + /* handle truncated input */ 1.1162 + switch(state) { 1.1163 + case 0: 1.1164 + break; /* no input at all, nothing to do */ 1.1165 + case 8: 1.1166 + T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); 1.1167 + break; 1.1168 + case 9: 1.1169 + T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode); 1.1170 + break; 1.1171 + default: 1.1172 + /* handle 0<state<8: call UTF-32BE with too-short input */ 1.1173 + pArgs->source=utf32BOM+(state&4); /* select the correct BOM */ 1.1174 + pArgs->sourceLimit=pArgs->source+(state&3); /* replay bytes */ 1.1175 + 1.1176 + /* no offsets: not enough for output */ 1.1177 + T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); 1.1178 + pArgs->source=source; 1.1179 + pArgs->sourceLimit=sourceLimit; 1.1180 + state=8; 1.1181 + break; 1.1182 + } 1.1183 + } 1.1184 + 1.1185 + cnv->mode=state; 1.1186 +} 1.1187 + 1.1188 +static UChar32 1.1189 +_UTF32GetNextUChar(UConverterToUnicodeArgs *pArgs, 1.1190 + UErrorCode *pErrorCode) { 1.1191 + switch(pArgs->converter->mode) { 1.1192 + case 8: 1.1193 + return T_UConverter_getNextUChar_UTF32_BE(pArgs, pErrorCode); 1.1194 + case 9: 1.1195 + return T_UConverter_getNextUChar_UTF32_LE(pArgs, pErrorCode); 1.1196 + default: 1.1197 + return UCNV_GET_NEXT_UCHAR_USE_TO_U; 1.1198 + } 1.1199 +} 1.1200 + 1.1201 +static const UConverterImpl _UTF32Impl = { 1.1202 + UCNV_UTF32, 1.1203 + 1.1204 + NULL, 1.1205 + NULL, 1.1206 + 1.1207 + _UTF32Open, 1.1208 + NULL, 1.1209 + _UTF32Reset, 1.1210 + 1.1211 + _UTF32ToUnicodeWithOffsets, 1.1212 + _UTF32ToUnicodeWithOffsets, 1.1213 +#if U_IS_BIG_ENDIAN 1.1214 + T_UConverter_fromUnicode_UTF32_BE, 1.1215 + T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC, 1.1216 +#else 1.1217 + T_UConverter_fromUnicode_UTF32_LE, 1.1218 + T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC, 1.1219 +#endif 1.1220 + _UTF32GetNextUChar, 1.1221 + 1.1222 + NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ 1.1223 + NULL, 1.1224 + NULL, 1.1225 + NULL, 1.1226 + ucnv_getNonSurrogateUnicodeSet 1.1227 +}; 1.1228 + 1.1229 +/* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianess of UTF-32 */ 1.1230 +static const UConverterStaticData _UTF32StaticData = { 1.1231 + sizeof(UConverterStaticData), 1.1232 + "UTF-32", 1.1233 + 1236, 1.1234 + UCNV_IBM, UCNV_UTF32, 4, 4, 1.1235 +#if U_IS_BIG_ENDIAN 1.1236 + { 0, 0, 0xff, 0xfd }, 4, 1.1237 +#else 1.1238 + { 0xfd, 0xff, 0, 0 }, 4, 1.1239 +#endif 1.1240 + FALSE, FALSE, 1.1241 + 0, 1.1242 + 0, 1.1243 + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 1.1244 +}; 1.1245 + 1.1246 +const UConverterSharedData _UTF32Data = { 1.1247 + sizeof(UConverterSharedData), ~((uint32_t) 0), 1.1248 + NULL, NULL, &_UTF32StaticData, FALSE, &_UTF32Impl, 1.1249 + 0 1.1250 +}; 1.1251 + 1.1252 +#endif