|
1 /* |
|
2 ********************************************************************** |
|
3 * Copyright (C) 2002-2011, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ********************************************************************** |
|
6 * file name: ucnv_u32.c |
|
7 * encoding: US-ASCII |
|
8 * tab size: 8 (not used) |
|
9 * indentation:4 |
|
10 * |
|
11 * created on: 2002jul01 |
|
12 * created by: Markus W. Scherer |
|
13 * |
|
14 * UTF-32 converter implementation. Used to be in ucnv_utf.c. |
|
15 */ |
|
16 |
|
17 #include "unicode/utypes.h" |
|
18 |
|
19 #if !UCONFIG_NO_CONVERSION |
|
20 |
|
21 #include "unicode/ucnv.h" |
|
22 #include "unicode/utf.h" |
|
23 #include "ucnv_bld.h" |
|
24 #include "ucnv_cnv.h" |
|
25 #include "cmemory.h" |
|
26 |
|
27 #define MAXIMUM_UCS2 0x0000FFFF |
|
28 #define MAXIMUM_UTF 0x0010FFFF |
|
29 #define HALF_SHIFT 10 |
|
30 #define HALF_BASE 0x0010000 |
|
31 #define HALF_MASK 0x3FF |
|
32 #define SURROGATE_HIGH_START 0xD800 |
|
33 #define SURROGATE_LOW_START 0xDC00 |
|
34 |
|
35 /* -SURROGATE_LOW_START + HALF_BASE */ |
|
36 #define SURROGATE_LOW_BASE 9216 |
|
37 |
|
38 enum { |
|
39 UCNV_NEED_TO_WRITE_BOM=1 |
|
40 }; |
|
41 |
|
42 /* UTF-32BE ----------------------------------------------------------------- */ |
|
43 |
|
44 static void |
|
45 T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args, |
|
46 UErrorCode * err) |
|
47 { |
|
48 const unsigned char *mySource = (unsigned char *) args->source; |
|
49 UChar *myTarget = args->target; |
|
50 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
|
51 const UChar *targetLimit = args->targetLimit; |
|
52 unsigned char *toUBytes = args->converter->toUBytes; |
|
53 uint32_t ch, i; |
|
54 |
|
55 /* Restore state of current sequence */ |
|
56 if (args->converter->toUnicodeStatus && myTarget < targetLimit) { |
|
57 i = args->converter->toULength; /* restore # of bytes consumed */ |
|
58 args->converter->toULength = 0; |
|
59 |
|
60 ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/ |
|
61 args->converter->toUnicodeStatus = 0; |
|
62 goto morebytes; |
|
63 } |
|
64 |
|
65 while (mySource < sourceLimit && myTarget < targetLimit) { |
|
66 i = 0; |
|
67 ch = 0; |
|
68 morebytes: |
|
69 while (i < sizeof(uint32_t)) { |
|
70 if (mySource < sourceLimit) { |
|
71 ch = (ch << 8) | (uint8_t)(*mySource); |
|
72 toUBytes[i++] = (char) *(mySource++); |
|
73 } |
|
74 else { |
|
75 /* stores a partially calculated target*/ |
|
76 /* + 1 to make 0 a valid character */ |
|
77 args->converter->toUnicodeStatus = ch + 1; |
|
78 args->converter->toULength = (int8_t) i; |
|
79 goto donefornow; |
|
80 } |
|
81 } |
|
82 |
|
83 if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { |
|
84 /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ |
|
85 if (ch <= MAXIMUM_UCS2) |
|
86 { |
|
87 /* fits in 16 bits */ |
|
88 *(myTarget++) = (UChar) ch; |
|
89 } |
|
90 else { |
|
91 /* write out the surrogates */ |
|
92 *(myTarget++) = U16_LEAD(ch); |
|
93 ch = U16_TRAIL(ch); |
|
94 if (myTarget < targetLimit) { |
|
95 *(myTarget++) = (UChar)ch; |
|
96 } |
|
97 else { |
|
98 /* Put in overflow buffer (not handled here) */ |
|
99 args->converter->UCharErrorBuffer[0] = (UChar) ch; |
|
100 args->converter->UCharErrorBufferLength = 1; |
|
101 *err = U_BUFFER_OVERFLOW_ERROR; |
|
102 break; |
|
103 } |
|
104 } |
|
105 } |
|
106 else { |
|
107 args->converter->toULength = (int8_t)i; |
|
108 *err = U_ILLEGAL_CHAR_FOUND; |
|
109 break; |
|
110 } |
|
111 } |
|
112 |
|
113 donefornow: |
|
114 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { |
|
115 /* End of target buffer */ |
|
116 *err = U_BUFFER_OVERFLOW_ERROR; |
|
117 } |
|
118 |
|
119 args->target = myTarget; |
|
120 args->source = (const char *) mySource; |
|
121 } |
|
122 |
|
123 static void |
|
124 T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, |
|
125 UErrorCode * err) |
|
126 { |
|
127 const unsigned char *mySource = (unsigned char *) args->source; |
|
128 UChar *myTarget = args->target; |
|
129 int32_t *myOffsets = args->offsets; |
|
130 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
|
131 const UChar *targetLimit = args->targetLimit; |
|
132 unsigned char *toUBytes = args->converter->toUBytes; |
|
133 uint32_t ch, i; |
|
134 int32_t offsetNum = 0; |
|
135 |
|
136 /* Restore state of current sequence */ |
|
137 if (args->converter->toUnicodeStatus && myTarget < targetLimit) { |
|
138 i = args->converter->toULength; /* restore # of bytes consumed */ |
|
139 args->converter->toULength = 0; |
|
140 |
|
141 ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/ |
|
142 args->converter->toUnicodeStatus = 0; |
|
143 goto morebytes; |
|
144 } |
|
145 |
|
146 while (mySource < sourceLimit && myTarget < targetLimit) { |
|
147 i = 0; |
|
148 ch = 0; |
|
149 morebytes: |
|
150 while (i < sizeof(uint32_t)) { |
|
151 if (mySource < sourceLimit) { |
|
152 ch = (ch << 8) | (uint8_t)(*mySource); |
|
153 toUBytes[i++] = (char) *(mySource++); |
|
154 } |
|
155 else { |
|
156 /* stores a partially calculated target*/ |
|
157 /* + 1 to make 0 a valid character */ |
|
158 args->converter->toUnicodeStatus = ch + 1; |
|
159 args->converter->toULength = (int8_t) i; |
|
160 goto donefornow; |
|
161 } |
|
162 } |
|
163 |
|
164 if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { |
|
165 /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ |
|
166 if (ch <= MAXIMUM_UCS2) { |
|
167 /* fits in 16 bits */ |
|
168 *(myTarget++) = (UChar) ch; |
|
169 *(myOffsets++) = offsetNum; |
|
170 } |
|
171 else { |
|
172 /* write out the surrogates */ |
|
173 *(myTarget++) = U16_LEAD(ch); |
|
174 *myOffsets++ = offsetNum; |
|
175 ch = U16_TRAIL(ch); |
|
176 if (myTarget < targetLimit) |
|
177 { |
|
178 *(myTarget++) = (UChar)ch; |
|
179 *(myOffsets++) = offsetNum; |
|
180 } |
|
181 else { |
|
182 /* Put in overflow buffer (not handled here) */ |
|
183 args->converter->UCharErrorBuffer[0] = (UChar) ch; |
|
184 args->converter->UCharErrorBufferLength = 1; |
|
185 *err = U_BUFFER_OVERFLOW_ERROR; |
|
186 break; |
|
187 } |
|
188 } |
|
189 } |
|
190 else { |
|
191 args->converter->toULength = (int8_t)i; |
|
192 *err = U_ILLEGAL_CHAR_FOUND; |
|
193 break; |
|
194 } |
|
195 offsetNum += i; |
|
196 } |
|
197 |
|
198 donefornow: |
|
199 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) |
|
200 { |
|
201 /* End of target buffer */ |
|
202 *err = U_BUFFER_OVERFLOW_ERROR; |
|
203 } |
|
204 |
|
205 args->target = myTarget; |
|
206 args->source = (const char *) mySource; |
|
207 args->offsets = myOffsets; |
|
208 } |
|
209 |
|
210 static void |
|
211 T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args, |
|
212 UErrorCode * err) |
|
213 { |
|
214 const UChar *mySource = args->source; |
|
215 unsigned char *myTarget; |
|
216 const UChar *sourceLimit = args->sourceLimit; |
|
217 const unsigned char *targetLimit = (unsigned char *) args->targetLimit; |
|
218 UChar32 ch, ch2; |
|
219 unsigned int indexToWrite; |
|
220 unsigned char temp[sizeof(uint32_t)]; |
|
221 |
|
222 if(mySource >= sourceLimit) { |
|
223 /* no input, nothing to do */ |
|
224 return; |
|
225 } |
|
226 |
|
227 /* write the BOM if necessary */ |
|
228 if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { |
|
229 static const char bom[]={ 0, 0, (char)0xfe, (char)0xff }; |
|
230 ucnv_fromUWriteBytes(args->converter, |
|
231 bom, 4, |
|
232 &args->target, args->targetLimit, |
|
233 &args->offsets, -1, |
|
234 err); |
|
235 args->converter->fromUnicodeStatus=0; |
|
236 } |
|
237 |
|
238 myTarget = (unsigned char *) args->target; |
|
239 temp[0] = 0; |
|
240 |
|
241 if (args->converter->fromUChar32) { |
|
242 ch = args->converter->fromUChar32; |
|
243 args->converter->fromUChar32 = 0; |
|
244 goto lowsurogate; |
|
245 } |
|
246 |
|
247 while (mySource < sourceLimit && myTarget < targetLimit) { |
|
248 ch = *(mySource++); |
|
249 |
|
250 if (U_IS_SURROGATE(ch)) { |
|
251 if (U_IS_LEAD(ch)) { |
|
252 lowsurogate: |
|
253 if (mySource < sourceLimit) { |
|
254 ch2 = *mySource; |
|
255 if (U_IS_TRAIL(ch2)) { |
|
256 ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; |
|
257 mySource++; |
|
258 } |
|
259 else { |
|
260 /* this is an unmatched trail code unit (2nd surrogate) */ |
|
261 /* callback(illegal) */ |
|
262 args->converter->fromUChar32 = ch; |
|
263 *err = U_ILLEGAL_CHAR_FOUND; |
|
264 break; |
|
265 } |
|
266 } |
|
267 else { |
|
268 /* ran out of source */ |
|
269 args->converter->fromUChar32 = ch; |
|
270 if (args->flush) { |
|
271 /* this is an unmatched trail code unit (2nd surrogate) */ |
|
272 /* callback(illegal) */ |
|
273 *err = U_ILLEGAL_CHAR_FOUND; |
|
274 } |
|
275 break; |
|
276 } |
|
277 } |
|
278 else { |
|
279 /* this is an unmatched trail code unit (2nd surrogate) */ |
|
280 /* callback(illegal) */ |
|
281 args->converter->fromUChar32 = ch; |
|
282 *err = U_ILLEGAL_CHAR_FOUND; |
|
283 break; |
|
284 } |
|
285 } |
|
286 |
|
287 /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ |
|
288 temp[1] = (uint8_t) (ch >> 16 & 0x1F); |
|
289 temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ |
|
290 temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ |
|
291 |
|
292 for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) { |
|
293 if (myTarget < targetLimit) { |
|
294 *(myTarget++) = temp[indexToWrite]; |
|
295 } |
|
296 else { |
|
297 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; |
|
298 *err = U_BUFFER_OVERFLOW_ERROR; |
|
299 } |
|
300 } |
|
301 } |
|
302 |
|
303 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { |
|
304 *err = U_BUFFER_OVERFLOW_ERROR; |
|
305 } |
|
306 |
|
307 args->target = (char *) myTarget; |
|
308 args->source = mySource; |
|
309 } |
|
310 |
|
311 static void |
|
312 T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, |
|
313 UErrorCode * err) |
|
314 { |
|
315 const UChar *mySource = args->source; |
|
316 unsigned char *myTarget; |
|
317 int32_t *myOffsets; |
|
318 const UChar *sourceLimit = args->sourceLimit; |
|
319 const unsigned char *targetLimit = (unsigned char *) args->targetLimit; |
|
320 UChar32 ch, ch2; |
|
321 int32_t offsetNum = 0; |
|
322 unsigned int indexToWrite; |
|
323 unsigned char temp[sizeof(uint32_t)]; |
|
324 |
|
325 if(mySource >= sourceLimit) { |
|
326 /* no input, nothing to do */ |
|
327 return; |
|
328 } |
|
329 |
|
330 /* write the BOM if necessary */ |
|
331 if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { |
|
332 static const char bom[]={ 0, 0, (char)0xfe, (char)0xff }; |
|
333 ucnv_fromUWriteBytes(args->converter, |
|
334 bom, 4, |
|
335 &args->target, args->targetLimit, |
|
336 &args->offsets, -1, |
|
337 err); |
|
338 args->converter->fromUnicodeStatus=0; |
|
339 } |
|
340 |
|
341 myTarget = (unsigned char *) args->target; |
|
342 myOffsets = args->offsets; |
|
343 temp[0] = 0; |
|
344 |
|
345 if (args->converter->fromUChar32) { |
|
346 ch = args->converter->fromUChar32; |
|
347 args->converter->fromUChar32 = 0; |
|
348 goto lowsurogate; |
|
349 } |
|
350 |
|
351 while (mySource < sourceLimit && myTarget < targetLimit) { |
|
352 ch = *(mySource++); |
|
353 |
|
354 if (U_IS_SURROGATE(ch)) { |
|
355 if (U_IS_LEAD(ch)) { |
|
356 lowsurogate: |
|
357 if (mySource < sourceLimit) { |
|
358 ch2 = *mySource; |
|
359 if (U_IS_TRAIL(ch2)) { |
|
360 ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; |
|
361 mySource++; |
|
362 } |
|
363 else { |
|
364 /* this is an unmatched trail code unit (2nd surrogate) */ |
|
365 /* callback(illegal) */ |
|
366 args->converter->fromUChar32 = ch; |
|
367 *err = U_ILLEGAL_CHAR_FOUND; |
|
368 break; |
|
369 } |
|
370 } |
|
371 else { |
|
372 /* ran out of source */ |
|
373 args->converter->fromUChar32 = ch; |
|
374 if (args->flush) { |
|
375 /* this is an unmatched trail code unit (2nd surrogate) */ |
|
376 /* callback(illegal) */ |
|
377 *err = U_ILLEGAL_CHAR_FOUND; |
|
378 } |
|
379 break; |
|
380 } |
|
381 } |
|
382 else { |
|
383 /* this is an unmatched trail code unit (2nd surrogate) */ |
|
384 /* callback(illegal) */ |
|
385 args->converter->fromUChar32 = ch; |
|
386 *err = U_ILLEGAL_CHAR_FOUND; |
|
387 break; |
|
388 } |
|
389 } |
|
390 |
|
391 /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ |
|
392 temp[1] = (uint8_t) (ch >> 16 & 0x1F); |
|
393 temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ |
|
394 temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ |
|
395 |
|
396 for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) { |
|
397 if (myTarget < targetLimit) { |
|
398 *(myTarget++) = temp[indexToWrite]; |
|
399 *(myOffsets++) = offsetNum; |
|
400 } |
|
401 else { |
|
402 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; |
|
403 *err = U_BUFFER_OVERFLOW_ERROR; |
|
404 } |
|
405 } |
|
406 offsetNum = offsetNum + 1 + (temp[1] != 0); |
|
407 } |
|
408 |
|
409 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { |
|
410 *err = U_BUFFER_OVERFLOW_ERROR; |
|
411 } |
|
412 |
|
413 args->target = (char *) myTarget; |
|
414 args->source = mySource; |
|
415 args->offsets = myOffsets; |
|
416 } |
|
417 |
|
418 static UChar32 |
|
419 T_UConverter_getNextUChar_UTF32_BE(UConverterToUnicodeArgs* args, |
|
420 UErrorCode* err) |
|
421 { |
|
422 const uint8_t *mySource; |
|
423 UChar32 myUChar; |
|
424 int32_t length; |
|
425 |
|
426 mySource = (const uint8_t *)args->source; |
|
427 if (mySource >= (const uint8_t *)args->sourceLimit) |
|
428 { |
|
429 /* no input */ |
|
430 *err = U_INDEX_OUTOFBOUNDS_ERROR; |
|
431 return 0xffff; |
|
432 } |
|
433 |
|
434 length = (int32_t)((const uint8_t *)args->sourceLimit - mySource); |
|
435 if (length < 4) |
|
436 { |
|
437 /* got a partial character */ |
|
438 uprv_memcpy(args->converter->toUBytes, mySource, length); |
|
439 args->converter->toULength = (int8_t)length; |
|
440 args->source = (const char *)(mySource + length); |
|
441 *err = U_TRUNCATED_CHAR_FOUND; |
|
442 return 0xffff; |
|
443 } |
|
444 |
|
445 /* Don't even try to do a direct cast because the value may be on an odd address. */ |
|
446 myUChar = ((UChar32)mySource[0] << 24) |
|
447 | ((UChar32)mySource[1] << 16) |
|
448 | ((UChar32)mySource[2] << 8) |
|
449 | ((UChar32)mySource[3]); |
|
450 |
|
451 args->source = (const char *)(mySource + 4); |
|
452 if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) { |
|
453 return myUChar; |
|
454 } |
|
455 |
|
456 uprv_memcpy(args->converter->toUBytes, mySource, 4); |
|
457 args->converter->toULength = 4; |
|
458 |
|
459 *err = U_ILLEGAL_CHAR_FOUND; |
|
460 return 0xffff; |
|
461 } |
|
462 |
|
463 static const UConverterImpl _UTF32BEImpl = { |
|
464 UCNV_UTF32_BigEndian, |
|
465 |
|
466 NULL, |
|
467 NULL, |
|
468 |
|
469 NULL, |
|
470 NULL, |
|
471 NULL, |
|
472 |
|
473 T_UConverter_toUnicode_UTF32_BE, |
|
474 T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC, |
|
475 T_UConverter_fromUnicode_UTF32_BE, |
|
476 T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC, |
|
477 T_UConverter_getNextUChar_UTF32_BE, |
|
478 |
|
479 NULL, |
|
480 NULL, |
|
481 NULL, |
|
482 NULL, |
|
483 ucnv_getNonSurrogateUnicodeSet |
|
484 }; |
|
485 |
|
486 /* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */ |
|
487 static const UConverterStaticData _UTF32BEStaticData = { |
|
488 sizeof(UConverterStaticData), |
|
489 "UTF-32BE", |
|
490 1232, |
|
491 UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4, |
|
492 { 0, 0, 0xff, 0xfd }, 4, FALSE, FALSE, |
|
493 0, |
|
494 0, |
|
495 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ |
|
496 }; |
|
497 |
|
498 const UConverterSharedData _UTF32BEData = { |
|
499 sizeof(UConverterSharedData), ~((uint32_t) 0), |
|
500 NULL, NULL, &_UTF32BEStaticData, FALSE, &_UTF32BEImpl, |
|
501 0 |
|
502 }; |
|
503 |
|
504 /* UTF-32LE ---------------------------------------------------------- */ |
|
505 |
|
506 static void |
|
507 T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args, |
|
508 UErrorCode * err) |
|
509 { |
|
510 const unsigned char *mySource = (unsigned char *) args->source; |
|
511 UChar *myTarget = args->target; |
|
512 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
|
513 const UChar *targetLimit = args->targetLimit; |
|
514 unsigned char *toUBytes = args->converter->toUBytes; |
|
515 uint32_t ch, i; |
|
516 |
|
517 /* Restore state of current sequence */ |
|
518 if (args->converter->toUnicodeStatus && myTarget < targetLimit) |
|
519 { |
|
520 i = args->converter->toULength; /* restore # of bytes consumed */ |
|
521 args->converter->toULength = 0; |
|
522 |
|
523 /* Stores the previously calculated ch from a previous call*/ |
|
524 ch = args->converter->toUnicodeStatus - 1; |
|
525 args->converter->toUnicodeStatus = 0; |
|
526 goto morebytes; |
|
527 } |
|
528 |
|
529 while (mySource < sourceLimit && myTarget < targetLimit) |
|
530 { |
|
531 i = 0; |
|
532 ch = 0; |
|
533 morebytes: |
|
534 while (i < sizeof(uint32_t)) |
|
535 { |
|
536 if (mySource < sourceLimit) |
|
537 { |
|
538 ch |= ((uint8_t)(*mySource)) << (i * 8); |
|
539 toUBytes[i++] = (char) *(mySource++); |
|
540 } |
|
541 else |
|
542 { |
|
543 /* stores a partially calculated target*/ |
|
544 /* + 1 to make 0 a valid character */ |
|
545 args->converter->toUnicodeStatus = ch + 1; |
|
546 args->converter->toULength = (int8_t) i; |
|
547 goto donefornow; |
|
548 } |
|
549 } |
|
550 |
|
551 if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { |
|
552 /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ |
|
553 if (ch <= MAXIMUM_UCS2) { |
|
554 /* fits in 16 bits */ |
|
555 *(myTarget++) = (UChar) ch; |
|
556 } |
|
557 else { |
|
558 /* write out the surrogates */ |
|
559 *(myTarget++) = U16_LEAD(ch); |
|
560 ch = U16_TRAIL(ch); |
|
561 if (myTarget < targetLimit) { |
|
562 *(myTarget++) = (UChar)ch; |
|
563 } |
|
564 else { |
|
565 /* Put in overflow buffer (not handled here) */ |
|
566 args->converter->UCharErrorBuffer[0] = (UChar) ch; |
|
567 args->converter->UCharErrorBufferLength = 1; |
|
568 *err = U_BUFFER_OVERFLOW_ERROR; |
|
569 break; |
|
570 } |
|
571 } |
|
572 } |
|
573 else { |
|
574 args->converter->toULength = (int8_t)i; |
|
575 *err = U_ILLEGAL_CHAR_FOUND; |
|
576 break; |
|
577 } |
|
578 } |
|
579 |
|
580 donefornow: |
|
581 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) |
|
582 { |
|
583 /* End of target buffer */ |
|
584 *err = U_BUFFER_OVERFLOW_ERROR; |
|
585 } |
|
586 |
|
587 args->target = myTarget; |
|
588 args->source = (const char *) mySource; |
|
589 } |
|
590 |
|
591 static void |
|
592 T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, |
|
593 UErrorCode * err) |
|
594 { |
|
595 const unsigned char *mySource = (unsigned char *) args->source; |
|
596 UChar *myTarget = args->target; |
|
597 int32_t *myOffsets = args->offsets; |
|
598 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
|
599 const UChar *targetLimit = args->targetLimit; |
|
600 unsigned char *toUBytes = args->converter->toUBytes; |
|
601 uint32_t ch, i; |
|
602 int32_t offsetNum = 0; |
|
603 |
|
604 /* Restore state of current sequence */ |
|
605 if (args->converter->toUnicodeStatus && myTarget < targetLimit) |
|
606 { |
|
607 i = args->converter->toULength; /* restore # of bytes consumed */ |
|
608 args->converter->toULength = 0; |
|
609 |
|
610 /* Stores the previously calculated ch from a previous call*/ |
|
611 ch = args->converter->toUnicodeStatus - 1; |
|
612 args->converter->toUnicodeStatus = 0; |
|
613 goto morebytes; |
|
614 } |
|
615 |
|
616 while (mySource < sourceLimit && myTarget < targetLimit) |
|
617 { |
|
618 i = 0; |
|
619 ch = 0; |
|
620 morebytes: |
|
621 while (i < sizeof(uint32_t)) |
|
622 { |
|
623 if (mySource < sourceLimit) |
|
624 { |
|
625 ch |= ((uint8_t)(*mySource)) << (i * 8); |
|
626 toUBytes[i++] = (char) *(mySource++); |
|
627 } |
|
628 else |
|
629 { |
|
630 /* stores a partially calculated target*/ |
|
631 /* + 1 to make 0 a valid character */ |
|
632 args->converter->toUnicodeStatus = ch + 1; |
|
633 args->converter->toULength = (int8_t) i; |
|
634 goto donefornow; |
|
635 } |
|
636 } |
|
637 |
|
638 if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) |
|
639 { |
|
640 /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ |
|
641 if (ch <= MAXIMUM_UCS2) |
|
642 { |
|
643 /* fits in 16 bits */ |
|
644 *(myTarget++) = (UChar) ch; |
|
645 *(myOffsets++) = offsetNum; |
|
646 } |
|
647 else { |
|
648 /* write out the surrogates */ |
|
649 *(myTarget++) = U16_LEAD(ch); |
|
650 *(myOffsets++) = offsetNum; |
|
651 ch = U16_TRAIL(ch); |
|
652 if (myTarget < targetLimit) |
|
653 { |
|
654 *(myTarget++) = (UChar)ch; |
|
655 *(myOffsets++) = offsetNum; |
|
656 } |
|
657 else |
|
658 { |
|
659 /* Put in overflow buffer (not handled here) */ |
|
660 args->converter->UCharErrorBuffer[0] = (UChar) ch; |
|
661 args->converter->UCharErrorBufferLength = 1; |
|
662 *err = U_BUFFER_OVERFLOW_ERROR; |
|
663 break; |
|
664 } |
|
665 } |
|
666 } |
|
667 else |
|
668 { |
|
669 args->converter->toULength = (int8_t)i; |
|
670 *err = U_ILLEGAL_CHAR_FOUND; |
|
671 break; |
|
672 } |
|
673 offsetNum += i; |
|
674 } |
|
675 |
|
676 donefornow: |
|
677 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) |
|
678 { |
|
679 /* End of target buffer */ |
|
680 *err = U_BUFFER_OVERFLOW_ERROR; |
|
681 } |
|
682 |
|
683 args->target = myTarget; |
|
684 args->source = (const char *) mySource; |
|
685 args->offsets = myOffsets; |
|
686 } |
|
687 |
|
688 static void |
|
689 T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args, |
|
690 UErrorCode * err) |
|
691 { |
|
692 const UChar *mySource = args->source; |
|
693 unsigned char *myTarget; |
|
694 const UChar *sourceLimit = args->sourceLimit; |
|
695 const unsigned char *targetLimit = (unsigned char *) args->targetLimit; |
|
696 UChar32 ch, ch2; |
|
697 unsigned int indexToWrite; |
|
698 unsigned char temp[sizeof(uint32_t)]; |
|
699 |
|
700 if(mySource >= sourceLimit) { |
|
701 /* no input, nothing to do */ |
|
702 return; |
|
703 } |
|
704 |
|
705 /* write the BOM if necessary */ |
|
706 if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { |
|
707 static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 }; |
|
708 ucnv_fromUWriteBytes(args->converter, |
|
709 bom, 4, |
|
710 &args->target, args->targetLimit, |
|
711 &args->offsets, -1, |
|
712 err); |
|
713 args->converter->fromUnicodeStatus=0; |
|
714 } |
|
715 |
|
716 myTarget = (unsigned char *) args->target; |
|
717 temp[3] = 0; |
|
718 |
|
719 if (args->converter->fromUChar32) |
|
720 { |
|
721 ch = args->converter->fromUChar32; |
|
722 args->converter->fromUChar32 = 0; |
|
723 goto lowsurogate; |
|
724 } |
|
725 |
|
726 while (mySource < sourceLimit && myTarget < targetLimit) |
|
727 { |
|
728 ch = *(mySource++); |
|
729 |
|
730 if (U16_IS_SURROGATE(ch)) { |
|
731 if (U16_IS_LEAD(ch)) |
|
732 { |
|
733 lowsurogate: |
|
734 if (mySource < sourceLimit) |
|
735 { |
|
736 ch2 = *mySource; |
|
737 if (U16_IS_TRAIL(ch2)) { |
|
738 ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; |
|
739 mySource++; |
|
740 } |
|
741 else { |
|
742 /* this is an unmatched trail code unit (2nd surrogate) */ |
|
743 /* callback(illegal) */ |
|
744 args->converter->fromUChar32 = ch; |
|
745 *err = U_ILLEGAL_CHAR_FOUND; |
|
746 break; |
|
747 } |
|
748 } |
|
749 else { |
|
750 /* ran out of source */ |
|
751 args->converter->fromUChar32 = ch; |
|
752 if (args->flush) { |
|
753 /* this is an unmatched trail code unit (2nd surrogate) */ |
|
754 /* callback(illegal) */ |
|
755 *err = U_ILLEGAL_CHAR_FOUND; |
|
756 } |
|
757 break; |
|
758 } |
|
759 } |
|
760 else { |
|
761 /* this is an unmatched trail code unit (2nd surrogate) */ |
|
762 /* callback(illegal) */ |
|
763 args->converter->fromUChar32 = ch; |
|
764 *err = U_ILLEGAL_CHAR_FOUND; |
|
765 break; |
|
766 } |
|
767 } |
|
768 |
|
769 /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ |
|
770 temp[2] = (uint8_t) (ch >> 16 & 0x1F); |
|
771 temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ |
|
772 temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ |
|
773 |
|
774 for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) |
|
775 { |
|
776 if (myTarget < targetLimit) |
|
777 { |
|
778 *(myTarget++) = temp[indexToWrite]; |
|
779 } |
|
780 else |
|
781 { |
|
782 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; |
|
783 *err = U_BUFFER_OVERFLOW_ERROR; |
|
784 } |
|
785 } |
|
786 } |
|
787 |
|
788 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) |
|
789 { |
|
790 *err = U_BUFFER_OVERFLOW_ERROR; |
|
791 } |
|
792 |
|
793 args->target = (char *) myTarget; |
|
794 args->source = mySource; |
|
795 } |
|
796 |
|
797 static void |
|
798 T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, |
|
799 UErrorCode * err) |
|
800 { |
|
801 const UChar *mySource = args->source; |
|
802 unsigned char *myTarget; |
|
803 int32_t *myOffsets; |
|
804 const UChar *sourceLimit = args->sourceLimit; |
|
805 const unsigned char *targetLimit = (unsigned char *) args->targetLimit; |
|
806 UChar32 ch, ch2; |
|
807 unsigned int indexToWrite; |
|
808 unsigned char temp[sizeof(uint32_t)]; |
|
809 int32_t offsetNum = 0; |
|
810 |
|
811 if(mySource >= sourceLimit) { |
|
812 /* no input, nothing to do */ |
|
813 return; |
|
814 } |
|
815 |
|
816 /* write the BOM if necessary */ |
|
817 if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { |
|
818 static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 }; |
|
819 ucnv_fromUWriteBytes(args->converter, |
|
820 bom, 4, |
|
821 &args->target, args->targetLimit, |
|
822 &args->offsets, -1, |
|
823 err); |
|
824 args->converter->fromUnicodeStatus=0; |
|
825 } |
|
826 |
|
827 myTarget = (unsigned char *) args->target; |
|
828 myOffsets = args->offsets; |
|
829 temp[3] = 0; |
|
830 |
|
831 if (args->converter->fromUChar32) |
|
832 { |
|
833 ch = args->converter->fromUChar32; |
|
834 args->converter->fromUChar32 = 0; |
|
835 goto lowsurogate; |
|
836 } |
|
837 |
|
838 while (mySource < sourceLimit && myTarget < targetLimit) |
|
839 { |
|
840 ch = *(mySource++); |
|
841 |
|
842 if (U16_IS_SURROGATE(ch)) { |
|
843 if (U16_IS_LEAD(ch)) |
|
844 { |
|
845 lowsurogate: |
|
846 if (mySource < sourceLimit) |
|
847 { |
|
848 ch2 = *mySource; |
|
849 if (U16_IS_TRAIL(ch2)) |
|
850 { |
|
851 ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; |
|
852 mySource++; |
|
853 } |
|
854 else { |
|
855 /* this is an unmatched trail code unit (2nd surrogate) */ |
|
856 /* callback(illegal) */ |
|
857 args->converter->fromUChar32 = ch; |
|
858 *err = U_ILLEGAL_CHAR_FOUND; |
|
859 break; |
|
860 } |
|
861 } |
|
862 else { |
|
863 /* ran out of source */ |
|
864 args->converter->fromUChar32 = ch; |
|
865 if (args->flush) { |
|
866 /* this is an unmatched trail code unit (2nd surrogate) */ |
|
867 /* callback(illegal) */ |
|
868 *err = U_ILLEGAL_CHAR_FOUND; |
|
869 } |
|
870 break; |
|
871 } |
|
872 } |
|
873 else { |
|
874 /* this is an unmatched trail code unit (2nd surrogate) */ |
|
875 /* callback(illegal) */ |
|
876 args->converter->fromUChar32 = ch; |
|
877 *err = U_ILLEGAL_CHAR_FOUND; |
|
878 break; |
|
879 } |
|
880 } |
|
881 |
|
882 /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ |
|
883 temp[2] = (uint8_t) (ch >> 16 & 0x1F); |
|
884 temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ |
|
885 temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ |
|
886 |
|
887 for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) |
|
888 { |
|
889 if (myTarget < targetLimit) |
|
890 { |
|
891 *(myTarget++) = temp[indexToWrite]; |
|
892 *(myOffsets++) = offsetNum; |
|
893 } |
|
894 else |
|
895 { |
|
896 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; |
|
897 *err = U_BUFFER_OVERFLOW_ERROR; |
|
898 } |
|
899 } |
|
900 offsetNum = offsetNum + 1 + (temp[2] != 0); |
|
901 } |
|
902 |
|
903 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) |
|
904 { |
|
905 *err = U_BUFFER_OVERFLOW_ERROR; |
|
906 } |
|
907 |
|
908 args->target = (char *) myTarget; |
|
909 args->source = mySource; |
|
910 args->offsets = myOffsets; |
|
911 } |
|
912 |
|
913 static UChar32 |
|
914 T_UConverter_getNextUChar_UTF32_LE(UConverterToUnicodeArgs* args, |
|
915 UErrorCode* err) |
|
916 { |
|
917 const uint8_t *mySource; |
|
918 UChar32 myUChar; |
|
919 int32_t length; |
|
920 |
|
921 mySource = (const uint8_t *)args->source; |
|
922 if (mySource >= (const uint8_t *)args->sourceLimit) |
|
923 { |
|
924 /* no input */ |
|
925 *err = U_INDEX_OUTOFBOUNDS_ERROR; |
|
926 return 0xffff; |
|
927 } |
|
928 |
|
929 length = (int32_t)((const uint8_t *)args->sourceLimit - mySource); |
|
930 if (length < 4) |
|
931 { |
|
932 /* got a partial character */ |
|
933 uprv_memcpy(args->converter->toUBytes, mySource, length); |
|
934 args->converter->toULength = (int8_t)length; |
|
935 args->source = (const char *)(mySource + length); |
|
936 *err = U_TRUNCATED_CHAR_FOUND; |
|
937 return 0xffff; |
|
938 } |
|
939 |
|
940 /* Don't even try to do a direct cast because the value may be on an odd address. */ |
|
941 myUChar = ((UChar32)mySource[3] << 24) |
|
942 | ((UChar32)mySource[2] << 16) |
|
943 | ((UChar32)mySource[1] << 8) |
|
944 | ((UChar32)mySource[0]); |
|
945 |
|
946 args->source = (const char *)(mySource + 4); |
|
947 if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) { |
|
948 return myUChar; |
|
949 } |
|
950 |
|
951 uprv_memcpy(args->converter->toUBytes, mySource, 4); |
|
952 args->converter->toULength = 4; |
|
953 |
|
954 *err = U_ILLEGAL_CHAR_FOUND; |
|
955 return 0xffff; |
|
956 } |
|
957 |
|
958 static const UConverterImpl _UTF32LEImpl = { |
|
959 UCNV_UTF32_LittleEndian, |
|
960 |
|
961 NULL, |
|
962 NULL, |
|
963 |
|
964 NULL, |
|
965 NULL, |
|
966 NULL, |
|
967 |
|
968 T_UConverter_toUnicode_UTF32_LE, |
|
969 T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC, |
|
970 T_UConverter_fromUnicode_UTF32_LE, |
|
971 T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC, |
|
972 T_UConverter_getNextUChar_UTF32_LE, |
|
973 |
|
974 NULL, |
|
975 NULL, |
|
976 NULL, |
|
977 NULL, |
|
978 ucnv_getNonSurrogateUnicodeSet |
|
979 }; |
|
980 |
|
981 /* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */ |
|
982 static const UConverterStaticData _UTF32LEStaticData = { |
|
983 sizeof(UConverterStaticData), |
|
984 "UTF-32LE", |
|
985 1234, |
|
986 UCNV_IBM, UCNV_UTF32_LittleEndian, 4, 4, |
|
987 { 0xfd, 0xff, 0, 0 }, 4, FALSE, FALSE, |
|
988 0, |
|
989 0, |
|
990 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ |
|
991 }; |
|
992 |
|
993 |
|
994 const UConverterSharedData _UTF32LEData = { |
|
995 sizeof(UConverterSharedData), ~((uint32_t) 0), |
|
996 NULL, NULL, &_UTF32LEStaticData, FALSE, &_UTF32LEImpl, |
|
997 0 |
|
998 }; |
|
999 |
|
1000 /* UTF-32 (Detect BOM) ------------------------------------------------------ */ |
|
1001 |
|
1002 /* |
|
1003 * Detect a BOM at the beginning of the stream and select UTF-32BE or UTF-32LE |
|
1004 * accordingly. |
|
1005 * |
|
1006 * State values: |
|
1007 * 0 initial state |
|
1008 * 1 saw 00 |
|
1009 * 2 saw 00 00 |
|
1010 * 3 saw 00 00 FE |
|
1011 * 4 - |
|
1012 * 5 saw FF |
|
1013 * 6 saw FF FE |
|
1014 * 7 saw FF FE 00 |
|
1015 * 8 UTF-32BE mode |
|
1016 * 9 UTF-32LE mode |
|
1017 * |
|
1018 * During detection: state&3==number of matching bytes so far. |
|
1019 * |
|
1020 * On output, emit U+FEFF as the first code point. |
|
1021 */ |
|
1022 |
|
1023 static void |
|
1024 _UTF32Reset(UConverter *cnv, UConverterResetChoice choice) { |
|
1025 if(choice<=UCNV_RESET_TO_UNICODE) { |
|
1026 /* reset toUnicode: state=0 */ |
|
1027 cnv->mode=0; |
|
1028 } |
|
1029 if(choice!=UCNV_RESET_TO_UNICODE) { |
|
1030 /* reset fromUnicode: prepare to output the UTF-32PE BOM */ |
|
1031 cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; |
|
1032 } |
|
1033 } |
|
1034 |
|
1035 static void |
|
1036 _UTF32Open(UConverter *cnv, |
|
1037 UConverterLoadArgs *pArgs, |
|
1038 UErrorCode *pErrorCode) { |
|
1039 _UTF32Reset(cnv, UCNV_RESET_BOTH); |
|
1040 } |
|
1041 |
|
1042 static const char utf32BOM[8]={ 0, 0, (char)0xfe, (char)0xff, (char)0xff, (char)0xfe, 0, 0 }; |
|
1043 |
|
1044 static void |
|
1045 _UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, |
|
1046 UErrorCode *pErrorCode) { |
|
1047 UConverter *cnv=pArgs->converter; |
|
1048 const char *source=pArgs->source; |
|
1049 const char *sourceLimit=pArgs->sourceLimit; |
|
1050 int32_t *offsets=pArgs->offsets; |
|
1051 |
|
1052 int32_t state, offsetDelta; |
|
1053 char b; |
|
1054 |
|
1055 state=cnv->mode; |
|
1056 |
|
1057 /* |
|
1058 * If we detect a BOM in this buffer, then we must add the BOM size to the |
|
1059 * offsets because the actual converter function will not see and count the BOM. |
|
1060 * offsetDelta will have the number of the BOM bytes that are in the current buffer. |
|
1061 */ |
|
1062 offsetDelta=0; |
|
1063 |
|
1064 while(source<sourceLimit && U_SUCCESS(*pErrorCode)) { |
|
1065 switch(state) { |
|
1066 case 0: |
|
1067 b=*source; |
|
1068 if(b==0) { |
|
1069 state=1; /* could be 00 00 FE FF */ |
|
1070 } else if(b==(char)0xff) { |
|
1071 state=5; /* could be FF FE 00 00 */ |
|
1072 } else { |
|
1073 state=8; /* default to UTF-32BE */ |
|
1074 continue; |
|
1075 } |
|
1076 ++source; |
|
1077 break; |
|
1078 case 1: |
|
1079 case 2: |
|
1080 case 3: |
|
1081 case 5: |
|
1082 case 6: |
|
1083 case 7: |
|
1084 if(*source==utf32BOM[state]) { |
|
1085 ++state; |
|
1086 ++source; |
|
1087 if(state==4) { |
|
1088 state=8; /* detect UTF-32BE */ |
|
1089 offsetDelta=(int32_t)(source-pArgs->source); |
|
1090 } else if(state==8) { |
|
1091 state=9; /* detect UTF-32LE */ |
|
1092 offsetDelta=(int32_t)(source-pArgs->source); |
|
1093 } |
|
1094 } else { |
|
1095 /* switch to UTF-32BE and pass the previous bytes */ |
|
1096 int32_t count=(int32_t)(source-pArgs->source); /* number of bytes from this buffer */ |
|
1097 |
|
1098 /* reset the source */ |
|
1099 source=pArgs->source; |
|
1100 |
|
1101 if(count==(state&3)) { |
|
1102 /* simple: all in the same buffer, just reset source */ |
|
1103 } else { |
|
1104 UBool oldFlush=pArgs->flush; |
|
1105 |
|
1106 /* some of the bytes are from a previous buffer, replay those first */ |
|
1107 pArgs->source=utf32BOM+(state&4); /* select the correct BOM */ |
|
1108 pArgs->sourceLimit=pArgs->source+((state&3)-count); /* replay previous bytes */ |
|
1109 pArgs->flush=FALSE; /* this sourceLimit is not the real source stream limit */ |
|
1110 |
|
1111 /* no offsets: bytes from previous buffer, and not enough for output */ |
|
1112 T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); |
|
1113 |
|
1114 /* restore real pointers; pArgs->source will be set in case 8/9 */ |
|
1115 pArgs->sourceLimit=sourceLimit; |
|
1116 pArgs->flush=oldFlush; |
|
1117 } |
|
1118 state=8; |
|
1119 continue; |
|
1120 } |
|
1121 break; |
|
1122 case 8: |
|
1123 /* call UTF-32BE */ |
|
1124 pArgs->source=source; |
|
1125 if(offsets==NULL) { |
|
1126 T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); |
|
1127 } else { |
|
1128 T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(pArgs, pErrorCode); |
|
1129 } |
|
1130 source=pArgs->source; |
|
1131 break; |
|
1132 case 9: |
|
1133 /* call UTF-32LE */ |
|
1134 pArgs->source=source; |
|
1135 if(offsets==NULL) { |
|
1136 T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode); |
|
1137 } else { |
|
1138 T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(pArgs, pErrorCode); |
|
1139 } |
|
1140 source=pArgs->source; |
|
1141 break; |
|
1142 default: |
|
1143 break; /* does not occur */ |
|
1144 } |
|
1145 } |
|
1146 |
|
1147 /* add BOM size to offsets - see comment at offsetDelta declaration */ |
|
1148 if(offsets!=NULL && offsetDelta!=0) { |
|
1149 int32_t *offsetsLimit=pArgs->offsets; |
|
1150 while(offsets<offsetsLimit) { |
|
1151 *offsets++ += offsetDelta; |
|
1152 } |
|
1153 } |
|
1154 |
|
1155 pArgs->source=source; |
|
1156 |
|
1157 if(source==sourceLimit && pArgs->flush) { |
|
1158 /* handle truncated input */ |
|
1159 switch(state) { |
|
1160 case 0: |
|
1161 break; /* no input at all, nothing to do */ |
|
1162 case 8: |
|
1163 T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); |
|
1164 break; |
|
1165 case 9: |
|
1166 T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode); |
|
1167 break; |
|
1168 default: |
|
1169 /* handle 0<state<8: call UTF-32BE with too-short input */ |
|
1170 pArgs->source=utf32BOM+(state&4); /* select the correct BOM */ |
|
1171 pArgs->sourceLimit=pArgs->source+(state&3); /* replay bytes */ |
|
1172 |
|
1173 /* no offsets: not enough for output */ |
|
1174 T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); |
|
1175 pArgs->source=source; |
|
1176 pArgs->sourceLimit=sourceLimit; |
|
1177 state=8; |
|
1178 break; |
|
1179 } |
|
1180 } |
|
1181 |
|
1182 cnv->mode=state; |
|
1183 } |
|
1184 |
|
1185 static UChar32 |
|
1186 _UTF32GetNextUChar(UConverterToUnicodeArgs *pArgs, |
|
1187 UErrorCode *pErrorCode) { |
|
1188 switch(pArgs->converter->mode) { |
|
1189 case 8: |
|
1190 return T_UConverter_getNextUChar_UTF32_BE(pArgs, pErrorCode); |
|
1191 case 9: |
|
1192 return T_UConverter_getNextUChar_UTF32_LE(pArgs, pErrorCode); |
|
1193 default: |
|
1194 return UCNV_GET_NEXT_UCHAR_USE_TO_U; |
|
1195 } |
|
1196 } |
|
1197 |
|
1198 static const UConverterImpl _UTF32Impl = { |
|
1199 UCNV_UTF32, |
|
1200 |
|
1201 NULL, |
|
1202 NULL, |
|
1203 |
|
1204 _UTF32Open, |
|
1205 NULL, |
|
1206 _UTF32Reset, |
|
1207 |
|
1208 _UTF32ToUnicodeWithOffsets, |
|
1209 _UTF32ToUnicodeWithOffsets, |
|
1210 #if U_IS_BIG_ENDIAN |
|
1211 T_UConverter_fromUnicode_UTF32_BE, |
|
1212 T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC, |
|
1213 #else |
|
1214 T_UConverter_fromUnicode_UTF32_LE, |
|
1215 T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC, |
|
1216 #endif |
|
1217 _UTF32GetNextUChar, |
|
1218 |
|
1219 NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ |
|
1220 NULL, |
|
1221 NULL, |
|
1222 NULL, |
|
1223 ucnv_getNonSurrogateUnicodeSet |
|
1224 }; |
|
1225 |
|
1226 /* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianess of UTF-32 */ |
|
1227 static const UConverterStaticData _UTF32StaticData = { |
|
1228 sizeof(UConverterStaticData), |
|
1229 "UTF-32", |
|
1230 1236, |
|
1231 UCNV_IBM, UCNV_UTF32, 4, 4, |
|
1232 #if U_IS_BIG_ENDIAN |
|
1233 { 0, 0, 0xff, 0xfd }, 4, |
|
1234 #else |
|
1235 { 0xfd, 0xff, 0, 0 }, 4, |
|
1236 #endif |
|
1237 FALSE, FALSE, |
|
1238 0, |
|
1239 0, |
|
1240 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ |
|
1241 }; |
|
1242 |
|
1243 const UConverterSharedData _UTF32Data = { |
|
1244 sizeof(UConverterSharedData), ~((uint32_t) 0), |
|
1245 NULL, NULL, &_UTF32StaticData, FALSE, &_UTF32Impl, |
|
1246 0 |
|
1247 }; |
|
1248 |
|
1249 #endif |