|
1 /* |
|
2 ********************************************************************** |
|
3 * Copyright (C) 2000-2011, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ********************************************************************** |
|
6 * file name: ucnvhz.c |
|
7 * encoding: US-ASCII |
|
8 * tab size: 8 (not used) |
|
9 * indentation:4 |
|
10 * |
|
11 * created on: 2000oct16 |
|
12 * created by: Ram Viswanadha |
|
13 * 10/31/2000 Ram Implemented offsets logic function |
|
14 * |
|
15 */ |
|
16 |
|
17 #include "unicode/utypes.h" |
|
18 |
|
19 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
|
20 |
|
21 #include "cmemory.h" |
|
22 #include "unicode/ucnv.h" |
|
23 #include "unicode/ucnv_cb.h" |
|
24 #include "unicode/uset.h" |
|
25 #include "unicode/utf16.h" |
|
26 #include "ucnv_bld.h" |
|
27 #include "ucnv_cnv.h" |
|
28 #include "ucnv_imp.h" |
|
29 |
|
30 #define UCNV_TILDE 0x7E /* ~ */ |
|
31 #define UCNV_OPEN_BRACE 0x7B /* { */ |
|
32 #define UCNV_CLOSE_BRACE 0x7D /* } */ |
|
33 #define SB_ESCAPE "\x7E\x7D" |
|
34 #define DB_ESCAPE "\x7E\x7B" |
|
35 #define TILDE_ESCAPE "\x7E\x7E" |
|
36 #define ESC_LEN 2 |
|
37 |
|
38 |
|
39 #define CONCAT_ESCAPE_MACRO( args, targetIndex,targetLength,strToAppend, err, len,sourceIndex){ \ |
|
40 while(len-->0){ \ |
|
41 if(targetIndex < targetLength){ \ |
|
42 args->target[targetIndex] = (unsigned char) *strToAppend; \ |
|
43 if(args->offsets!=NULL){ \ |
|
44 *(offsets++) = sourceIndex-1; \ |
|
45 } \ |
|
46 targetIndex++; \ |
|
47 } \ |
|
48 else{ \ |
|
49 args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \ |
|
50 *err =U_BUFFER_OVERFLOW_ERROR; \ |
|
51 } \ |
|
52 strToAppend++; \ |
|
53 } \ |
|
54 } |
|
55 |
|
56 |
|
57 typedef struct{ |
|
58 UConverter* gbConverter; |
|
59 int32_t targetIndex; |
|
60 int32_t sourceIndex; |
|
61 UBool isEscapeAppended; |
|
62 UBool isStateDBCS; |
|
63 UBool isTargetUCharDBCS; |
|
64 UBool isEmptySegment; |
|
65 }UConverterDataHZ; |
|
66 |
|
67 |
|
68 |
|
69 static void |
|
70 _HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ |
|
71 UConverter *gbConverter; |
|
72 if(pArgs->onlyTestIsLoadable) { |
|
73 ucnv_canCreateConverter("GBK", errorCode); /* errorCode carries result */ |
|
74 return; |
|
75 } |
|
76 gbConverter = ucnv_open("GBK", errorCode); |
|
77 if(U_FAILURE(*errorCode)) { |
|
78 return; |
|
79 } |
|
80 cnv->toUnicodeStatus = 0; |
|
81 cnv->fromUnicodeStatus= 0; |
|
82 cnv->mode=0; |
|
83 cnv->fromUChar32=0x0000; |
|
84 cnv->extraInfo = uprv_calloc(1, sizeof(UConverterDataHZ)); |
|
85 if(cnv->extraInfo != NULL){ |
|
86 ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter; |
|
87 } |
|
88 else { |
|
89 ucnv_close(gbConverter); |
|
90 *errorCode = U_MEMORY_ALLOCATION_ERROR; |
|
91 return; |
|
92 } |
|
93 } |
|
94 |
|
95 static void |
|
96 _HZClose(UConverter *cnv){ |
|
97 if(cnv->extraInfo != NULL) { |
|
98 ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter); |
|
99 if(!cnv->isExtraLocal) { |
|
100 uprv_free(cnv->extraInfo); |
|
101 } |
|
102 cnv->extraInfo = NULL; |
|
103 } |
|
104 } |
|
105 |
|
106 static void |
|
107 _HZReset(UConverter *cnv, UConverterResetChoice choice){ |
|
108 if(choice<=UCNV_RESET_TO_UNICODE) { |
|
109 cnv->toUnicodeStatus = 0; |
|
110 cnv->mode=0; |
|
111 if(cnv->extraInfo != NULL){ |
|
112 ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE; |
|
113 ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE; |
|
114 } |
|
115 } |
|
116 if(choice!=UCNV_RESET_TO_UNICODE) { |
|
117 cnv->fromUnicodeStatus= 0; |
|
118 cnv->fromUChar32=0x0000; |
|
119 if(cnv->extraInfo != NULL){ |
|
120 ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE; |
|
121 ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0; |
|
122 ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0; |
|
123 ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE; |
|
124 } |
|
125 } |
|
126 } |
|
127 |
|
128 /**************************************HZ Encoding************************************************* |
|
129 * Rules for HZ encoding |
|
130 * |
|
131 * In ASCII mode, a byte is interpreted as an ASCII character, unless a |
|
132 * '~' is encountered. The character '~' is an escape character. By |
|
133 * convention, it must be immediately followed ONLY by '~', '{' or '\n' |
|
134 * (<LF>), with the following special meaning. |
|
135 |
|
136 * 1. The escape sequence '~~' is interpreted as a '~'. |
|
137 * 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB. |
|
138 * 3. The escape sequence '~\n' is a line-continuation marker to be |
|
139 * consumed with no output produced. |
|
140 * In GB mode, characters are interpreted two bytes at a time as (pure) |
|
141 * GB codes until the escape-from-GB code '~}' is read. This code |
|
142 * switches the mode from GB back to ASCII. (Note that the escape- |
|
143 * from-GB code '~}' ($7E7D) is outside the defined GB range.) |
|
144 * |
|
145 * Source: RFC 1842 |
|
146 * |
|
147 * Note that the formal syntax in RFC 1842 is invalid. I assume that the |
|
148 * intended definition of single-byte-segment is as follows (pedberg): |
|
149 * single-byte-segment = single-byte-seq 1*single-byte-char |
|
150 */ |
|
151 |
|
152 |
|
153 static void |
|
154 UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, |
|
155 UErrorCode* err){ |
|
156 char tempBuf[2]; |
|
157 const char *mySource = ( char *) args->source; |
|
158 UChar *myTarget = args->target; |
|
159 const char *mySourceLimit = args->sourceLimit; |
|
160 UChar32 targetUniChar = 0x0000; |
|
161 int32_t mySourceChar = 0x0000; |
|
162 UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo); |
|
163 tempBuf[0]=0; |
|
164 tempBuf[1]=0; |
|
165 |
|
166 /* Calling code already handles this situation. */ |
|
167 /*if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){ |
|
168 *err = U_ILLEGAL_ARGUMENT_ERROR; |
|
169 return; |
|
170 }*/ |
|
171 |
|
172 while(mySource< mySourceLimit){ |
|
173 |
|
174 if(myTarget < args->targetLimit){ |
|
175 |
|
176 mySourceChar= (unsigned char) *mySource++; |
|
177 |
|
178 if(args->converter->mode == UCNV_TILDE) { |
|
179 /* second byte after ~ */ |
|
180 args->converter->mode=0; |
|
181 switch(mySourceChar) { |
|
182 case 0x0A: |
|
183 /* no output for ~\n (line-continuation marker) */ |
|
184 continue; |
|
185 case UCNV_TILDE: |
|
186 if(args->offsets) { |
|
187 args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2); |
|
188 } |
|
189 *(myTarget++)=(UChar)mySourceChar; |
|
190 myData->isEmptySegment = FALSE; |
|
191 continue; |
|
192 case UCNV_OPEN_BRACE: |
|
193 case UCNV_CLOSE_BRACE: |
|
194 myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE); |
|
195 if (myData->isEmptySegment) { |
|
196 myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ |
|
197 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
|
198 args->converter->toUCallbackReason = UCNV_IRREGULAR; |
|
199 args->converter->toUBytes[0] = UCNV_TILDE; |
|
200 args->converter->toUBytes[1] = mySourceChar; |
|
201 args->converter->toULength = 2; |
|
202 args->target = myTarget; |
|
203 args->source = mySource; |
|
204 return; |
|
205 } |
|
206 myData->isEmptySegment = TRUE; |
|
207 continue; |
|
208 default: |
|
209 /* if the first byte is equal to TILDE and the trail byte |
|
210 * is not a valid byte then it is an error condition |
|
211 */ |
|
212 /* |
|
213 * Ticket 5691: consistent illegal sequences: |
|
214 * - We include at least the first byte in the illegal sequence. |
|
215 * - If any of the non-initial bytes could be the start of a character, |
|
216 * we stop the illegal sequence before the first one of those. |
|
217 */ |
|
218 myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ |
|
219 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
|
220 args->converter->toUBytes[0] = UCNV_TILDE; |
|
221 if( myData->isStateDBCS ? |
|
222 (0x21 <= mySourceChar && mySourceChar <= 0x7e) : |
|
223 mySourceChar <= 0x7f |
|
224 ) { |
|
225 /* The current byte could be the start of a character: Back it out. */ |
|
226 args->converter->toULength = 1; |
|
227 --mySource; |
|
228 } else { |
|
229 /* Include the current byte in the illegal sequence. */ |
|
230 args->converter->toUBytes[1] = mySourceChar; |
|
231 args->converter->toULength = 2; |
|
232 } |
|
233 args->target = myTarget; |
|
234 args->source = mySource; |
|
235 return; |
|
236 } |
|
237 } else if(myData->isStateDBCS) { |
|
238 if(args->converter->toUnicodeStatus == 0x00){ |
|
239 /* lead byte */ |
|
240 if(mySourceChar == UCNV_TILDE) { |
|
241 args->converter->mode = UCNV_TILDE; |
|
242 } else { |
|
243 /* add another bit to distinguish a 0 byte from not having seen a lead byte */ |
|
244 args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100); |
|
245 myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */ |
|
246 } |
|
247 continue; |
|
248 } |
|
249 else{ |
|
250 /* trail byte */ |
|
251 int leadIsOk, trailIsOk; |
|
252 uint32_t leadByte = args->converter->toUnicodeStatus & 0xff; |
|
253 targetUniChar = 0xffff; |
|
254 /* |
|
255 * Ticket 5691: consistent illegal sequences: |
|
256 * - We include at least the first byte in the illegal sequence. |
|
257 * - If any of the non-initial bytes could be the start of a character, |
|
258 * we stop the illegal sequence before the first one of those. |
|
259 * |
|
260 * In HZ DBCS, if the second byte is in the 21..7e range, |
|
261 * we report only the first byte as the illegal sequence. |
|
262 * Otherwise we convert or report the pair of bytes. |
|
263 */ |
|
264 leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21); |
|
265 trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); |
|
266 if (leadIsOk && trailIsOk) { |
|
267 tempBuf[0] = (char) (leadByte+0x80) ; |
|
268 tempBuf[1] = (char) (mySourceChar+0x80); |
|
269 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, |
|
270 tempBuf, 2, args->converter->useFallback); |
|
271 mySourceChar= (leadByte << 8) | mySourceChar; |
|
272 } else if (trailIsOk) { |
|
273 /* report a single illegal byte and continue with the following DBCS starter byte */ |
|
274 --mySource; |
|
275 mySourceChar = (int32_t)leadByte; |
|
276 } else { |
|
277 /* report a pair of illegal bytes if the second byte is not a DBCS starter */ |
|
278 /* add another bit so that the code below writes 2 bytes in case of error */ |
|
279 mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar; |
|
280 } |
|
281 args->converter->toUnicodeStatus =0x00; |
|
282 } |
|
283 } |
|
284 else{ |
|
285 if(mySourceChar == UCNV_TILDE) { |
|
286 args->converter->mode = UCNV_TILDE; |
|
287 continue; |
|
288 } else if(mySourceChar <= 0x7f) { |
|
289 targetUniChar = (UChar)mySourceChar; /* ASCII */ |
|
290 myData->isEmptySegment = FALSE; /* the segment has something valid */ |
|
291 } else { |
|
292 targetUniChar = 0xffff; |
|
293 myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ |
|
294 } |
|
295 } |
|
296 if(targetUniChar < 0xfffe){ |
|
297 if(args->offsets) { |
|
298 args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS)); |
|
299 } |
|
300 |
|
301 *(myTarget++)=(UChar)targetUniChar; |
|
302 } |
|
303 else /* targetUniChar>=0xfffe */ { |
|
304 if(targetUniChar == 0xfffe){ |
|
305 *err = U_INVALID_CHAR_FOUND; |
|
306 } |
|
307 else{ |
|
308 *err = U_ILLEGAL_CHAR_FOUND; |
|
309 } |
|
310 if(mySourceChar > 0xff){ |
|
311 args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8); |
|
312 args->converter->toUBytes[1] = (uint8_t)mySourceChar; |
|
313 args->converter->toULength=2; |
|
314 } |
|
315 else{ |
|
316 args->converter->toUBytes[0] = (uint8_t)mySourceChar; |
|
317 args->converter->toULength=1; |
|
318 } |
|
319 break; |
|
320 } |
|
321 } |
|
322 else{ |
|
323 *err =U_BUFFER_OVERFLOW_ERROR; |
|
324 break; |
|
325 } |
|
326 } |
|
327 |
|
328 args->target = myTarget; |
|
329 args->source = mySource; |
|
330 } |
|
331 |
|
332 |
|
333 static void |
|
334 UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, |
|
335 UErrorCode * err){ |
|
336 const UChar *mySource = args->source; |
|
337 char *myTarget = args->target; |
|
338 int32_t* offsets = args->offsets; |
|
339 int32_t mySourceIndex = 0; |
|
340 int32_t myTargetIndex = 0; |
|
341 int32_t targetLength = (int32_t)(args->targetLimit - myTarget); |
|
342 int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source); |
|
343 int32_t length=0; |
|
344 uint32_t targetUniChar = 0x0000; |
|
345 UChar32 mySourceChar = 0x0000; |
|
346 UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo; |
|
347 UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS; |
|
348 UBool oldIsTargetUCharDBCS = isTargetUCharDBCS; |
|
349 int len =0; |
|
350 const char* escSeq=NULL; |
|
351 |
|
352 /* Calling code already handles this situation. */ |
|
353 /*if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){ |
|
354 *err = U_ILLEGAL_ARGUMENT_ERROR; |
|
355 return; |
|
356 }*/ |
|
357 if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) { |
|
358 goto getTrail; |
|
359 } |
|
360 /*writing the char to the output stream */ |
|
361 while (mySourceIndex < mySourceLength){ |
|
362 targetUniChar = missingCharMarker; |
|
363 if (myTargetIndex < targetLength){ |
|
364 |
|
365 mySourceChar = (UChar) mySource[mySourceIndex++]; |
|
366 |
|
367 |
|
368 oldIsTargetUCharDBCS = isTargetUCharDBCS; |
|
369 if(mySourceChar ==UCNV_TILDE){ |
|
370 /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/ |
|
371 len = ESC_LEN; |
|
372 escSeq = TILDE_ESCAPE; |
|
373 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); |
|
374 continue; |
|
375 } else if(mySourceChar <= 0x7f) { |
|
376 length = 1; |
|
377 targetUniChar = mySourceChar; |
|
378 } else { |
|
379 length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData, |
|
380 mySourceChar,&targetUniChar,args->converter->useFallback); |
|
381 /* we can only use lead bytes 21..7D and trail bytes 21..7E */ |
|
382 if( length == 2 && |
|
383 (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) && |
|
384 (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1) |
|
385 ) { |
|
386 targetUniChar -= 0x8080; |
|
387 } else { |
|
388 targetUniChar = missingCharMarker; |
|
389 } |
|
390 } |
|
391 if (targetUniChar != missingCharMarker){ |
|
392 myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF); |
|
393 if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){ |
|
394 /*Shifting from a double byte to single byte mode*/ |
|
395 if(!isTargetUCharDBCS){ |
|
396 len =ESC_LEN; |
|
397 escSeq = SB_ESCAPE; |
|
398 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); |
|
399 myConverterData->isEscapeAppended = TRUE; |
|
400 } |
|
401 else{ /* Shifting from a single byte to double byte mode*/ |
|
402 len =ESC_LEN; |
|
403 escSeq = DB_ESCAPE; |
|
404 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); |
|
405 myConverterData->isEscapeAppended = TRUE; |
|
406 |
|
407 } |
|
408 } |
|
409 |
|
410 if(isTargetUCharDBCS){ |
|
411 if( myTargetIndex <targetLength){ |
|
412 myTarget[myTargetIndex++] =(char) (targetUniChar >> 8); |
|
413 if(offsets){ |
|
414 *(offsets++) = mySourceIndex-1; |
|
415 } |
|
416 if(myTargetIndex < targetLength){ |
|
417 myTarget[myTargetIndex++] =(char) targetUniChar; |
|
418 if(offsets){ |
|
419 *(offsets++) = mySourceIndex-1; |
|
420 } |
|
421 }else{ |
|
422 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; |
|
423 *err = U_BUFFER_OVERFLOW_ERROR; |
|
424 } |
|
425 }else{ |
|
426 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8); |
|
427 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; |
|
428 *err = U_BUFFER_OVERFLOW_ERROR; |
|
429 } |
|
430 |
|
431 }else{ |
|
432 if( myTargetIndex <targetLength){ |
|
433 myTarget[myTargetIndex++] = (char) (targetUniChar ); |
|
434 if(offsets){ |
|
435 *(offsets++) = mySourceIndex-1; |
|
436 } |
|
437 |
|
438 }else{ |
|
439 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; |
|
440 *err = U_BUFFER_OVERFLOW_ERROR; |
|
441 } |
|
442 } |
|
443 |
|
444 } |
|
445 else{ |
|
446 /* oops.. the code point is unassigned */ |
|
447 /*Handle surrogates */ |
|
448 /*check if the char is a First surrogate*/ |
|
449 if(U16_IS_SURROGATE(mySourceChar)) { |
|
450 if(U16_IS_SURROGATE_LEAD(mySourceChar)) { |
|
451 args->converter->fromUChar32=mySourceChar; |
|
452 getTrail: |
|
453 /*look ahead to find the trail surrogate*/ |
|
454 if(mySourceIndex < mySourceLength) { |
|
455 /* test the following code unit */ |
|
456 UChar trail=(UChar) args->source[mySourceIndex]; |
|
457 if(U16_IS_TRAIL(trail)) { |
|
458 ++mySourceIndex; |
|
459 mySourceChar=U16_GET_SUPPLEMENTARY(args->converter->fromUChar32, trail); |
|
460 args->converter->fromUChar32=0x00; |
|
461 /* there are no surrogates in GB2312*/ |
|
462 *err = U_INVALID_CHAR_FOUND; |
|
463 /* exit this condition tree */ |
|
464 } else { |
|
465 /* this is an unmatched lead code unit (1st surrogate) */ |
|
466 /* callback(illegal) */ |
|
467 *err=U_ILLEGAL_CHAR_FOUND; |
|
468 } |
|
469 } else { |
|
470 /* no more input */ |
|
471 *err = U_ZERO_ERROR; |
|
472 } |
|
473 } else { |
|
474 /* this is an unmatched trail code unit (2nd surrogate) */ |
|
475 /* callback(illegal) */ |
|
476 *err=U_ILLEGAL_CHAR_FOUND; |
|
477 } |
|
478 } else { |
|
479 /* callback(unassigned) for a BMP code point */ |
|
480 *err = U_INVALID_CHAR_FOUND; |
|
481 } |
|
482 |
|
483 args->converter->fromUChar32=mySourceChar; |
|
484 break; |
|
485 } |
|
486 } |
|
487 else{ |
|
488 *err = U_BUFFER_OVERFLOW_ERROR; |
|
489 break; |
|
490 } |
|
491 targetUniChar=missingCharMarker; |
|
492 } |
|
493 |
|
494 args->target += myTargetIndex; |
|
495 args->source += mySourceIndex; |
|
496 myConverterData->isTargetUCharDBCS = isTargetUCharDBCS; |
|
497 } |
|
498 |
|
499 static void |
|
500 _HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) { |
|
501 UConverter *cnv = args->converter; |
|
502 UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo; |
|
503 char *p; |
|
504 char buffer[4]; |
|
505 p = buffer; |
|
506 |
|
507 if( convData->isTargetUCharDBCS){ |
|
508 *p++= UCNV_TILDE; |
|
509 *p++= UCNV_CLOSE_BRACE; |
|
510 convData->isTargetUCharDBCS=FALSE; |
|
511 } |
|
512 *p++= (char)cnv->subChars[0]; |
|
513 |
|
514 ucnv_cbFromUWriteBytes(args, |
|
515 buffer, (int32_t)(p - buffer), |
|
516 offsetIndex, err); |
|
517 } |
|
518 |
|
519 /* |
|
520 * Structure for cloning an HZ converter into a single memory block. |
|
521 * ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct, |
|
522 * and then ucnv_safeClone() of the sub-converter may additionally align |
|
523 * subCnv inside the cloneHZStruct, for which we need the deadSpace after |
|
524 * subCnv. This is because UAlignedMemory may be larger than the actually |
|
525 * necessary alignment size for the platform. |
|
526 * The other cloneHZStruct fields will not be moved around, |
|
527 * and are aligned properly with cloneHZStruct's alignment. |
|
528 */ |
|
529 struct cloneHZStruct |
|
530 { |
|
531 UConverter cnv; |
|
532 UConverter subCnv; |
|
533 UAlignedMemory deadSpace; |
|
534 UConverterDataHZ mydata; |
|
535 }; |
|
536 |
|
537 |
|
538 static UConverter * |
|
539 _HZ_SafeClone(const UConverter *cnv, |
|
540 void *stackBuffer, |
|
541 int32_t *pBufferSize, |
|
542 UErrorCode *status) |
|
543 { |
|
544 struct cloneHZStruct * localClone; |
|
545 int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct); |
|
546 |
|
547 if (U_FAILURE(*status)){ |
|
548 return 0; |
|
549 } |
|
550 |
|
551 if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ |
|
552 *pBufferSize = bufferSizeNeeded; |
|
553 return 0; |
|
554 } |
|
555 |
|
556 localClone = (struct cloneHZStruct *)stackBuffer; |
|
557 /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ |
|
558 |
|
559 uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ)); |
|
560 localClone->cnv.extraInfo = &localClone->mydata; |
|
561 localClone->cnv.isExtraLocal = TRUE; |
|
562 |
|
563 /* deep-clone the sub-converter */ |
|
564 size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */ |
|
565 ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter = |
|
566 ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status); |
|
567 |
|
568 return &localClone->cnv; |
|
569 } |
|
570 |
|
571 static void |
|
572 _HZ_GetUnicodeSet(const UConverter *cnv, |
|
573 const USetAdder *sa, |
|
574 UConverterUnicodeSet which, |
|
575 UErrorCode *pErrorCode) { |
|
576 /* HZ converts all of ASCII */ |
|
577 sa->addRange(sa->set, 0, 0x7f); |
|
578 |
|
579 /* add all of the code points that the sub-converter handles */ |
|
580 ucnv_MBCSGetFilteredUnicodeSetForUnicode( |
|
581 ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, |
|
582 sa, which, UCNV_SET_FILTER_HZ, |
|
583 pErrorCode); |
|
584 } |
|
585 |
|
586 static const UConverterImpl _HZImpl={ |
|
587 |
|
588 UCNV_HZ, |
|
589 |
|
590 NULL, |
|
591 NULL, |
|
592 |
|
593 _HZOpen, |
|
594 _HZClose, |
|
595 _HZReset, |
|
596 |
|
597 UConverter_toUnicode_HZ_OFFSETS_LOGIC, |
|
598 UConverter_toUnicode_HZ_OFFSETS_LOGIC, |
|
599 UConverter_fromUnicode_HZ_OFFSETS_LOGIC, |
|
600 UConverter_fromUnicode_HZ_OFFSETS_LOGIC, |
|
601 NULL, |
|
602 |
|
603 NULL, |
|
604 NULL, |
|
605 _HZ_WriteSub, |
|
606 _HZ_SafeClone, |
|
607 _HZ_GetUnicodeSet |
|
608 }; |
|
609 |
|
610 static const UConverterStaticData _HZStaticData={ |
|
611 sizeof(UConverterStaticData), |
|
612 "HZ", |
|
613 0, |
|
614 UCNV_IBM, |
|
615 UCNV_HZ, |
|
616 1, |
|
617 4, |
|
618 { 0x1a, 0, 0, 0 }, |
|
619 1, |
|
620 FALSE, |
|
621 FALSE, |
|
622 0, |
|
623 0, |
|
624 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ |
|
625 |
|
626 }; |
|
627 |
|
628 |
|
629 const UConverterSharedData _HZData={ |
|
630 sizeof(UConverterSharedData), |
|
631 ~((uint32_t) 0), |
|
632 NULL, |
|
633 NULL, |
|
634 &_HZStaticData, |
|
635 FALSE, |
|
636 &_HZImpl, |
|
637 0 |
|
638 }; |
|
639 |
|
640 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |