intl/icu/source/common/ucnvhz.c

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 **********************************************************************
michael@0 3 * Copyright (C) 2000-2011, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 **********************************************************************
michael@0 6 * file name: ucnvhz.c
michael@0 7 * encoding: US-ASCII
michael@0 8 * tab size: 8 (not used)
michael@0 9 * indentation:4
michael@0 10 *
michael@0 11 * created on: 2000oct16
michael@0 12 * created by: Ram Viswanadha
michael@0 13 * 10/31/2000 Ram Implemented offsets logic function
michael@0 14 *
michael@0 15 */
michael@0 16
michael@0 17 #include "unicode/utypes.h"
michael@0 18
michael@0 19 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
michael@0 20
michael@0 21 #include "cmemory.h"
michael@0 22 #include "unicode/ucnv.h"
michael@0 23 #include "unicode/ucnv_cb.h"
michael@0 24 #include "unicode/uset.h"
michael@0 25 #include "unicode/utf16.h"
michael@0 26 #include "ucnv_bld.h"
michael@0 27 #include "ucnv_cnv.h"
michael@0 28 #include "ucnv_imp.h"
michael@0 29
michael@0 30 #define UCNV_TILDE 0x7E /* ~ */
michael@0 31 #define UCNV_OPEN_BRACE 0x7B /* { */
michael@0 32 #define UCNV_CLOSE_BRACE 0x7D /* } */
michael@0 33 #define SB_ESCAPE "\x7E\x7D"
michael@0 34 #define DB_ESCAPE "\x7E\x7B"
michael@0 35 #define TILDE_ESCAPE "\x7E\x7E"
michael@0 36 #define ESC_LEN 2
michael@0 37
michael@0 38
michael@0 39 #define CONCAT_ESCAPE_MACRO( args, targetIndex,targetLength,strToAppend, err, len,sourceIndex){ \
michael@0 40 while(len-->0){ \
michael@0 41 if(targetIndex < targetLength){ \
michael@0 42 args->target[targetIndex] = (unsigned char) *strToAppend; \
michael@0 43 if(args->offsets!=NULL){ \
michael@0 44 *(offsets++) = sourceIndex-1; \
michael@0 45 } \
michael@0 46 targetIndex++; \
michael@0 47 } \
michael@0 48 else{ \
michael@0 49 args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \
michael@0 50 *err =U_BUFFER_OVERFLOW_ERROR; \
michael@0 51 } \
michael@0 52 strToAppend++; \
michael@0 53 } \
michael@0 54 }
michael@0 55
michael@0 56
michael@0 57 typedef struct{
michael@0 58 UConverter* gbConverter;
michael@0 59 int32_t targetIndex;
michael@0 60 int32_t sourceIndex;
michael@0 61 UBool isEscapeAppended;
michael@0 62 UBool isStateDBCS;
michael@0 63 UBool isTargetUCharDBCS;
michael@0 64 UBool isEmptySegment;
michael@0 65 }UConverterDataHZ;
michael@0 66
michael@0 67
michael@0 68
michael@0 69 static void
michael@0 70 _HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
michael@0 71 UConverter *gbConverter;
michael@0 72 if(pArgs->onlyTestIsLoadable) {
michael@0 73 ucnv_canCreateConverter("GBK", errorCode); /* errorCode carries result */
michael@0 74 return;
michael@0 75 }
michael@0 76 gbConverter = ucnv_open("GBK", errorCode);
michael@0 77 if(U_FAILURE(*errorCode)) {
michael@0 78 return;
michael@0 79 }
michael@0 80 cnv->toUnicodeStatus = 0;
michael@0 81 cnv->fromUnicodeStatus= 0;
michael@0 82 cnv->mode=0;
michael@0 83 cnv->fromUChar32=0x0000;
michael@0 84 cnv->extraInfo = uprv_calloc(1, sizeof(UConverterDataHZ));
michael@0 85 if(cnv->extraInfo != NULL){
michael@0 86 ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter;
michael@0 87 }
michael@0 88 else {
michael@0 89 ucnv_close(gbConverter);
michael@0 90 *errorCode = U_MEMORY_ALLOCATION_ERROR;
michael@0 91 return;
michael@0 92 }
michael@0 93 }
michael@0 94
michael@0 95 static void
michael@0 96 _HZClose(UConverter *cnv){
michael@0 97 if(cnv->extraInfo != NULL) {
michael@0 98 ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter);
michael@0 99 if(!cnv->isExtraLocal) {
michael@0 100 uprv_free(cnv->extraInfo);
michael@0 101 }
michael@0 102 cnv->extraInfo = NULL;
michael@0 103 }
michael@0 104 }
michael@0 105
michael@0 106 static void
michael@0 107 _HZReset(UConverter *cnv, UConverterResetChoice choice){
michael@0 108 if(choice<=UCNV_RESET_TO_UNICODE) {
michael@0 109 cnv->toUnicodeStatus = 0;
michael@0 110 cnv->mode=0;
michael@0 111 if(cnv->extraInfo != NULL){
michael@0 112 ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE;
michael@0 113 ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE;
michael@0 114 }
michael@0 115 }
michael@0 116 if(choice!=UCNV_RESET_TO_UNICODE) {
michael@0 117 cnv->fromUnicodeStatus= 0;
michael@0 118 cnv->fromUChar32=0x0000;
michael@0 119 if(cnv->extraInfo != NULL){
michael@0 120 ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE;
michael@0 121 ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0;
michael@0 122 ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0;
michael@0 123 ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE;
michael@0 124 }
michael@0 125 }
michael@0 126 }
michael@0 127
michael@0 128 /**************************************HZ Encoding*************************************************
michael@0 129 * Rules for HZ encoding
michael@0 130 *
michael@0 131 * In ASCII mode, a byte is interpreted as an ASCII character, unless a
michael@0 132 * '~' is encountered. The character '~' is an escape character. By
michael@0 133 * convention, it must be immediately followed ONLY by '~', '{' or '\n'
michael@0 134 * (<LF>), with the following special meaning.
michael@0 135
michael@0 136 * 1. The escape sequence '~~' is interpreted as a '~'.
michael@0 137 * 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
michael@0 138 * 3. The escape sequence '~\n' is a line-continuation marker to be
michael@0 139 * consumed with no output produced.
michael@0 140 * In GB mode, characters are interpreted two bytes at a time as (pure)
michael@0 141 * GB codes until the escape-from-GB code '~}' is read. This code
michael@0 142 * switches the mode from GB back to ASCII. (Note that the escape-
michael@0 143 * from-GB code '~}' ($7E7D) is outside the defined GB range.)
michael@0 144 *
michael@0 145 * Source: RFC 1842
michael@0 146 *
michael@0 147 * Note that the formal syntax in RFC 1842 is invalid. I assume that the
michael@0 148 * intended definition of single-byte-segment is as follows (pedberg):
michael@0 149 * single-byte-segment = single-byte-seq 1*single-byte-char
michael@0 150 */
michael@0 151
michael@0 152
michael@0 153 static void
michael@0 154 UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
michael@0 155 UErrorCode* err){
michael@0 156 char tempBuf[2];
michael@0 157 const char *mySource = ( char *) args->source;
michael@0 158 UChar *myTarget = args->target;
michael@0 159 const char *mySourceLimit = args->sourceLimit;
michael@0 160 UChar32 targetUniChar = 0x0000;
michael@0 161 int32_t mySourceChar = 0x0000;
michael@0 162 UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo);
michael@0 163 tempBuf[0]=0;
michael@0 164 tempBuf[1]=0;
michael@0 165
michael@0 166 /* Calling code already handles this situation. */
michael@0 167 /*if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){
michael@0 168 *err = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 169 return;
michael@0 170 }*/
michael@0 171
michael@0 172 while(mySource< mySourceLimit){
michael@0 173
michael@0 174 if(myTarget < args->targetLimit){
michael@0 175
michael@0 176 mySourceChar= (unsigned char) *mySource++;
michael@0 177
michael@0 178 if(args->converter->mode == UCNV_TILDE) {
michael@0 179 /* second byte after ~ */
michael@0 180 args->converter->mode=0;
michael@0 181 switch(mySourceChar) {
michael@0 182 case 0x0A:
michael@0 183 /* no output for ~\n (line-continuation marker) */
michael@0 184 continue;
michael@0 185 case UCNV_TILDE:
michael@0 186 if(args->offsets) {
michael@0 187 args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2);
michael@0 188 }
michael@0 189 *(myTarget++)=(UChar)mySourceChar;
michael@0 190 myData->isEmptySegment = FALSE;
michael@0 191 continue;
michael@0 192 case UCNV_OPEN_BRACE:
michael@0 193 case UCNV_CLOSE_BRACE:
michael@0 194 myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE);
michael@0 195 if (myData->isEmptySegment) {
michael@0 196 myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */
michael@0 197 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
michael@0 198 args->converter->toUCallbackReason = UCNV_IRREGULAR;
michael@0 199 args->converter->toUBytes[0] = UCNV_TILDE;
michael@0 200 args->converter->toUBytes[1] = mySourceChar;
michael@0 201 args->converter->toULength = 2;
michael@0 202 args->target = myTarget;
michael@0 203 args->source = mySource;
michael@0 204 return;
michael@0 205 }
michael@0 206 myData->isEmptySegment = TRUE;
michael@0 207 continue;
michael@0 208 default:
michael@0 209 /* if the first byte is equal to TILDE and the trail byte
michael@0 210 * is not a valid byte then it is an error condition
michael@0 211 */
michael@0 212 /*
michael@0 213 * Ticket 5691: consistent illegal sequences:
michael@0 214 * - We include at least the first byte in the illegal sequence.
michael@0 215 * - If any of the non-initial bytes could be the start of a character,
michael@0 216 * we stop the illegal sequence before the first one of those.
michael@0 217 */
michael@0 218 myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
michael@0 219 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
michael@0 220 args->converter->toUBytes[0] = UCNV_TILDE;
michael@0 221 if( myData->isStateDBCS ?
michael@0 222 (0x21 <= mySourceChar && mySourceChar <= 0x7e) :
michael@0 223 mySourceChar <= 0x7f
michael@0 224 ) {
michael@0 225 /* The current byte could be the start of a character: Back it out. */
michael@0 226 args->converter->toULength = 1;
michael@0 227 --mySource;
michael@0 228 } else {
michael@0 229 /* Include the current byte in the illegal sequence. */
michael@0 230 args->converter->toUBytes[1] = mySourceChar;
michael@0 231 args->converter->toULength = 2;
michael@0 232 }
michael@0 233 args->target = myTarget;
michael@0 234 args->source = mySource;
michael@0 235 return;
michael@0 236 }
michael@0 237 } else if(myData->isStateDBCS) {
michael@0 238 if(args->converter->toUnicodeStatus == 0x00){
michael@0 239 /* lead byte */
michael@0 240 if(mySourceChar == UCNV_TILDE) {
michael@0 241 args->converter->mode = UCNV_TILDE;
michael@0 242 } else {
michael@0 243 /* add another bit to distinguish a 0 byte from not having seen a lead byte */
michael@0 244 args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100);
michael@0 245 myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */
michael@0 246 }
michael@0 247 continue;
michael@0 248 }
michael@0 249 else{
michael@0 250 /* trail byte */
michael@0 251 int leadIsOk, trailIsOk;
michael@0 252 uint32_t leadByte = args->converter->toUnicodeStatus & 0xff;
michael@0 253 targetUniChar = 0xffff;
michael@0 254 /*
michael@0 255 * Ticket 5691: consistent illegal sequences:
michael@0 256 * - We include at least the first byte in the illegal sequence.
michael@0 257 * - If any of the non-initial bytes could be the start of a character,
michael@0 258 * we stop the illegal sequence before the first one of those.
michael@0 259 *
michael@0 260 * In HZ DBCS, if the second byte is in the 21..7e range,
michael@0 261 * we report only the first byte as the illegal sequence.
michael@0 262 * Otherwise we convert or report the pair of bytes.
michael@0 263 */
michael@0 264 leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21);
michael@0 265 trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
michael@0 266 if (leadIsOk && trailIsOk) {
michael@0 267 tempBuf[0] = (char) (leadByte+0x80) ;
michael@0 268 tempBuf[1] = (char) (mySourceChar+0x80);
michael@0 269 targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
michael@0 270 tempBuf, 2, args->converter->useFallback);
michael@0 271 mySourceChar= (leadByte << 8) | mySourceChar;
michael@0 272 } else if (trailIsOk) {
michael@0 273 /* report a single illegal byte and continue with the following DBCS starter byte */
michael@0 274 --mySource;
michael@0 275 mySourceChar = (int32_t)leadByte;
michael@0 276 } else {
michael@0 277 /* report a pair of illegal bytes if the second byte is not a DBCS starter */
michael@0 278 /* add another bit so that the code below writes 2 bytes in case of error */
michael@0 279 mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar;
michael@0 280 }
michael@0 281 args->converter->toUnicodeStatus =0x00;
michael@0 282 }
michael@0 283 }
michael@0 284 else{
michael@0 285 if(mySourceChar == UCNV_TILDE) {
michael@0 286 args->converter->mode = UCNV_TILDE;
michael@0 287 continue;
michael@0 288 } else if(mySourceChar <= 0x7f) {
michael@0 289 targetUniChar = (UChar)mySourceChar; /* ASCII */
michael@0 290 myData->isEmptySegment = FALSE; /* the segment has something valid */
michael@0 291 } else {
michael@0 292 targetUniChar = 0xffff;
michael@0 293 myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
michael@0 294 }
michael@0 295 }
michael@0 296 if(targetUniChar < 0xfffe){
michael@0 297 if(args->offsets) {
michael@0 298 args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS));
michael@0 299 }
michael@0 300
michael@0 301 *(myTarget++)=(UChar)targetUniChar;
michael@0 302 }
michael@0 303 else /* targetUniChar>=0xfffe */ {
michael@0 304 if(targetUniChar == 0xfffe){
michael@0 305 *err = U_INVALID_CHAR_FOUND;
michael@0 306 }
michael@0 307 else{
michael@0 308 *err = U_ILLEGAL_CHAR_FOUND;
michael@0 309 }
michael@0 310 if(mySourceChar > 0xff){
michael@0 311 args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8);
michael@0 312 args->converter->toUBytes[1] = (uint8_t)mySourceChar;
michael@0 313 args->converter->toULength=2;
michael@0 314 }
michael@0 315 else{
michael@0 316 args->converter->toUBytes[0] = (uint8_t)mySourceChar;
michael@0 317 args->converter->toULength=1;
michael@0 318 }
michael@0 319 break;
michael@0 320 }
michael@0 321 }
michael@0 322 else{
michael@0 323 *err =U_BUFFER_OVERFLOW_ERROR;
michael@0 324 break;
michael@0 325 }
michael@0 326 }
michael@0 327
michael@0 328 args->target = myTarget;
michael@0 329 args->source = mySource;
michael@0 330 }
michael@0 331
michael@0 332
michael@0 333 static void
michael@0 334 UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
michael@0 335 UErrorCode * err){
michael@0 336 const UChar *mySource = args->source;
michael@0 337 char *myTarget = args->target;
michael@0 338 int32_t* offsets = args->offsets;
michael@0 339 int32_t mySourceIndex = 0;
michael@0 340 int32_t myTargetIndex = 0;
michael@0 341 int32_t targetLength = (int32_t)(args->targetLimit - myTarget);
michael@0 342 int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source);
michael@0 343 int32_t length=0;
michael@0 344 uint32_t targetUniChar = 0x0000;
michael@0 345 UChar32 mySourceChar = 0x0000;
michael@0 346 UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo;
michael@0 347 UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS;
michael@0 348 UBool oldIsTargetUCharDBCS = isTargetUCharDBCS;
michael@0 349 int len =0;
michael@0 350 const char* escSeq=NULL;
michael@0 351
michael@0 352 /* Calling code already handles this situation. */
michael@0 353 /*if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){
michael@0 354 *err = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 355 return;
michael@0 356 }*/
michael@0 357 if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) {
michael@0 358 goto getTrail;
michael@0 359 }
michael@0 360 /*writing the char to the output stream */
michael@0 361 while (mySourceIndex < mySourceLength){
michael@0 362 targetUniChar = missingCharMarker;
michael@0 363 if (myTargetIndex < targetLength){
michael@0 364
michael@0 365 mySourceChar = (UChar) mySource[mySourceIndex++];
michael@0 366
michael@0 367
michael@0 368 oldIsTargetUCharDBCS = isTargetUCharDBCS;
michael@0 369 if(mySourceChar ==UCNV_TILDE){
michael@0 370 /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/
michael@0 371 len = ESC_LEN;
michael@0 372 escSeq = TILDE_ESCAPE;
michael@0 373 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
michael@0 374 continue;
michael@0 375 } else if(mySourceChar <= 0x7f) {
michael@0 376 length = 1;
michael@0 377 targetUniChar = mySourceChar;
michael@0 378 } else {
michael@0 379 length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData,
michael@0 380 mySourceChar,&targetUniChar,args->converter->useFallback);
michael@0 381 /* we can only use lead bytes 21..7D and trail bytes 21..7E */
michael@0 382 if( length == 2 &&
michael@0 383 (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) &&
michael@0 384 (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1)
michael@0 385 ) {
michael@0 386 targetUniChar -= 0x8080;
michael@0 387 } else {
michael@0 388 targetUniChar = missingCharMarker;
michael@0 389 }
michael@0 390 }
michael@0 391 if (targetUniChar != missingCharMarker){
michael@0 392 myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);
michael@0 393 if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){
michael@0 394 /*Shifting from a double byte to single byte mode*/
michael@0 395 if(!isTargetUCharDBCS){
michael@0 396 len =ESC_LEN;
michael@0 397 escSeq = SB_ESCAPE;
michael@0 398 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
michael@0 399 myConverterData->isEscapeAppended = TRUE;
michael@0 400 }
michael@0 401 else{ /* Shifting from a single byte to double byte mode*/
michael@0 402 len =ESC_LEN;
michael@0 403 escSeq = DB_ESCAPE;
michael@0 404 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
michael@0 405 myConverterData->isEscapeAppended = TRUE;
michael@0 406
michael@0 407 }
michael@0 408 }
michael@0 409
michael@0 410 if(isTargetUCharDBCS){
michael@0 411 if( myTargetIndex <targetLength){
michael@0 412 myTarget[myTargetIndex++] =(char) (targetUniChar >> 8);
michael@0 413 if(offsets){
michael@0 414 *(offsets++) = mySourceIndex-1;
michael@0 415 }
michael@0 416 if(myTargetIndex < targetLength){
michael@0 417 myTarget[myTargetIndex++] =(char) targetUniChar;
michael@0 418 if(offsets){
michael@0 419 *(offsets++) = mySourceIndex-1;
michael@0 420 }
michael@0 421 }else{
michael@0 422 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
michael@0 423 *err = U_BUFFER_OVERFLOW_ERROR;
michael@0 424 }
michael@0 425 }else{
michael@0 426 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8);
michael@0 427 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
michael@0 428 *err = U_BUFFER_OVERFLOW_ERROR;
michael@0 429 }
michael@0 430
michael@0 431 }else{
michael@0 432 if( myTargetIndex <targetLength){
michael@0 433 myTarget[myTargetIndex++] = (char) (targetUniChar );
michael@0 434 if(offsets){
michael@0 435 *(offsets++) = mySourceIndex-1;
michael@0 436 }
michael@0 437
michael@0 438 }else{
michael@0 439 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
michael@0 440 *err = U_BUFFER_OVERFLOW_ERROR;
michael@0 441 }
michael@0 442 }
michael@0 443
michael@0 444 }
michael@0 445 else{
michael@0 446 /* oops.. the code point is unassigned */
michael@0 447 /*Handle surrogates */
michael@0 448 /*check if the char is a First surrogate*/
michael@0 449 if(U16_IS_SURROGATE(mySourceChar)) {
michael@0 450 if(U16_IS_SURROGATE_LEAD(mySourceChar)) {
michael@0 451 args->converter->fromUChar32=mySourceChar;
michael@0 452 getTrail:
michael@0 453 /*look ahead to find the trail surrogate*/
michael@0 454 if(mySourceIndex < mySourceLength) {
michael@0 455 /* test the following code unit */
michael@0 456 UChar trail=(UChar) args->source[mySourceIndex];
michael@0 457 if(U16_IS_TRAIL(trail)) {
michael@0 458 ++mySourceIndex;
michael@0 459 mySourceChar=U16_GET_SUPPLEMENTARY(args->converter->fromUChar32, trail);
michael@0 460 args->converter->fromUChar32=0x00;
michael@0 461 /* there are no surrogates in GB2312*/
michael@0 462 *err = U_INVALID_CHAR_FOUND;
michael@0 463 /* exit this condition tree */
michael@0 464 } else {
michael@0 465 /* this is an unmatched lead code unit (1st surrogate) */
michael@0 466 /* callback(illegal) */
michael@0 467 *err=U_ILLEGAL_CHAR_FOUND;
michael@0 468 }
michael@0 469 } else {
michael@0 470 /* no more input */
michael@0 471 *err = U_ZERO_ERROR;
michael@0 472 }
michael@0 473 } else {
michael@0 474 /* this is an unmatched trail code unit (2nd surrogate) */
michael@0 475 /* callback(illegal) */
michael@0 476 *err=U_ILLEGAL_CHAR_FOUND;
michael@0 477 }
michael@0 478 } else {
michael@0 479 /* callback(unassigned) for a BMP code point */
michael@0 480 *err = U_INVALID_CHAR_FOUND;
michael@0 481 }
michael@0 482
michael@0 483 args->converter->fromUChar32=mySourceChar;
michael@0 484 break;
michael@0 485 }
michael@0 486 }
michael@0 487 else{
michael@0 488 *err = U_BUFFER_OVERFLOW_ERROR;
michael@0 489 break;
michael@0 490 }
michael@0 491 targetUniChar=missingCharMarker;
michael@0 492 }
michael@0 493
michael@0 494 args->target += myTargetIndex;
michael@0 495 args->source += mySourceIndex;
michael@0 496 myConverterData->isTargetUCharDBCS = isTargetUCharDBCS;
michael@0 497 }
michael@0 498
michael@0 499 static void
michael@0 500 _HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
michael@0 501 UConverter *cnv = args->converter;
michael@0 502 UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo;
michael@0 503 char *p;
michael@0 504 char buffer[4];
michael@0 505 p = buffer;
michael@0 506
michael@0 507 if( convData->isTargetUCharDBCS){
michael@0 508 *p++= UCNV_TILDE;
michael@0 509 *p++= UCNV_CLOSE_BRACE;
michael@0 510 convData->isTargetUCharDBCS=FALSE;
michael@0 511 }
michael@0 512 *p++= (char)cnv->subChars[0];
michael@0 513
michael@0 514 ucnv_cbFromUWriteBytes(args,
michael@0 515 buffer, (int32_t)(p - buffer),
michael@0 516 offsetIndex, err);
michael@0 517 }
michael@0 518
michael@0 519 /*
michael@0 520 * Structure for cloning an HZ converter into a single memory block.
michael@0 521 * ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct,
michael@0 522 * and then ucnv_safeClone() of the sub-converter may additionally align
michael@0 523 * subCnv inside the cloneHZStruct, for which we need the deadSpace after
michael@0 524 * subCnv. This is because UAlignedMemory may be larger than the actually
michael@0 525 * necessary alignment size for the platform.
michael@0 526 * The other cloneHZStruct fields will not be moved around,
michael@0 527 * and are aligned properly with cloneHZStruct's alignment.
michael@0 528 */
michael@0 529 struct cloneHZStruct
michael@0 530 {
michael@0 531 UConverter cnv;
michael@0 532 UConverter subCnv;
michael@0 533 UAlignedMemory deadSpace;
michael@0 534 UConverterDataHZ mydata;
michael@0 535 };
michael@0 536
michael@0 537
michael@0 538 static UConverter *
michael@0 539 _HZ_SafeClone(const UConverter *cnv,
michael@0 540 void *stackBuffer,
michael@0 541 int32_t *pBufferSize,
michael@0 542 UErrorCode *status)
michael@0 543 {
michael@0 544 struct cloneHZStruct * localClone;
michael@0 545 int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct);
michael@0 546
michael@0 547 if (U_FAILURE(*status)){
michael@0 548 return 0;
michael@0 549 }
michael@0 550
michael@0 551 if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
michael@0 552 *pBufferSize = bufferSizeNeeded;
michael@0 553 return 0;
michael@0 554 }
michael@0 555
michael@0 556 localClone = (struct cloneHZStruct *)stackBuffer;
michael@0 557 /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
michael@0 558
michael@0 559 uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ));
michael@0 560 localClone->cnv.extraInfo = &localClone->mydata;
michael@0 561 localClone->cnv.isExtraLocal = TRUE;
michael@0 562
michael@0 563 /* deep-clone the sub-converter */
michael@0 564 size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
michael@0 565 ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter =
michael@0 566 ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status);
michael@0 567
michael@0 568 return &localClone->cnv;
michael@0 569 }
michael@0 570
michael@0 571 static void
michael@0 572 _HZ_GetUnicodeSet(const UConverter *cnv,
michael@0 573 const USetAdder *sa,
michael@0 574 UConverterUnicodeSet which,
michael@0 575 UErrorCode *pErrorCode) {
michael@0 576 /* HZ converts all of ASCII */
michael@0 577 sa->addRange(sa->set, 0, 0x7f);
michael@0 578
michael@0 579 /* add all of the code points that the sub-converter handles */
michael@0 580 ucnv_MBCSGetFilteredUnicodeSetForUnicode(
michael@0 581 ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData,
michael@0 582 sa, which, UCNV_SET_FILTER_HZ,
michael@0 583 pErrorCode);
michael@0 584 }
michael@0 585
michael@0 586 static const UConverterImpl _HZImpl={
michael@0 587
michael@0 588 UCNV_HZ,
michael@0 589
michael@0 590 NULL,
michael@0 591 NULL,
michael@0 592
michael@0 593 _HZOpen,
michael@0 594 _HZClose,
michael@0 595 _HZReset,
michael@0 596
michael@0 597 UConverter_toUnicode_HZ_OFFSETS_LOGIC,
michael@0 598 UConverter_toUnicode_HZ_OFFSETS_LOGIC,
michael@0 599 UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
michael@0 600 UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
michael@0 601 NULL,
michael@0 602
michael@0 603 NULL,
michael@0 604 NULL,
michael@0 605 _HZ_WriteSub,
michael@0 606 _HZ_SafeClone,
michael@0 607 _HZ_GetUnicodeSet
michael@0 608 };
michael@0 609
michael@0 610 static const UConverterStaticData _HZStaticData={
michael@0 611 sizeof(UConverterStaticData),
michael@0 612 "HZ",
michael@0 613 0,
michael@0 614 UCNV_IBM,
michael@0 615 UCNV_HZ,
michael@0 616 1,
michael@0 617 4,
michael@0 618 { 0x1a, 0, 0, 0 },
michael@0 619 1,
michael@0 620 FALSE,
michael@0 621 FALSE,
michael@0 622 0,
michael@0 623 0,
michael@0 624 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
michael@0 625
michael@0 626 };
michael@0 627
michael@0 628
michael@0 629 const UConverterSharedData _HZData={
michael@0 630 sizeof(UConverterSharedData),
michael@0 631 ~((uint32_t) 0),
michael@0 632 NULL,
michael@0 633 NULL,
michael@0 634 &_HZStaticData,
michael@0 635 FALSE,
michael@0 636 &_HZImpl,
michael@0 637 0
michael@0 638 };
michael@0 639
michael@0 640 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

mercurial