1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ucnvhz.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,640 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 2000-2011, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* file name: ucnvhz.c 1.10 +* encoding: US-ASCII 1.11 +* tab size: 8 (not used) 1.12 +* indentation:4 1.13 +* 1.14 +* created on: 2000oct16 1.15 +* created by: Ram Viswanadha 1.16 +* 10/31/2000 Ram Implemented offsets logic function 1.17 +* 1.18 +*/ 1.19 + 1.20 +#include "unicode/utypes.h" 1.21 + 1.22 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION 1.23 + 1.24 +#include "cmemory.h" 1.25 +#include "unicode/ucnv.h" 1.26 +#include "unicode/ucnv_cb.h" 1.27 +#include "unicode/uset.h" 1.28 +#include "unicode/utf16.h" 1.29 +#include "ucnv_bld.h" 1.30 +#include "ucnv_cnv.h" 1.31 +#include "ucnv_imp.h" 1.32 + 1.33 +#define UCNV_TILDE 0x7E /* ~ */ 1.34 +#define UCNV_OPEN_BRACE 0x7B /* { */ 1.35 +#define UCNV_CLOSE_BRACE 0x7D /* } */ 1.36 +#define SB_ESCAPE "\x7E\x7D" 1.37 +#define DB_ESCAPE "\x7E\x7B" 1.38 +#define TILDE_ESCAPE "\x7E\x7E" 1.39 +#define ESC_LEN 2 1.40 + 1.41 + 1.42 +#define CONCAT_ESCAPE_MACRO( args, targetIndex,targetLength,strToAppend, err, len,sourceIndex){ \ 1.43 + while(len-->0){ \ 1.44 + if(targetIndex < targetLength){ \ 1.45 + args->target[targetIndex] = (unsigned char) *strToAppend; \ 1.46 + if(args->offsets!=NULL){ \ 1.47 + *(offsets++) = sourceIndex-1; \ 1.48 + } \ 1.49 + targetIndex++; \ 1.50 + } \ 1.51 + else{ \ 1.52 + args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \ 1.53 + *err =U_BUFFER_OVERFLOW_ERROR; \ 1.54 + } \ 1.55 + strToAppend++; \ 1.56 + } \ 1.57 +} 1.58 + 1.59 + 1.60 +typedef struct{ 1.61 + UConverter* gbConverter; 1.62 + int32_t targetIndex; 1.63 + int32_t sourceIndex; 1.64 + UBool isEscapeAppended; 1.65 + UBool isStateDBCS; 1.66 + UBool isTargetUCharDBCS; 1.67 + UBool isEmptySegment; 1.68 +}UConverterDataHZ; 1.69 + 1.70 + 1.71 + 1.72 +static void 1.73 +_HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ 1.74 + UConverter *gbConverter; 1.75 + if(pArgs->onlyTestIsLoadable) { 1.76 + ucnv_canCreateConverter("GBK", errorCode); /* errorCode carries result */ 1.77 + return; 1.78 + } 1.79 + gbConverter = ucnv_open("GBK", errorCode); 1.80 + if(U_FAILURE(*errorCode)) { 1.81 + return; 1.82 + } 1.83 + cnv->toUnicodeStatus = 0; 1.84 + cnv->fromUnicodeStatus= 0; 1.85 + cnv->mode=0; 1.86 + cnv->fromUChar32=0x0000; 1.87 + cnv->extraInfo = uprv_calloc(1, sizeof(UConverterDataHZ)); 1.88 + if(cnv->extraInfo != NULL){ 1.89 + ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter; 1.90 + } 1.91 + else { 1.92 + ucnv_close(gbConverter); 1.93 + *errorCode = U_MEMORY_ALLOCATION_ERROR; 1.94 + return; 1.95 + } 1.96 +} 1.97 + 1.98 +static void 1.99 +_HZClose(UConverter *cnv){ 1.100 + if(cnv->extraInfo != NULL) { 1.101 + ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter); 1.102 + if(!cnv->isExtraLocal) { 1.103 + uprv_free(cnv->extraInfo); 1.104 + } 1.105 + cnv->extraInfo = NULL; 1.106 + } 1.107 +} 1.108 + 1.109 +static void 1.110 +_HZReset(UConverter *cnv, UConverterResetChoice choice){ 1.111 + if(choice<=UCNV_RESET_TO_UNICODE) { 1.112 + cnv->toUnicodeStatus = 0; 1.113 + cnv->mode=0; 1.114 + if(cnv->extraInfo != NULL){ 1.115 + ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE; 1.116 + ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE; 1.117 + } 1.118 + } 1.119 + if(choice!=UCNV_RESET_TO_UNICODE) { 1.120 + cnv->fromUnicodeStatus= 0; 1.121 + cnv->fromUChar32=0x0000; 1.122 + if(cnv->extraInfo != NULL){ 1.123 + ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE; 1.124 + ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0; 1.125 + ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0; 1.126 + ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE; 1.127 + } 1.128 + } 1.129 +} 1.130 + 1.131 +/**************************************HZ Encoding************************************************* 1.132 +* Rules for HZ encoding 1.133 +* 1.134 +* In ASCII mode, a byte is interpreted as an ASCII character, unless a 1.135 +* '~' is encountered. The character '~' is an escape character. By 1.136 +* convention, it must be immediately followed ONLY by '~', '{' or '\n' 1.137 +* (<LF>), with the following special meaning. 1.138 + 1.139 +* 1. The escape sequence '~~' is interpreted as a '~'. 1.140 +* 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB. 1.141 +* 3. The escape sequence '~\n' is a line-continuation marker to be 1.142 +* consumed with no output produced. 1.143 +* In GB mode, characters are interpreted two bytes at a time as (pure) 1.144 +* GB codes until the escape-from-GB code '~}' is read. This code 1.145 +* switches the mode from GB back to ASCII. (Note that the escape- 1.146 +* from-GB code '~}' ($7E7D) is outside the defined GB range.) 1.147 +* 1.148 +* Source: RFC 1842 1.149 +* 1.150 +* Note that the formal syntax in RFC 1842 is invalid. I assume that the 1.151 +* intended definition of single-byte-segment is as follows (pedberg): 1.152 +* single-byte-segment = single-byte-seq 1*single-byte-char 1.153 +*/ 1.154 + 1.155 + 1.156 +static void 1.157 +UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, 1.158 + UErrorCode* err){ 1.159 + char tempBuf[2]; 1.160 + const char *mySource = ( char *) args->source; 1.161 + UChar *myTarget = args->target; 1.162 + const char *mySourceLimit = args->sourceLimit; 1.163 + UChar32 targetUniChar = 0x0000; 1.164 + int32_t mySourceChar = 0x0000; 1.165 + UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo); 1.166 + tempBuf[0]=0; 1.167 + tempBuf[1]=0; 1.168 + 1.169 + /* Calling code already handles this situation. */ 1.170 + /*if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){ 1.171 + *err = U_ILLEGAL_ARGUMENT_ERROR; 1.172 + return; 1.173 + }*/ 1.174 + 1.175 + while(mySource< mySourceLimit){ 1.176 + 1.177 + if(myTarget < args->targetLimit){ 1.178 + 1.179 + mySourceChar= (unsigned char) *mySource++; 1.180 + 1.181 + if(args->converter->mode == UCNV_TILDE) { 1.182 + /* second byte after ~ */ 1.183 + args->converter->mode=0; 1.184 + switch(mySourceChar) { 1.185 + case 0x0A: 1.186 + /* no output for ~\n (line-continuation marker) */ 1.187 + continue; 1.188 + case UCNV_TILDE: 1.189 + if(args->offsets) { 1.190 + args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2); 1.191 + } 1.192 + *(myTarget++)=(UChar)mySourceChar; 1.193 + myData->isEmptySegment = FALSE; 1.194 + continue; 1.195 + case UCNV_OPEN_BRACE: 1.196 + case UCNV_CLOSE_BRACE: 1.197 + myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE); 1.198 + if (myData->isEmptySegment) { 1.199 + myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ 1.200 + *err = U_ILLEGAL_ESCAPE_SEQUENCE; 1.201 + args->converter->toUCallbackReason = UCNV_IRREGULAR; 1.202 + args->converter->toUBytes[0] = UCNV_TILDE; 1.203 + args->converter->toUBytes[1] = mySourceChar; 1.204 + args->converter->toULength = 2; 1.205 + args->target = myTarget; 1.206 + args->source = mySource; 1.207 + return; 1.208 + } 1.209 + myData->isEmptySegment = TRUE; 1.210 + continue; 1.211 + default: 1.212 + /* if the first byte is equal to TILDE and the trail byte 1.213 + * is not a valid byte then it is an error condition 1.214 + */ 1.215 + /* 1.216 + * Ticket 5691: consistent illegal sequences: 1.217 + * - We include at least the first byte in the illegal sequence. 1.218 + * - If any of the non-initial bytes could be the start of a character, 1.219 + * we stop the illegal sequence before the first one of those. 1.220 + */ 1.221 + myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ 1.222 + *err = U_ILLEGAL_ESCAPE_SEQUENCE; 1.223 + args->converter->toUBytes[0] = UCNV_TILDE; 1.224 + if( myData->isStateDBCS ? 1.225 + (0x21 <= mySourceChar && mySourceChar <= 0x7e) : 1.226 + mySourceChar <= 0x7f 1.227 + ) { 1.228 + /* The current byte could be the start of a character: Back it out. */ 1.229 + args->converter->toULength = 1; 1.230 + --mySource; 1.231 + } else { 1.232 + /* Include the current byte in the illegal sequence. */ 1.233 + args->converter->toUBytes[1] = mySourceChar; 1.234 + args->converter->toULength = 2; 1.235 + } 1.236 + args->target = myTarget; 1.237 + args->source = mySource; 1.238 + return; 1.239 + } 1.240 + } else if(myData->isStateDBCS) { 1.241 + if(args->converter->toUnicodeStatus == 0x00){ 1.242 + /* lead byte */ 1.243 + if(mySourceChar == UCNV_TILDE) { 1.244 + args->converter->mode = UCNV_TILDE; 1.245 + } else { 1.246 + /* add another bit to distinguish a 0 byte from not having seen a lead byte */ 1.247 + args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100); 1.248 + myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */ 1.249 + } 1.250 + continue; 1.251 + } 1.252 + else{ 1.253 + /* trail byte */ 1.254 + int leadIsOk, trailIsOk; 1.255 + uint32_t leadByte = args->converter->toUnicodeStatus & 0xff; 1.256 + targetUniChar = 0xffff; 1.257 + /* 1.258 + * Ticket 5691: consistent illegal sequences: 1.259 + * - We include at least the first byte in the illegal sequence. 1.260 + * - If any of the non-initial bytes could be the start of a character, 1.261 + * we stop the illegal sequence before the first one of those. 1.262 + * 1.263 + * In HZ DBCS, if the second byte is in the 21..7e range, 1.264 + * we report only the first byte as the illegal sequence. 1.265 + * Otherwise we convert or report the pair of bytes. 1.266 + */ 1.267 + leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21); 1.268 + trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); 1.269 + if (leadIsOk && trailIsOk) { 1.270 + tempBuf[0] = (char) (leadByte+0x80) ; 1.271 + tempBuf[1] = (char) (mySourceChar+0x80); 1.272 + targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, 1.273 + tempBuf, 2, args->converter->useFallback); 1.274 + mySourceChar= (leadByte << 8) | mySourceChar; 1.275 + } else if (trailIsOk) { 1.276 + /* report a single illegal byte and continue with the following DBCS starter byte */ 1.277 + --mySource; 1.278 + mySourceChar = (int32_t)leadByte; 1.279 + } else { 1.280 + /* report a pair of illegal bytes if the second byte is not a DBCS starter */ 1.281 + /* add another bit so that the code below writes 2 bytes in case of error */ 1.282 + mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar; 1.283 + } 1.284 + args->converter->toUnicodeStatus =0x00; 1.285 + } 1.286 + } 1.287 + else{ 1.288 + if(mySourceChar == UCNV_TILDE) { 1.289 + args->converter->mode = UCNV_TILDE; 1.290 + continue; 1.291 + } else if(mySourceChar <= 0x7f) { 1.292 + targetUniChar = (UChar)mySourceChar; /* ASCII */ 1.293 + myData->isEmptySegment = FALSE; /* the segment has something valid */ 1.294 + } else { 1.295 + targetUniChar = 0xffff; 1.296 + myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ 1.297 + } 1.298 + } 1.299 + if(targetUniChar < 0xfffe){ 1.300 + if(args->offsets) { 1.301 + args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS)); 1.302 + } 1.303 + 1.304 + *(myTarget++)=(UChar)targetUniChar; 1.305 + } 1.306 + else /* targetUniChar>=0xfffe */ { 1.307 + if(targetUniChar == 0xfffe){ 1.308 + *err = U_INVALID_CHAR_FOUND; 1.309 + } 1.310 + else{ 1.311 + *err = U_ILLEGAL_CHAR_FOUND; 1.312 + } 1.313 + if(mySourceChar > 0xff){ 1.314 + args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8); 1.315 + args->converter->toUBytes[1] = (uint8_t)mySourceChar; 1.316 + args->converter->toULength=2; 1.317 + } 1.318 + else{ 1.319 + args->converter->toUBytes[0] = (uint8_t)mySourceChar; 1.320 + args->converter->toULength=1; 1.321 + } 1.322 + break; 1.323 + } 1.324 + } 1.325 + else{ 1.326 + *err =U_BUFFER_OVERFLOW_ERROR; 1.327 + break; 1.328 + } 1.329 + } 1.330 + 1.331 + args->target = myTarget; 1.332 + args->source = mySource; 1.333 +} 1.334 + 1.335 + 1.336 +static void 1.337 +UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, 1.338 + UErrorCode * err){ 1.339 + const UChar *mySource = args->source; 1.340 + char *myTarget = args->target; 1.341 + int32_t* offsets = args->offsets; 1.342 + int32_t mySourceIndex = 0; 1.343 + int32_t myTargetIndex = 0; 1.344 + int32_t targetLength = (int32_t)(args->targetLimit - myTarget); 1.345 + int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source); 1.346 + int32_t length=0; 1.347 + uint32_t targetUniChar = 0x0000; 1.348 + UChar32 mySourceChar = 0x0000; 1.349 + UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo; 1.350 + UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS; 1.351 + UBool oldIsTargetUCharDBCS = isTargetUCharDBCS; 1.352 + int len =0; 1.353 + const char* escSeq=NULL; 1.354 + 1.355 + /* Calling code already handles this situation. */ 1.356 + /*if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){ 1.357 + *err = U_ILLEGAL_ARGUMENT_ERROR; 1.358 + return; 1.359 + }*/ 1.360 + if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) { 1.361 + goto getTrail; 1.362 + } 1.363 + /*writing the char to the output stream */ 1.364 + while (mySourceIndex < mySourceLength){ 1.365 + targetUniChar = missingCharMarker; 1.366 + if (myTargetIndex < targetLength){ 1.367 + 1.368 + mySourceChar = (UChar) mySource[mySourceIndex++]; 1.369 + 1.370 + 1.371 + oldIsTargetUCharDBCS = isTargetUCharDBCS; 1.372 + if(mySourceChar ==UCNV_TILDE){ 1.373 + /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/ 1.374 + len = ESC_LEN; 1.375 + escSeq = TILDE_ESCAPE; 1.376 + CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); 1.377 + continue; 1.378 + } else if(mySourceChar <= 0x7f) { 1.379 + length = 1; 1.380 + targetUniChar = mySourceChar; 1.381 + } else { 1.382 + length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData, 1.383 + mySourceChar,&targetUniChar,args->converter->useFallback); 1.384 + /* we can only use lead bytes 21..7D and trail bytes 21..7E */ 1.385 + if( length == 2 && 1.386 + (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) && 1.387 + (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1) 1.388 + ) { 1.389 + targetUniChar -= 0x8080; 1.390 + } else { 1.391 + targetUniChar = missingCharMarker; 1.392 + } 1.393 + } 1.394 + if (targetUniChar != missingCharMarker){ 1.395 + myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF); 1.396 + if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){ 1.397 + /*Shifting from a double byte to single byte mode*/ 1.398 + if(!isTargetUCharDBCS){ 1.399 + len =ESC_LEN; 1.400 + escSeq = SB_ESCAPE; 1.401 + CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); 1.402 + myConverterData->isEscapeAppended = TRUE; 1.403 + } 1.404 + else{ /* Shifting from a single byte to double byte mode*/ 1.405 + len =ESC_LEN; 1.406 + escSeq = DB_ESCAPE; 1.407 + CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); 1.408 + myConverterData->isEscapeAppended = TRUE; 1.409 + 1.410 + } 1.411 + } 1.412 + 1.413 + if(isTargetUCharDBCS){ 1.414 + if( myTargetIndex <targetLength){ 1.415 + myTarget[myTargetIndex++] =(char) (targetUniChar >> 8); 1.416 + if(offsets){ 1.417 + *(offsets++) = mySourceIndex-1; 1.418 + } 1.419 + if(myTargetIndex < targetLength){ 1.420 + myTarget[myTargetIndex++] =(char) targetUniChar; 1.421 + if(offsets){ 1.422 + *(offsets++) = mySourceIndex-1; 1.423 + } 1.424 + }else{ 1.425 + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; 1.426 + *err = U_BUFFER_OVERFLOW_ERROR; 1.427 + } 1.428 + }else{ 1.429 + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8); 1.430 + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; 1.431 + *err = U_BUFFER_OVERFLOW_ERROR; 1.432 + } 1.433 + 1.434 + }else{ 1.435 + if( myTargetIndex <targetLength){ 1.436 + myTarget[myTargetIndex++] = (char) (targetUniChar ); 1.437 + if(offsets){ 1.438 + *(offsets++) = mySourceIndex-1; 1.439 + } 1.440 + 1.441 + }else{ 1.442 + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; 1.443 + *err = U_BUFFER_OVERFLOW_ERROR; 1.444 + } 1.445 + } 1.446 + 1.447 + } 1.448 + else{ 1.449 + /* oops.. the code point is unassigned */ 1.450 + /*Handle surrogates */ 1.451 + /*check if the char is a First surrogate*/ 1.452 + if(U16_IS_SURROGATE(mySourceChar)) { 1.453 + if(U16_IS_SURROGATE_LEAD(mySourceChar)) { 1.454 + args->converter->fromUChar32=mySourceChar; 1.455 +getTrail: 1.456 + /*look ahead to find the trail surrogate*/ 1.457 + if(mySourceIndex < mySourceLength) { 1.458 + /* test the following code unit */ 1.459 + UChar trail=(UChar) args->source[mySourceIndex]; 1.460 + if(U16_IS_TRAIL(trail)) { 1.461 + ++mySourceIndex; 1.462 + mySourceChar=U16_GET_SUPPLEMENTARY(args->converter->fromUChar32, trail); 1.463 + args->converter->fromUChar32=0x00; 1.464 + /* there are no surrogates in GB2312*/ 1.465 + *err = U_INVALID_CHAR_FOUND; 1.466 + /* exit this condition tree */ 1.467 + } else { 1.468 + /* this is an unmatched lead code unit (1st surrogate) */ 1.469 + /* callback(illegal) */ 1.470 + *err=U_ILLEGAL_CHAR_FOUND; 1.471 + } 1.472 + } else { 1.473 + /* no more input */ 1.474 + *err = U_ZERO_ERROR; 1.475 + } 1.476 + } else { 1.477 + /* this is an unmatched trail code unit (2nd surrogate) */ 1.478 + /* callback(illegal) */ 1.479 + *err=U_ILLEGAL_CHAR_FOUND; 1.480 + } 1.481 + } else { 1.482 + /* callback(unassigned) for a BMP code point */ 1.483 + *err = U_INVALID_CHAR_FOUND; 1.484 + } 1.485 + 1.486 + args->converter->fromUChar32=mySourceChar; 1.487 + break; 1.488 + } 1.489 + } 1.490 + else{ 1.491 + *err = U_BUFFER_OVERFLOW_ERROR; 1.492 + break; 1.493 + } 1.494 + targetUniChar=missingCharMarker; 1.495 + } 1.496 + 1.497 + args->target += myTargetIndex; 1.498 + args->source += mySourceIndex; 1.499 + myConverterData->isTargetUCharDBCS = isTargetUCharDBCS; 1.500 +} 1.501 + 1.502 +static void 1.503 +_HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) { 1.504 + UConverter *cnv = args->converter; 1.505 + UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo; 1.506 + char *p; 1.507 + char buffer[4]; 1.508 + p = buffer; 1.509 + 1.510 + if( convData->isTargetUCharDBCS){ 1.511 + *p++= UCNV_TILDE; 1.512 + *p++= UCNV_CLOSE_BRACE; 1.513 + convData->isTargetUCharDBCS=FALSE; 1.514 + } 1.515 + *p++= (char)cnv->subChars[0]; 1.516 + 1.517 + ucnv_cbFromUWriteBytes(args, 1.518 + buffer, (int32_t)(p - buffer), 1.519 + offsetIndex, err); 1.520 +} 1.521 + 1.522 +/* 1.523 + * Structure for cloning an HZ converter into a single memory block. 1.524 + * ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct, 1.525 + * and then ucnv_safeClone() of the sub-converter may additionally align 1.526 + * subCnv inside the cloneHZStruct, for which we need the deadSpace after 1.527 + * subCnv. This is because UAlignedMemory may be larger than the actually 1.528 + * necessary alignment size for the platform. 1.529 + * The other cloneHZStruct fields will not be moved around, 1.530 + * and are aligned properly with cloneHZStruct's alignment. 1.531 + */ 1.532 +struct cloneHZStruct 1.533 +{ 1.534 + UConverter cnv; 1.535 + UConverter subCnv; 1.536 + UAlignedMemory deadSpace; 1.537 + UConverterDataHZ mydata; 1.538 +}; 1.539 + 1.540 + 1.541 +static UConverter * 1.542 +_HZ_SafeClone(const UConverter *cnv, 1.543 + void *stackBuffer, 1.544 + int32_t *pBufferSize, 1.545 + UErrorCode *status) 1.546 +{ 1.547 + struct cloneHZStruct * localClone; 1.548 + int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct); 1.549 + 1.550 + if (U_FAILURE(*status)){ 1.551 + return 0; 1.552 + } 1.553 + 1.554 + if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ 1.555 + *pBufferSize = bufferSizeNeeded; 1.556 + return 0; 1.557 + } 1.558 + 1.559 + localClone = (struct cloneHZStruct *)stackBuffer; 1.560 + /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ 1.561 + 1.562 + uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ)); 1.563 + localClone->cnv.extraInfo = &localClone->mydata; 1.564 + localClone->cnv.isExtraLocal = TRUE; 1.565 + 1.566 + /* deep-clone the sub-converter */ 1.567 + size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */ 1.568 + ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter = 1.569 + ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status); 1.570 + 1.571 + return &localClone->cnv; 1.572 +} 1.573 + 1.574 +static void 1.575 +_HZ_GetUnicodeSet(const UConverter *cnv, 1.576 + const USetAdder *sa, 1.577 + UConverterUnicodeSet which, 1.578 + UErrorCode *pErrorCode) { 1.579 + /* HZ converts all of ASCII */ 1.580 + sa->addRange(sa->set, 0, 0x7f); 1.581 + 1.582 + /* add all of the code points that the sub-converter handles */ 1.583 + ucnv_MBCSGetFilteredUnicodeSetForUnicode( 1.584 + ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, 1.585 + sa, which, UCNV_SET_FILTER_HZ, 1.586 + pErrorCode); 1.587 +} 1.588 + 1.589 +static const UConverterImpl _HZImpl={ 1.590 + 1.591 + UCNV_HZ, 1.592 + 1.593 + NULL, 1.594 + NULL, 1.595 + 1.596 + _HZOpen, 1.597 + _HZClose, 1.598 + _HZReset, 1.599 + 1.600 + UConverter_toUnicode_HZ_OFFSETS_LOGIC, 1.601 + UConverter_toUnicode_HZ_OFFSETS_LOGIC, 1.602 + UConverter_fromUnicode_HZ_OFFSETS_LOGIC, 1.603 + UConverter_fromUnicode_HZ_OFFSETS_LOGIC, 1.604 + NULL, 1.605 + 1.606 + NULL, 1.607 + NULL, 1.608 + _HZ_WriteSub, 1.609 + _HZ_SafeClone, 1.610 + _HZ_GetUnicodeSet 1.611 +}; 1.612 + 1.613 +static const UConverterStaticData _HZStaticData={ 1.614 + sizeof(UConverterStaticData), 1.615 + "HZ", 1.616 + 0, 1.617 + UCNV_IBM, 1.618 + UCNV_HZ, 1.619 + 1, 1.620 + 4, 1.621 + { 0x1a, 0, 0, 0 }, 1.622 + 1, 1.623 + FALSE, 1.624 + FALSE, 1.625 + 0, 1.626 + 0, 1.627 + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ 1.628 + 1.629 +}; 1.630 + 1.631 + 1.632 +const UConverterSharedData _HZData={ 1.633 + sizeof(UConverterSharedData), 1.634 + ~((uint32_t) 0), 1.635 + NULL, 1.636 + NULL, 1.637 + &_HZStaticData, 1.638 + FALSE, 1.639 + &_HZImpl, 1.640 + 0 1.641 +}; 1.642 + 1.643 +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */