michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (C) 2000-2011, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: * file name: ucnvhz.c michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2000oct16 michael@0: * created by: Ram Viswanadha michael@0: * 10/31/2000 Ram Implemented offsets logic function michael@0: * michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION michael@0: michael@0: #include "cmemory.h" michael@0: #include "unicode/ucnv.h" michael@0: #include "unicode/ucnv_cb.h" michael@0: #include "unicode/uset.h" michael@0: #include "unicode/utf16.h" michael@0: #include "ucnv_bld.h" michael@0: #include "ucnv_cnv.h" michael@0: #include "ucnv_imp.h" michael@0: michael@0: #define UCNV_TILDE 0x7E /* ~ */ michael@0: #define UCNV_OPEN_BRACE 0x7B /* { */ michael@0: #define UCNV_CLOSE_BRACE 0x7D /* } */ michael@0: #define SB_ESCAPE "\x7E\x7D" michael@0: #define DB_ESCAPE "\x7E\x7B" michael@0: #define TILDE_ESCAPE "\x7E\x7E" michael@0: #define ESC_LEN 2 michael@0: michael@0: michael@0: #define CONCAT_ESCAPE_MACRO( args, targetIndex,targetLength,strToAppend, err, len,sourceIndex){ \ michael@0: while(len-->0){ \ michael@0: if(targetIndex < targetLength){ \ michael@0: args->target[targetIndex] = (unsigned char) *strToAppend; \ michael@0: if(args->offsets!=NULL){ \ michael@0: *(offsets++) = sourceIndex-1; \ michael@0: } \ michael@0: targetIndex++; \ michael@0: } \ michael@0: else{ \ michael@0: args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \ michael@0: *err =U_BUFFER_OVERFLOW_ERROR; \ michael@0: } \ michael@0: strToAppend++; \ michael@0: } \ michael@0: } michael@0: michael@0: michael@0: typedef struct{ michael@0: UConverter* gbConverter; michael@0: int32_t targetIndex; michael@0: int32_t sourceIndex; michael@0: UBool isEscapeAppended; michael@0: UBool isStateDBCS; michael@0: UBool isTargetUCharDBCS; michael@0: UBool isEmptySegment; michael@0: }UConverterDataHZ; michael@0: michael@0: michael@0: michael@0: static void michael@0: _HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ michael@0: UConverter *gbConverter; michael@0: if(pArgs->onlyTestIsLoadable) { michael@0: ucnv_canCreateConverter("GBK", errorCode); /* errorCode carries result */ michael@0: return; michael@0: } michael@0: gbConverter = ucnv_open("GBK", errorCode); michael@0: if(U_FAILURE(*errorCode)) { michael@0: return; michael@0: } michael@0: cnv->toUnicodeStatus = 0; michael@0: cnv->fromUnicodeStatus= 0; michael@0: cnv->mode=0; michael@0: cnv->fromUChar32=0x0000; michael@0: cnv->extraInfo = uprv_calloc(1, sizeof(UConverterDataHZ)); michael@0: if(cnv->extraInfo != NULL){ michael@0: ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter; michael@0: } michael@0: else { michael@0: ucnv_close(gbConverter); michael@0: *errorCode = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: _HZClose(UConverter *cnv){ michael@0: if(cnv->extraInfo != NULL) { michael@0: ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter); michael@0: if(!cnv->isExtraLocal) { michael@0: uprv_free(cnv->extraInfo); michael@0: } michael@0: cnv->extraInfo = NULL; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: _HZReset(UConverter *cnv, UConverterResetChoice choice){ michael@0: if(choice<=UCNV_RESET_TO_UNICODE) { michael@0: cnv->toUnicodeStatus = 0; michael@0: cnv->mode=0; michael@0: if(cnv->extraInfo != NULL){ michael@0: ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE; michael@0: ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE; michael@0: } michael@0: } michael@0: if(choice!=UCNV_RESET_TO_UNICODE) { michael@0: cnv->fromUnicodeStatus= 0; michael@0: cnv->fromUChar32=0x0000; michael@0: if(cnv->extraInfo != NULL){ michael@0: ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE; michael@0: ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0; michael@0: ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0; michael@0: ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /**************************************HZ Encoding************************************************* michael@0: * Rules for HZ encoding michael@0: * michael@0: * In ASCII mode, a byte is interpreted as an ASCII character, unless a michael@0: * '~' is encountered. The character '~' is an escape character. By michael@0: * convention, it must be immediately followed ONLY by '~', '{' or '\n' michael@0: * (), with the following special meaning. michael@0: michael@0: * 1. The escape sequence '~~' is interpreted as a '~'. michael@0: * 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB. michael@0: * 3. The escape sequence '~\n' is a line-continuation marker to be michael@0: * consumed with no output produced. michael@0: * In GB mode, characters are interpreted two bytes at a time as (pure) michael@0: * GB codes until the escape-from-GB code '~}' is read. This code michael@0: * switches the mode from GB back to ASCII. (Note that the escape- michael@0: * from-GB code '~}' ($7E7D) is outside the defined GB range.) michael@0: * michael@0: * Source: RFC 1842 michael@0: * michael@0: * Note that the formal syntax in RFC 1842 is invalid. I assume that the michael@0: * intended definition of single-byte-segment is as follows (pedberg): michael@0: * single-byte-segment = single-byte-seq 1*single-byte-char michael@0: */ michael@0: michael@0: michael@0: static void michael@0: UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, michael@0: UErrorCode* err){ michael@0: char tempBuf[2]; michael@0: const char *mySource = ( char *) args->source; michael@0: UChar *myTarget = args->target; michael@0: const char *mySourceLimit = args->sourceLimit; michael@0: UChar32 targetUniChar = 0x0000; michael@0: int32_t mySourceChar = 0x0000; michael@0: UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo); michael@0: tempBuf[0]=0; michael@0: tempBuf[1]=0; michael@0: michael@0: /* Calling code already handles this situation. */ michael@0: /*if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){ michael@0: *err = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: }*/ michael@0: michael@0: while(mySource< mySourceLimit){ michael@0: michael@0: if(myTarget < args->targetLimit){ michael@0: michael@0: mySourceChar= (unsigned char) *mySource++; michael@0: michael@0: if(args->converter->mode == UCNV_TILDE) { michael@0: /* second byte after ~ */ michael@0: args->converter->mode=0; michael@0: switch(mySourceChar) { michael@0: case 0x0A: michael@0: /* no output for ~\n (line-continuation marker) */ michael@0: continue; michael@0: case UCNV_TILDE: michael@0: if(args->offsets) { michael@0: args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2); michael@0: } michael@0: *(myTarget++)=(UChar)mySourceChar; michael@0: myData->isEmptySegment = FALSE; michael@0: continue; michael@0: case UCNV_OPEN_BRACE: michael@0: case UCNV_CLOSE_BRACE: michael@0: myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE); michael@0: if (myData->isEmptySegment) { michael@0: myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ michael@0: *err = U_ILLEGAL_ESCAPE_SEQUENCE; michael@0: args->converter->toUCallbackReason = UCNV_IRREGULAR; michael@0: args->converter->toUBytes[0] = UCNV_TILDE; michael@0: args->converter->toUBytes[1] = mySourceChar; michael@0: args->converter->toULength = 2; michael@0: args->target = myTarget; michael@0: args->source = mySource; michael@0: return; michael@0: } michael@0: myData->isEmptySegment = TRUE; michael@0: continue; michael@0: default: michael@0: /* if the first byte is equal to TILDE and the trail byte michael@0: * is not a valid byte then it is an error condition michael@0: */ michael@0: /* michael@0: * Ticket 5691: consistent illegal sequences: michael@0: * - We include at least the first byte in the illegal sequence. michael@0: * - If any of the non-initial bytes could be the start of a character, michael@0: * we stop the illegal sequence before the first one of those. michael@0: */ michael@0: myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ michael@0: *err = U_ILLEGAL_ESCAPE_SEQUENCE; michael@0: args->converter->toUBytes[0] = UCNV_TILDE; michael@0: if( myData->isStateDBCS ? michael@0: (0x21 <= mySourceChar && mySourceChar <= 0x7e) : michael@0: mySourceChar <= 0x7f michael@0: ) { michael@0: /* The current byte could be the start of a character: Back it out. */ michael@0: args->converter->toULength = 1; michael@0: --mySource; michael@0: } else { michael@0: /* Include the current byte in the illegal sequence. */ michael@0: args->converter->toUBytes[1] = mySourceChar; michael@0: args->converter->toULength = 2; michael@0: } michael@0: args->target = myTarget; michael@0: args->source = mySource; michael@0: return; michael@0: } michael@0: } else if(myData->isStateDBCS) { michael@0: if(args->converter->toUnicodeStatus == 0x00){ michael@0: /* lead byte */ michael@0: if(mySourceChar == UCNV_TILDE) { michael@0: args->converter->mode = UCNV_TILDE; michael@0: } else { michael@0: /* add another bit to distinguish a 0 byte from not having seen a lead byte */ michael@0: args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100); michael@0: myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */ michael@0: } michael@0: continue; michael@0: } michael@0: else{ michael@0: /* trail byte */ michael@0: int leadIsOk, trailIsOk; michael@0: uint32_t leadByte = args->converter->toUnicodeStatus & 0xff; michael@0: targetUniChar = 0xffff; michael@0: /* michael@0: * Ticket 5691: consistent illegal sequences: michael@0: * - We include at least the first byte in the illegal sequence. michael@0: * - If any of the non-initial bytes could be the start of a character, michael@0: * we stop the illegal sequence before the first one of those. michael@0: * michael@0: * In HZ DBCS, if the second byte is in the 21..7e range, michael@0: * we report only the first byte as the illegal sequence. michael@0: * Otherwise we convert or report the pair of bytes. michael@0: */ michael@0: leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21); michael@0: trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); michael@0: if (leadIsOk && trailIsOk) { michael@0: tempBuf[0] = (char) (leadByte+0x80) ; michael@0: tempBuf[1] = (char) (mySourceChar+0x80); michael@0: targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, michael@0: tempBuf, 2, args->converter->useFallback); michael@0: mySourceChar= (leadByte << 8) | mySourceChar; michael@0: } else if (trailIsOk) { michael@0: /* report a single illegal byte and continue with the following DBCS starter byte */ michael@0: --mySource; michael@0: mySourceChar = (int32_t)leadByte; michael@0: } else { michael@0: /* report a pair of illegal bytes if the second byte is not a DBCS starter */ michael@0: /* add another bit so that the code below writes 2 bytes in case of error */ michael@0: mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar; michael@0: } michael@0: args->converter->toUnicodeStatus =0x00; michael@0: } michael@0: } michael@0: else{ michael@0: if(mySourceChar == UCNV_TILDE) { michael@0: args->converter->mode = UCNV_TILDE; michael@0: continue; michael@0: } else if(mySourceChar <= 0x7f) { michael@0: targetUniChar = (UChar)mySourceChar; /* ASCII */ michael@0: myData->isEmptySegment = FALSE; /* the segment has something valid */ michael@0: } else { michael@0: targetUniChar = 0xffff; michael@0: myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ michael@0: } michael@0: } michael@0: if(targetUniChar < 0xfffe){ michael@0: if(args->offsets) { michael@0: args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS)); michael@0: } michael@0: michael@0: *(myTarget++)=(UChar)targetUniChar; michael@0: } michael@0: else /* targetUniChar>=0xfffe */ { michael@0: if(targetUniChar == 0xfffe){ michael@0: *err = U_INVALID_CHAR_FOUND; michael@0: } michael@0: else{ michael@0: *err = U_ILLEGAL_CHAR_FOUND; michael@0: } michael@0: if(mySourceChar > 0xff){ michael@0: args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8); michael@0: args->converter->toUBytes[1] = (uint8_t)mySourceChar; michael@0: args->converter->toULength=2; michael@0: } michael@0: else{ michael@0: args->converter->toUBytes[0] = (uint8_t)mySourceChar; michael@0: args->converter->toULength=1; michael@0: } michael@0: break; michael@0: } michael@0: } michael@0: else{ michael@0: *err =U_BUFFER_OVERFLOW_ERROR; michael@0: break; michael@0: } michael@0: } michael@0: michael@0: args->target = myTarget; michael@0: args->source = mySource; michael@0: } michael@0: michael@0: michael@0: static void michael@0: UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, michael@0: UErrorCode * err){ michael@0: const UChar *mySource = args->source; michael@0: char *myTarget = args->target; michael@0: int32_t* offsets = args->offsets; michael@0: int32_t mySourceIndex = 0; michael@0: int32_t myTargetIndex = 0; michael@0: int32_t targetLength = (int32_t)(args->targetLimit - myTarget); michael@0: int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source); michael@0: int32_t length=0; michael@0: uint32_t targetUniChar = 0x0000; michael@0: UChar32 mySourceChar = 0x0000; michael@0: UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo; michael@0: UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS; michael@0: UBool oldIsTargetUCharDBCS = isTargetUCharDBCS; michael@0: int len =0; michael@0: const char* escSeq=NULL; michael@0: michael@0: /* Calling code already handles this situation. */ michael@0: /*if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){ michael@0: *err = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: }*/ michael@0: if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) { michael@0: goto getTrail; michael@0: } michael@0: /*writing the char to the output stream */ michael@0: while (mySourceIndex < mySourceLength){ michael@0: targetUniChar = missingCharMarker; michael@0: if (myTargetIndex < targetLength){ michael@0: michael@0: mySourceChar = (UChar) mySource[mySourceIndex++]; michael@0: michael@0: michael@0: oldIsTargetUCharDBCS = isTargetUCharDBCS; michael@0: if(mySourceChar ==UCNV_TILDE){ michael@0: /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/ michael@0: len = ESC_LEN; michael@0: escSeq = TILDE_ESCAPE; michael@0: CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); michael@0: continue; michael@0: } else if(mySourceChar <= 0x7f) { michael@0: length = 1; michael@0: targetUniChar = mySourceChar; michael@0: } else { michael@0: length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData, michael@0: mySourceChar,&targetUniChar,args->converter->useFallback); michael@0: /* we can only use lead bytes 21..7D and trail bytes 21..7E */ michael@0: if( length == 2 && michael@0: (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) && michael@0: (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1) michael@0: ) { michael@0: targetUniChar -= 0x8080; michael@0: } else { michael@0: targetUniChar = missingCharMarker; michael@0: } michael@0: } michael@0: if (targetUniChar != missingCharMarker){ michael@0: myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF); michael@0: if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){ michael@0: /*Shifting from a double byte to single byte mode*/ michael@0: if(!isTargetUCharDBCS){ michael@0: len =ESC_LEN; michael@0: escSeq = SB_ESCAPE; michael@0: CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); michael@0: myConverterData->isEscapeAppended = TRUE; michael@0: } michael@0: else{ /* Shifting from a single byte to double byte mode*/ michael@0: len =ESC_LEN; michael@0: escSeq = DB_ESCAPE; michael@0: CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); michael@0: myConverterData->isEscapeAppended = TRUE; michael@0: michael@0: } michael@0: } michael@0: michael@0: if(isTargetUCharDBCS){ michael@0: if( myTargetIndex > 8); michael@0: if(offsets){ michael@0: *(offsets++) = mySourceIndex-1; michael@0: } michael@0: if(myTargetIndex < targetLength){ michael@0: myTarget[myTargetIndex++] =(char) targetUniChar; michael@0: if(offsets){ michael@0: *(offsets++) = mySourceIndex-1; michael@0: } michael@0: }else{ michael@0: args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; michael@0: *err = U_BUFFER_OVERFLOW_ERROR; michael@0: } michael@0: }else{ michael@0: args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8); michael@0: args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; michael@0: *err = U_BUFFER_OVERFLOW_ERROR; michael@0: } michael@0: michael@0: }else{ michael@0: if( myTargetIndex converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; michael@0: *err = U_BUFFER_OVERFLOW_ERROR; michael@0: } michael@0: } michael@0: michael@0: } michael@0: else{ michael@0: /* oops.. the code point is unassigned */ michael@0: /*Handle surrogates */ michael@0: /*check if the char is a First surrogate*/ michael@0: if(U16_IS_SURROGATE(mySourceChar)) { michael@0: if(U16_IS_SURROGATE_LEAD(mySourceChar)) { michael@0: args->converter->fromUChar32=mySourceChar; michael@0: getTrail: michael@0: /*look ahead to find the trail surrogate*/ michael@0: if(mySourceIndex < mySourceLength) { michael@0: /* test the following code unit */ michael@0: UChar trail=(UChar) args->source[mySourceIndex]; michael@0: if(U16_IS_TRAIL(trail)) { michael@0: ++mySourceIndex; michael@0: mySourceChar=U16_GET_SUPPLEMENTARY(args->converter->fromUChar32, trail); michael@0: args->converter->fromUChar32=0x00; michael@0: /* there are no surrogates in GB2312*/ michael@0: *err = U_INVALID_CHAR_FOUND; michael@0: /* exit this condition tree */ michael@0: } else { michael@0: /* this is an unmatched lead code unit (1st surrogate) */ michael@0: /* callback(illegal) */ michael@0: *err=U_ILLEGAL_CHAR_FOUND; michael@0: } michael@0: } else { michael@0: /* no more input */ michael@0: *err = U_ZERO_ERROR; michael@0: } michael@0: } else { michael@0: /* this is an unmatched trail code unit (2nd surrogate) */ michael@0: /* callback(illegal) */ michael@0: *err=U_ILLEGAL_CHAR_FOUND; michael@0: } michael@0: } else { michael@0: /* callback(unassigned) for a BMP code point */ michael@0: *err = U_INVALID_CHAR_FOUND; michael@0: } michael@0: michael@0: args->converter->fromUChar32=mySourceChar; michael@0: break; michael@0: } michael@0: } michael@0: else{ michael@0: *err = U_BUFFER_OVERFLOW_ERROR; michael@0: break; michael@0: } michael@0: targetUniChar=missingCharMarker; michael@0: } michael@0: michael@0: args->target += myTargetIndex; michael@0: args->source += mySourceIndex; michael@0: myConverterData->isTargetUCharDBCS = isTargetUCharDBCS; michael@0: } michael@0: michael@0: static void michael@0: _HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) { michael@0: UConverter *cnv = args->converter; michael@0: UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo; michael@0: char *p; michael@0: char buffer[4]; michael@0: p = buffer; michael@0: michael@0: if( convData->isTargetUCharDBCS){ michael@0: *p++= UCNV_TILDE; michael@0: *p++= UCNV_CLOSE_BRACE; michael@0: convData->isTargetUCharDBCS=FALSE; michael@0: } michael@0: *p++= (char)cnv->subChars[0]; michael@0: michael@0: ucnv_cbFromUWriteBytes(args, michael@0: buffer, (int32_t)(p - buffer), michael@0: offsetIndex, err); michael@0: } michael@0: michael@0: /* michael@0: * Structure for cloning an HZ converter into a single memory block. michael@0: * ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct, michael@0: * and then ucnv_safeClone() of the sub-converter may additionally align michael@0: * subCnv inside the cloneHZStruct, for which we need the deadSpace after michael@0: * subCnv. This is because UAlignedMemory may be larger than the actually michael@0: * necessary alignment size for the platform. michael@0: * The other cloneHZStruct fields will not be moved around, michael@0: * and are aligned properly with cloneHZStruct's alignment. michael@0: */ michael@0: struct cloneHZStruct michael@0: { michael@0: UConverter cnv; michael@0: UConverter subCnv; michael@0: UAlignedMemory deadSpace; michael@0: UConverterDataHZ mydata; michael@0: }; michael@0: michael@0: michael@0: static UConverter * michael@0: _HZ_SafeClone(const UConverter *cnv, michael@0: void *stackBuffer, michael@0: int32_t *pBufferSize, michael@0: UErrorCode *status) michael@0: { michael@0: struct cloneHZStruct * localClone; michael@0: int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct); michael@0: michael@0: if (U_FAILURE(*status)){ michael@0: return 0; michael@0: } michael@0: michael@0: if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ michael@0: *pBufferSize = bufferSizeNeeded; michael@0: return 0; michael@0: } michael@0: michael@0: localClone = (struct cloneHZStruct *)stackBuffer; michael@0: /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ michael@0: michael@0: uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ)); michael@0: localClone->cnv.extraInfo = &localClone->mydata; michael@0: localClone->cnv.isExtraLocal = TRUE; michael@0: michael@0: /* deep-clone the sub-converter */ michael@0: size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */ michael@0: ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter = michael@0: ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status); michael@0: michael@0: return &localClone->cnv; michael@0: } michael@0: michael@0: static void michael@0: _HZ_GetUnicodeSet(const UConverter *cnv, michael@0: const USetAdder *sa, michael@0: UConverterUnicodeSet which, michael@0: UErrorCode *pErrorCode) { michael@0: /* HZ converts all of ASCII */ michael@0: sa->addRange(sa->set, 0, 0x7f); michael@0: michael@0: /* add all of the code points that the sub-converter handles */ michael@0: ucnv_MBCSGetFilteredUnicodeSetForUnicode( michael@0: ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, michael@0: sa, which, UCNV_SET_FILTER_HZ, michael@0: pErrorCode); michael@0: } michael@0: michael@0: static const UConverterImpl _HZImpl={ michael@0: michael@0: UCNV_HZ, michael@0: michael@0: NULL, michael@0: NULL, michael@0: michael@0: _HZOpen, michael@0: _HZClose, michael@0: _HZReset, michael@0: michael@0: UConverter_toUnicode_HZ_OFFSETS_LOGIC, michael@0: UConverter_toUnicode_HZ_OFFSETS_LOGIC, michael@0: UConverter_fromUnicode_HZ_OFFSETS_LOGIC, michael@0: UConverter_fromUnicode_HZ_OFFSETS_LOGIC, michael@0: NULL, michael@0: michael@0: NULL, michael@0: NULL, michael@0: _HZ_WriteSub, michael@0: _HZ_SafeClone, michael@0: _HZ_GetUnicodeSet michael@0: }; michael@0: michael@0: static const UConverterStaticData _HZStaticData={ michael@0: sizeof(UConverterStaticData), michael@0: "HZ", michael@0: 0, michael@0: UCNV_IBM, michael@0: UCNV_HZ, michael@0: 1, michael@0: 4, michael@0: { 0x1a, 0, 0, 0 }, michael@0: 1, michael@0: FALSE, michael@0: FALSE, michael@0: 0, michael@0: 0, michael@0: { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ michael@0: michael@0: }; michael@0: michael@0: michael@0: const UConverterSharedData _HZData={ michael@0: sizeof(UConverterSharedData), michael@0: ~((uint32_t) 0), michael@0: NULL, michael@0: NULL, michael@0: &_HZStaticData, michael@0: FALSE, michael@0: &_HZImpl, michael@0: 0 michael@0: }; michael@0: michael@0: #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */