The Tor Browser: intl/icu/source/common/ucnvhz.c@fc2d59ddac77

     1 /*

     2 **********************************************************************

     3 *   Copyright (C) 2000-2011, International Business Machines

     4 *   Corporation and others.  All Rights Reserved.

     5 **********************************************************************

     6 *   file name:  ucnvhz.c

     7 *   encoding:   US-ASCII

     8 *   tab size:   8 (not used)

     9 *   indentation:4

    10 *

    11 *   created on: 2000oct16

    12 *   created by: Ram Viswanadha

    13 *   10/31/2000  Ram     Implemented offsets logic function

    14 *

    15 */

    17 #include "unicode/utypes.h"

    19 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

    21 #include "cmemory.h"

    22 #include "unicode/ucnv.h"

    23 #include "unicode/ucnv_cb.h"

    24 #include "unicode/uset.h"

    25 #include "unicode/utf16.h"

    26 #include "ucnv_bld.h"

    27 #include "ucnv_cnv.h"

    28 #include "ucnv_imp.h"

    30 #define UCNV_TILDE 0x7E          /* ~ */

    31 #define UCNV_OPEN_BRACE 0x7B     /* { */

    32 #define UCNV_CLOSE_BRACE 0x7D   /* } */

    33 #define SB_ESCAPE    "\x7E\x7D"

    34 #define DB_ESCAPE    "\x7E\x7B"

    35 #define TILDE_ESCAPE "\x7E\x7E"

    36 #define ESC_LEN       2

    39 #define CONCAT_ESCAPE_MACRO( args, targetIndex,targetLength,strToAppend, err, len,sourceIndex){                             \

    40     while(len-->0){                                                                                                         \

    41         if(targetIndex < targetLength){                                                                                     \

    42             args->target[targetIndex] = (unsigned char) *strToAppend;                                                       \

    43             if(args->offsets!=NULL){                                                                                        \

    44                 *(offsets++) = sourceIndex-1;                                                                               \

    45             }                                                                                                               \

    46             targetIndex++;                                                                                                  \

    47         }                                                                                                                   \

    48         else{                                                                                                               \

    49             args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \

    50             *err =U_BUFFER_OVERFLOW_ERROR;                                                                                  \

    51         }                                                                                                                   \

    52         strToAppend++;                                                                                                      \

    53     }                                                                                                                       \

    54 }

    57 typedef struct{

    58     UConverter* gbConverter;

    59     int32_t targetIndex;

    60     int32_t sourceIndex;

    61     UBool isEscapeAppended;

    62     UBool isStateDBCS;

    63     UBool isTargetUCharDBCS;

    64     UBool isEmptySegment;

    65 }UConverterDataHZ;

    69 static void

    70 _HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){

    71     UConverter *gbConverter;

    72     if(pArgs->onlyTestIsLoadable) {

    73         ucnv_canCreateConverter("GBK", errorCode);  /* errorCode carries result */

    74         return;

    75     }

    76     gbConverter = ucnv_open("GBK", errorCode);

    77     if(U_FAILURE(*errorCode)) {

    78         return;

    79     }

    80     cnv->toUnicodeStatus = 0;

    81     cnv->fromUnicodeStatus= 0;

    82     cnv->mode=0;

    83     cnv->fromUChar32=0x0000;

    84     cnv->extraInfo = uprv_calloc(1, sizeof(UConverterDataHZ));

    85     if(cnv->extraInfo != NULL){

    86         ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter;

    87     }

    88     else {

    89         ucnv_close(gbConverter);

    90         *errorCode = U_MEMORY_ALLOCATION_ERROR;

    91         return;

    92     }

    93 }

    95 static void

    96 _HZClose(UConverter *cnv){

    97     if(cnv->extraInfo != NULL) {

    98         ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter);

    99         if(!cnv->isExtraLocal) {

   100             uprv_free(cnv->extraInfo);

   101         }

   102         cnv->extraInfo = NULL;

   103     }

   104 }

   106 static void

   107 _HZReset(UConverter *cnv, UConverterResetChoice choice){

   108     if(choice<=UCNV_RESET_TO_UNICODE) {

   109         cnv->toUnicodeStatus = 0;

   110         cnv->mode=0;

   111         if(cnv->extraInfo != NULL){

   112             ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE;

   113             ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE;

   114         }

   115     }

   116     if(choice!=UCNV_RESET_TO_UNICODE) {

   117         cnv->fromUnicodeStatus= 0;

   118         cnv->fromUChar32=0x0000;

   119         if(cnv->extraInfo != NULL){

   120             ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE;

   121             ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0;

   122             ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0;

   123             ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE;

   124         }

   125     }

   126 }

   128 /**************************************HZ Encoding*************************************************

   129 * Rules for HZ encoding

   130 *

   131 *   In ASCII mode, a byte is interpreted as an ASCII character, unless a

   132 *   '~' is encountered. The character '~' is an escape character. By

   133 *   convention, it must be immediately followed ONLY by '~', '{' or '\n'

   134 *   (<LF>), with the following special meaning.

   136 *   1. The escape sequence '~~' is interpreted as a '~'.

   137 *   2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB.

   138 *   3. The escape sequence '~\n' is a line-continuation marker to be

   139 *     consumed with no output produced.

   140 *   In GB mode, characters are interpreted two bytes at a time as (pure)

   141 *   GB codes until the escape-from-GB code '~}' is read. This code

   142 *   switches the mode from GB back to ASCII.  (Note that the escape-

   143 *   from-GB code '~}' ($7E7D) is outside the defined GB range.)

   144 *

   145 *   Source: RFC 1842

   146 *

   147 *   Note that the formal syntax in RFC 1842 is invalid. I assume that the

   148 *   intended definition of single-byte-segment is as follows (pedberg):

   149 *   single-byte-segment = single-byte-seq 1*single-byte-char

   150 */

   153 static void

   154 UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,

   155                                                             UErrorCode* err){

   156     char tempBuf[2];

   157     const char *mySource = ( char *) args->source;

   158     UChar *myTarget = args->target;

   159     const char *mySourceLimit = args->sourceLimit;

   160     UChar32 targetUniChar = 0x0000;

   161     int32_t mySourceChar = 0x0000;

   162     UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo);

   163     tempBuf[0]=0;

   164     tempBuf[1]=0;

   166     /* Calling code already handles this situation. */

   167     /*if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){

   168         *err = U_ILLEGAL_ARGUMENT_ERROR;

   169         return;

   170     }*/

   172     while(mySource< mySourceLimit){

   174         if(myTarget < args->targetLimit){

   176             mySourceChar= (unsigned char) *mySource++;

   178             if(args->converter->mode == UCNV_TILDE) {

   179                 /* second byte after ~ */

   180                 args->converter->mode=0;

   181                 switch(mySourceChar) {

   182                 case 0x0A:

   183                     /* no output for ~\n (line-continuation marker) */

   184                     continue;

   185                 case UCNV_TILDE:

   186                     if(args->offsets) {

   187                         args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2);

   188                     }

   189                     *(myTarget++)=(UChar)mySourceChar;

   190                     myData->isEmptySegment = FALSE;

   191                     continue;

   192                 case UCNV_OPEN_BRACE:

   193                 case UCNV_CLOSE_BRACE:

   194                     myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE);

   195                     if (myData->isEmptySegment) {

   196                         myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */

   197                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;

   198                         args->converter->toUCallbackReason = UCNV_IRREGULAR;

   199                         args->converter->toUBytes[0] = UCNV_TILDE;

   200                         args->converter->toUBytes[1] = mySourceChar;

   201                         args->converter->toULength = 2;

   202                         args->target = myTarget;

   203                         args->source = mySource;

   204                         return;

   205                     }

   206                     myData->isEmptySegment = TRUE;

   207                     continue;

   208                 default:

   209                      /* if the first byte is equal to TILDE and the trail byte

   210                      * is not a valid byte then it is an error condition

   211                      */

   212                     /*

   213                      * Ticket 5691: consistent illegal sequences:

   214                      * - We include at least the first byte in the illegal sequence.

   215                      * - If any of the non-initial bytes could be the start of a character,

   216                      *   we stop the illegal sequence before the first one of those.

   217                      */

   218                     myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */

   219                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;

   220                     args->converter->toUBytes[0] = UCNV_TILDE;

   221                     if( myData->isStateDBCS ?

   222                             (0x21 <= mySourceChar && mySourceChar <= 0x7e) :

   223                             mySourceChar <= 0x7f

   224                     ) {

   225                         /* The current byte could be the start of a character: Back it out. */

   226                         args->converter->toULength = 1;

   227                         --mySource;

   228                     } else {

   229                         /* Include the current byte in the illegal sequence. */

   230                         args->converter->toUBytes[1] = mySourceChar;

   231                         args->converter->toULength = 2;

   232                     }

   233                     args->target = myTarget;

   234                     args->source = mySource;

   235                     return;

   236                 }

   237             } else if(myData->isStateDBCS) {

   238                 if(args->converter->toUnicodeStatus == 0x00){

   239                     /* lead byte */

   240                     if(mySourceChar == UCNV_TILDE) {

   241                         args->converter->mode = UCNV_TILDE;

   242                     } else {

   243                         /* add another bit to distinguish a 0 byte from not having seen a lead byte */

   244                         args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100);

   245                         myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */

   246                     }

   247                     continue;

   248                 }

   249                 else{

   250                     /* trail byte */

   251                     int leadIsOk, trailIsOk;

   252                     uint32_t leadByte = args->converter->toUnicodeStatus & 0xff;

   253                     targetUniChar = 0xffff;

   254                     /*

   255                      * Ticket 5691: consistent illegal sequences:

   256                      * - We include at least the first byte in the illegal sequence.

   257                      * - If any of the non-initial bytes could be the start of a character,

   258                      *   we stop the illegal sequence before the first one of those.

   259                      *

   260                      * In HZ DBCS, if the second byte is in the 21..7e range,

   261                      * we report only the first byte as the illegal sequence.

   262                      * Otherwise we convert or report the pair of bytes.

   263                      */

   264                     leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21);

   265                     trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);

   266                     if (leadIsOk && trailIsOk) {

   267                         tempBuf[0] = (char) (leadByte+0x80) ;

   268                         tempBuf[1] = (char) (mySourceChar+0x80);

   269                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,

   270                             tempBuf, 2, args->converter->useFallback);

   271                         mySourceChar= (leadByte << 8) | mySourceChar;

   272                     } else if (trailIsOk) {

   273                         /* report a single illegal byte and continue with the following DBCS starter byte */

   274                         --mySource;

   275                         mySourceChar = (int32_t)leadByte;

   276                     } else {

   277                         /* report a pair of illegal bytes if the second byte is not a DBCS starter */

   278                         /* add another bit so that the code below writes 2 bytes in case of error */

   279                         mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar;

   280                     }

   281                     args->converter->toUnicodeStatus =0x00;

   282                 }

   283             }

   284             else{

   285                 if(mySourceChar == UCNV_TILDE) {

   286                     args->converter->mode = UCNV_TILDE;

   287                     continue;

   288                 } else if(mySourceChar <= 0x7f) {

   289                     targetUniChar = (UChar)mySourceChar;  /* ASCII */

   290                     myData->isEmptySegment = FALSE; /* the segment has something valid */

   291                 } else {

   292                     targetUniChar = 0xffff;

   293                     myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */

   294                 }

   295             }

   296             if(targetUniChar < 0xfffe){

   297                 if(args->offsets) {

   298                     args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS));

   299                 }

   301                 *(myTarget++)=(UChar)targetUniChar;

   302             }

   303             else /* targetUniChar>=0xfffe */ {

   304                 if(targetUniChar == 0xfffe){

   305                     *err = U_INVALID_CHAR_FOUND;

   306                 }

   307                 else{

   308                     *err = U_ILLEGAL_CHAR_FOUND;

   309                 }

   310                 if(mySourceChar > 0xff){

   311                     args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8);

   312                     args->converter->toUBytes[1] = (uint8_t)mySourceChar;

   313                     args->converter->toULength=2;

   314                 }

   315                 else{

   316                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;

   317                     args->converter->toULength=1;

   318                 }

   319                 break;

   320             }

   321         }

   322         else{

   323             *err =U_BUFFER_OVERFLOW_ERROR;

   324             break;

   325         }

   326     }

   328     args->target = myTarget;

   329     args->source = mySource;

   330 }

   333 static void

   334 UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,

   335                                                       UErrorCode * err){

   336     const UChar *mySource = args->source;

   337     char *myTarget = args->target;

   338     int32_t* offsets = args->offsets;

   339     int32_t mySourceIndex = 0;

   340     int32_t myTargetIndex = 0;

   341     int32_t targetLength = (int32_t)(args->targetLimit - myTarget);

   342     int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source);

   343     int32_t length=0;

   344     uint32_t targetUniChar = 0x0000;

   345     UChar32 mySourceChar = 0x0000;

   346     UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo;

   347     UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS;

   348     UBool oldIsTargetUCharDBCS = isTargetUCharDBCS;

   349     int len =0;

   350     const char* escSeq=NULL;

   352     /* Calling code already handles this situation. */

   353     /*if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){

   354         *err = U_ILLEGAL_ARGUMENT_ERROR;

   355         return;

   356     }*/

   357     if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) {

   358         goto getTrail;

   359     }

   360     /*writing the char to the output stream */

   361     while (mySourceIndex < mySourceLength){

   362         targetUniChar = missingCharMarker;

   363         if (myTargetIndex < targetLength){

   365             mySourceChar = (UChar) mySource[mySourceIndex++];

   368             oldIsTargetUCharDBCS = isTargetUCharDBCS;

   369             if(mySourceChar ==UCNV_TILDE){

   370                 /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/

   371                 len = ESC_LEN;

   372                 escSeq = TILDE_ESCAPE;

   373                 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);

   374                 continue;

   375             } else if(mySourceChar <= 0x7f) {

   376                 length = 1;

   377                 targetUniChar = mySourceChar;

   378             } else {

   379                 length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData,

   380                     mySourceChar,&targetUniChar,args->converter->useFallback);

   381                 /* we can only use lead bytes 21..7D and trail bytes 21..7E */

   382                 if( length == 2 &&

   383                     (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) &&

   384                     (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1)

   385                 ) {

   386                     targetUniChar -= 0x8080;

   387                 } else {

   388                     targetUniChar = missingCharMarker;

   389                 }

   390             }

   391             if (targetUniChar != missingCharMarker){

   392                myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);

   393                  if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){

   394                     /*Shifting from a double byte to single byte mode*/

   395                     if(!isTargetUCharDBCS){

   396                         len =ESC_LEN;

   397                         escSeq = SB_ESCAPE;

   398                         CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);

   399                         myConverterData->isEscapeAppended = TRUE;

   400                     }

   401                     else{ /* Shifting from a single byte to double byte mode*/

   402                         len =ESC_LEN;

   403                         escSeq = DB_ESCAPE;

   404                         CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);

   405                         myConverterData->isEscapeAppended = TRUE;

   407                     }

   408                 }

   410                 if(isTargetUCharDBCS){

   411                     if( myTargetIndex <targetLength){

   412                         myTarget[myTargetIndex++] =(char) (targetUniChar >> 8);

   413                         if(offsets){

   414                             *(offsets++) = mySourceIndex-1;

   415                         }

   416                         if(myTargetIndex < targetLength){

   417                             myTarget[myTargetIndex++] =(char) targetUniChar;

   418                             if(offsets){

   419                                 *(offsets++) = mySourceIndex-1;

   420                             }

   421                         }else{

   422                             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;

   423                             *err = U_BUFFER_OVERFLOW_ERROR;

   424                         }

   425                     }else{

   426                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8);

   427                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;

   428                         *err = U_BUFFER_OVERFLOW_ERROR;

   429                     }

   431                 }else{

   432                     if( myTargetIndex <targetLength){

   433                         myTarget[myTargetIndex++] = (char) (targetUniChar );

   434                         if(offsets){

   435                             *(offsets++) = mySourceIndex-1;

   436                         }

   438                     }else{

   439                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;

   440                         *err = U_BUFFER_OVERFLOW_ERROR;

   441                     }

   442                 }

   444             }

   445             else{

   446                 /* oops.. the code point is unassigned */

   447                 /*Handle surrogates */

   448                 /*check if the char is a First surrogate*/

   449                 if(U16_IS_SURROGATE(mySourceChar)) {

   450                     if(U16_IS_SURROGATE_LEAD(mySourceChar)) {

   451                         args->converter->fromUChar32=mySourceChar;

   452 getTrail:

   453                         /*look ahead to find the trail surrogate*/

   454                         if(mySourceIndex <  mySourceLength) {

   455                             /* test the following code unit */

   456                             UChar trail=(UChar) args->source[mySourceIndex];

   457                             if(U16_IS_TRAIL(trail)) {

   458                                 ++mySourceIndex;

   459                                 mySourceChar=U16_GET_SUPPLEMENTARY(args->converter->fromUChar32, trail);

   460                                 args->converter->fromUChar32=0x00;

   461                                 /* there are no surrogates in GB2312*/

   462                                 *err = U_INVALID_CHAR_FOUND;

   463                                 /* exit this condition tree */

   464                             } else {

   465                                 /* this is an unmatched lead code unit (1st surrogate) */

   466                                 /* callback(illegal) */

   467                                 *err=U_ILLEGAL_CHAR_FOUND;

   468                             }

   469                         } else {

   470                             /* no more input */

   471                             *err = U_ZERO_ERROR;

   472                         }

   473                     } else {

   474                         /* this is an unmatched trail code unit (2nd surrogate) */

   475                         /* callback(illegal) */

   476                         *err=U_ILLEGAL_CHAR_FOUND;

   477                     }

   478                 } else {

   479                     /* callback(unassigned) for a BMP code point */

   480                     *err = U_INVALID_CHAR_FOUND;

   481                 }

   483                 args->converter->fromUChar32=mySourceChar;

   484                 break;

   485             }

   486         }

   487         else{

   488             *err = U_BUFFER_OVERFLOW_ERROR;

   489             break;

   490         }

   491         targetUniChar=missingCharMarker;

   492     }

   494     args->target += myTargetIndex;

   495     args->source += mySourceIndex;

   496     myConverterData->isTargetUCharDBCS = isTargetUCharDBCS;

   497 }

   499 static void

   500 _HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {

   501     UConverter *cnv = args->converter;

   502     UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo;

   503     char *p;

   504     char buffer[4];

   505     p = buffer;

   507     if( convData->isTargetUCharDBCS){

   508         *p++= UCNV_TILDE;

   509         *p++= UCNV_CLOSE_BRACE;

   510         convData->isTargetUCharDBCS=FALSE;

   511     }

   512     *p++= (char)cnv->subChars[0];

   514     ucnv_cbFromUWriteBytes(args,

   515                            buffer, (int32_t)(p - buffer),

   516                            offsetIndex, err);

   517 }

   519 /*

   520  * Structure for cloning an HZ converter into a single memory block.

   521  * ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct,

   522  * and then ucnv_safeClone() of the sub-converter may additionally align

   523  * subCnv inside the cloneHZStruct, for which we need the deadSpace after

   524  * subCnv. This is because UAlignedMemory may be larger than the actually

   525  * necessary alignment size for the platform.

   526  * The other cloneHZStruct fields will not be moved around,

   527  * and are aligned properly with cloneHZStruct's alignment.

   528  */

   529 struct cloneHZStruct

   530 {

   531     UConverter cnv;

   532     UConverter subCnv;

   533     UAlignedMemory deadSpace;

   534     UConverterDataHZ mydata;

   535 };

   538 static UConverter *

   539 _HZ_SafeClone(const UConverter *cnv,

   540               void *stackBuffer,

   541               int32_t *pBufferSize,

   542               UErrorCode *status)

   543 {

   544     struct cloneHZStruct * localClone;

   545     int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct);

   547     if (U_FAILURE(*status)){

   548         return 0;

   549     }

   551     if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */

   552         *pBufferSize = bufferSizeNeeded;

   553         return 0;

   554     }

   556     localClone = (struct cloneHZStruct *)stackBuffer;

   557     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */

   559     uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ));

   560     localClone->cnv.extraInfo = &localClone->mydata;

   561     localClone->cnv.isExtraLocal = TRUE;

   563     /* deep-clone the sub-converter */

   564     size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */

   565     ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter =

   566         ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status);

   568     return &localClone->cnv;

   569 }

   571 static void

   572 _HZ_GetUnicodeSet(const UConverter *cnv,

   573                   const USetAdder *sa,

   574                   UConverterUnicodeSet which,

   575                   UErrorCode *pErrorCode) {

   576     /* HZ converts all of ASCII */

   577     sa->addRange(sa->set, 0, 0x7f);

   579     /* add all of the code points that the sub-converter handles */

   580     ucnv_MBCSGetFilteredUnicodeSetForUnicode(

   581         ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData,

   582         sa, which, UCNV_SET_FILTER_HZ,

   583         pErrorCode);

   584 }

   586 static const UConverterImpl _HZImpl={

   588     UCNV_HZ,

   590     NULL,

   591     NULL,

   593     _HZOpen,

   594     _HZClose,

   595     _HZReset,

   597     UConverter_toUnicode_HZ_OFFSETS_LOGIC,

   598     UConverter_toUnicode_HZ_OFFSETS_LOGIC,

   599     UConverter_fromUnicode_HZ_OFFSETS_LOGIC,

   600     UConverter_fromUnicode_HZ_OFFSETS_LOGIC,

   601     NULL,

   603     NULL,

   604     NULL,

   605     _HZ_WriteSub,

   606     _HZ_SafeClone,

   607     _HZ_GetUnicodeSet

   608 };

   610 static const UConverterStaticData _HZStaticData={

   611     sizeof(UConverterStaticData),

   612         "HZ",

   613          0,

   614          UCNV_IBM,

   615          UCNV_HZ,

   616          1,

   617          4,

   618         { 0x1a, 0, 0, 0 },

   619         1,

   620         FALSE,

   621         FALSE,

   622         0,

   623         0,

   624         { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */

   626 };

   629 const UConverterSharedData _HZData={

   630     sizeof(UConverterSharedData),

   631         ~((uint32_t) 0),

   632         NULL,

   633         NULL,

   634         &_HZStaticData,

   635         FALSE,

   636         &_HZImpl,

   637         0

   638 };

   640 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

The Tor Browser / file revision

intl/icu/source/common/ucnvhz.c@fc2d59ddac77

intl/icu/source/common/ucnvhz.c