intl/uconv/util/uscan.c

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     5 #include "unicpriv.h"
     6 #define CHK_GR94(b) ( (uint8_t) 0xa0 < (uint8_t) (b) && (uint8_t) (b) < (uint8_t) 0xff )
     7 #define CHK_GR94_2Byte(b1,b2) (CHK_GR94(b1) && CHK_GR94(b2))
     8 /*=================================================================================
    10 =================================================================================*/
    11 typedef  int (*uSubScannerFunc) (unsigned char* in, uint16_t* out);
    12 /*=================================================================================
    14 =================================================================================*/
    16 typedef int (*uScannerFunc) (
    17                              int32_t*    state,
    18                              unsigned char  *in,
    19                              uint16_t    *out,
    20                              uint32_t     inbuflen,
    21                              uint32_t*    inscanlen
    22                              );
    24 int uScan(
    25           uScanClassID scanClass,
    26           int32_t*    state,
    27           unsigned char  *in,
    28           uint16_t    *out,
    29           uint32_t     inbuflen,
    30           uint32_t*    inscanlen
    31           );
    33 #define uSubScanner(sub,in,out) (* m_subscanner[sub])((in),(out))
    35 int uCheckAndScanAlways1Byte(
    36                             int32_t*    state,
    37                             unsigned char  *in,
    38                             uint16_t    *out,
    39                             uint32_t     inbuflen,
    40                             uint32_t*    inscanlen
    41                             );
    42 int uCheckAndScanAlways2Byte(
    43                              int32_t*    state,
    44                              unsigned char  *in,
    45                              uint16_t    *out,
    46                              uint32_t     inbuflen,
    47                              uint32_t*    inscanlen
    48                              );
    49 int uCheckAndScanAlways2ByteShiftGR(
    50                                     int32_t*    state,
    51                                     unsigned char  *in,
    52                                     uint16_t    *out,
    53                                     uint32_t     inbuflen,
    54                                     uint32_t*    inscanlen
    55                                     );
    56 int uCheckAndScanAlways2ByteGR128(
    57                                   int32_t*    state,
    58                                   unsigned char  *in,
    59                                   uint16_t    *out,
    60                                   uint32_t     inbuflen,
    61                                   uint32_t*    inscanlen
    62                                           );
    63 int uScanShift(
    64                uShiftInTable    *shift,
    65                int32_t*    state,
    66                unsigned char  *in,
    67                uint16_t    *out,
    68                uint32_t     inbuflen,
    69                uint32_t*    inscanlen
    70                );
    72 int uCheckAndScan2ByteGRPrefix8F(
    73                                  int32_t*    state,
    74                                  unsigned char  *in,
    75                                  uint16_t    *out,
    76                                  uint32_t     inbuflen,
    77                                  uint32_t*    inscanlen
    78                                  );
    79 int uCheckAndScan2ByteGRPrefix8EA2(
    80                                    int32_t*    state,
    81                                    unsigned char  *in,
    82                                    uint16_t    *out,
    83                                    uint32_t     inbuflen,
    84                                    uint32_t*    inscanlen
    85                                    );
    86 int uCheckAndScan2ByteGRPrefix8EA3(
    87                                    int32_t*    state,
    88                                    unsigned char  *in,
    89                                    uint16_t    *out,
    90                                    uint32_t     inbuflen,
    91                                    uint32_t*    inscanlen
    92                                    );
    93 int uCheckAndScan2ByteGRPrefix8EA4(
    94                                    int32_t*    state,
    95                                    unsigned char  *in,
    96                                    uint16_t    *out,
    97                                    uint32_t     inbuflen,
    98                                    uint32_t*    inscanlen
    99                                    );
   100 int uCheckAndScan2ByteGRPrefix8EA5(
   101                                    int32_t*    state,
   102                                    unsigned char  *in,
   103                                    uint16_t    *out,
   104                                    uint32_t     inbuflen,
   105                                    uint32_t*    inscanlen
   106                                    );
   107 int uCheckAndScan2ByteGRPrefix8EA6(
   108                                    int32_t*    state,
   109                                    unsigned char  *in,
   110                                    uint16_t    *out,
   111                                    uint32_t     inbuflen,
   112                                    uint32_t*    inscanlen
   113                                    );
   114 int uCheckAndScan2ByteGRPrefix8EA7(
   115                                    int32_t*    state,
   116                                    unsigned char  *in,
   117                                    uint16_t    *out,
   118                                    uint32_t     inbuflen,
   119                                    uint32_t*    inscanlen
   120                                    );
   121 int uCnSAlways8BytesDecomposedHangul(
   122                                      int32_t*    state,
   123                                      unsigned char  *in,
   124                                      uint16_t    *out,
   125                                      uint32_t     inbuflen,
   126                                      uint32_t*    inscanlen
   127                                      );
   128 int uCheckAndScanJohabHangul(
   129                              int32_t*    state,
   130                              unsigned char  *in,
   131                              uint16_t    *out,
   132                              uint32_t     inbuflen,
   133                              uint32_t*    inscanlen
   134                              );
   135 int uCheckAndScanJohabSymbol(
   136                              int32_t*    state,
   137                              unsigned char  *in,
   138                              uint16_t    *out,
   139                              uint32_t     inbuflen,
   140                              uint32_t*    inscanlen
   141                              );
   143 int uCheckAndScan4BytesGB18030(
   144                                int32_t*    state,
   145                                unsigned char  *in,
   146                                uint16_t    *out,
   147                                uint32_t     inbuflen,
   148                                uint32_t*    inscanlen
   149                                );
   151 int uScanAlways2Byte(
   152                      unsigned char*  in,
   153                      uint16_t*    out
   154                      );
   155 int uScanAlways2ByteShiftGR(
   156                             unsigned char*  in,
   157                             uint16_t*    out
   158                             );
   159 int uScanAlways1Byte(
   160                      unsigned char*  in,
   161                      uint16_t*    out
   162                      );
   163 int uScanAlways1BytePrefix8E(
   164                              unsigned char*  in,
   165                              uint16_t*    out
   166                              );
   167 /*=================================================================================
   169 =================================================================================*/
   170 const uScannerFunc m_scanner[uNumOfCharsetType] =
   171 {
   172     uCheckAndScanAlways1Byte,
   173     uCheckAndScanAlways2Byte,
   174     uCheckAndScanAlways2ByteShiftGR,
   175     uCheckAndScan2ByteGRPrefix8F,
   176     uCheckAndScan2ByteGRPrefix8EA2,
   177     uCheckAndScan2ByteGRPrefix8EA3,
   178     uCheckAndScan2ByteGRPrefix8EA4,
   179     uCheckAndScan2ByteGRPrefix8EA5,
   180     uCheckAndScan2ByteGRPrefix8EA6,
   181     uCheckAndScan2ByteGRPrefix8EA7,
   182     uCnSAlways8BytesDecomposedHangul,
   183     uCheckAndScanJohabHangul,
   184     uCheckAndScanJohabSymbol,
   185     uCheckAndScan4BytesGB18030,
   186     uCheckAndScanAlways2ByteGR128
   187 };
   189 /*=================================================================================
   191 =================================================================================*/
   193 const uSubScannerFunc m_subscanner[uNumOfCharType] =
   194 {
   195     uScanAlways1Byte,
   196     uScanAlways2Byte,
   197     uScanAlways2ByteShiftGR,
   198     uScanAlways1BytePrefix8E
   199 };
   200 /*=================================================================================
   202 =================================================================================*/
   203 int uScan(
   204           uScanClassID scanClass,
   205           int32_t*    state,
   206           unsigned char  *in,
   207           uint16_t    *out,
   208           uint32_t     inbuflen,
   209           uint32_t*    inscanlen
   210           )
   211 {
   212   return (* m_scanner[scanClass]) (state,in,out,inbuflen,inscanlen);
   213 }
   214 /*=================================================================================
   216 =================================================================================*/
   217 int uScanAlways1Byte(
   218                      unsigned char*  in,
   219                      uint16_t*    out
   220                      )
   221 {
   222   *out = (uint16_t) in[0];
   223   return 1;
   224 }
   226 /*=================================================================================
   228 =================================================================================*/
   229 int uScanAlways2Byte(
   230                      unsigned char*  in,
   231                      uint16_t*    out
   232                      )
   233 {
   234   *out = (uint16_t) (( in[0] << 8) | (in[1]));
   235   return 1;
   236 }
   237 /*=================================================================================
   239 =================================================================================*/
   240 int uScanAlways2ByteShiftGR(
   241                             unsigned char*  in,
   242                             uint16_t*    out
   243                             )
   244 {
   245   *out = (uint16_t) ((( in[0] << 8) | (in[1])) &  0x7F7F);
   246   return 1;
   247 }
   249 /*=================================================================================
   251 =================================================================================*/
   252 int uScanAlways1BytePrefix8E(
   253                              unsigned char*  in,
   254                              uint16_t*    out
   255                              )
   256 {
   257   *out = (uint16_t) in[1];
   258   return 1;
   259 }
   260 /*=================================================================================
   262 =================================================================================*/
   263 int uCheckAndScanAlways1Byte(
   264                              int32_t*    state,
   265                              unsigned char  *in,
   266                              uint16_t    *out,
   267                              uint32_t     inbuflen,
   268                              uint32_t*    inscanlen
   269                              )
   270 {
   271   /* Don't check inlen. The caller should ensure it is larger than 0 */
   272   *inscanlen = 1;
   273   *out = (uint16_t) in[0];
   275   return 1;
   276 }
   278 /*=================================================================================
   280 =================================================================================*/
   281 int uCheckAndScanAlways2Byte(
   282                              int32_t*    state,
   283                              unsigned char  *in,
   284                              uint16_t    *out,
   285                              uint32_t     inbuflen,
   286                              uint32_t*    inscanlen
   287                              )
   288 {
   289   if(inbuflen < 2)
   290     return 0;
   291   else
   292   {
   293     *inscanlen = 2;
   294     *out = ((in[0] << 8) | ( in[1])) ;
   295     return 1;
   296   }
   297 }
   298 /*=================================================================================
   300 =================================================================================*/
   301 int uCheckAndScanAlways2ByteShiftGR(
   302                                     int32_t*    state,
   303                                     unsigned char  *in,
   304                                     uint16_t    *out,
   305                                     uint32_t     inbuflen,
   306                                     uint32_t*    inscanlen
   307                                     )
   308 {
   309   /*
   310    * Both bytes should be in the range of [0xa1,0xfe] for 94x94 character sets
   311    * invoked on GR. No encoding implemented in Mozilla uses 96x96 char. sets.
   312    * Only 2nd byte range needs to be checked because 
   313    * 1st byte is checked before calling this in nsUnicodeDecoerHelper.cpp 
   314    */
   315   if(inbuflen < 2)    /* will lead to NS_OK_UDEC_MOREINPUT */
   316     return 0;
   317   else if (! CHK_GR94(in[1]))  
   318   {
   319     *inscanlen = 2; 
   320     *out = 0xFF;  /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
   321     return 1;
   322   }
   323   else
   324   {
   325     *inscanlen = 2;
   326     *out = (((in[0] << 8) | ( in[1]))  & 0x7F7F);
   327     return 1;
   328   }
   329 }
   330 /*=================================================================================
   332 =================================================================================*/
   333 int uCheckAndScanAlways2ByteGR128(
   334                                   int32_t*    state,
   335                                   unsigned char  *in,
   336                                   uint16_t    *out,
   337                                   uint32_t     inbuflen,
   338                                   uint32_t*    inscanlen
   339                                   )
   340 {
   341   /*
   342    * The first byte should be in  [0xa1,0xfe] 
   343    * and the second byte in [0x41,0xfe]
   344    * Used by CP949 -> Unicode converter.
   345    * Only 2nd byte range needs to be checked because 
   346    * 1st byte is checked before calling this in nsUnicodeDecoderHelper.cpp 
   347    */
   348   if(inbuflen < 2)    /* will lead to NS_OK_UDEC_MOREINPUT */
   349     return 0;
   350   else if (in[1] < 0x41)     /* 2nd byte range check */
   351   {
   352     *inscanlen = 2; 
   353     *out = 0xFF;  /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
   354     return 1;
   355   }
   356   else
   357   {
   358     *inscanlen = 2;
   359     *out = (in[0] << 8) |  in[1];
   360     return 1;
   361   }
   362 }
   363 /*=================================================================================
   365 =================================================================================*/
   366 int uScanShift(
   367                uShiftInTable    *shift,
   368                int32_t*    state,
   369                unsigned char  *in,
   370                uint16_t    *out,
   371                uint32_t     inbuflen,
   372                uint32_t*    inscanlen
   373                )
   374 {
   375   int16_t i;
   376   const uShiftInCell* cell = &(shift->shiftcell[0]);
   377   int16_t itemnum = shift->numOfItem;
   378   for(i=0;i<itemnum;i++)
   379   {
   380     if( ( in[0] >=  cell[i].shiftin_Min) &&
   381       ( in[0] <=  cell[i].shiftin_Max))
   382     {
   383       if(inbuflen < cell[i].reserveLen)
   384         return 0;
   385       else
   386       {
   387         *inscanlen = cell[i].reserveLen;
   388         return (uSubScanner(cell[i].classID,in,out));
   389       }
   390     }
   391   }
   392   return 0;
   393 }
   394 /*=================================================================================
   396 =================================================================================*/
   397 int uCheckAndScan2ByteGRPrefix8F(
   398                                  int32_t*    state,
   399                                  unsigned char  *in,
   400                                  uint16_t    *out,
   401                                  uint32_t     inbuflen,
   402                                  uint32_t*    inscanlen
   403                                  )
   404 {
   405   if((inbuflen < 3) ||(in[0] != 0x8F)) 
   406     return 0;
   407   else if (! CHK_GR94(in[1]))  /* 2nd byte range check */
   408   {
   409     *inscanlen = 2; 
   410     *out = 0xFF;  /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
   411     return 1;
   412   }
   413   else if (! CHK_GR94(in[2]))  /* 3rd byte range check */
   414   {
   415     *inscanlen = 3; 
   416     *out = 0xFF;  /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
   417     return 1;
   418   }
   419   else
   420   {
   421     *inscanlen = 3;
   422     *out = (((in[1] << 8) | ( in[2]))  & 0x7F7F);
   423     return 1;
   424   }
   425 }
   426 /*=================================================================================
   428 =================================================================================*/
   430 /* Macro definition to use for uCheckAndScan2ByteGRPrefix8EAX()
   431  * where X is 2,3,4,5,6,7 
   432  */
   433 #define CNS_8EAX_4BYTE(PREFIX)                    \
   434   if((inbuflen < 4) || (in[0] != 0x8E))           \
   435     return 0;                                     \
   436   else if((in[1] != (PREFIX)))                    \
   437   {                                               \
   438     *inscanlen = 2;                               \
   439     *out = 0xFF;                                  \
   440     return 1;                                     \
   441   }                                               \
   442   else if(! CHK_GR94(in[2]))                      \
   443   {                                               \
   444     *inscanlen = 3;                               \
   445     *out = 0xFF;                                  \
   446     return 1;                                     \
   447   }                                               \
   448   else if(! CHK_GR94(in[3]))                      \
   449   {                                               \
   450     *inscanlen = 4;                               \
   451     *out = 0xFF;                                  \
   452     return 1;                                     \
   453   }                                               \
   454   else                                            \
   455   {                                               \
   456     *inscanlen = 4;                               \
   457     *out = (((in[2] << 8) | ( in[3]))  & 0x7F7F); \
   458     return 1;                                     \
   459   }    
   461 int uCheckAndScan2ByteGRPrefix8EA2(
   462                                    int32_t*    state,
   463                                    unsigned char  *in,
   464                                    uint16_t    *out,
   465                                    uint32_t     inbuflen,
   466                                    uint32_t*    inscanlen
   467                                    )
   468 {
   469   CNS_8EAX_4BYTE(0xA2)
   470 }
   472 /*=================================================================================
   474 =================================================================================*/
   475 int uCheckAndScan2ByteGRPrefix8EA3(
   476                                    int32_t*    state,
   477                                    unsigned char  *in,
   478                                    uint16_t    *out,
   479                                    uint32_t     inbuflen,
   480                                    uint32_t*    inscanlen
   481                                    )
   482 {
   483   CNS_8EAX_4BYTE(0xA3)
   484 }
   485 /*=================================================================================
   487 =================================================================================*/
   488 int uCheckAndScan2ByteGRPrefix8EA4(
   489                                    int32_t*    state,
   490                                    unsigned char  *in,
   491                                    uint16_t    *out,
   492                                    uint32_t     inbuflen,
   493                                    uint32_t*    inscanlen
   494                                    )
   495 {
   496   CNS_8EAX_4BYTE(0xA4)
   497 }
   498 /*=================================================================================
   500 =================================================================================*/
   501 int uCheckAndScan2ByteGRPrefix8EA5(
   502                                    int32_t*    state,
   503                                    unsigned char  *in,
   504                                    uint16_t    *out,
   505                                    uint32_t     inbuflen,
   506                                    uint32_t*    inscanlen
   507                                    )
   508 {
   509   CNS_8EAX_4BYTE(0xA5)
   510 }
   511 /*=================================================================================
   513 =================================================================================*/
   514 int uCheckAndScan2ByteGRPrefix8EA6(
   515                                    int32_t*    state,
   516                                    unsigned char  *in,
   517                                    uint16_t    *out,
   518                                    uint32_t     inbuflen,
   519                                    uint32_t*    inscanlen
   520                                    )
   521 {
   522   CNS_8EAX_4BYTE(0xA6)
   523 }
   524 /*=================================================================================
   526 =================================================================================*/
   527 int uCheckAndScan2ByteGRPrefix8EA7(
   528                                    int32_t*    state,
   529                                    unsigned char  *in,
   530                                    uint16_t    *out,
   531                                    uint32_t     inbuflen,
   532                                    uint32_t*    inscanlen
   533                                    )
   534 {
   535   CNS_8EAX_4BYTE(0xA7)
   536 }
   537 /*=================================================================================
   539 =================================================================================*/
   540 #define SBase 0xAC00
   541 #define SCount 11172
   542 #define LCount 19
   543 #define VCount 21
   544 #define TCount 28
   545 #define NCount (VCount * TCount)
   547 int uCnSAlways8BytesDecomposedHangul(
   548                                      int32_t*    state,
   549                                      unsigned char  *in,
   550                                      uint16_t    *out,
   551                                      uint32_t     inbuflen,
   552                                      uint32_t*    inscanlen
   553                                      )
   554 {
   556   uint16_t LIndex, VIndex, TIndex;
   557   /* no 8 bytes, not in a4 range, or the first 2 byte are not a4d4 */
   558   if((inbuflen < 8) || (0xa4 != in[0]) || (0xd4 != in[1]) ||
   559     (0xa4 != in[2] ) || (0xa4 != in[4]) || (0xa4 != in[6]))
   560     return 0;
   562   /* Compute LIndex  */
   563   if((in[3] < 0xa1) || (in[3] > 0xbe)) { /* illegal leading consonant */
   564     return 0;
   565   } 
   566   else {
   567     static const uint8_t lMap[] = {
   568       /*        A1   A2   A3   A4   A5   A6   A7  */
   569       0,   1,0xff,   2,0xff,0xff,   3,
   570         /*   A8   A9   AA   AB   AC   AD   AE   AF  */
   571         4,   5,0xff,0xff,0xff,0xff,0xff,0xff,
   572         /*   B0   B1   B2   B3   B4   B5   B6   B7  */
   573         0xff,   6,   7,   8,0xff,   9,  10,  11,
   574         /*   B8   B9   BA   BB   BC   BD   BE       */
   575         12,  13,  14,  15,  16,  17,  18     
   576     };
   578     LIndex = lMap[in[3] - 0xa1];
   579     if(0xff == (0xff & LIndex))
   580       return 0;
   581   }
   583   /* Compute VIndex  */
   584   if((in[5] < 0xbf) || (in[5] > 0xd3)) { /* illegal medial vowel */
   585     return 0;
   586   } 
   587   else {
   588     VIndex = in[5] - 0xbf;
   589   }
   591   /* Compute TIndex  */
   592   if(0xd4 == in[7])  
   593   {
   594     TIndex = 0;
   595   } 
   596   else if((in[7] < 0xa1) || (in[7] > 0xbe)) {/* illegal trailing consonant */
   597     return 0;
   598   } 
   599   else {
   600     static const uint8_t tMap[] = {
   601       /*        A1   A2   A3   A4   A5   A6   A7  */
   602       1,   2,   3,   4,   5,   6,   7,
   603         /*   A8   A9   AA   AB   AC   AD   AE   AF  */
   604         0xff,   8,   9,  10,  11,  12,  13,  14,
   605         /*   B0   B1   B2   B3   B4   B5   B6   B7  */
   606         15,  16,  17,0xff,  18,  19,  20,  21,
   607         /*   B8   B9   BA   BB   BC   BD   BE       */
   608         22,0xff,  23,  24,  25,  26,  27     
   609     };
   610     TIndex = tMap[in[7] - 0xa1];
   611     if(0xff == (0xff & TIndex))
   612       return 0;
   613   }
   615   *inscanlen = 8;
   616   /* the following line is from Unicode 2.0 page 3-13 item 5 */
   617   *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase;
   619   return 1;
   620 }
   621 /*=================================================================================
   623 =================================================================================*/
   625 int uCheckAndScanJohabHangul(
   626                              int32_t*    state,
   627                              unsigned char  *in,
   628                              uint16_t    *out,
   629                              uint32_t     inbuflen,
   630                              uint32_t*    inscanlen
   631                              )
   632 {
   633 /* since we don't have code to convert Johab to Unicode right now     *
   634   * make this part of code #if 0 to save space until we fully test it */
   635   if(inbuflen < 2)
   636     return 0;
   637   else {
   638   /*
   639   * See Table 4-45 Johab Encoding's Five-Bit Binary Patterns in page 183
   640   * of "CJKV Information Processing" for details
   641     */
   642     static const uint8_t lMap[32]={ /* totaly 19  */
   643       0xff,0xff,0,   1,   2,   3,   4,   5,    /* 0-7    */
   644         6,   7,   8,   9,   10,  11,  12,  13,   /* 8-15   */
   645         14,  15,  16,  17,  18,  0xff,0xff,0xff, /* 16-23  */
   646         0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff  /* 24-31  */
   647     };
   648     static const uint8_t vMap[32]={ /* totaly 21 */
   649       0xff,0xff,0xff,0,   1,   2,   3,   4,    /* 0-7   */
   650         0xff,0xff,5,   6,   7,   8,   9,   10,   /* 8-15  */
   651         0xff,0xff,11,  12,  13,  14,  15,  16,   /* 16-23 */
   652         0xff,0xff,17,  18,  19,  20,  0xff,0xff  /* 24-31 */
   653     };
   654     static const uint8_t tMap[32]={ /* totaly 29 */
   655       0xff,0,   1,   2,   3,   4,   5,   6,    /* 0-7   */
   656         7,   8,   9,   10,  11,  12,  13,  14,   /* 8-15  */
   657         15,  16,  0xff,17,  18,  19,  20,  21,   /* 16-23 */
   658         22,  23,  24,  25,  26,  27,  0xff,0xff  /* 24-31 */
   659     };
   660     uint16_t ch = (in[0] << 8) | in[1];
   661     uint16_t LIndex, VIndex, TIndex;
   662     if(0 == (0x8000 & ch))
   663       return 0;
   664     LIndex=lMap[(ch>>10)& 0x1F];
   665     VIndex=vMap[(ch>>5) & 0x1F];
   666     TIndex=tMap[(ch>>0) & 0x1F];
   667     if((0xff==(LIndex)) || 
   668       (0xff==(VIndex)) || 
   669       (0xff==(TIndex)))
   670       return 0;
   671     /* the following line is from Unicode 2.0 page 3-13 item 5 */
   672     *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase;
   673     *inscanlen = 2;
   674     return 1;
   675   }
   676 }
   677 int uCheckAndScanJohabSymbol(
   678                              int32_t*    state,
   679                              unsigned char  *in,
   680                              uint16_t    *out,
   681                              uint32_t     inbuflen,
   682                              uint32_t*    inscanlen
   683                              )
   684 {
   685   if(inbuflen < 2)
   686     return 0;
   687   else {
   688   /*
   689   * The following code are based on the Perl code lised under
   690   * "Johab to ISO-2022-KR or EUC-KR Conversion" in page 1014 of
   691   * "CJKV Information Processing" by Ken Lunde <lunde@adobe.com>
   692   *
   693   * sub johab2ks ($) { # Convert Johab to ISO-2022-KR
   694   *   my @johab = unpack("C*", $_[0]);
   695   *   my ($offset, $d8_off) = (0,0);
   696   *   my @out = ();
   697   *   while(($hi, $lo) = splice($johab, 0, 2)) {
   698   *     $offset = 1 if ($hi > 223 and $hi < 250);
   699   *     $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42));
   700   *     push (@out, (((($hi - ($hi < 223 ? 200 : 187)) << 1) -
   701   *            ($lo < 161 ? 1 : 0) + $offset) + $d8_off),
   702   *            $lo - ($lo < 161 ? ($lo > 126 ? 34 : 16) : 128 ));
   703   *   }
   704   *   return pack ("C*", @out);
   705   * }
   706   * additional comments from Ken Lunde
   707   * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42));
   708   * has three possible return values:
   709   * 0  if $hi is not equal to 216
   710   * 94 if $hi is euqal to 216 and if $lo is greater than 160
   711   * 42 if $hi is euqal to 216 and if $lo is not greater than 160
   712     */ 
   713     unsigned char hi = in[0];
   714     unsigned char lo = in[1];
   715     uint16_t offset = (( hi > 223 ) && ( hi < 250)) ? 1 : 0;
   716     uint16_t d8_off = 0;
   717     if(216 == hi) {
   718       if( lo > 160)
   719         d8_off = 94;
   720       else
   721         d8_off = 42;
   722     }
   724     *out = (((((hi - ((hi < 223) ? 200 : 187)) << 1) -
   725       (lo < 161 ? 1 : 0) + offset) + d8_off) << 8 ) |
   726       (lo - ((lo < 161) ? ((lo > 126) ? 34 : 16) : 
   727     128));
   728     *inscanlen = 2;
   729     return 1;
   730   }
   731 }
   732 int uCheckAndScan4BytesGB18030(
   733                                int32_t*    state,
   734                                unsigned char  *in,
   735                                uint16_t    *out,
   736                                uint32_t     inbuflen,
   737                                uint32_t*    inscanlen
   738                                )
   739 {
   740   uint32_t  data;
   741   if(inbuflen < 4) 
   742     return 0;
   744   if((in[0] < 0x81 ) || (0xfe < in[0])) 
   745     return 0;
   746   if((in[1] < 0x30 ) || (0x39 < in[1])) 
   747     return 0;
   748   if((in[2] < 0x81 ) || (0xfe < in[2])) 
   749     return 0;
   750   if((in[3] < 0x30 ) || (0x39 < in[3])) 
   751     return 0;
   753   data = (((((in[0] - 0x81) * 10 + (in[1] - 0x30)) * 126) + 
   754     (in[2] - 0x81)) * 10 ) + (in[3] - 0x30);
   756   *inscanlen = 4;
   757   *out = (data < 0x00010000) ? data : 0xFFFD;
   758   return 1;
   759 }

mercurial