intl/uconv/util/uscan.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/uconv/util/uscan.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,759 @@
     1.4 +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +#include "unicpriv.h"
     1.9 +#define CHK_GR94(b) ( (uint8_t) 0xa0 < (uint8_t) (b) && (uint8_t) (b) < (uint8_t) 0xff )
    1.10 +#define CHK_GR94_2Byte(b1,b2) (CHK_GR94(b1) && CHK_GR94(b2))
    1.11 +/*=================================================================================
    1.12 +
    1.13 +=================================================================================*/
    1.14 +typedef  int (*uSubScannerFunc) (unsigned char* in, uint16_t* out);
    1.15 +/*=================================================================================
    1.16 +
    1.17 +=================================================================================*/
    1.18 +
    1.19 +typedef int (*uScannerFunc) (
    1.20 +                             int32_t*    state,
    1.21 +                             unsigned char  *in,
    1.22 +                             uint16_t    *out,
    1.23 +                             uint32_t     inbuflen,
    1.24 +                             uint32_t*    inscanlen
    1.25 +                             );
    1.26 +
    1.27 +int uScan(
    1.28 +          uScanClassID scanClass,
    1.29 +          int32_t*    state,
    1.30 +          unsigned char  *in,
    1.31 +          uint16_t    *out,
    1.32 +          uint32_t     inbuflen,
    1.33 +          uint32_t*    inscanlen
    1.34 +          );
    1.35 +
    1.36 +#define uSubScanner(sub,in,out) (* m_subscanner[sub])((in),(out))
    1.37 +
    1.38 +int uCheckAndScanAlways1Byte(
    1.39 +                            int32_t*    state,
    1.40 +                            unsigned char  *in,
    1.41 +                            uint16_t    *out,
    1.42 +                            uint32_t     inbuflen,
    1.43 +                            uint32_t*    inscanlen
    1.44 +                            );
    1.45 +int uCheckAndScanAlways2Byte(
    1.46 +                             int32_t*    state,
    1.47 +                             unsigned char  *in,
    1.48 +                             uint16_t    *out,
    1.49 +                             uint32_t     inbuflen,
    1.50 +                             uint32_t*    inscanlen
    1.51 +                             );
    1.52 +int uCheckAndScanAlways2ByteShiftGR(
    1.53 +                                    int32_t*    state,
    1.54 +                                    unsigned char  *in,
    1.55 +                                    uint16_t    *out,
    1.56 +                                    uint32_t     inbuflen,
    1.57 +                                    uint32_t*    inscanlen
    1.58 +                                    );
    1.59 +int uCheckAndScanAlways2ByteGR128(
    1.60 +                                  int32_t*    state,
    1.61 +                                  unsigned char  *in,
    1.62 +                                  uint16_t    *out,
    1.63 +                                  uint32_t     inbuflen,
    1.64 +                                  uint32_t*    inscanlen
    1.65 +                                          );
    1.66 +int uScanShift(
    1.67 +               uShiftInTable    *shift,
    1.68 +               int32_t*    state,
    1.69 +               unsigned char  *in,
    1.70 +               uint16_t    *out,
    1.71 +               uint32_t     inbuflen,
    1.72 +               uint32_t*    inscanlen
    1.73 +               );
    1.74 +
    1.75 +int uCheckAndScan2ByteGRPrefix8F(
    1.76 +                                 int32_t*    state,
    1.77 +                                 unsigned char  *in,
    1.78 +                                 uint16_t    *out,
    1.79 +                                 uint32_t     inbuflen,
    1.80 +                                 uint32_t*    inscanlen
    1.81 +                                 );
    1.82 +int uCheckAndScan2ByteGRPrefix8EA2(
    1.83 +                                   int32_t*    state,
    1.84 +                                   unsigned char  *in,
    1.85 +                                   uint16_t    *out,
    1.86 +                                   uint32_t     inbuflen,
    1.87 +                                   uint32_t*    inscanlen
    1.88 +                                   );
    1.89 +int uCheckAndScan2ByteGRPrefix8EA3(
    1.90 +                                   int32_t*    state,
    1.91 +                                   unsigned char  *in,
    1.92 +                                   uint16_t    *out,
    1.93 +                                   uint32_t     inbuflen,
    1.94 +                                   uint32_t*    inscanlen
    1.95 +                                   );
    1.96 +int uCheckAndScan2ByteGRPrefix8EA4(
    1.97 +                                   int32_t*    state,
    1.98 +                                   unsigned char  *in,
    1.99 +                                   uint16_t    *out,
   1.100 +                                   uint32_t     inbuflen,
   1.101 +                                   uint32_t*    inscanlen
   1.102 +                                   );
   1.103 +int uCheckAndScan2ByteGRPrefix8EA5(
   1.104 +                                   int32_t*    state,
   1.105 +                                   unsigned char  *in,
   1.106 +                                   uint16_t    *out,
   1.107 +                                   uint32_t     inbuflen,
   1.108 +                                   uint32_t*    inscanlen
   1.109 +                                   );
   1.110 +int uCheckAndScan2ByteGRPrefix8EA6(
   1.111 +                                   int32_t*    state,
   1.112 +                                   unsigned char  *in,
   1.113 +                                   uint16_t    *out,
   1.114 +                                   uint32_t     inbuflen,
   1.115 +                                   uint32_t*    inscanlen
   1.116 +                                   );
   1.117 +int uCheckAndScan2ByteGRPrefix8EA7(
   1.118 +                                   int32_t*    state,
   1.119 +                                   unsigned char  *in,
   1.120 +                                   uint16_t    *out,
   1.121 +                                   uint32_t     inbuflen,
   1.122 +                                   uint32_t*    inscanlen
   1.123 +                                   );
   1.124 +int uCnSAlways8BytesDecomposedHangul(
   1.125 +                                     int32_t*    state,
   1.126 +                                     unsigned char  *in,
   1.127 +                                     uint16_t    *out,
   1.128 +                                     uint32_t     inbuflen,
   1.129 +                                     uint32_t*    inscanlen
   1.130 +                                     );
   1.131 +int uCheckAndScanJohabHangul(
   1.132 +                             int32_t*    state,
   1.133 +                             unsigned char  *in,
   1.134 +                             uint16_t    *out,
   1.135 +                             uint32_t     inbuflen,
   1.136 +                             uint32_t*    inscanlen
   1.137 +                             );
   1.138 +int uCheckAndScanJohabSymbol(
   1.139 +                             int32_t*    state,
   1.140 +                             unsigned char  *in,
   1.141 +                             uint16_t    *out,
   1.142 +                             uint32_t     inbuflen,
   1.143 +                             uint32_t*    inscanlen
   1.144 +                             );
   1.145 +
   1.146 +int uCheckAndScan4BytesGB18030(
   1.147 +                               int32_t*    state,
   1.148 +                               unsigned char  *in,
   1.149 +                               uint16_t    *out,
   1.150 +                               uint32_t     inbuflen,
   1.151 +                               uint32_t*    inscanlen
   1.152 +                               );
   1.153 +
   1.154 +int uScanAlways2Byte(
   1.155 +                     unsigned char*  in,
   1.156 +                     uint16_t*    out
   1.157 +                     );
   1.158 +int uScanAlways2ByteShiftGR(
   1.159 +                            unsigned char*  in,
   1.160 +                            uint16_t*    out
   1.161 +                            );
   1.162 +int uScanAlways1Byte(
   1.163 +                     unsigned char*  in,
   1.164 +                     uint16_t*    out
   1.165 +                     );
   1.166 +int uScanAlways1BytePrefix8E(
   1.167 +                             unsigned char*  in,
   1.168 +                             uint16_t*    out
   1.169 +                             );
   1.170 +/*=================================================================================
   1.171 +
   1.172 +=================================================================================*/
   1.173 +const uScannerFunc m_scanner[uNumOfCharsetType] =
   1.174 +{
   1.175 +    uCheckAndScanAlways1Byte,
   1.176 +    uCheckAndScanAlways2Byte,
   1.177 +    uCheckAndScanAlways2ByteShiftGR,
   1.178 +    uCheckAndScan2ByteGRPrefix8F,
   1.179 +    uCheckAndScan2ByteGRPrefix8EA2,
   1.180 +    uCheckAndScan2ByteGRPrefix8EA3,
   1.181 +    uCheckAndScan2ByteGRPrefix8EA4,
   1.182 +    uCheckAndScan2ByteGRPrefix8EA5,
   1.183 +    uCheckAndScan2ByteGRPrefix8EA6,
   1.184 +    uCheckAndScan2ByteGRPrefix8EA7,
   1.185 +    uCnSAlways8BytesDecomposedHangul,
   1.186 +    uCheckAndScanJohabHangul,
   1.187 +    uCheckAndScanJohabSymbol,
   1.188 +    uCheckAndScan4BytesGB18030,
   1.189 +    uCheckAndScanAlways2ByteGR128
   1.190 +};
   1.191 +
   1.192 +/*=================================================================================
   1.193 +
   1.194 +=================================================================================*/
   1.195 +
   1.196 +const uSubScannerFunc m_subscanner[uNumOfCharType] =
   1.197 +{
   1.198 +    uScanAlways1Byte,
   1.199 +    uScanAlways2Byte,
   1.200 +    uScanAlways2ByteShiftGR,
   1.201 +    uScanAlways1BytePrefix8E
   1.202 +};
   1.203 +/*=================================================================================
   1.204 +
   1.205 +=================================================================================*/
   1.206 +int uScan(
   1.207 +          uScanClassID scanClass,
   1.208 +          int32_t*    state,
   1.209 +          unsigned char  *in,
   1.210 +          uint16_t    *out,
   1.211 +          uint32_t     inbuflen,
   1.212 +          uint32_t*    inscanlen
   1.213 +          )
   1.214 +{
   1.215 +  return (* m_scanner[scanClass]) (state,in,out,inbuflen,inscanlen);
   1.216 +}
   1.217 +/*=================================================================================
   1.218 +
   1.219 +=================================================================================*/
   1.220 +int uScanAlways1Byte(
   1.221 +                     unsigned char*  in,
   1.222 +                     uint16_t*    out
   1.223 +                     )
   1.224 +{
   1.225 +  *out = (uint16_t) in[0];
   1.226 +  return 1;
   1.227 +}
   1.228 +
   1.229 +/*=================================================================================
   1.230 +
   1.231 +=================================================================================*/
   1.232 +int uScanAlways2Byte(
   1.233 +                     unsigned char*  in,
   1.234 +                     uint16_t*    out
   1.235 +                     )
   1.236 +{
   1.237 +  *out = (uint16_t) (( in[0] << 8) | (in[1]));
   1.238 +  return 1;
   1.239 +}
   1.240 +/*=================================================================================
   1.241 +
   1.242 +=================================================================================*/
   1.243 +int uScanAlways2ByteShiftGR(
   1.244 +                            unsigned char*  in,
   1.245 +                            uint16_t*    out
   1.246 +                            )
   1.247 +{
   1.248 +  *out = (uint16_t) ((( in[0] << 8) | (in[1])) &  0x7F7F);
   1.249 +  return 1;
   1.250 +}
   1.251 +
   1.252 +/*=================================================================================
   1.253 +
   1.254 +=================================================================================*/
   1.255 +int uScanAlways1BytePrefix8E(
   1.256 +                             unsigned char*  in,
   1.257 +                             uint16_t*    out
   1.258 +                             )
   1.259 +{
   1.260 +  *out = (uint16_t) in[1];
   1.261 +  return 1;
   1.262 +}
   1.263 +/*=================================================================================
   1.264 +
   1.265 +=================================================================================*/
   1.266 +int uCheckAndScanAlways1Byte(
   1.267 +                             int32_t*    state,
   1.268 +                             unsigned char  *in,
   1.269 +                             uint16_t    *out,
   1.270 +                             uint32_t     inbuflen,
   1.271 +                             uint32_t*    inscanlen
   1.272 +                             )
   1.273 +{
   1.274 +  /* Don't check inlen. The caller should ensure it is larger than 0 */
   1.275 +  *inscanlen = 1;
   1.276 +  *out = (uint16_t) in[0];
   1.277 +  
   1.278 +  return 1;
   1.279 +}
   1.280 +
   1.281 +/*=================================================================================
   1.282 +
   1.283 +=================================================================================*/
   1.284 +int uCheckAndScanAlways2Byte(
   1.285 +                             int32_t*    state,
   1.286 +                             unsigned char  *in,
   1.287 +                             uint16_t    *out,
   1.288 +                             uint32_t     inbuflen,
   1.289 +                             uint32_t*    inscanlen
   1.290 +                             )
   1.291 +{
   1.292 +  if(inbuflen < 2)
   1.293 +    return 0;
   1.294 +  else
   1.295 +  {
   1.296 +    *inscanlen = 2;
   1.297 +    *out = ((in[0] << 8) | ( in[1])) ;
   1.298 +    return 1;
   1.299 +  }
   1.300 +}
   1.301 +/*=================================================================================
   1.302 +
   1.303 +=================================================================================*/
   1.304 +int uCheckAndScanAlways2ByteShiftGR(
   1.305 +                                    int32_t*    state,
   1.306 +                                    unsigned char  *in,
   1.307 +                                    uint16_t    *out,
   1.308 +                                    uint32_t     inbuflen,
   1.309 +                                    uint32_t*    inscanlen
   1.310 +                                    )
   1.311 +{
   1.312 +  /*
   1.313 +   * Both bytes should be in the range of [0xa1,0xfe] for 94x94 character sets
   1.314 +   * invoked on GR. No encoding implemented in Mozilla uses 96x96 char. sets.
   1.315 +   * Only 2nd byte range needs to be checked because 
   1.316 +   * 1st byte is checked before calling this in nsUnicodeDecoerHelper.cpp 
   1.317 +   */
   1.318 +  if(inbuflen < 2)    /* will lead to NS_OK_UDEC_MOREINPUT */
   1.319 +    return 0;
   1.320 +  else if (! CHK_GR94(in[1]))  
   1.321 +  {
   1.322 +    *inscanlen = 2; 
   1.323 +    *out = 0xFF;  /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
   1.324 +    return 1;
   1.325 +  }
   1.326 +  else
   1.327 +  {
   1.328 +    *inscanlen = 2;
   1.329 +    *out = (((in[0] << 8) | ( in[1]))  & 0x7F7F);
   1.330 +    return 1;
   1.331 +  }
   1.332 +}
   1.333 +/*=================================================================================
   1.334 +
   1.335 +=================================================================================*/
   1.336 +int uCheckAndScanAlways2ByteGR128(
   1.337 +                                  int32_t*    state,
   1.338 +                                  unsigned char  *in,
   1.339 +                                  uint16_t    *out,
   1.340 +                                  uint32_t     inbuflen,
   1.341 +                                  uint32_t*    inscanlen
   1.342 +                                  )
   1.343 +{
   1.344 +  /*
   1.345 +   * The first byte should be in  [0xa1,0xfe] 
   1.346 +   * and the second byte in [0x41,0xfe]
   1.347 +   * Used by CP949 -> Unicode converter.
   1.348 +   * Only 2nd byte range needs to be checked because 
   1.349 +   * 1st byte is checked before calling this in nsUnicodeDecoderHelper.cpp 
   1.350 +   */
   1.351 +  if(inbuflen < 2)    /* will lead to NS_OK_UDEC_MOREINPUT */
   1.352 +    return 0;
   1.353 +  else if (in[1] < 0x41)     /* 2nd byte range check */
   1.354 +  {
   1.355 +    *inscanlen = 2; 
   1.356 +    *out = 0xFF;  /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
   1.357 +    return 1;
   1.358 +  }
   1.359 +  else
   1.360 +  {
   1.361 +    *inscanlen = 2;
   1.362 +    *out = (in[0] << 8) |  in[1];
   1.363 +    return 1;
   1.364 +  }
   1.365 +}
   1.366 +/*=================================================================================
   1.367 +
   1.368 +=================================================================================*/
   1.369 +int uScanShift(
   1.370 +               uShiftInTable    *shift,
   1.371 +               int32_t*    state,
   1.372 +               unsigned char  *in,
   1.373 +               uint16_t    *out,
   1.374 +               uint32_t     inbuflen,
   1.375 +               uint32_t*    inscanlen
   1.376 +               )
   1.377 +{
   1.378 +  int16_t i;
   1.379 +  const uShiftInCell* cell = &(shift->shiftcell[0]);
   1.380 +  int16_t itemnum = shift->numOfItem;
   1.381 +  for(i=0;i<itemnum;i++)
   1.382 +  {
   1.383 +    if( ( in[0] >=  cell[i].shiftin_Min) &&
   1.384 +      ( in[0] <=  cell[i].shiftin_Max))
   1.385 +    {
   1.386 +      if(inbuflen < cell[i].reserveLen)
   1.387 +        return 0;
   1.388 +      else
   1.389 +      {
   1.390 +        *inscanlen = cell[i].reserveLen;
   1.391 +        return (uSubScanner(cell[i].classID,in,out));
   1.392 +      }
   1.393 +    }
   1.394 +  }
   1.395 +  return 0;
   1.396 +}
   1.397 +/*=================================================================================
   1.398 +
   1.399 +=================================================================================*/
   1.400 +int uCheckAndScan2ByteGRPrefix8F(
   1.401 +                                 int32_t*    state,
   1.402 +                                 unsigned char  *in,
   1.403 +                                 uint16_t    *out,
   1.404 +                                 uint32_t     inbuflen,
   1.405 +                                 uint32_t*    inscanlen
   1.406 +                                 )
   1.407 +{
   1.408 +  if((inbuflen < 3) ||(in[0] != 0x8F)) 
   1.409 +    return 0;
   1.410 +  else if (! CHK_GR94(in[1]))  /* 2nd byte range check */
   1.411 +  {
   1.412 +    *inscanlen = 2; 
   1.413 +    *out = 0xFF;  /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
   1.414 +    return 1;
   1.415 +  }
   1.416 +  else if (! CHK_GR94(in[2]))  /* 3rd byte range check */
   1.417 +  {
   1.418 +    *inscanlen = 3; 
   1.419 +    *out = 0xFF;  /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */
   1.420 +    return 1;
   1.421 +  }
   1.422 +  else
   1.423 +  {
   1.424 +    *inscanlen = 3;
   1.425 +    *out = (((in[1] << 8) | ( in[2]))  & 0x7F7F);
   1.426 +    return 1;
   1.427 +  }
   1.428 +}
   1.429 +/*=================================================================================
   1.430 +
   1.431 +=================================================================================*/
   1.432 +
   1.433 +/* Macro definition to use for uCheckAndScan2ByteGRPrefix8EAX()
   1.434 + * where X is 2,3,4,5,6,7 
   1.435 + */
   1.436 +#define CNS_8EAX_4BYTE(PREFIX)                    \
   1.437 +  if((inbuflen < 4) || (in[0] != 0x8E))           \
   1.438 +    return 0;                                     \
   1.439 +  else if((in[1] != (PREFIX)))                    \
   1.440 +  {                                               \
   1.441 +    *inscanlen = 2;                               \
   1.442 +    *out = 0xFF;                                  \
   1.443 +    return 1;                                     \
   1.444 +  }                                               \
   1.445 +  else if(! CHK_GR94(in[2]))                      \
   1.446 +  {                                               \
   1.447 +    *inscanlen = 3;                               \
   1.448 +    *out = 0xFF;                                  \
   1.449 +    return 1;                                     \
   1.450 +  }                                               \
   1.451 +  else if(! CHK_GR94(in[3]))                      \
   1.452 +  {                                               \
   1.453 +    *inscanlen = 4;                               \
   1.454 +    *out = 0xFF;                                  \
   1.455 +    return 1;                                     \
   1.456 +  }                                               \
   1.457 +  else                                            \
   1.458 +  {                                               \
   1.459 +    *inscanlen = 4;                               \
   1.460 +    *out = (((in[2] << 8) | ( in[3]))  & 0x7F7F); \
   1.461 +    return 1;                                     \
   1.462 +  }    
   1.463 +
   1.464 +int uCheckAndScan2ByteGRPrefix8EA2(
   1.465 +                                   int32_t*    state,
   1.466 +                                   unsigned char  *in,
   1.467 +                                   uint16_t    *out,
   1.468 +                                   uint32_t     inbuflen,
   1.469 +                                   uint32_t*    inscanlen
   1.470 +                                   )
   1.471 +{
   1.472 +  CNS_8EAX_4BYTE(0xA2)
   1.473 +}
   1.474 +
   1.475 +/*=================================================================================
   1.476 +
   1.477 +=================================================================================*/
   1.478 +int uCheckAndScan2ByteGRPrefix8EA3(
   1.479 +                                   int32_t*    state,
   1.480 +                                   unsigned char  *in,
   1.481 +                                   uint16_t    *out,
   1.482 +                                   uint32_t     inbuflen,
   1.483 +                                   uint32_t*    inscanlen
   1.484 +                                   )
   1.485 +{
   1.486 +  CNS_8EAX_4BYTE(0xA3)
   1.487 +}
   1.488 +/*=================================================================================
   1.489 +
   1.490 +=================================================================================*/
   1.491 +int uCheckAndScan2ByteGRPrefix8EA4(
   1.492 +                                   int32_t*    state,
   1.493 +                                   unsigned char  *in,
   1.494 +                                   uint16_t    *out,
   1.495 +                                   uint32_t     inbuflen,
   1.496 +                                   uint32_t*    inscanlen
   1.497 +                                   )
   1.498 +{
   1.499 +  CNS_8EAX_4BYTE(0xA4)
   1.500 +}
   1.501 +/*=================================================================================
   1.502 +
   1.503 +=================================================================================*/
   1.504 +int uCheckAndScan2ByteGRPrefix8EA5(
   1.505 +                                   int32_t*    state,
   1.506 +                                   unsigned char  *in,
   1.507 +                                   uint16_t    *out,
   1.508 +                                   uint32_t     inbuflen,
   1.509 +                                   uint32_t*    inscanlen
   1.510 +                                   )
   1.511 +{
   1.512 +  CNS_8EAX_4BYTE(0xA5)
   1.513 +}
   1.514 +/*=================================================================================
   1.515 +
   1.516 +=================================================================================*/
   1.517 +int uCheckAndScan2ByteGRPrefix8EA6(
   1.518 +                                   int32_t*    state,
   1.519 +                                   unsigned char  *in,
   1.520 +                                   uint16_t    *out,
   1.521 +                                   uint32_t     inbuflen,
   1.522 +                                   uint32_t*    inscanlen
   1.523 +                                   )
   1.524 +{
   1.525 +  CNS_8EAX_4BYTE(0xA6)
   1.526 +}
   1.527 +/*=================================================================================
   1.528 +
   1.529 +=================================================================================*/
   1.530 +int uCheckAndScan2ByteGRPrefix8EA7(
   1.531 +                                   int32_t*    state,
   1.532 +                                   unsigned char  *in,
   1.533 +                                   uint16_t    *out,
   1.534 +                                   uint32_t     inbuflen,
   1.535 +                                   uint32_t*    inscanlen
   1.536 +                                   )
   1.537 +{
   1.538 +  CNS_8EAX_4BYTE(0xA7)
   1.539 +}
   1.540 +/*=================================================================================
   1.541 +
   1.542 +=================================================================================*/
   1.543 +#define SBase 0xAC00
   1.544 +#define SCount 11172
   1.545 +#define LCount 19
   1.546 +#define VCount 21
   1.547 +#define TCount 28
   1.548 +#define NCount (VCount * TCount)
   1.549 +
   1.550 +int uCnSAlways8BytesDecomposedHangul(
   1.551 +                                     int32_t*    state,
   1.552 +                                     unsigned char  *in,
   1.553 +                                     uint16_t    *out,
   1.554 +                                     uint32_t     inbuflen,
   1.555 +                                     uint32_t*    inscanlen
   1.556 +                                     )
   1.557 +{
   1.558 +  
   1.559 +  uint16_t LIndex, VIndex, TIndex;
   1.560 +  /* no 8 bytes, not in a4 range, or the first 2 byte are not a4d4 */
   1.561 +  if((inbuflen < 8) || (0xa4 != in[0]) || (0xd4 != in[1]) ||
   1.562 +    (0xa4 != in[2] ) || (0xa4 != in[4]) || (0xa4 != in[6]))
   1.563 +    return 0;
   1.564 +  
   1.565 +  /* Compute LIndex  */
   1.566 +  if((in[3] < 0xa1) || (in[3] > 0xbe)) { /* illegal leading consonant */
   1.567 +    return 0;
   1.568 +  } 
   1.569 +  else {
   1.570 +    static const uint8_t lMap[] = {
   1.571 +      /*        A1   A2   A3   A4   A5   A6   A7  */
   1.572 +      0,   1,0xff,   2,0xff,0xff,   3,
   1.573 +        /*   A8   A9   AA   AB   AC   AD   AE   AF  */
   1.574 +        4,   5,0xff,0xff,0xff,0xff,0xff,0xff,
   1.575 +        /*   B0   B1   B2   B3   B4   B5   B6   B7  */
   1.576 +        0xff,   6,   7,   8,0xff,   9,  10,  11,
   1.577 +        /*   B8   B9   BA   BB   BC   BD   BE       */
   1.578 +        12,  13,  14,  15,  16,  17,  18     
   1.579 +    };
   1.580 +    
   1.581 +    LIndex = lMap[in[3] - 0xa1];
   1.582 +    if(0xff == (0xff & LIndex))
   1.583 +      return 0;
   1.584 +  }
   1.585 +  
   1.586 +  /* Compute VIndex  */
   1.587 +  if((in[5] < 0xbf) || (in[5] > 0xd3)) { /* illegal medial vowel */
   1.588 +    return 0;
   1.589 +  } 
   1.590 +  else {
   1.591 +    VIndex = in[5] - 0xbf;
   1.592 +  }
   1.593 +  
   1.594 +  /* Compute TIndex  */
   1.595 +  if(0xd4 == in[7])  
   1.596 +  {
   1.597 +    TIndex = 0;
   1.598 +  } 
   1.599 +  else if((in[7] < 0xa1) || (in[7] > 0xbe)) {/* illegal trailing consonant */
   1.600 +    return 0;
   1.601 +  } 
   1.602 +  else {
   1.603 +    static const uint8_t tMap[] = {
   1.604 +      /*        A1   A2   A3   A4   A5   A6   A7  */
   1.605 +      1,   2,   3,   4,   5,   6,   7,
   1.606 +        /*   A8   A9   AA   AB   AC   AD   AE   AF  */
   1.607 +        0xff,   8,   9,  10,  11,  12,  13,  14,
   1.608 +        /*   B0   B1   B2   B3   B4   B5   B6   B7  */
   1.609 +        15,  16,  17,0xff,  18,  19,  20,  21,
   1.610 +        /*   B8   B9   BA   BB   BC   BD   BE       */
   1.611 +        22,0xff,  23,  24,  25,  26,  27     
   1.612 +    };
   1.613 +    TIndex = tMap[in[7] - 0xa1];
   1.614 +    if(0xff == (0xff & TIndex))
   1.615 +      return 0;
   1.616 +  }
   1.617 +  
   1.618 +  *inscanlen = 8;
   1.619 +  /* the following line is from Unicode 2.0 page 3-13 item 5 */
   1.620 +  *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase;
   1.621 +  
   1.622 +  return 1;
   1.623 +}
   1.624 +/*=================================================================================
   1.625 +
   1.626 +=================================================================================*/
   1.627 +
   1.628 +int uCheckAndScanJohabHangul(
   1.629 +                             int32_t*    state,
   1.630 +                             unsigned char  *in,
   1.631 +                             uint16_t    *out,
   1.632 +                             uint32_t     inbuflen,
   1.633 +                             uint32_t*    inscanlen
   1.634 +                             )
   1.635 +{
   1.636 +/* since we don't have code to convert Johab to Unicode right now     *
   1.637 +  * make this part of code #if 0 to save space until we fully test it */
   1.638 +  if(inbuflen < 2)
   1.639 +    return 0;
   1.640 +  else {
   1.641 +  /*
   1.642 +  * See Table 4-45 Johab Encoding's Five-Bit Binary Patterns in page 183
   1.643 +  * of "CJKV Information Processing" for details
   1.644 +    */
   1.645 +    static const uint8_t lMap[32]={ /* totaly 19  */
   1.646 +      0xff,0xff,0,   1,   2,   3,   4,   5,    /* 0-7    */
   1.647 +        6,   7,   8,   9,   10,  11,  12,  13,   /* 8-15   */
   1.648 +        14,  15,  16,  17,  18,  0xff,0xff,0xff, /* 16-23  */
   1.649 +        0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff  /* 24-31  */
   1.650 +    };
   1.651 +    static const uint8_t vMap[32]={ /* totaly 21 */
   1.652 +      0xff,0xff,0xff,0,   1,   2,   3,   4,    /* 0-7   */
   1.653 +        0xff,0xff,5,   6,   7,   8,   9,   10,   /* 8-15  */
   1.654 +        0xff,0xff,11,  12,  13,  14,  15,  16,   /* 16-23 */
   1.655 +        0xff,0xff,17,  18,  19,  20,  0xff,0xff  /* 24-31 */
   1.656 +    };
   1.657 +    static const uint8_t tMap[32]={ /* totaly 29 */
   1.658 +      0xff,0,   1,   2,   3,   4,   5,   6,    /* 0-7   */
   1.659 +        7,   8,   9,   10,  11,  12,  13,  14,   /* 8-15  */
   1.660 +        15,  16,  0xff,17,  18,  19,  20,  21,   /* 16-23 */
   1.661 +        22,  23,  24,  25,  26,  27,  0xff,0xff  /* 24-31 */
   1.662 +    };
   1.663 +    uint16_t ch = (in[0] << 8) | in[1];
   1.664 +    uint16_t LIndex, VIndex, TIndex;
   1.665 +    if(0 == (0x8000 & ch))
   1.666 +      return 0;
   1.667 +    LIndex=lMap[(ch>>10)& 0x1F];
   1.668 +    VIndex=vMap[(ch>>5) & 0x1F];
   1.669 +    TIndex=tMap[(ch>>0) & 0x1F];
   1.670 +    if((0xff==(LIndex)) || 
   1.671 +      (0xff==(VIndex)) || 
   1.672 +      (0xff==(TIndex)))
   1.673 +      return 0;
   1.674 +    /* the following line is from Unicode 2.0 page 3-13 item 5 */
   1.675 +    *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase;
   1.676 +    *inscanlen = 2;
   1.677 +    return 1;
   1.678 +  }
   1.679 +}
   1.680 +int uCheckAndScanJohabSymbol(
   1.681 +                             int32_t*    state,
   1.682 +                             unsigned char  *in,
   1.683 +                             uint16_t    *out,
   1.684 +                             uint32_t     inbuflen,
   1.685 +                             uint32_t*    inscanlen
   1.686 +                             )
   1.687 +{
   1.688 +  if(inbuflen < 2)
   1.689 +    return 0;
   1.690 +  else {
   1.691 +  /*
   1.692 +  * The following code are based on the Perl code lised under
   1.693 +  * "Johab to ISO-2022-KR or EUC-KR Conversion" in page 1014 of
   1.694 +  * "CJKV Information Processing" by Ken Lunde <lunde@adobe.com>
   1.695 +  *
   1.696 +  * sub johab2ks ($) { # Convert Johab to ISO-2022-KR
   1.697 +  *   my @johab = unpack("C*", $_[0]);
   1.698 +  *   my ($offset, $d8_off) = (0,0);
   1.699 +  *   my @out = ();
   1.700 +  *   while(($hi, $lo) = splice($johab, 0, 2)) {
   1.701 +  *     $offset = 1 if ($hi > 223 and $hi < 250);
   1.702 +  *     $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42));
   1.703 +  *     push (@out, (((($hi - ($hi < 223 ? 200 : 187)) << 1) -
   1.704 +  *            ($lo < 161 ? 1 : 0) + $offset) + $d8_off),
   1.705 +  *            $lo - ($lo < 161 ? ($lo > 126 ? 34 : 16) : 128 ));
   1.706 +  *   }
   1.707 +  *   return pack ("C*", @out);
   1.708 +  * }
   1.709 +  * additional comments from Ken Lunde
   1.710 +  * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42));
   1.711 +  * has three possible return values:
   1.712 +  * 0  if $hi is not equal to 216
   1.713 +  * 94 if $hi is euqal to 216 and if $lo is greater than 160
   1.714 +  * 42 if $hi is euqal to 216 and if $lo is not greater than 160
   1.715 +    */ 
   1.716 +    unsigned char hi = in[0];
   1.717 +    unsigned char lo = in[1];
   1.718 +    uint16_t offset = (( hi > 223 ) && ( hi < 250)) ? 1 : 0;
   1.719 +    uint16_t d8_off = 0;
   1.720 +    if(216 == hi) {
   1.721 +      if( lo > 160)
   1.722 +        d8_off = 94;
   1.723 +      else
   1.724 +        d8_off = 42;
   1.725 +    }
   1.726 +    
   1.727 +    *out = (((((hi - ((hi < 223) ? 200 : 187)) << 1) -
   1.728 +      (lo < 161 ? 1 : 0) + offset) + d8_off) << 8 ) |
   1.729 +      (lo - ((lo < 161) ? ((lo > 126) ? 34 : 16) : 
   1.730 +    128));
   1.731 +    *inscanlen = 2;
   1.732 +    return 1;
   1.733 +  }
   1.734 +}
   1.735 +int uCheckAndScan4BytesGB18030(
   1.736 +                               int32_t*    state,
   1.737 +                               unsigned char  *in,
   1.738 +                               uint16_t    *out,
   1.739 +                               uint32_t     inbuflen,
   1.740 +                               uint32_t*    inscanlen
   1.741 +                               )
   1.742 +{
   1.743 +  uint32_t  data;
   1.744 +  if(inbuflen < 4) 
   1.745 +    return 0;
   1.746 +  
   1.747 +  if((in[0] < 0x81 ) || (0xfe < in[0])) 
   1.748 +    return 0;
   1.749 +  if((in[1] < 0x30 ) || (0x39 < in[1])) 
   1.750 +    return 0;
   1.751 +  if((in[2] < 0x81 ) || (0xfe < in[2])) 
   1.752 +    return 0;
   1.753 +  if((in[3] < 0x30 ) || (0x39 < in[3])) 
   1.754 +    return 0;
   1.755 +  
   1.756 +  data = (((((in[0] - 0x81) * 10 + (in[1] - 0x30)) * 126) + 
   1.757 +    (in[2] - 0x81)) * 10 ) + (in[3] - 0x30);
   1.758 +  
   1.759 +  *inscanlen = 4;
   1.760 +  *out = (data < 0x00010000) ? data : 0xFFFD;
   1.761 +  return 1;
   1.762 +}

mercurial