intl/uconv/util/ugen.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/uconv/util/ugen.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,712 @@
     1.4 +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +#include "unicpriv.h"
     1.9 +/*=================================================================================
    1.10 +
    1.11 +=================================================================================*/
    1.12 +typedef  int (*uSubGeneratorFunc) (uint16_t in, unsigned char* out);
    1.13 +/*=================================================================================
    1.14 +
    1.15 +=================================================================================*/
    1.16 +
    1.17 +typedef int (*uGeneratorFunc) (
    1.18 +                               int32_t*    state,
    1.19 +                               uint16_t    in,
    1.20 +                               unsigned char*  out,
    1.21 +                               uint32_t     outbuflen,
    1.22 +                               uint32_t*    outlen
    1.23 +                               );
    1.24 +
    1.25 +int uGenerate(
    1.26 +              uScanClassID scanClass,
    1.27 +              int32_t*    state,
    1.28 +              uint16_t    in,
    1.29 +              unsigned char*  out,
    1.30 +              uint32_t     outbuflen,
    1.31 +              uint32_t*    outlen
    1.32 +              );
    1.33 +
    1.34 +#define uSubGenerator(sub,in,out) (* m_subgenerator[sub])((in),(out))
    1.35 +
    1.36 +int uCheckAndGenAlways1Byte(
    1.37 +                               int32_t*   state,
    1.38 +                               uint16_t   in,
    1.39 +                               unsigned char* out,
    1.40 +                               uint32_t    outbuflen,
    1.41 +                               uint32_t*   outlen
    1.42 +                               );
    1.43 +int uCheckAndGenAlways2Byte(
    1.44 +                            int32_t*   state,
    1.45 +                            uint16_t   in,
    1.46 +                            unsigned char* out,
    1.47 +                            uint32_t    outbuflen,
    1.48 +                            uint32_t*   outlen
    1.49 +                            );
    1.50 +int uCheckAndGenAlways2ByteShiftGR(
    1.51 +                                   int32_t*    state,
    1.52 +                                   uint16_t    in,
    1.53 +                                   unsigned char*  out,
    1.54 +                                   uint32_t     outbuflen,
    1.55 +                                   uint32_t*    outlen
    1.56 +                                   );
    1.57 +int uGenerateShift(
    1.58 +                   uShiftOutTable   *shift,
    1.59 +                   int32_t*   state,
    1.60 +                   uint16_t   in,
    1.61 +                   unsigned char* out,
    1.62 +                   uint32_t    outbuflen,
    1.63 +                   uint32_t*   outlen
    1.64 +                   );
    1.65 +int uCheckAndGen2ByteGRPrefix8F(
    1.66 +                                int32_t*   state,
    1.67 +                                uint16_t   in,
    1.68 +                                unsigned char* out,
    1.69 +                                uint32_t    outbuflen,
    1.70 +                                uint32_t*   outlen
    1.71 +                                );
    1.72 +int uCheckAndGen2ByteGRPrefix8EA2(
    1.73 +                                  int32_t*   state,
    1.74 +                                  uint16_t   in,
    1.75 +                                  unsigned char* out,
    1.76 +                                  uint32_t    outbuflen,
    1.77 +                                  uint32_t*   outlen
    1.78 +                                  );
    1.79 +
    1.80 +int uCheckAndGen2ByteGRPrefix8EA3(
    1.81 +                                  int32_t*   state,
    1.82 +                                  uint16_t   in,
    1.83 +                                  unsigned char* out,
    1.84 +                                  uint32_t    outbuflen,
    1.85 +                                  uint32_t*   outlen
    1.86 +                                  );
    1.87 +
    1.88 +int uCheckAndGen2ByteGRPrefix8EA4(
    1.89 +                                  int32_t*   state,
    1.90 +                                  uint16_t   in,
    1.91 +                                  unsigned char* out,
    1.92 +                                  uint32_t    outbuflen,
    1.93 +                                  uint32_t*   outlen
    1.94 +                                  );
    1.95 +
    1.96 +int uCheckAndGen2ByteGRPrefix8EA5(
    1.97 +                                  int32_t*   state,
    1.98 +                                  uint16_t   in,
    1.99 +                                  unsigned char* out,
   1.100 +                                  uint32_t    outbuflen,
   1.101 +                                  uint32_t*   outlen
   1.102 +                                  );
   1.103 +
   1.104 +int uCheckAndGen2ByteGRPrefix8EA6(
   1.105 +                                  int32_t*   state,
   1.106 +                                  uint16_t   in,
   1.107 +                                  unsigned char* out,
   1.108 +                                  uint32_t    outbuflen,
   1.109 +                                  uint32_t*   outlen
   1.110 +                                  );
   1.111 +
   1.112 +int uCheckAndGen2ByteGRPrefix8EA7(
   1.113 +                                  int32_t*   state,
   1.114 +                                  uint16_t   in,
   1.115 +                                  unsigned char* out,
   1.116 +                                  uint32_t    outbuflen,
   1.117 +                                  uint32_t*   outlen
   1.118 +                                  );
   1.119 +int uCnGAlways8BytesDecomposedHangul(
   1.120 +                                     int32_t*    state,
   1.121 +                                     uint16_t    in,
   1.122 +                                     unsigned char*  out,
   1.123 +                                     uint32_t     outbuflen,
   1.124 +                                     uint32_t*    outlen
   1.125 +                                     );
   1.126 +
   1.127 +int uCheckAndGenJohabHangul(
   1.128 +                            int32_t*   state,
   1.129 +                            uint16_t   in,
   1.130 +                            unsigned char* out,
   1.131 +                            uint32_t    outbuflen,
   1.132 +                            uint32_t*   outlen
   1.133 +                            );
   1.134 +
   1.135 +int uCheckAndGenJohabSymbol(
   1.136 +                            int32_t*   state,
   1.137 +                            uint16_t   in,
   1.138 +                            unsigned char* out,
   1.139 +                            uint32_t    outbuflen,
   1.140 +                            uint32_t*   outlen
   1.141 +                            );
   1.142 +
   1.143 +
   1.144 +int uCheckAndGen4BytesGB18030(
   1.145 +                              int32_t*   state,
   1.146 +                              uint16_t   in,
   1.147 +                              unsigned char* out,
   1.148 +                              uint32_t    outbuflen,
   1.149 +                              uint32_t*   outlen
   1.150 +                              );
   1.151 +
   1.152 +int uGenAlways2Byte(
   1.153 +                    uint16_t    in,
   1.154 +                    unsigned char* out
   1.155 +                    );
   1.156 +int uGenAlways2ByteShiftGR(
   1.157 +                           uint16_t     in,
   1.158 +                           unsigned char*  out
   1.159 +                           );
   1.160 +int uGenAlways1Byte(
   1.161 +                    uint16_t    in,
   1.162 +                    unsigned char* out
   1.163 +                    );
   1.164 +int uGenAlways1BytePrefix8E(
   1.165 +                            uint16_t    in,
   1.166 +                            unsigned char* out
   1.167 +                            );
   1.168 +/*=================================================================================
   1.169 +
   1.170 +=================================================================================*/
   1.171 +const uGeneratorFunc m_generator[uNumOfCharsetType] =
   1.172 +{
   1.173 +    uCheckAndGenAlways1Byte,
   1.174 +    uCheckAndGenAlways2Byte,
   1.175 +    uCheckAndGenAlways2ByteShiftGR,
   1.176 +    uCheckAndGen2ByteGRPrefix8F,
   1.177 +    uCheckAndGen2ByteGRPrefix8EA2,
   1.178 +    uCheckAndGen2ByteGRPrefix8EA3,
   1.179 +    uCheckAndGen2ByteGRPrefix8EA4,
   1.180 +    uCheckAndGen2ByteGRPrefix8EA5,
   1.181 +    uCheckAndGen2ByteGRPrefix8EA6,
   1.182 +    uCheckAndGen2ByteGRPrefix8EA7,
   1.183 +    uCnGAlways8BytesDecomposedHangul,
   1.184 +    uCheckAndGenJohabHangul,
   1.185 +    uCheckAndGenJohabSymbol,
   1.186 +    uCheckAndGen4BytesGB18030,
   1.187 +    uCheckAndGenAlways2Byte   /* place-holder for GR128 */
   1.188 +};
   1.189 +
   1.190 +/*=================================================================================
   1.191 +
   1.192 +=================================================================================*/
   1.193 +
   1.194 +const uSubGeneratorFunc m_subgenerator[uNumOfCharType] =
   1.195 +{
   1.196 +    uGenAlways1Byte,
   1.197 +    uGenAlways2Byte,
   1.198 +    uGenAlways2ByteShiftGR,
   1.199 +    uGenAlways1BytePrefix8E
   1.200 +};
   1.201 +/*=================================================================================
   1.202 +
   1.203 +=================================================================================*/
   1.204 +int uGenerate(
   1.205 +              uScanClassID scanClass,
   1.206 +              int32_t*    state,
   1.207 +              uint16_t    in,
   1.208 +              unsigned char*  out,
   1.209 +              uint32_t     outbuflen,
   1.210 +              uint32_t*    outlen
   1.211 +              )
   1.212 +{
   1.213 +    return (* m_generator[scanClass]) (state,in,out,outbuflen,outlen);
   1.214 +}
   1.215 +/*=================================================================================
   1.216 +
   1.217 +=================================================================================*/
   1.218 +int uGenAlways1Byte(
   1.219 +                    uint16_t    in,
   1.220 +                    unsigned char* out
   1.221 +                    )
   1.222 +{
   1.223 +    out[0] = (unsigned char)in;
   1.224 +    return 1;
   1.225 +}
   1.226 +
   1.227 +/*=================================================================================
   1.228 +
   1.229 +=================================================================================*/
   1.230 +int uGenAlways2Byte(
   1.231 +                    uint16_t    in,
   1.232 +                    unsigned char* out
   1.233 +                    )
   1.234 +{
   1.235 +    out[0] = (unsigned char)((in >> 8) & 0xff);
   1.236 +    out[1] = (unsigned char)(in & 0xff);
   1.237 +    return 1;
   1.238 +}
   1.239 +/*=================================================================================
   1.240 +
   1.241 +=================================================================================*/
   1.242 +int uGenAlways2ByteShiftGR(
   1.243 +                           uint16_t     in,
   1.244 +                           unsigned char*  out
   1.245 +                           )
   1.246 +{
   1.247 +    out[0] = (unsigned char)(((in >> 8) & 0xff) | 0x80);
   1.248 +    out[1] = (unsigned char)((in & 0xff) | 0x80);
   1.249 +    return 1;
   1.250 +}
   1.251 +/*=================================================================================
   1.252 +
   1.253 +=================================================================================*/
   1.254 +int uGenAlways1BytePrefix8E(
   1.255 +                            uint16_t    in,
   1.256 +                            unsigned char* out
   1.257 +                            )
   1.258 +{
   1.259 +    out[0] = 0x8E;
   1.260 +    out[1] = (unsigned char)(in  & 0xff);
   1.261 +    return 1;
   1.262 +}
   1.263 +/*=================================================================================
   1.264 +
   1.265 +=================================================================================*/
   1.266 +int uCheckAndGenAlways1Byte(
   1.267 +                            int32_t*   state,
   1.268 +                            uint16_t   in,
   1.269 +                            unsigned char* out,
   1.270 +                            uint32_t    outbuflen,
   1.271 +                            uint32_t*   outlen
   1.272 +                            )
   1.273 +{
   1.274 +    /* Don't check inlen. The caller should ensure it is larger than 0 */
   1.275 +    /*  Oops, I don't agree. Code changed to check every time. [CATA] */
   1.276 +    if(outbuflen < 1)
   1.277 +        return 0;
   1.278 +    else
   1.279 +    {
   1.280 +        *outlen = 1;
   1.281 +        out[0] = in & 0xff;
   1.282 +        return 1;
   1.283 +    }
   1.284 +}
   1.285 +
   1.286 +/*=================================================================================
   1.287 +
   1.288 +=================================================================================*/
   1.289 +int uCheckAndGenAlways2Byte(
   1.290 +                            int32_t*   state,
   1.291 +                            uint16_t   in,
   1.292 +                            unsigned char* out,
   1.293 +                            uint32_t    outbuflen,
   1.294 +                            uint32_t*   outlen
   1.295 +                            )
   1.296 +{
   1.297 +    if(outbuflen < 2)
   1.298 +        return 0;
   1.299 +    else
   1.300 +    {
   1.301 +        *outlen = 2;
   1.302 +        out[0] = ((in >> 8 ) & 0xff);
   1.303 +        out[1] = in  & 0xff;
   1.304 +        return 1;
   1.305 +    }
   1.306 +}
   1.307 +/*=================================================================================
   1.308 +
   1.309 +=================================================================================*/
   1.310 +int uCheckAndGenAlways2ByteShiftGR(
   1.311 +                                   int32_t*    state,
   1.312 +                                   uint16_t    in,
   1.313 +                                   unsigned char*  out,
   1.314 +                                   uint32_t     outbuflen,
   1.315 +                                   uint32_t*    outlen
   1.316 +                                   )
   1.317 +{
   1.318 +    if(outbuflen < 2)
   1.319 +        return 0;
   1.320 +    else
   1.321 +    {
   1.322 +        *outlen = 2;
   1.323 +        out[0] = ((in >> 8 ) & 0xff) | 0x80;
   1.324 +        out[1] = (in  & 0xff)  | 0x80;
   1.325 +        return 1;
   1.326 +    }
   1.327 +}
   1.328 +/*=================================================================================
   1.329 +
   1.330 +=================================================================================*/
   1.331 +int uGenerateShift(
   1.332 +                   uShiftOutTable   *shift,
   1.333 +                   int32_t*   state,
   1.334 +                   uint16_t   in,
   1.335 +                   unsigned char* out,
   1.336 +                   uint32_t    outbuflen,
   1.337 +                   uint32_t*   outlen
   1.338 +                   )
   1.339 +{
   1.340 +    int16_t i;
   1.341 +    const uShiftOutCell* cell = &(shift->shiftcell[0]);
   1.342 +    int16_t itemnum = shift->numOfItem;
   1.343 +    unsigned char inH, inL;
   1.344 +    inH = (in >> 8) & 0xff;
   1.345 +    inL = (in & 0xff );
   1.346 +    for(i=0;i<itemnum;i++)
   1.347 +    {
   1.348 +        if( ( inL >=  cell[i].shiftout_MinLB) &&
   1.349 +            ( inL <=  cell[i].shiftout_MaxLB) &&
   1.350 +            ( inH >=  cell[i].shiftout_MinHB) &&
   1.351 +            ( inH <=  cell[i].shiftout_MaxHB) )
   1.352 +        {
   1.353 +            if(outbuflen < cell[i].reserveLen)
   1.354 +              {
   1.355 +                return 0;
   1.356 +              }
   1.357 +            else
   1.358 +            {
   1.359 +                *outlen = cell[i].reserveLen;
   1.360 +                return (uSubGenerator(cell[i].classID,in,out));
   1.361 +            }
   1.362 +        }
   1.363 +    }
   1.364 +    return 0;
   1.365 +}
   1.366 +/*=================================================================================
   1.367 +
   1.368 +=================================================================================*/
   1.369 +int uCheckAndGen2ByteGRPrefix8F(int32_t*   state,
   1.370 +                                uint16_t   in,
   1.371 +                                unsigned char* out,
   1.372 +                                uint32_t    outbuflen,
   1.373 +                                uint32_t*   outlen
   1.374 +                                )
   1.375 +{
   1.376 +    if(outbuflen < 3)
   1.377 +        return 0;
   1.378 +    else
   1.379 +    {
   1.380 +        *outlen = 3;
   1.381 +        out[0] = 0x8F;
   1.382 +        out[1] = ((in >> 8 ) & 0xff) | 0x80;
   1.383 +        out[2] = (in  & 0xff)  | 0x80;
   1.384 +        return 1;
   1.385 +    }
   1.386 +}
   1.387 +/*=================================================================================
   1.388 +
   1.389 +=================================================================================*/
   1.390 +int uCheckAndGen2ByteGRPrefix8EA2(int32_t*   state,
   1.391 +                                  uint16_t   in,
   1.392 +                                  unsigned char* out,
   1.393 +                                  uint32_t    outbuflen,
   1.394 +                                  uint32_t*   outlen
   1.395 +                                  )
   1.396 +{
   1.397 +    if(outbuflen < 4)
   1.398 +        return 0;
   1.399 +    else
   1.400 +    {
   1.401 +        *outlen = 4;
   1.402 +        out[0] = 0x8E;
   1.403 +        out[1] = 0xA2;
   1.404 +        out[2] = ((in >> 8 ) & 0xff) | 0x80;
   1.405 +        out[3] = (in  & 0xff)  | 0x80;
   1.406 +        return 1;
   1.407 +    }
   1.408 +}
   1.409 +
   1.410 +
   1.411 +/*=================================================================================
   1.412 +
   1.413 +=================================================================================*/
   1.414 +int uCheckAndGen2ByteGRPrefix8EA3(int32_t*   state,
   1.415 +                                  uint16_t   in,
   1.416 +                                  unsigned char* out,
   1.417 +                                  uint32_t    outbuflen,
   1.418 +                                  uint32_t*   outlen
   1.419 +                                  )
   1.420 +{
   1.421 +    if(outbuflen < 4)
   1.422 +        return 0;
   1.423 +    else
   1.424 +    {
   1.425 +        *outlen = 4;
   1.426 +        out[0] = 0x8E;
   1.427 +        out[1] = 0xA3;
   1.428 +        out[2] = ((in >> 8 ) & 0xff) | 0x80;
   1.429 +        out[3] = (in  & 0xff)  | 0x80;
   1.430 +        return 1;
   1.431 +    }
   1.432 +}
   1.433 +/*=================================================================================
   1.434 +
   1.435 +=================================================================================*/
   1.436 +int uCheckAndGen2ByteGRPrefix8EA4(int32_t*   state,
   1.437 +                                  uint16_t   in,
   1.438 +                                  unsigned char* out,
   1.439 +                                  uint32_t    outbuflen,
   1.440 +                                  uint32_t*   outlen
   1.441 +                                  )
   1.442 +{
   1.443 +    if(outbuflen < 4)
   1.444 +        return 0;
   1.445 +    else
   1.446 +    {
   1.447 +        *outlen = 4;
   1.448 +        out[0] = 0x8E;
   1.449 +        out[1] = 0xA4;
   1.450 +        out[2] = ((in >> 8 ) & 0xff) | 0x80;
   1.451 +        out[3] = (in  & 0xff)  | 0x80;
   1.452 +        return 1;
   1.453 +    }
   1.454 +}
   1.455 +/*=================================================================================
   1.456 +
   1.457 +=================================================================================*/
   1.458 +int uCheckAndGen2ByteGRPrefix8EA5(int32_t*   state,
   1.459 +                                  uint16_t   in,
   1.460 +                                  unsigned char* out,
   1.461 +                                  uint32_t    outbuflen,
   1.462 +                                  uint32_t*   outlen
   1.463 +                                  )
   1.464 +{
   1.465 +    if(outbuflen < 4)
   1.466 +        return 0;
   1.467 +    else
   1.468 +    {
   1.469 +        *outlen = 4;
   1.470 +        out[0] = 0x8E;
   1.471 +        out[1] = 0xA5;
   1.472 +        out[2] = ((in >> 8 ) & 0xff) | 0x80;
   1.473 +        out[3] = (in  & 0xff)  | 0x80;
   1.474 +        return 1;
   1.475 +    }
   1.476 +}
   1.477 +/*=================================================================================
   1.478 +
   1.479 +=================================================================================*/
   1.480 +int uCheckAndGen2ByteGRPrefix8EA6(int32_t*   state,
   1.481 +                                  uint16_t   in,
   1.482 +                                  unsigned char* out,
   1.483 +                                  uint32_t    outbuflen,
   1.484 +                                  uint32_t*   outlen
   1.485 +                                  )
   1.486 +{
   1.487 +    if(outbuflen < 4)
   1.488 +        return 0;
   1.489 +    else
   1.490 +    {
   1.491 +        *outlen = 4;
   1.492 +        out[0] = 0x8E;
   1.493 +        out[1] = 0xA6;
   1.494 +        out[2] = ((in >> 8 ) & 0xff) | 0x80;
   1.495 +        out[3] = (in  & 0xff)  | 0x80;
   1.496 +        return 1;
   1.497 +    }
   1.498 +}
   1.499 +/*=================================================================================
   1.500 +
   1.501 +=================================================================================*/
   1.502 +int uCheckAndGen2ByteGRPrefix8EA7(int32_t*   state,
   1.503 +                                  uint16_t   in,
   1.504 +                                  unsigned char* out,
   1.505 +                                  uint32_t    outbuflen,
   1.506 +                                  uint32_t*   outlen
   1.507 +                                  )
   1.508 +{
   1.509 +    if(outbuflen < 4)
   1.510 +        return 0;
   1.511 +    else
   1.512 +    {
   1.513 +        *outlen = 4;
   1.514 +        out[0] = 0x8E;
   1.515 +        out[1] = 0xA7;
   1.516 +        out[2] = ((in >> 8 ) & 0xff) | 0x80;
   1.517 +        out[3] = (in  & 0xff)  | 0x80;
   1.518 +        return 1;
   1.519 +    }
   1.520 +}
   1.521 +/*=================================================================================
   1.522 +
   1.523 +=================================================================================*/
   1.524 +#define SBase 0xAC00
   1.525 +#define LCount 19
   1.526 +#define VCount 21
   1.527 +#define TCount 28
   1.528 +#define NCount (VCount * TCount)
   1.529 +/*=================================================================================
   1.530 +
   1.531 +=================================================================================*/
   1.532 +int uCnGAlways8BytesDecomposedHangul(
   1.533 +                                     int32_t*    state,
   1.534 +                                     uint16_t    in,
   1.535 +                                     unsigned char*  out,
   1.536 +                                     uint32_t     outbuflen,
   1.537 +                                     uint32_t*    outlen
   1.538 +                                     )
   1.539 +{
   1.540 +    static const uint8_t lMap[LCount] = {
   1.541 +        0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2, 0xb3, 0xb5,
   1.542 +            0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe
   1.543 +    };
   1.544 +    
   1.545 +    static const uint8_t tMap[TCount] = {
   1.546 +        0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa9, 0xaa, 
   1.547 +            0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb4, 0xb5, 
   1.548 +            0xb6, 0xb7, 0xb8, 0xba, 0xbb, 0xbc, 0xbd, 0xbe
   1.549 +    };
   1.550 +
   1.551 +    uint16_t SIndex, LIndex, VIndex, TIndex;
   1.552 +
   1.553 +    if(outbuflen < 8)
   1.554 +        return 0;
   1.555 +
   1.556 +    /* the following line are copy from Unicode 2.0 page 3-13 */
   1.557 +    /* item 1 of Hangul Syllabel Decomposition */
   1.558 +    SIndex =  in - SBase;
   1.559 +    
   1.560 +    /* the following lines are copy from Unicode 2.0 page 3-14 */
   1.561 +    /* item 2 of Hangul Syllabel Decomposition w/ modification */
   1.562 +    LIndex = SIndex / NCount;
   1.563 +    VIndex = (SIndex % NCount) / TCount;
   1.564 +    TIndex = SIndex % TCount;
   1.565 +    
   1.566 +    /* 
   1.567 +     * A Hangul syllable not enumerated in KS X 1001 is represented
   1.568 +     * by a sequence of 8 bytes beginning with Hangul-filler
   1.569 +     * (0xA4D4 in EUC-KR and 0x2454 in ISO-2022-KR) followed by three 
   1.570 +     * Jamos (2 bytes each the first of which is 0xA4 in EUC-KR) making 
   1.571 +     * up the syllable.  ref. KS X 1001:1998 Annex 3
   1.572 +     */
   1.573 +    *outlen = 8;
   1.574 +    out[0] = out[2] = out[4] = out[6] = 0xa4;
   1.575 +    out[1] = 0xd4;
   1.576 +    out[3] = lMap[LIndex] ;
   1.577 +    out[5] = (VIndex + 0xbf);
   1.578 +    out[7] = tMap[TIndex];
   1.579 +
   1.580 +    return 1;
   1.581 +}
   1.582 +
   1.583 +int uCheckAndGenJohabHangul(
   1.584 +                            int32_t*   state,
   1.585 +                            uint16_t   in,
   1.586 +                            unsigned char* out,
   1.587 +                            uint32_t    outbuflen,
   1.588 +                            uint32_t*   outlen
   1.589 +                            )
   1.590 +{
   1.591 +    if(outbuflen < 2)
   1.592 +        return 0;
   1.593 +    else
   1.594 +    {
   1.595 +    /*
   1.596 +    See Table 4-45 (page 183) of CJKV Information Processing
   1.597 +    for detail explanation of the following table.
   1.598 +        */
   1.599 +        /*
   1.600 +        static const uint8_t lMap[LCount] = {
   1.601 +        2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
   1.602 +        };
   1.603 +        Therefore lMap[i] == i+2;
   1.604 +        */
   1.605 +        
   1.606 +        static const uint8_t vMap[VCount] = {
   1.607 +            /* no 0,1,2 */
   1.608 +            3,4,5,6,7,            /* no 8,9   */
   1.609 +                10,11,12,13,14,15,    /* no 16,17 */
   1.610 +                18,19,20,21,22,23,    /* no 24,25 */
   1.611 +                26,27,28,29
   1.612 +        };
   1.613 +        static const uint8_t tMap[TCount] = {
   1.614 +            1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17, /* no 18 */
   1.615 +                19,20,21,22,23,24,25,26,27,28,29
   1.616 +        };
   1.617 +        uint16_t SIndex, LIndex, VIndex, TIndex, ch;
   1.618 +        /* the following line are copy from Unicode 2.0 page 3-13 */
   1.619 +        /* item 1 of Hangul Syllabel Decomposition */
   1.620 +        SIndex =  in - SBase;
   1.621 +        
   1.622 +        /* the following lines are copy from Unicode 2.0 page 3-14 */
   1.623 +        /* item 2 of Hangul Syllabel Decomposition w/ modification */
   1.624 +        LIndex = SIndex / NCount;
   1.625 +        VIndex = (SIndex % NCount) / TCount;
   1.626 +        TIndex = SIndex % TCount;
   1.627 +        
   1.628 +        *outlen = 2;
   1.629 +        ch = 0x8000 | 
   1.630 +            ((LIndex+2)<<10) | 
   1.631 +            (vMap[VIndex]<<5)| 
   1.632 +            tMap[TIndex];
   1.633 +        out[0] = (ch >> 8);
   1.634 +        out[1] = ch & 0x00FF;
   1.635 +#if 0
   1.636 +        printf("Johab Hangul %x %x in=%x L=%d V=%d T=%d\n", out[0], out[1], in, LIndex, VIndex, TIndex); 
   1.637 +#endif 
   1.638 +        return 1;
   1.639 +    }
   1.640 +}
   1.641 +int uCheckAndGenJohabSymbol(
   1.642 +                            int32_t*   state,
   1.643 +                            uint16_t   in,
   1.644 +                            unsigned char* out,
   1.645 +                            uint32_t    outbuflen,
   1.646 +                            uint32_t*   outlen
   1.647 +                            )
   1.648 +{
   1.649 +    if(outbuflen < 2)
   1.650 +        return 0;
   1.651 +    else
   1.652 +    {
   1.653 +    /* The following code are based on the Perl code listed under
   1.654 +    * "ISO-2022-KR or EUC-KR to Johab Conversion" (page 1013)
   1.655 +    * in the book "CJKV Information Processing" by 
   1.656 +    * Ken Lunde <lunde@adobe.com>
   1.657 +    *
   1.658 +    * sub convert2johab($) { # Convert ISO-2022-KR or EUC-KR to Johab
   1.659 +    *  my @euc = unpack("C*", $_[0]);
   1.660 +    *  my ($fe_off, $hi_off, $lo_off) = (0,0,1);
   1.661 +    *  my @out = ();
   1.662 +    *  while(($hi, $lo) = splice(@euc, 0, 2)) {
   1.663 +    *    $hi &= 127; $lo &= 127;
   1.664 +    *    $fe_off = 21 if $hi == 73;
   1.665 +    *    $fe_off = 34 if $hi == 126;
   1.666 +    *    ($hi_off, $lo_off) = ($lo_off, $hi_off) if ($hi <74 or $hi >125);
   1.667 +    *    push(@out, ((($hi+$hi_off) >> 1)+ ($hi <74 ? 200:187)- $fe_off),
   1.668 +    *      $lo + ((($hi+$lo_off) & 1) ? ($lo > 110 ? 34:16):128));    
   1.669 +    *  }
   1.670 +    *  return pack("C*", @out);
   1.671 +        */
   1.672 +        
   1.673 +        unsigned char fe_off = 0;
   1.674 +        unsigned char hi_off = 0;
   1.675 +        unsigned char lo_off = 1;
   1.676 +        unsigned char hi = (in >> 8) & 0x7F;
   1.677 +        unsigned char lo = in & 0x7F;
   1.678 +        if(73 == hi)
   1.679 +            fe_off = 21;
   1.680 +        if(126 == hi)
   1.681 +            fe_off = 34;
   1.682 +        if( (hi < 74) || ( hi > 125) )
   1.683 +        {
   1.684 +            hi_off = 1;
   1.685 +            lo_off = 0;
   1.686 +        }
   1.687 +        *outlen = 2;
   1.688 +        out[0] =  ((hi+hi_off) >> 1) + ((hi<74) ? 200 : 187 ) - fe_off;
   1.689 +        out[1] =  lo + (((hi+lo_off) & 1) ? ((lo > 110) ? 34 : 16) : 
   1.690 +        128);
   1.691 +#if 0
   1.692 +        printf("Johab Symbol %x %x in=%x\n", out[0], out[1], in); 
   1.693 +#endif
   1.694 +        return 1;
   1.695 +    }
   1.696 +}
   1.697 +int uCheckAndGen4BytesGB18030(
   1.698 +                              int32_t*   state,
   1.699 +                              uint16_t   in,
   1.700 +                              unsigned char* out,
   1.701 +                              uint32_t    outbuflen,
   1.702 +                              uint32_t*   outlen
   1.703 +                              )
   1.704 +{
   1.705 +    if(outbuflen < 4)
   1.706 +        return 0;
   1.707 +    out[0] = (in / (10*126*10)) + 0x81;
   1.708 +    in %= (10*126*10);
   1.709 +    out[1] = (in / (10*126)) + 0x30;
   1.710 +    in %= (10*126);
   1.711 +    out[2] = (in / (10)) + 0x81;
   1.712 +    out[3] = (in % 10) + 0x30;
   1.713 +    *outlen = 4;
   1.714 +    return 1;
   1.715 +}

mercurial