intl/unicharutil/tools/ucgendat.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/unicharutil/tools/ucgendat.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1457 @@
     1.4 +/*
     1.5 + * Copyright 1996, 1997, 1998 Computing Research Labs,
     1.6 + * New Mexico State University
     1.7 + *
     1.8 + * Permission is hereby granted, free of charge, to any person obtaining a
     1.9 + * copy of this software and associated documentation files (the "Software"),
    1.10 + * to deal in the Software without restriction, including without limitation
    1.11 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    1.12 + * and/or sell copies of the Software, and to permit persons to whom the
    1.13 + * Software is furnished to do so, subject to the following conditions:
    1.14 + *
    1.15 + * The above copyright notice and this permission notice shall be included in
    1.16 + * all copies or substantial portions of the Software.
    1.17 + *
    1.18 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    1.19 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    1.20 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
    1.21 + * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
    1.22 + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
    1.23 + * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
    1.24 + * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    1.25 + */
    1.26 +#ifndef lint
    1.27 +#ifdef __GNUC__
    1.28 +static char rcsid[] __attribute__ ((unused)) = "$Id: ucgendat.c,v 1.1 1999/01/08 00:19:21 ftang%netscape.com Exp $";
    1.29 +#else
    1.30 +static char rcsid[] = "$Id: ucgendat.c,v 1.1 1999/01/08 00:19:21 ftang%netscape.com Exp $";
    1.31 +#endif
    1.32 +#endif
    1.33 +
    1.34 +#include <stdio.h>
    1.35 +#include <stdlib.h>
    1.36 +#include <string.h>
    1.37 +#ifndef WIN32
    1.38 +#include <unistd.h>
    1.39 +#endif
    1.40 +
    1.41 +#define ishdigit(cc) (((cc) >= '0' && (cc) <= '9') ||\
    1.42 +                      ((cc) >= 'A' && (cc) <= 'F') ||\
    1.43 +                      ((cc) >= 'a' && (cc) <= 'f'))
    1.44 +
    1.45 +/*
    1.46 + * A header written to the output file with the byte-order-mark and the number
    1.47 + * of property nodes.
    1.48 + */
    1.49 +static unsigned short hdr[2] = {0xfeff, 0};
    1.50 +
    1.51 +#define NUMPROPS 49
    1.52 +#define NEEDPROPS (NUMPROPS + (4 - (NUMPROPS & 3)))
    1.53 +
    1.54 +typedef struct {
    1.55 +    char *name;
    1.56 +    int len;
    1.57 +} _prop_t;
    1.58 +
    1.59 +/*
    1.60 + * List of properties expected to be found in the Unicode Character Database
    1.61 + * including some implementation specific properties.
    1.62 + *
    1.63 + * The implementation specific properties are:
    1.64 + * Cm = Composed (can be decomposed)
    1.65 + * Nb = Non-breaking
    1.66 + * Sy = Symmetric (has left and right forms)
    1.67 + * Hd = Hex digit
    1.68 + * Qm = Quote marks
    1.69 + * Mr = Mirroring
    1.70 + * Ss = Space, other
    1.71 + * Cp = Defined character
    1.72 + */
    1.73 +static _prop_t props[NUMPROPS] = {
    1.74 +    {"Mn", 2}, {"Mc", 2}, {"Me", 2}, {"Nd", 2}, {"Nl", 2}, {"No", 2},
    1.75 +    {"Zs", 2}, {"Zl", 2}, {"Zp", 2}, {"Cc", 2}, {"Cf", 2}, {"Cs", 2},
    1.76 +    {"Co", 2}, {"Cn", 2}, {"Lu", 2}, {"Ll", 2}, {"Lt", 2}, {"Lm", 2},
    1.77 +    {"Lo", 2}, {"Pc", 2}, {"Pd", 2}, {"Ps", 2}, {"Pe", 2}, {"Po", 2},
    1.78 +    {"Sm", 2}, {"Sc", 2}, {"Sk", 2}, {"So", 2}, {"L",  1}, {"R",  1},
    1.79 +    {"EN", 2}, {"ES", 2}, {"ET", 2}, {"AN", 2}, {"CS", 2}, {"B",  1},
    1.80 +    {"S",  1}, {"WS", 2}, {"ON", 2},
    1.81 +    {"Cm", 2}, {"Nb", 2}, {"Sy", 2}, {"Hd", 2}, {"Qm", 2}, {"Mr", 2},
    1.82 +    {"Ss", 2}, {"Cp", 2}, {"Pi", 2}, {"Pf", 2}
    1.83 +};
    1.84 +
    1.85 +typedef struct {
    1.86 +    unsigned long *ranges;
    1.87 +    unsigned short used;
    1.88 +    unsigned short size;
    1.89 +} _ranges_t;
    1.90 +
    1.91 +static _ranges_t proptbl[NUMPROPS];
    1.92 +
    1.93 +/*
    1.94 + * Make sure this array is sized to be on a 4-byte boundary at compile time.
    1.95 + */
    1.96 +static unsigned short propcnt[NEEDPROPS];
    1.97 +
    1.98 +/*
    1.99 + * Array used to collect a decomposition before adding it to the decomposition
   1.100 + * table.
   1.101 + */
   1.102 +static unsigned long dectmp[64];
   1.103 +static unsigned long dectmp_size;
   1.104 +
   1.105 +typedef struct {
   1.106 +    unsigned long code;
   1.107 +    unsigned short size;
   1.108 +    unsigned short used;
   1.109 +    unsigned long *decomp;
   1.110 +} _decomp_t;
   1.111 +
   1.112 +/*
   1.113 + * List of decomposition.  Created and expanded in order as the characters are
   1.114 + * encountered.
   1.115 + */
   1.116 +static _decomp_t *decomps;
   1.117 +static unsigned long decomps_used;
   1.118 +static unsigned long decomps_size;
   1.119 +
   1.120 +/*
   1.121 + * Types and lists for handling lists of case mappings.
   1.122 + */
   1.123 +typedef struct {
   1.124 +    unsigned long key;
   1.125 +    unsigned long other1;
   1.126 +    unsigned long other2;
   1.127 +} _case_t;
   1.128 +
   1.129 +static _case_t *upper;
   1.130 +static _case_t *lower;
   1.131 +static _case_t *title;
   1.132 +static unsigned long upper_used;
   1.133 +static unsigned long upper_size;
   1.134 +static unsigned long lower_used;
   1.135 +static unsigned long lower_size;
   1.136 +static unsigned long title_used;
   1.137 +static unsigned long title_size;
   1.138 +
   1.139 +/*
   1.140 + * Array used to collect case mappings before adding them to a list.
   1.141 + */
   1.142 +static unsigned long cases[3];
   1.143 +
   1.144 +/*
   1.145 + * An array to hold ranges for combining classes.
   1.146 + */
   1.147 +static unsigned long *ccl;
   1.148 +static unsigned long ccl_used;
   1.149 +static unsigned long ccl_size;
   1.150 +
   1.151 +/*
   1.152 + * Structures for handling numbers.
   1.153 + */
   1.154 +typedef struct {
   1.155 +    unsigned long code;
   1.156 +    unsigned long idx;
   1.157 +} _codeidx_t;
   1.158 +
   1.159 +typedef struct {
   1.160 +    short numerator;
   1.161 +    short denominator;
   1.162 +} _num_t;
   1.163 +
   1.164 +/*
   1.165 + * Arrays to hold the mapping of codes to numbers.
   1.166 + */
   1.167 +static _codeidx_t *ncodes;
   1.168 +static unsigned long ncodes_used;
   1.169 +static unsigned long ncodes_size;
   1.170 +
   1.171 +static _num_t *nums;
   1.172 +static unsigned long nums_used;
   1.173 +static unsigned long nums_size;
   1.174 +
   1.175 +/*
   1.176 + * Array for holding numbers.
   1.177 + */
   1.178 +static _num_t *nums;
   1.179 +static unsigned long nums_used;
   1.180 +static unsigned long nums_size;
   1.181 +
   1.182 +static void
   1.183 +#ifdef __STDC__
   1.184 +add_range(unsigned long start, unsigned long end, char *p1, char *p2)
   1.185 +#else
   1.186 +add_range(start, end, p1, p2)
   1.187 +unsigned long start, end;
   1.188 +char *p1, *p2;
   1.189 +#endif
   1.190 +{
   1.191 +    int i, j, k, len;
   1.192 +    _ranges_t *rlp;
   1.193 +    char *name;
   1.194 +
   1.195 +    for (k = 0; k < 2; k++) {
   1.196 +        if (k == 0) {
   1.197 +            name = p1;
   1.198 +            len = 2;
   1.199 +        } else {
   1.200 +            if (p2 == 0)
   1.201 +              break;
   1.202 +
   1.203 +            name = p2;
   1.204 +            len = 1;
   1.205 +        }
   1.206 +
   1.207 +        for (i = 0; i < NUMPROPS; i++) {
   1.208 +            if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
   1.209 +              break;
   1.210 +        }
   1.211 +
   1.212 +        if (i == NUMPROPS)
   1.213 +          continue;
   1.214 +
   1.215 +        rlp = &proptbl[i];
   1.216 +
   1.217 +        /*
   1.218 +         * Resize the range list if necessary.
   1.219 +         */
   1.220 +        if (rlp->used == rlp->size) {
   1.221 +            if (rlp->size == 0)
   1.222 +              rlp->ranges = (unsigned long *)
   1.223 +                  malloc(sizeof(unsigned long) << 3);
   1.224 +            else
   1.225 +              rlp->ranges = (unsigned long *)
   1.226 +                  realloc((char *) rlp->ranges,
   1.227 +                          sizeof(unsigned long) * (rlp->size + 8));
   1.228 +            rlp->size += 8;
   1.229 +        }
   1.230 +
   1.231 +        /*
   1.232 +         * If this is the first code for this property list, just add it
   1.233 +         * and return.
   1.234 +         */
   1.235 +        if (rlp->used == 0) {
   1.236 +            rlp->ranges[0] = start;
   1.237 +            rlp->ranges[1] = end;
   1.238 +            rlp->used += 2;
   1.239 +            continue;
   1.240 +        }
   1.241 +
   1.242 +        /*
   1.243 +         * Optimize the case of adding the range to the end.
   1.244 +         */
   1.245 +        j = rlp->used - 1;
   1.246 +        if (start > rlp->ranges[j]) {
   1.247 +            j = rlp->used;
   1.248 +            rlp->ranges[j++] = start;
   1.249 +            rlp->ranges[j++] = end;
   1.250 +            rlp->used = j;
   1.251 +            continue;
   1.252 +        }
   1.253 +
   1.254 +        /*
   1.255 +         * Need to locate the insertion point.
   1.256 +         */
   1.257 +        for (i = 0;
   1.258 +             i < rlp->used && start > rlp->ranges[i + 1] + 1; i += 2) ;
   1.259 +
   1.260 +        /*
   1.261 +         * If the start value lies in the current range, then simply set the
   1.262 +         * new end point of the range to the end value passed as a parameter.
   1.263 +         */
   1.264 +        if (rlp->ranges[i] <= start && start <= rlp->ranges[i + 1] + 1) {
   1.265 +            rlp->ranges[i + 1] = end;
   1.266 +            return;
   1.267 +        }
   1.268 +
   1.269 +        /*
   1.270 +         * Shift following values up by two.
   1.271 +         */
   1.272 +        for (j = rlp->used; j > i; j -= 2) {
   1.273 +            rlp->ranges[j] = rlp->ranges[j - 2];
   1.274 +            rlp->ranges[j + 1] = rlp->ranges[j - 1];
   1.275 +        }
   1.276 +
   1.277 +        /*
   1.278 +         * Add the new range at the insertion point.
   1.279 +         */
   1.280 +        rlp->ranges[i] = start;
   1.281 +        rlp->ranges[i + 1] = end;
   1.282 +        rlp->used += 2;
   1.283 +    }
   1.284 +}
   1.285 +
   1.286 +static void
   1.287 +#ifdef __STDC__
   1.288 +ordered_range_insert(unsigned long c, char *name, int len)
   1.289 +#else
   1.290 +ordered_range_insert(c, name, len)
   1.291 +unsigned long c;
   1.292 +char *name;
   1.293 +int len;
   1.294 +#endif
   1.295 +{
   1.296 +    int i, j;
   1.297 +    unsigned long s, e;
   1.298 +    _ranges_t *rlp;
   1.299 +
   1.300 +    if (len == 0)
   1.301 +      return;
   1.302 +
   1.303 +    for (i = 0; i < NUMPROPS; i++) {
   1.304 +        if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
   1.305 +          break;
   1.306 +    }
   1.307 +
   1.308 +    if (i == NUMPROPS)
   1.309 +      return;
   1.310 +
   1.311 +    /*
   1.312 +     * Have a match, so insert the code in order.
   1.313 +     */
   1.314 +    rlp = &proptbl[i];
   1.315 +
   1.316 +    /*
   1.317 +     * Resize the range list if necessary.
   1.318 +     */
   1.319 +    if (rlp->used == rlp->size) {
   1.320 +        if (rlp->size == 0)
   1.321 +          rlp->ranges = (unsigned long *)
   1.322 +              malloc(sizeof(unsigned long) << 3);
   1.323 +        else
   1.324 +          rlp->ranges = (unsigned long *)
   1.325 +              realloc((char *) rlp->ranges,
   1.326 +                      sizeof(unsigned long) * (rlp->size + 8));
   1.327 +        rlp->size += 8;
   1.328 +    }
   1.329 +
   1.330 +    /*
   1.331 +     * If this is the first code for this property list, just add it
   1.332 +     * and return.
   1.333 +     */
   1.334 +    if (rlp->used == 0) {
   1.335 +        rlp->ranges[0] = rlp->ranges[1] = c;
   1.336 +        rlp->used += 2;
   1.337 +        return;
   1.338 +    }
   1.339 +
   1.340 +    /*
   1.341 +     * Optimize the cases of extending the last range and adding new ranges to
   1.342 +     * the end.
   1.343 +     */
   1.344 +    j = rlp->used - 1;
   1.345 +    e = rlp->ranges[j];
   1.346 +    s = rlp->ranges[j - 1];
   1.347 +
   1.348 +    if (c == e + 1) {
   1.349 +        /*
   1.350 +         * Extend the last range.
   1.351 +         */
   1.352 +        rlp->ranges[j] = c;
   1.353 +        return;
   1.354 +    }
   1.355 +
   1.356 +    if (c > e + 1) {
   1.357 +        /*
   1.358 +         * Start another range on the end.
   1.359 +         */
   1.360 +        j = rlp->used;
   1.361 +        rlp->ranges[j] = rlp->ranges[j + 1] = c;
   1.362 +        rlp->used += 2;
   1.363 +        return;
   1.364 +    }
   1.365 +
   1.366 +    if (c >= s)
   1.367 +      /*
   1.368 +       * The code is a duplicate of a code in the last range, so just return.
   1.369 +       */
   1.370 +      return;
   1.371 +
   1.372 +    /*
   1.373 +     * The code should be inserted somewhere before the last range in the
   1.374 +     * list.  Locate the insertion point.
   1.375 +     */
   1.376 +    for (i = 0;
   1.377 +         i < rlp->used && c > rlp->ranges[i + 1] + 1; i += 2) ;
   1.378 +
   1.379 +    s = rlp->ranges[i];
   1.380 +    e = rlp->ranges[i + 1];
   1.381 +
   1.382 +    if (c == e + 1)
   1.383 +      /*
   1.384 +       * Simply extend the current range.
   1.385 +       */
   1.386 +      rlp->ranges[i + 1] = c;
   1.387 +    else if (c < s) {
   1.388 +        /*
   1.389 +         * Add a new entry before the current location.  Shift all entries
   1.390 +         * before the current one up by one to make room.
   1.391 +         */
   1.392 +        for (j = rlp->used; j > i; j -= 2) {
   1.393 +            rlp->ranges[j] = rlp->ranges[j - 2];
   1.394 +            rlp->ranges[j + 1] = rlp->ranges[j - 1];
   1.395 +        }
   1.396 +        rlp->ranges[i] = rlp->ranges[i + 1] = c;
   1.397 +
   1.398 +        rlp->used += 2;
   1.399 +    }
   1.400 +}
   1.401 +
   1.402 +static void
   1.403 +#ifdef __STDC__
   1.404 +add_decomp(unsigned long code)
   1.405 +#else
   1.406 +add_decomp(code)
   1.407 +unsigned long code;
   1.408 +#endif
   1.409 +{
   1.410 +    unsigned long i, j, size;
   1.411 +
   1.412 +    /*
   1.413 +     * Add the code to the composite property.
   1.414 +     */
   1.415 +    ordered_range_insert(code, "Cm", 2);
   1.416 +
   1.417 +    /*
   1.418 +     * Locate the insertion point for the code.
   1.419 +     */
   1.420 +    for (i = 0; i < decomps_used && code > decomps[i].code; i++) ;
   1.421 +
   1.422 +    /*
   1.423 +     * Allocate space for a new decomposition.
   1.424 +     */
   1.425 +    if (decomps_used == decomps_size) {
   1.426 +        if (decomps_size == 0)
   1.427 +          decomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3);
   1.428 +        else
   1.429 +          decomps = (_decomp_t *)
   1.430 +              realloc((char *) decomps,
   1.431 +                      sizeof(_decomp_t) * (decomps_size + 8));
   1.432 +        (void) memset((char *) (decomps + decomps_size), 0,
   1.433 +                      sizeof(_decomp_t) << 3);
   1.434 +        decomps_size += 8;
   1.435 +    }
   1.436 +
   1.437 +    if (i < decomps_used && code != decomps[i].code) {
   1.438 +        /*
   1.439 +         * Shift the decomps up by one if the codes don't match.
   1.440 +         */
   1.441 +        for (j = decomps_used; j > i; j--)
   1.442 +          (void) memcpy((char *) &decomps[j], (char *) &decomps[j - 1],
   1.443 +                        sizeof(_decomp_t));
   1.444 +    }
   1.445 +
   1.446 +    /*
   1.447 +     * Insert or replace a decomposition.
   1.448 +     */
   1.449 +    size = dectmp_size + (4 - (dectmp_size & 3));
   1.450 +    if (decomps[i].size < size) {
   1.451 +        if (decomps[i].size == 0)
   1.452 +          decomps[i].decomp = (unsigned long *)
   1.453 +              malloc(sizeof(unsigned long) * size);
   1.454 +        else
   1.455 +          decomps[i].decomp = (unsigned long *)
   1.456 +              realloc((char *) decomps[i].decomp,
   1.457 +                      sizeof(unsigned long) * size);
   1.458 +        decomps[i].size = size;
   1.459 +    }
   1.460 +
   1.461 +    if (decomps[i].code != code)
   1.462 +      decomps_used++;
   1.463 +
   1.464 +    decomps[i].code = code;
   1.465 +    decomps[i].used = dectmp_size;
   1.466 +    (void) memcpy((char *) decomps[i].decomp, (char *) dectmp,
   1.467 +                  sizeof(unsigned long) * dectmp_size);
   1.468 +
   1.469 +}
   1.470 +
   1.471 +static void
   1.472 +#ifdef __STDC__
   1.473 +add_title(unsigned long code)
   1.474 +#else
   1.475 +add_title(code)
   1.476 +unsigned long code;
   1.477 +#endif
   1.478 +{
   1.479 +    unsigned long i, j;
   1.480 +
   1.481 +    /*
   1.482 +     * Always map the code to itself.
   1.483 +     */
   1.484 +    cases[2] = code;
   1.485 +
   1.486 +    if (title_used == title_size) {
   1.487 +        if (title_size == 0)
   1.488 +          title = (_case_t *) malloc(sizeof(_case_t) << 3);
   1.489 +        else
   1.490 +          title = (_case_t *) realloc((char *) title,
   1.491 +                                      sizeof(_case_t) * (title_size + 8));
   1.492 +        title_size += 8;
   1.493 +    }
   1.494 +
   1.495 +    /*
   1.496 +     * Locate the insertion point.
   1.497 +     */
   1.498 +    for (i = 0; i < title_used && code > title[i].key; i++) ;
   1.499 +
   1.500 +    if (i < title_used) {
   1.501 +        /*
   1.502 +         * Shift the array up by one.
   1.503 +         */
   1.504 +        for (j = title_used; j > i; j--)
   1.505 +          (void) memcpy((char *) &title[j], (char *) &title[j - 1],
   1.506 +                        sizeof(_case_t));
   1.507 +    }
   1.508 +
   1.509 +    title[i].key = cases[2];    /* Title */
   1.510 +    title[i].other1 = cases[0]; /* Upper */
   1.511 +    title[i].other2 = cases[1]; /* Lower */
   1.512 +
   1.513 +    title_used++;
   1.514 +}
   1.515 +
   1.516 +static void
   1.517 +#ifdef __STDC__
   1.518 +add_upper(unsigned long code)
   1.519 +#else
   1.520 +add_upper(code)
   1.521 +unsigned long code;
   1.522 +#endif
   1.523 +{
   1.524 +    unsigned long i, j;
   1.525 +
   1.526 +    /*
   1.527 +     * Always map the code to itself.
   1.528 +     */
   1.529 +    cases[0] = code;
   1.530 +
   1.531 +    /*
   1.532 +     * If the title case character is not present, then make it the same as
   1.533 +     * the upper case.
   1.534 +     */
   1.535 +    if (cases[2] == 0)
   1.536 +      cases[2] = code;
   1.537 +
   1.538 +    if (upper_used == upper_size) {
   1.539 +        if (upper_size == 0)
   1.540 +          upper = (_case_t *) malloc(sizeof(_case_t) << 3);
   1.541 +        else
   1.542 +          upper = (_case_t *) realloc((char *) upper,
   1.543 +                                      sizeof(_case_t) * (upper_size + 8));
   1.544 +        upper_size += 8;
   1.545 +    }
   1.546 +
   1.547 +    /*
   1.548 +     * Locate the insertion point.
   1.549 +     */
   1.550 +    for (i = 0; i < upper_used && code > upper[i].key; i++) ;
   1.551 +
   1.552 +    if (i < upper_used) {
   1.553 +        /*
   1.554 +         * Shift the array up by one.
   1.555 +         */
   1.556 +        for (j = upper_used; j > i; j--)
   1.557 +          (void) memcpy((char *) &upper[j], (char *) &upper[j - 1],
   1.558 +                        sizeof(_case_t));
   1.559 +    }
   1.560 +
   1.561 +    upper[i].key = cases[0];    /* Upper */
   1.562 +    upper[i].other1 = cases[1]; /* Lower */
   1.563 +    upper[i].other2 = cases[2]; /* Title */
   1.564 +
   1.565 +    upper_used++;
   1.566 +}
   1.567 +
   1.568 +static void
   1.569 +#ifdef __STDC__
   1.570 +add_lower(unsigned long code)
   1.571 +#else
   1.572 +add_lower(code)
   1.573 +unsigned long code;
   1.574 +#endif
   1.575 +{
   1.576 +    unsigned long i, j;
   1.577 +
   1.578 +    /*
   1.579 +     * Always map the code to itself.
   1.580 +     */
   1.581 +    cases[1] = code;
   1.582 +
   1.583 +    /*
   1.584 +     * If the title case character is empty, then make it the same as the
   1.585 +     * upper case.
   1.586 +     */
   1.587 +    if (cases[2] == 0)
   1.588 +      cases[2] = cases[0];
   1.589 +
   1.590 +    if (lower_used == lower_size) {
   1.591 +        if (lower_size == 0)
   1.592 +          lower = (_case_t *) malloc(sizeof(_case_t) << 3);
   1.593 +        else
   1.594 +          lower = (_case_t *) realloc((char *) lower,
   1.595 +                                      sizeof(_case_t) * (lower_size + 8));
   1.596 +        lower_size += 8;
   1.597 +    }
   1.598 +
   1.599 +    /*
   1.600 +     * Locate the insertion point.
   1.601 +     */
   1.602 +    for (i = 0; i < lower_used && code > lower[i].key; i++) ;
   1.603 +
   1.604 +    if (i < lower_used) {
   1.605 +        /*
   1.606 +         * Shift the array up by one.
   1.607 +         */
   1.608 +        for (j = lower_used; j > i; j--)
   1.609 +          (void) memcpy((char *) &lower[j], (char *) &lower[j - 1],
   1.610 +                        sizeof(_case_t));
   1.611 +    }
   1.612 +
   1.613 +    lower[i].key = cases[1];    /* Lower */
   1.614 +    lower[i].other1 = cases[0]; /* Upper */
   1.615 +    lower[i].other2 = cases[2]; /* Title */
   1.616 +
   1.617 +    lower_used++;
   1.618 +}
   1.619 +
   1.620 +static void
   1.621 +#ifdef __STDC__
   1.622 +ordered_ccl_insert(unsigned long c, unsigned long ccl_code)
   1.623 +#else
   1.624 +ordered_ccl_insert(c, ccl_code)
   1.625 +unsigned long c, ccl_code;
   1.626 +#endif
   1.627 +{
   1.628 +    unsigned long i, j;
   1.629 +
   1.630 +    if (ccl_used == ccl_size) {
   1.631 +        if (ccl_size == 0)
   1.632 +          ccl = (unsigned long *) malloc(sizeof(unsigned long) * 24);
   1.633 +        else
   1.634 +          ccl = (unsigned long *)
   1.635 +              realloc((char *) ccl, sizeof(unsigned long) * (ccl_size + 24));
   1.636 +        ccl_size += 24;
   1.637 +    }
   1.638 +
   1.639 +    /*
   1.640 +     * Optimize adding the first item.
   1.641 +     */
   1.642 +    if (ccl_used == 0) {
   1.643 +        ccl[0] = ccl[1] = c;
   1.644 +        ccl[2] = ccl_code;
   1.645 +        ccl_used += 3;
   1.646 +        return;
   1.647 +    }
   1.648 +
   1.649 +    /*
   1.650 +     * Handle the special case of extending the range on the end.  This
   1.651 +     * requires that the combining class codes are the same.
   1.652 +     */
   1.653 +    if (ccl_code == ccl[ccl_used - 1] && c == ccl[ccl_used - 2] + 1) {
   1.654 +        ccl[ccl_used - 2] = c;
   1.655 +        return;
   1.656 +    }
   1.657 +
   1.658 +    /*
   1.659 +     * Handle the special case of adding another range on the end.
   1.660 +     */
   1.661 +    if (c > ccl[ccl_used - 2] + 1 ||
   1.662 +        (c == ccl[ccl_used - 2] + 1 && ccl_code != ccl[ccl_used - 1])) {
   1.663 +        ccl[ccl_used++] = c;
   1.664 +        ccl[ccl_used++] = c;
   1.665 +        ccl[ccl_used++] = ccl_code;
   1.666 +        return;
   1.667 +    }
   1.668 +
   1.669 +    /*
   1.670 +     * Locate either the insertion point or range for the code.
   1.671 +     */
   1.672 +    for (i = 0; i < ccl_used && c > ccl[i + 1] + 1; i += 3) ;
   1.673 +
   1.674 +    if (ccl_code == ccl[i + 2] && c == ccl[i + 1] + 1) {
   1.675 +        /*
   1.676 +         * Extend an existing range.
   1.677 +         */
   1.678 +        ccl[i + 1] = c;
   1.679 +        return;
   1.680 +    } else if (c < ccl[i]) {
   1.681 +        /*
   1.682 +         * Start a new range before the current location.
   1.683 +         */
   1.684 +        for (j = ccl_used; j > i; j -= 3) {
   1.685 +            ccl[j] = ccl[j - 3];
   1.686 +            ccl[j - 1] = ccl[j - 4];
   1.687 +            ccl[j - 2] = ccl[j - 5];
   1.688 +        }
   1.689 +        ccl[i] = ccl[i + 1] = c;
   1.690 +        ccl[i + 2] = ccl_code;
   1.691 +    }
   1.692 +}
   1.693 +
   1.694 +/*
   1.695 + * Adds a number if it does not already exist and returns an index value
   1.696 + * multiplied by 2.
   1.697 + */
   1.698 +static unsigned long
   1.699 +#ifdef __STDC__
   1.700 +make_number(short num, short denom)
   1.701 +#else
   1.702 +make_number(num, denom)
   1.703 +short num, denom;
   1.704 +#endif
   1.705 +{
   1.706 +    unsigned long n;
   1.707 +
   1.708 +    /*
   1.709 +     * Determine if the number already exists.
   1.710 +     */
   1.711 +    for (n = 0; n < nums_used; n++) {
   1.712 +        if (nums[n].numerator == num && nums[n].denominator == denom)
   1.713 +          return n << 1;
   1.714 +    }
   1.715 +
   1.716 +    if (nums_used == nums_size) {
   1.717 +        if (nums_size == 0)
   1.718 +          nums = (_num_t *) malloc(sizeof(_num_t) << 3);
   1.719 +        else
   1.720 +          nums = (_num_t *) realloc((char *) nums,
   1.721 +                                    sizeof(_num_t) * (nums_size + 8));
   1.722 +        nums_size += 8;
   1.723 +    }
   1.724 +
   1.725 +    n = nums_used++;
   1.726 +    nums[n].numerator = num;
   1.727 +    nums[n].denominator = denom;
   1.728 +
   1.729 +    return n << 1;
   1.730 +}
   1.731 +
   1.732 +static void
   1.733 +#ifdef __STDC__
   1.734 +add_number(unsigned long code, short num, short denom)
   1.735 +#else
   1.736 +add_number(code, num, denom)
   1.737 +unsigned long code;
   1.738 +short num, denom;
   1.739 +#endif
   1.740 +{
   1.741 +    unsigned long i, j;
   1.742 +
   1.743 +    /*
   1.744 +     * Insert the code in order.
   1.745 +     */
   1.746 +    for (i = 0; i < ncodes_used && code > ncodes[i].code; i++) ;
   1.747 +
   1.748 +    /*
   1.749 +     * Handle the case of the codes matching and simply replace the number
   1.750 +     * that was there before.
   1.751 +     */
   1.752 +    if (ncodes_used > 0 && code == ncodes[i].code) {
   1.753 +        ncodes[i].idx = make_number(num, denom);
   1.754 +        return;
   1.755 +    }
   1.756 +
   1.757 +    /*
   1.758 +     * Resize the array if necessary.
   1.759 +     */
   1.760 +    if (ncodes_used == ncodes_size) {
   1.761 +        if (ncodes_size == 0)
   1.762 +          ncodes = (_codeidx_t *) malloc(sizeof(_codeidx_t) << 3);
   1.763 +        else
   1.764 +          ncodes = (_codeidx_t *)
   1.765 +              realloc((char *) ncodes, sizeof(_codeidx_t) * (ncodes_size + 8));
   1.766 +
   1.767 +        ncodes_size += 8;
   1.768 +    }
   1.769 +
   1.770 +    /*
   1.771 +     * Shift things around to insert the code if necessary.
   1.772 +     */
   1.773 +    if (i < ncodes_used) {
   1.774 +        for (j = ncodes_used; j > i; j--) {
   1.775 +            ncodes[j].code = ncodes[j - 1].code;
   1.776 +            ncodes[j].idx = ncodes[j - 1].idx;
   1.777 +        }
   1.778 +    }
   1.779 +    ncodes[i].code = code;
   1.780 +    ncodes[i].idx = make_number(num, denom);
   1.781 +
   1.782 +    ncodes_used++;
   1.783 +}
   1.784 +
   1.785 +/*
   1.786 + * This routine assumes that the line is a valid Unicode Character Database
   1.787 + * entry.
   1.788 + */
   1.789 +static void
   1.790 +#ifdef __STDC__
   1.791 +read_cdata(FILE *in)
   1.792 +#else
   1.793 +read_cdata(in)
   1.794 +FILE *in;
   1.795 +#endif
   1.796 +{
   1.797 +    unsigned long i, lineno, skip, code, ccl_code;
   1.798 +    short wnum, neg, number[2];
   1.799 +    char line[512], *s, *e;
   1.800 +
   1.801 +    lineno = skip = 0;
   1.802 +    while (fscanf(in, "%[^\n]\n", line) != EOF) {
   1.803 +        lineno++;
   1.804 +
   1.805 +        /*
   1.806 +         * Skip blank lines and lines that start with a '#'.
   1.807 +         */
   1.808 +        if (line[0] == 0 || line[0] == '#')
   1.809 +          continue;
   1.810 +
   1.811 +        /*
   1.812 +         * If lines need to be skipped, do it here.
   1.813 +         */
   1.814 +        if (skip) {
   1.815 +            skip--;
   1.816 +            continue;
   1.817 +        }
   1.818 +
   1.819 +        /*
   1.820 +         * Collect the code.  The code can be up to 6 hex digits in length to
   1.821 +         * allow surrogates to be specified.
   1.822 +         */
   1.823 +        for (s = line, i = code = 0; *s != ';' && i < 6; i++, s++) {
   1.824 +            code <<= 4;
   1.825 +            if (*s >= '0' && *s <= '9')
   1.826 +              code += *s - '0';
   1.827 +            else if (*s >= 'A' && *s <= 'F')
   1.828 +              code += (*s - 'A') + 10;
   1.829 +            else if (*s >= 'a' && *s <= 'f')
   1.830 +              code += (*s - 'a') + 10;
   1.831 +        }
   1.832 +
   1.833 +        /*
   1.834 +         * Handle the following special cases:
   1.835 +         * 1. 4E00-9FA5 CJK Ideographs.
   1.836 +         * 2. AC00-D7A3 Hangul Syllables.
   1.837 +         * 3. D800-DFFF Surrogates.
   1.838 +         * 4. E000-F8FF Private Use Area.
   1.839 +         * 5. F900-FA2D Han compatibility.
   1.840 +         */
   1.841 +        switch (code) {
   1.842 +          case 0x4e00:
   1.843 +            /*
   1.844 +             * The Han ideographs.
   1.845 +             */
   1.846 +            add_range(0x4e00, 0x9fff, "Lo", "L");
   1.847 +
   1.848 +            /*
   1.849 +             * Add the characters to the defined category.
   1.850 +             */
   1.851 +            add_range(0x4e00, 0x9fa5, "Cp", 0);
   1.852 +
   1.853 +            skip = 1;
   1.854 +            break;
   1.855 +          case 0xac00:
   1.856 +            /*
   1.857 +             * The Hangul syllables.
   1.858 +             */
   1.859 +            add_range(0xac00, 0xd7a3, "Lo", "L");
   1.860 +
   1.861 +            /*
   1.862 +             * Add the characters to the defined category.
   1.863 +             */
   1.864 +            add_range(0xac00, 0xd7a3, "Cp", 0);
   1.865 +
   1.866 +            skip = 1;
   1.867 +            break;
   1.868 +          case 0xd800:
   1.869 +            /*
   1.870 +             * Make a range of all surrogates and assume some default
   1.871 +             * properties.
   1.872 +             */
   1.873 +            add_range(0x010000, 0x10ffff, "Cs", "L");
   1.874 +            skip = 5;
   1.875 +            break;
   1.876 +          case 0xe000:
   1.877 +            /*
   1.878 +             * The Private Use area.  Add with a default set of properties.
   1.879 +             */
   1.880 +            add_range(0xe000, 0xf8ff, "Co", "L");
   1.881 +            skip = 1;
   1.882 +            break;
   1.883 +          case 0xf900:
   1.884 +            /*
   1.885 +             * The CJK compatibility area.
   1.886 +             */
   1.887 +            add_range(0xf900, 0xfaff, "Lo", "L");
   1.888 +
   1.889 +            /*
   1.890 +             * Add the characters to the defined category.
   1.891 +             */
   1.892 +            add_range(0xf900, 0xfaff, "Cp", 0);
   1.893 +
   1.894 +            skip = 1;
   1.895 +        }
   1.896 +
   1.897 +        if (skip)
   1.898 +          continue;
   1.899 +
   1.900 +        /*
   1.901 +         * Add the code to the defined category.
   1.902 +         */
   1.903 +        ordered_range_insert(code, "Cp", 2);
   1.904 +
   1.905 +        /*
   1.906 +         * Locate the first character property field.
   1.907 +         */
   1.908 +        for (i = 0; *s != 0 && i < 2; s++) {
   1.909 +            if (*s == ';')
   1.910 +              i++;
   1.911 +        }
   1.912 +        for (e = s; *e && *e != ';'; e++) ;
   1.913 +    
   1.914 +        ordered_range_insert(code, s, e - s);
   1.915 +
   1.916 +        /*
   1.917 +         * Locate the combining class code.
   1.918 +         */
   1.919 +        for (s = e; *s != 0 && i < 3; s++) {
   1.920 +            if (*s == ';')
   1.921 +              i++;
   1.922 +        }
   1.923 +
   1.924 +        /*
   1.925 +         * Convert the combining class code from decimal.
   1.926 +         */
   1.927 +        for (ccl_code = 0, e = s; *e && *e != ';'; e++)
   1.928 +          ccl_code = (ccl_code * 10) + (*e - '0');
   1.929 +
   1.930 +        /*
   1.931 +         * Add the code if it not 0.
   1.932 +         */
   1.933 +        if (ccl_code != 0)
   1.934 +          ordered_ccl_insert(code, ccl_code);
   1.935 +
   1.936 +        /*
   1.937 +         * Locate the second character property field.
   1.938 +         */
   1.939 +        for (s = e; *s != 0 && i < 4; s++) {
   1.940 +            if (*s == ';')
   1.941 +              i++;
   1.942 +        }
   1.943 +        for (e = s; *e && *e != ';'; e++) ;
   1.944 +
   1.945 +        ordered_range_insert(code, s, e - s);
   1.946 +
   1.947 +        /*
   1.948 +         * Check for a decomposition.
   1.949 +         */
   1.950 +        s = ++e;
   1.951 +        if (*s != ';' && *s != '<') {
   1.952 +            /*
   1.953 +             * Collect the codes of the decomposition.
   1.954 +             */
   1.955 +            for (dectmp_size = 0; *s != ';'; ) {
   1.956 +                /*
   1.957 +                 * Skip all leading non-hex digits.
   1.958 +                 */
   1.959 +                while (!ishdigit(*s))
   1.960 +                  s++;
   1.961 +
   1.962 +                for (dectmp[dectmp_size] = 0; ishdigit(*s); s++) {
   1.963 +                    dectmp[dectmp_size] <<= 4;
   1.964 +                    if (*s >= '0' && *s <= '9')
   1.965 +                      dectmp[dectmp_size] += *s - '0';
   1.966 +                    else if (*s >= 'A' && *s <= 'F')
   1.967 +                      dectmp[dectmp_size] += (*s - 'A') + 10;
   1.968 +                    else if (*s >= 'a' && *s <= 'f')
   1.969 +                      dectmp[dectmp_size] += (*s - 'a') + 10;
   1.970 +                }
   1.971 +                dectmp_size++;
   1.972 +            }
   1.973 +
   1.974 +            /*
   1.975 +             * If there is more than one code in the temporary decomposition
   1.976 +             * array, then add the character with its decomposition.
   1.977 +             */
   1.978 +            if (dectmp_size > 1)
   1.979 +              add_decomp(code);
   1.980 +        }
   1.981 +
   1.982 +        /*
   1.983 +         * Skip to the number field.
   1.984 +         */
   1.985 +        for (i = 0; i < 3 && *s; s++) {
   1.986 +            if (*s == ';')
   1.987 +              i++;
   1.988 +        }
   1.989 +
   1.990 +        /*
   1.991 +         * Scan the number in.
   1.992 +         */
   1.993 +        number[0] = number[1] = 0;
   1.994 +        for (e = s, neg = wnum = 0; *e && *e != ';'; e++) {
   1.995 +            if (*e == '-') {
   1.996 +                neg = 1;
   1.997 +                continue;
   1.998 +            }
   1.999 +
  1.1000 +            if (*e == '/') {
  1.1001 +                /*
  1.1002 +                 * Move the the denominator of the fraction.
  1.1003 +                 */
  1.1004 +                if (neg)
  1.1005 +                  number[wnum] *= -1;
  1.1006 +                neg = 0;
  1.1007 +                e++;
  1.1008 +                wnum++;
  1.1009 +            }
  1.1010 +            number[wnum] = (number[wnum] * 10) + (*e - '0');
  1.1011 +        }
  1.1012 +
  1.1013 +        if (e > s) {
  1.1014 +            /*
  1.1015 +             * Adjust the denominator in case of integers and add the number.
  1.1016 +             */
  1.1017 +            if (wnum == 0)
  1.1018 +              number[1] = number[0];
  1.1019 +
  1.1020 +            add_number(code, number[0], number[1]);
  1.1021 +        }
  1.1022 +
  1.1023 +        /*
  1.1024 +         * Skip to the start of the possible case mappings.
  1.1025 +         */
  1.1026 +        for (s = e, i = 0; i < 4 && *s; s++) {
  1.1027 +            if (*s == ';')
  1.1028 +              i++;
  1.1029 +        }
  1.1030 +
  1.1031 +        /*
  1.1032 +         * Collect the case mappings.
  1.1033 +         */
  1.1034 +        cases[0] = cases[1] = cases[2] = 0;
  1.1035 +        for (i = 0; i < 3; i++) {
  1.1036 +            while (ishdigit(*s)) {
  1.1037 +                cases[i] <<= 4;
  1.1038 +                if (*s >= '0' && *s <= '9')
  1.1039 +                  cases[i] += *s - '0';
  1.1040 +                else if (*s >= 'A' && *s <= 'F')
  1.1041 +                  cases[i] += (*s - 'A') + 10;
  1.1042 +                else if (*s >= 'a' && *s <= 'f')
  1.1043 +                  cases[i] += (*s - 'a') + 10;
  1.1044 +                s++;
  1.1045 +            }
  1.1046 +            if (*s == ';')
  1.1047 +              s++;
  1.1048 +        }
  1.1049 +        if (cases[0] && cases[1])
  1.1050 +          /*
  1.1051 +           * Add the upper and lower mappings for a title case character.
  1.1052 +           */
  1.1053 +          add_title(code);
  1.1054 +        else if (cases[1])
  1.1055 +          /*
  1.1056 +           * Add the lower and title case mappings for the upper case
  1.1057 +           * character.
  1.1058 +           */
  1.1059 +          add_upper(code);
  1.1060 +        else if (cases[0])
  1.1061 +          /*
  1.1062 +           * Add the upper and title case mappings for the lower case
  1.1063 +           * character.
  1.1064 +           */
  1.1065 +          add_lower(code);
  1.1066 +    }
  1.1067 +}
  1.1068 +
  1.1069 +static _decomp_t *
  1.1070 +#ifdef __STDC__
  1.1071 +find_decomp(unsigned long code)
  1.1072 +#else
  1.1073 +find_decomp(code)
  1.1074 +unsigned long code;
  1.1075 +#endif
  1.1076 +{
  1.1077 +    long l, r, m;
  1.1078 +
  1.1079 +    l = 0;
  1.1080 +    r = decomps_used - 1;
  1.1081 +    while (l <= r) {
  1.1082 +        m = (l + r) >> 1;
  1.1083 +        if (code > decomps[m].code)
  1.1084 +          l = m + 1;
  1.1085 +        else if (code < decomps[m].code)
  1.1086 +          r = m - 1;
  1.1087 +        else
  1.1088 +          return &decomps[m];
  1.1089 +    }
  1.1090 +    return 0;
  1.1091 +}
  1.1092 +
  1.1093 +static void
  1.1094 +#ifdef __STDC__
  1.1095 +decomp_it(_decomp_t *d)
  1.1096 +#else
  1.1097 +decomp_it(d)
  1.1098 +_decomp_t *d;
  1.1099 +#endif
  1.1100 +{
  1.1101 +    unsigned long i;
  1.1102 +    _decomp_t *dp;
  1.1103 +
  1.1104 +    for (i = 0; i < d->used; i++) {
  1.1105 +        if ((dp = find_decomp(d->decomp[i])) != 0)
  1.1106 +          decomp_it(dp);
  1.1107 +        else
  1.1108 +          dectmp[dectmp_size++] = d->decomp[i];
  1.1109 +    }
  1.1110 +}
  1.1111 +
  1.1112 +/*
  1.1113 + * Expand all decompositions by recursively decomposing each character
  1.1114 + * in the decomposition.
  1.1115 + */
  1.1116 +static void
  1.1117 +#ifdef __STDC__
  1.1118 +expand_decomp(void)
  1.1119 +#else
  1.1120 +expand_decomp()
  1.1121 +#endif
  1.1122 +{
  1.1123 +    unsigned long i;
  1.1124 +
  1.1125 +    for (i = 0; i < decomps_used; i++) {
  1.1126 +        dectmp_size = 0;
  1.1127 +        decomp_it(&decomps[i]);
  1.1128 +        if (dectmp_size > 0)
  1.1129 +          add_decomp(decomps[i].code);
  1.1130 +    }
  1.1131 +}
  1.1132 +
  1.1133 +static void
  1.1134 +#ifdef __STDC__
  1.1135 +write_cdata(char *opath)
  1.1136 +#else
  1.1137 +write_cdata(opath)
  1.1138 +char *opath;
  1.1139 +#endif
  1.1140 +{
  1.1141 +    FILE *out;
  1.1142 +    unsigned long i, idx, bytes, nprops;
  1.1143 +    unsigned short casecnt[2];
  1.1144 +    char path[BUFSIZ];
  1.1145 +
  1.1146 +    /*****************************************************************
  1.1147 +     *
  1.1148 +     * Generate the ctype data.
  1.1149 +     *
  1.1150 +     *****************************************************************/
  1.1151 +
  1.1152 +    /*
  1.1153 +     * Open the ctype.dat file.
  1.1154 +     */
  1.1155 +    sprintf(path, "%s/ctype.dat", opath);
  1.1156 +    if ((out = fopen(path, "wb")) == 0)
  1.1157 +      return;
  1.1158 +
  1.1159 +    /*
  1.1160 +     * Collect the offsets for the properties.  The offsets array is
  1.1161 +     * on a 4-byte boundary to keep things efficient for architectures
  1.1162 +     * that need such a thing.
  1.1163 +     */
  1.1164 +    for (i = idx = 0; i < NUMPROPS; i++) {
  1.1165 +        propcnt[i] = (proptbl[i].used != 0) ? idx : 0xffff;
  1.1166 +        idx += proptbl[i].used;
  1.1167 +    }
  1.1168 +
  1.1169 +    /*
  1.1170 +     * Add the sentinel index which is used by the binary search as the upper
  1.1171 +     * bound for a search.
  1.1172 +     */
  1.1173 +    propcnt[i] = idx;
  1.1174 +
  1.1175 +    /*
  1.1176 +     * Record the actual number of property lists.  This may be different than
  1.1177 +     * the number of offsets actually written because of aligning on a 4-byte
  1.1178 +     * boundary.
  1.1179 +     */
  1.1180 +    hdr[1] = NUMPROPS;
  1.1181 +
  1.1182 +    /*
  1.1183 +     * Calculate the byte count needed and pad the property counts array to a
  1.1184 +     * 4-byte boundary.
  1.1185 +     */
  1.1186 +    if ((bytes = sizeof(unsigned short) * (NUMPROPS + 1)) & 3)
  1.1187 +      bytes += 4 - (bytes & 3);
  1.1188 +    nprops = bytes / sizeof(unsigned short);
  1.1189 +    bytes += sizeof(unsigned long) * idx;
  1.1190 +        
  1.1191 +    /*
  1.1192 +     * Write the header.
  1.1193 +     */
  1.1194 +    fwrite((char *) hdr, sizeof(unsigned short), 2, out);
  1.1195 +
  1.1196 +    /*
  1.1197 +     * Write the byte count.
  1.1198 +     */
  1.1199 +    fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
  1.1200 +
  1.1201 +    /*
  1.1202 +     * Write the property list counts.
  1.1203 +     */
  1.1204 +    fwrite((char *) propcnt, sizeof(unsigned short), nprops, out);
  1.1205 +
  1.1206 +    /*
  1.1207 +     * Write the property lists.
  1.1208 +     */
  1.1209 +    for (i = 0; i < NUMPROPS; i++) {
  1.1210 +        if (proptbl[i].used > 0)
  1.1211 +          fwrite((char *) proptbl[i].ranges, sizeof(unsigned long),
  1.1212 +                 proptbl[i].used, out);
  1.1213 +    }
  1.1214 +
  1.1215 +    fclose(out);
  1.1216 +
  1.1217 +    /*****************************************************************
  1.1218 +     *
  1.1219 +     * Generate the case mapping data.
  1.1220 +     *
  1.1221 +     *****************************************************************/
  1.1222 +
  1.1223 +    /*
  1.1224 +     * Open the case.dat file.
  1.1225 +     */
  1.1226 +    sprintf(path, "%s/case.dat", opath);
  1.1227 +    if ((out = fopen(path, "wb")) == 0)
  1.1228 +      return;
  1.1229 +
  1.1230 +    /*
  1.1231 +     * Write the case mapping tables.
  1.1232 +     */
  1.1233 +    hdr[1] = upper_used + lower_used + title_used;
  1.1234 +    casecnt[0] = upper_used;
  1.1235 +    casecnt[1] = lower_used;
  1.1236 +
  1.1237 +    /*
  1.1238 +     * Write the header.
  1.1239 +     */
  1.1240 +    fwrite((char *) hdr, sizeof(unsigned short), 2, out);
  1.1241 +
  1.1242 +    /*
  1.1243 +     * Write the upper and lower case table sizes.
  1.1244 +     */
  1.1245 +    fwrite((char *) casecnt, sizeof(unsigned short), 2, out);
  1.1246 +
  1.1247 +    if (upper_used > 0)
  1.1248 +      /*
  1.1249 +       * Write the upper case table.
  1.1250 +       */
  1.1251 +      fwrite((char *) upper, sizeof(_case_t), upper_used, out);
  1.1252 +
  1.1253 +    if (lower_used > 0)
  1.1254 +      /*
  1.1255 +       * Write the lower case table.
  1.1256 +       */
  1.1257 +      fwrite((char *) lower, sizeof(_case_t), lower_used, out);
  1.1258 +
  1.1259 +    if (title_used > 0)
  1.1260 +      /*
  1.1261 +       * Write the title case table.
  1.1262 +       */
  1.1263 +      fwrite((char *) title, sizeof(_case_t), title_used, out);
  1.1264 +
  1.1265 +    fclose(out);
  1.1266 +
  1.1267 +    /*****************************************************************
  1.1268 +     *
  1.1269 +     * Generate the decomposition data.
  1.1270 +     *
  1.1271 +     *****************************************************************/
  1.1272 +
  1.1273 +    /*
  1.1274 +     * Fully expand all decompositions before generating the output file.
  1.1275 +     */
  1.1276 +    expand_decomp();
  1.1277 +
  1.1278 +    /*
  1.1279 +     * Open the decomp.dat file.
  1.1280 +     */
  1.1281 +    sprintf(path, "%s/decomp.dat", opath);
  1.1282 +    if ((out = fopen(path, "wb")) == 0)
  1.1283 +      return;
  1.1284 +
  1.1285 +    hdr[1] = decomps_used;
  1.1286 +
  1.1287 +    /*
  1.1288 +     * Write the header.
  1.1289 +     */
  1.1290 +    fwrite((char *) hdr, sizeof(unsigned short), 2, out);
  1.1291 +
  1.1292 +    /*
  1.1293 +     * Write a temporary byte count which will be calculated as the
  1.1294 +     * decompositions are written out.
  1.1295 +     */
  1.1296 +    bytes = 0;
  1.1297 +    fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
  1.1298 +
  1.1299 +    if (decomps_used) {
  1.1300 +        /*
  1.1301 +         * Write the list of decomp nodes.
  1.1302 +         */
  1.1303 +        for (i = idx = 0; i < decomps_used; i++) {
  1.1304 +            fwrite((char *) &decomps[i].code, sizeof(unsigned long), 1, out);
  1.1305 +            fwrite((char *) &idx, sizeof(unsigned long), 1, out);
  1.1306 +            idx += decomps[i].used;
  1.1307 +        }
  1.1308 +
  1.1309 +        /*
  1.1310 +         * Write the sentinel index as the last decomp node.
  1.1311 +         */
  1.1312 +        fwrite((char *) &idx, sizeof(unsigned long), 1, out);
  1.1313 +
  1.1314 +        /*
  1.1315 +         * Write the decompositions themselves.
  1.1316 +         */
  1.1317 +        for (i = 0; i < decomps_used; i++)
  1.1318 +          fwrite((char *) decomps[i].decomp, sizeof(unsigned long),
  1.1319 +                 decomps[i].used, out);
  1.1320 +
  1.1321 +        /*
  1.1322 +         * Seek back to the beginning and write the byte count.
  1.1323 +         */
  1.1324 +        bytes = (sizeof(unsigned long) * idx) +
  1.1325 +            (sizeof(unsigned long) * ((hdr[1] << 1) + 1));
  1.1326 +        fseek(out, sizeof(unsigned short) << 1, 0L);
  1.1327 +        fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
  1.1328 +
  1.1329 +        fclose(out);
  1.1330 +    }
  1.1331 +
  1.1332 +    /*****************************************************************
  1.1333 +     *
  1.1334 +     * Generate the combining class data.
  1.1335 +     *
  1.1336 +     *****************************************************************/
  1.1337 +
  1.1338 +    /*
  1.1339 +     * Open the cmbcl.dat file.
  1.1340 +     */
  1.1341 +    sprintf(path, "%s/cmbcl.dat", opath);
  1.1342 +    if ((out = fopen(path, "wb")) == 0)
  1.1343 +      return;
  1.1344 +
  1.1345 +    /*
  1.1346 +     * Set the number of ranges used.  Each range has a combining class which
  1.1347 +     * means each entry is a 3-tuple.
  1.1348 +     */
  1.1349 +    hdr[1] = ccl_used / 3;
  1.1350 +
  1.1351 +    /*
  1.1352 +     * Write the header.
  1.1353 +     */
  1.1354 +    fwrite((char *) hdr, sizeof(unsigned short), 2, out);
  1.1355 +
  1.1356 +    /*
  1.1357 +     * Write out the byte count to maintain header size.
  1.1358 +     */
  1.1359 +    bytes = ccl_used * sizeof(unsigned long);
  1.1360 +    fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
  1.1361 +
  1.1362 +    if (ccl_used > 0)
  1.1363 +      /*
  1.1364 +       * Write the combining class ranges out.
  1.1365 +       */
  1.1366 +      fwrite((char *) ccl, sizeof(unsigned long), ccl_used, out);
  1.1367 +
  1.1368 +    fclose(out);
  1.1369 +
  1.1370 +    /*****************************************************************
  1.1371 +     *
  1.1372 +     * Generate the number data.
  1.1373 +     *
  1.1374 +     *****************************************************************/
  1.1375 +
  1.1376 +    /*
  1.1377 +     * Open the num.dat file.
  1.1378 +     */
  1.1379 +    sprintf(path, "%s/num.dat", opath);
  1.1380 +    if ((out = fopen(path, "wb")) == 0)
  1.1381 +      return;
  1.1382 +
  1.1383 +    /*
  1.1384 +     * The count part of the header will be the total number of codes that
  1.1385 +     * have numbers.
  1.1386 +     */
  1.1387 +    hdr[1] = (unsigned short) (ncodes_used << 1);
  1.1388 +    bytes = (ncodes_used * sizeof(_codeidx_t)) + (nums_used * sizeof(_num_t));
  1.1389 +
  1.1390 +    /*
  1.1391 +     * Write the header.
  1.1392 +     */
  1.1393 +    fwrite((char *) hdr, sizeof(unsigned short), 2, out);
  1.1394 +
  1.1395 +    /*
  1.1396 +     * Write out the byte count to maintain header size.
  1.1397 +     */
  1.1398 +    fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
  1.1399 +
  1.1400 +    /*
  1.1401 +     * Now, if number mappings exist, write them out.
  1.1402 +     */
  1.1403 +    if (ncodes_used > 0) {
  1.1404 +        fwrite((char *) ncodes, sizeof(_codeidx_t), ncodes_used, out);
  1.1405 +        fwrite((char *) nums, sizeof(_num_t), nums_used, out);
  1.1406 +    }
  1.1407 +
  1.1408 +    fclose(out);
  1.1409 +}
  1.1410 +
  1.1411 +void
  1.1412 +#ifdef __STDC__
  1.1413 +main(int argc, char *argv[])
  1.1414 +#else
  1.1415 +main(argc, argv)
  1.1416 +int argc;
  1.1417 +char *argv[];
  1.1418 +#endif
  1.1419 +{
  1.1420 +    FILE *in;
  1.1421 +    char *prog, *opath;
  1.1422 +
  1.1423 +    if ((prog = strrchr(argv[0], '/')) != 0)
  1.1424 +      prog++;
  1.1425 +    else
  1.1426 +      prog = argv[0];
  1.1427 +
  1.1428 +    opath = 0;
  1.1429 +    in = stdin;
  1.1430 +
  1.1431 +    argc--;
  1.1432 +    argv++;
  1.1433 +
  1.1434 +    while (argc > 0) {
  1.1435 +        if (argv[0][0] == '-' && argv[0][1] == 'o') {
  1.1436 +            argc--;
  1.1437 +            argv++;
  1.1438 +            opath = argv[0];
  1.1439 +        } else {
  1.1440 +            if (in != stdin)
  1.1441 +              fclose(in);
  1.1442 +            if ((in = fopen(argv[0], "rb")) == 0)
  1.1443 +              fprintf(stderr, "%s: unable to open ctype file %s\n",
  1.1444 +                      prog, argv[0]);
  1.1445 +            else {
  1.1446 +                read_cdata(in);
  1.1447 +                fclose(in);
  1.1448 +                in = 0;
  1.1449 +            }
  1.1450 +        }
  1.1451 +        argc--;
  1.1452 +        argv++;
  1.1453 +    }
  1.1454 +
  1.1455 +    if (opath == 0)
  1.1456 +      opath = ".";
  1.1457 +    write_cdata(opath);
  1.1458 +
  1.1459 +    exit(0);
  1.1460 +}

mercurial