1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/unicharutil/tools/ucgendat.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1457 @@ 1.4 +/* 1.5 + * Copyright 1996, 1997, 1998 Computing Research Labs, 1.6 + * New Mexico State University 1.7 + * 1.8 + * Permission is hereby granted, free of charge, to any person obtaining a 1.9 + * copy of this software and associated documentation files (the "Software"), 1.10 + * to deal in the Software without restriction, including without limitation 1.11 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 1.12 + * and/or sell copies of the Software, and to permit persons to whom the 1.13 + * Software is furnished to do so, subject to the following conditions: 1.14 + * 1.15 + * The above copyright notice and this permission notice shall be included in 1.16 + * all copies or substantial portions of the Software. 1.17 + * 1.18 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1.19 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1.20 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1.21 + * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY 1.22 + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 1.23 + * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 1.24 + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. 1.25 + */ 1.26 +#ifndef lint 1.27 +#ifdef __GNUC__ 1.28 +static char rcsid[] __attribute__ ((unused)) = "$Id: ucgendat.c,v 1.1 1999/01/08 00:19:21 ftang%netscape.com Exp $"; 1.29 +#else 1.30 +static char rcsid[] = "$Id: ucgendat.c,v 1.1 1999/01/08 00:19:21 ftang%netscape.com Exp $"; 1.31 +#endif 1.32 +#endif 1.33 + 1.34 +#include <stdio.h> 1.35 +#include <stdlib.h> 1.36 +#include <string.h> 1.37 +#ifndef WIN32 1.38 +#include <unistd.h> 1.39 +#endif 1.40 + 1.41 +#define ishdigit(cc) (((cc) >= '0' && (cc) <= '9') ||\ 1.42 + ((cc) >= 'A' && (cc) <= 'F') ||\ 1.43 + ((cc) >= 'a' && (cc) <= 'f')) 1.44 + 1.45 +/* 1.46 + * A header written to the output file with the byte-order-mark and the number 1.47 + * of property nodes. 1.48 + */ 1.49 +static unsigned short hdr[2] = {0xfeff, 0}; 1.50 + 1.51 +#define NUMPROPS 49 1.52 +#define NEEDPROPS (NUMPROPS + (4 - (NUMPROPS & 3))) 1.53 + 1.54 +typedef struct { 1.55 + char *name; 1.56 + int len; 1.57 +} _prop_t; 1.58 + 1.59 +/* 1.60 + * List of properties expected to be found in the Unicode Character Database 1.61 + * including some implementation specific properties. 1.62 + * 1.63 + * The implementation specific properties are: 1.64 + * Cm = Composed (can be decomposed) 1.65 + * Nb = Non-breaking 1.66 + * Sy = Symmetric (has left and right forms) 1.67 + * Hd = Hex digit 1.68 + * Qm = Quote marks 1.69 + * Mr = Mirroring 1.70 + * Ss = Space, other 1.71 + * Cp = Defined character 1.72 + */ 1.73 +static _prop_t props[NUMPROPS] = { 1.74 + {"Mn", 2}, {"Mc", 2}, {"Me", 2}, {"Nd", 2}, {"Nl", 2}, {"No", 2}, 1.75 + {"Zs", 2}, {"Zl", 2}, {"Zp", 2}, {"Cc", 2}, {"Cf", 2}, {"Cs", 2}, 1.76 + {"Co", 2}, {"Cn", 2}, {"Lu", 2}, {"Ll", 2}, {"Lt", 2}, {"Lm", 2}, 1.77 + {"Lo", 2}, {"Pc", 2}, {"Pd", 2}, {"Ps", 2}, {"Pe", 2}, {"Po", 2}, 1.78 + {"Sm", 2}, {"Sc", 2}, {"Sk", 2}, {"So", 2}, {"L", 1}, {"R", 1}, 1.79 + {"EN", 2}, {"ES", 2}, {"ET", 2}, {"AN", 2}, {"CS", 2}, {"B", 1}, 1.80 + {"S", 1}, {"WS", 2}, {"ON", 2}, 1.81 + {"Cm", 2}, {"Nb", 2}, {"Sy", 2}, {"Hd", 2}, {"Qm", 2}, {"Mr", 2}, 1.82 + {"Ss", 2}, {"Cp", 2}, {"Pi", 2}, {"Pf", 2} 1.83 +}; 1.84 + 1.85 +typedef struct { 1.86 + unsigned long *ranges; 1.87 + unsigned short used; 1.88 + unsigned short size; 1.89 +} _ranges_t; 1.90 + 1.91 +static _ranges_t proptbl[NUMPROPS]; 1.92 + 1.93 +/* 1.94 + * Make sure this array is sized to be on a 4-byte boundary at compile time. 1.95 + */ 1.96 +static unsigned short propcnt[NEEDPROPS]; 1.97 + 1.98 +/* 1.99 + * Array used to collect a decomposition before adding it to the decomposition 1.100 + * table. 1.101 + */ 1.102 +static unsigned long dectmp[64]; 1.103 +static unsigned long dectmp_size; 1.104 + 1.105 +typedef struct { 1.106 + unsigned long code; 1.107 + unsigned short size; 1.108 + unsigned short used; 1.109 + unsigned long *decomp; 1.110 +} _decomp_t; 1.111 + 1.112 +/* 1.113 + * List of decomposition. Created and expanded in order as the characters are 1.114 + * encountered. 1.115 + */ 1.116 +static _decomp_t *decomps; 1.117 +static unsigned long decomps_used; 1.118 +static unsigned long decomps_size; 1.119 + 1.120 +/* 1.121 + * Types and lists for handling lists of case mappings. 1.122 + */ 1.123 +typedef struct { 1.124 + unsigned long key; 1.125 + unsigned long other1; 1.126 + unsigned long other2; 1.127 +} _case_t; 1.128 + 1.129 +static _case_t *upper; 1.130 +static _case_t *lower; 1.131 +static _case_t *title; 1.132 +static unsigned long upper_used; 1.133 +static unsigned long upper_size; 1.134 +static unsigned long lower_used; 1.135 +static unsigned long lower_size; 1.136 +static unsigned long title_used; 1.137 +static unsigned long title_size; 1.138 + 1.139 +/* 1.140 + * Array used to collect case mappings before adding them to a list. 1.141 + */ 1.142 +static unsigned long cases[3]; 1.143 + 1.144 +/* 1.145 + * An array to hold ranges for combining classes. 1.146 + */ 1.147 +static unsigned long *ccl; 1.148 +static unsigned long ccl_used; 1.149 +static unsigned long ccl_size; 1.150 + 1.151 +/* 1.152 + * Structures for handling numbers. 1.153 + */ 1.154 +typedef struct { 1.155 + unsigned long code; 1.156 + unsigned long idx; 1.157 +} _codeidx_t; 1.158 + 1.159 +typedef struct { 1.160 + short numerator; 1.161 + short denominator; 1.162 +} _num_t; 1.163 + 1.164 +/* 1.165 + * Arrays to hold the mapping of codes to numbers. 1.166 + */ 1.167 +static _codeidx_t *ncodes; 1.168 +static unsigned long ncodes_used; 1.169 +static unsigned long ncodes_size; 1.170 + 1.171 +static _num_t *nums; 1.172 +static unsigned long nums_used; 1.173 +static unsigned long nums_size; 1.174 + 1.175 +/* 1.176 + * Array for holding numbers. 1.177 + */ 1.178 +static _num_t *nums; 1.179 +static unsigned long nums_used; 1.180 +static unsigned long nums_size; 1.181 + 1.182 +static void 1.183 +#ifdef __STDC__ 1.184 +add_range(unsigned long start, unsigned long end, char *p1, char *p2) 1.185 +#else 1.186 +add_range(start, end, p1, p2) 1.187 +unsigned long start, end; 1.188 +char *p1, *p2; 1.189 +#endif 1.190 +{ 1.191 + int i, j, k, len; 1.192 + _ranges_t *rlp; 1.193 + char *name; 1.194 + 1.195 + for (k = 0; k < 2; k++) { 1.196 + if (k == 0) { 1.197 + name = p1; 1.198 + len = 2; 1.199 + } else { 1.200 + if (p2 == 0) 1.201 + break; 1.202 + 1.203 + name = p2; 1.204 + len = 1; 1.205 + } 1.206 + 1.207 + for (i = 0; i < NUMPROPS; i++) { 1.208 + if (props[i].len == len && memcmp(props[i].name, name, len) == 0) 1.209 + break; 1.210 + } 1.211 + 1.212 + if (i == NUMPROPS) 1.213 + continue; 1.214 + 1.215 + rlp = &proptbl[i]; 1.216 + 1.217 + /* 1.218 + * Resize the range list if necessary. 1.219 + */ 1.220 + if (rlp->used == rlp->size) { 1.221 + if (rlp->size == 0) 1.222 + rlp->ranges = (unsigned long *) 1.223 + malloc(sizeof(unsigned long) << 3); 1.224 + else 1.225 + rlp->ranges = (unsigned long *) 1.226 + realloc((char *) rlp->ranges, 1.227 + sizeof(unsigned long) * (rlp->size + 8)); 1.228 + rlp->size += 8; 1.229 + } 1.230 + 1.231 + /* 1.232 + * If this is the first code for this property list, just add it 1.233 + * and return. 1.234 + */ 1.235 + if (rlp->used == 0) { 1.236 + rlp->ranges[0] = start; 1.237 + rlp->ranges[1] = end; 1.238 + rlp->used += 2; 1.239 + continue; 1.240 + } 1.241 + 1.242 + /* 1.243 + * Optimize the case of adding the range to the end. 1.244 + */ 1.245 + j = rlp->used - 1; 1.246 + if (start > rlp->ranges[j]) { 1.247 + j = rlp->used; 1.248 + rlp->ranges[j++] = start; 1.249 + rlp->ranges[j++] = end; 1.250 + rlp->used = j; 1.251 + continue; 1.252 + } 1.253 + 1.254 + /* 1.255 + * Need to locate the insertion point. 1.256 + */ 1.257 + for (i = 0; 1.258 + i < rlp->used && start > rlp->ranges[i + 1] + 1; i += 2) ; 1.259 + 1.260 + /* 1.261 + * If the start value lies in the current range, then simply set the 1.262 + * new end point of the range to the end value passed as a parameter. 1.263 + */ 1.264 + if (rlp->ranges[i] <= start && start <= rlp->ranges[i + 1] + 1) { 1.265 + rlp->ranges[i + 1] = end; 1.266 + return; 1.267 + } 1.268 + 1.269 + /* 1.270 + * Shift following values up by two. 1.271 + */ 1.272 + for (j = rlp->used; j > i; j -= 2) { 1.273 + rlp->ranges[j] = rlp->ranges[j - 2]; 1.274 + rlp->ranges[j + 1] = rlp->ranges[j - 1]; 1.275 + } 1.276 + 1.277 + /* 1.278 + * Add the new range at the insertion point. 1.279 + */ 1.280 + rlp->ranges[i] = start; 1.281 + rlp->ranges[i + 1] = end; 1.282 + rlp->used += 2; 1.283 + } 1.284 +} 1.285 + 1.286 +static void 1.287 +#ifdef __STDC__ 1.288 +ordered_range_insert(unsigned long c, char *name, int len) 1.289 +#else 1.290 +ordered_range_insert(c, name, len) 1.291 +unsigned long c; 1.292 +char *name; 1.293 +int len; 1.294 +#endif 1.295 +{ 1.296 + int i, j; 1.297 + unsigned long s, e; 1.298 + _ranges_t *rlp; 1.299 + 1.300 + if (len == 0) 1.301 + return; 1.302 + 1.303 + for (i = 0; i < NUMPROPS; i++) { 1.304 + if (props[i].len == len && memcmp(props[i].name, name, len) == 0) 1.305 + break; 1.306 + } 1.307 + 1.308 + if (i == NUMPROPS) 1.309 + return; 1.310 + 1.311 + /* 1.312 + * Have a match, so insert the code in order. 1.313 + */ 1.314 + rlp = &proptbl[i]; 1.315 + 1.316 + /* 1.317 + * Resize the range list if necessary. 1.318 + */ 1.319 + if (rlp->used == rlp->size) { 1.320 + if (rlp->size == 0) 1.321 + rlp->ranges = (unsigned long *) 1.322 + malloc(sizeof(unsigned long) << 3); 1.323 + else 1.324 + rlp->ranges = (unsigned long *) 1.325 + realloc((char *) rlp->ranges, 1.326 + sizeof(unsigned long) * (rlp->size + 8)); 1.327 + rlp->size += 8; 1.328 + } 1.329 + 1.330 + /* 1.331 + * If this is the first code for this property list, just add it 1.332 + * and return. 1.333 + */ 1.334 + if (rlp->used == 0) { 1.335 + rlp->ranges[0] = rlp->ranges[1] = c; 1.336 + rlp->used += 2; 1.337 + return; 1.338 + } 1.339 + 1.340 + /* 1.341 + * Optimize the cases of extending the last range and adding new ranges to 1.342 + * the end. 1.343 + */ 1.344 + j = rlp->used - 1; 1.345 + e = rlp->ranges[j]; 1.346 + s = rlp->ranges[j - 1]; 1.347 + 1.348 + if (c == e + 1) { 1.349 + /* 1.350 + * Extend the last range. 1.351 + */ 1.352 + rlp->ranges[j] = c; 1.353 + return; 1.354 + } 1.355 + 1.356 + if (c > e + 1) { 1.357 + /* 1.358 + * Start another range on the end. 1.359 + */ 1.360 + j = rlp->used; 1.361 + rlp->ranges[j] = rlp->ranges[j + 1] = c; 1.362 + rlp->used += 2; 1.363 + return; 1.364 + } 1.365 + 1.366 + if (c >= s) 1.367 + /* 1.368 + * The code is a duplicate of a code in the last range, so just return. 1.369 + */ 1.370 + return; 1.371 + 1.372 + /* 1.373 + * The code should be inserted somewhere before the last range in the 1.374 + * list. Locate the insertion point. 1.375 + */ 1.376 + for (i = 0; 1.377 + i < rlp->used && c > rlp->ranges[i + 1] + 1; i += 2) ; 1.378 + 1.379 + s = rlp->ranges[i]; 1.380 + e = rlp->ranges[i + 1]; 1.381 + 1.382 + if (c == e + 1) 1.383 + /* 1.384 + * Simply extend the current range. 1.385 + */ 1.386 + rlp->ranges[i + 1] = c; 1.387 + else if (c < s) { 1.388 + /* 1.389 + * Add a new entry before the current location. Shift all entries 1.390 + * before the current one up by one to make room. 1.391 + */ 1.392 + for (j = rlp->used; j > i; j -= 2) { 1.393 + rlp->ranges[j] = rlp->ranges[j - 2]; 1.394 + rlp->ranges[j + 1] = rlp->ranges[j - 1]; 1.395 + } 1.396 + rlp->ranges[i] = rlp->ranges[i + 1] = c; 1.397 + 1.398 + rlp->used += 2; 1.399 + } 1.400 +} 1.401 + 1.402 +static void 1.403 +#ifdef __STDC__ 1.404 +add_decomp(unsigned long code) 1.405 +#else 1.406 +add_decomp(code) 1.407 +unsigned long code; 1.408 +#endif 1.409 +{ 1.410 + unsigned long i, j, size; 1.411 + 1.412 + /* 1.413 + * Add the code to the composite property. 1.414 + */ 1.415 + ordered_range_insert(code, "Cm", 2); 1.416 + 1.417 + /* 1.418 + * Locate the insertion point for the code. 1.419 + */ 1.420 + for (i = 0; i < decomps_used && code > decomps[i].code; i++) ; 1.421 + 1.422 + /* 1.423 + * Allocate space for a new decomposition. 1.424 + */ 1.425 + if (decomps_used == decomps_size) { 1.426 + if (decomps_size == 0) 1.427 + decomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3); 1.428 + else 1.429 + decomps = (_decomp_t *) 1.430 + realloc((char *) decomps, 1.431 + sizeof(_decomp_t) * (decomps_size + 8)); 1.432 + (void) memset((char *) (decomps + decomps_size), 0, 1.433 + sizeof(_decomp_t) << 3); 1.434 + decomps_size += 8; 1.435 + } 1.436 + 1.437 + if (i < decomps_used && code != decomps[i].code) { 1.438 + /* 1.439 + * Shift the decomps up by one if the codes don't match. 1.440 + */ 1.441 + for (j = decomps_used; j > i; j--) 1.442 + (void) memcpy((char *) &decomps[j], (char *) &decomps[j - 1], 1.443 + sizeof(_decomp_t)); 1.444 + } 1.445 + 1.446 + /* 1.447 + * Insert or replace a decomposition. 1.448 + */ 1.449 + size = dectmp_size + (4 - (dectmp_size & 3)); 1.450 + if (decomps[i].size < size) { 1.451 + if (decomps[i].size == 0) 1.452 + decomps[i].decomp = (unsigned long *) 1.453 + malloc(sizeof(unsigned long) * size); 1.454 + else 1.455 + decomps[i].decomp = (unsigned long *) 1.456 + realloc((char *) decomps[i].decomp, 1.457 + sizeof(unsigned long) * size); 1.458 + decomps[i].size = size; 1.459 + } 1.460 + 1.461 + if (decomps[i].code != code) 1.462 + decomps_used++; 1.463 + 1.464 + decomps[i].code = code; 1.465 + decomps[i].used = dectmp_size; 1.466 + (void) memcpy((char *) decomps[i].decomp, (char *) dectmp, 1.467 + sizeof(unsigned long) * dectmp_size); 1.468 + 1.469 +} 1.470 + 1.471 +static void 1.472 +#ifdef __STDC__ 1.473 +add_title(unsigned long code) 1.474 +#else 1.475 +add_title(code) 1.476 +unsigned long code; 1.477 +#endif 1.478 +{ 1.479 + unsigned long i, j; 1.480 + 1.481 + /* 1.482 + * Always map the code to itself. 1.483 + */ 1.484 + cases[2] = code; 1.485 + 1.486 + if (title_used == title_size) { 1.487 + if (title_size == 0) 1.488 + title = (_case_t *) malloc(sizeof(_case_t) << 3); 1.489 + else 1.490 + title = (_case_t *) realloc((char *) title, 1.491 + sizeof(_case_t) * (title_size + 8)); 1.492 + title_size += 8; 1.493 + } 1.494 + 1.495 + /* 1.496 + * Locate the insertion point. 1.497 + */ 1.498 + for (i = 0; i < title_used && code > title[i].key; i++) ; 1.499 + 1.500 + if (i < title_used) { 1.501 + /* 1.502 + * Shift the array up by one. 1.503 + */ 1.504 + for (j = title_used; j > i; j--) 1.505 + (void) memcpy((char *) &title[j], (char *) &title[j - 1], 1.506 + sizeof(_case_t)); 1.507 + } 1.508 + 1.509 + title[i].key = cases[2]; /* Title */ 1.510 + title[i].other1 = cases[0]; /* Upper */ 1.511 + title[i].other2 = cases[1]; /* Lower */ 1.512 + 1.513 + title_used++; 1.514 +} 1.515 + 1.516 +static void 1.517 +#ifdef __STDC__ 1.518 +add_upper(unsigned long code) 1.519 +#else 1.520 +add_upper(code) 1.521 +unsigned long code; 1.522 +#endif 1.523 +{ 1.524 + unsigned long i, j; 1.525 + 1.526 + /* 1.527 + * Always map the code to itself. 1.528 + */ 1.529 + cases[0] = code; 1.530 + 1.531 + /* 1.532 + * If the title case character is not present, then make it the same as 1.533 + * the upper case. 1.534 + */ 1.535 + if (cases[2] == 0) 1.536 + cases[2] = code; 1.537 + 1.538 + if (upper_used == upper_size) { 1.539 + if (upper_size == 0) 1.540 + upper = (_case_t *) malloc(sizeof(_case_t) << 3); 1.541 + else 1.542 + upper = (_case_t *) realloc((char *) upper, 1.543 + sizeof(_case_t) * (upper_size + 8)); 1.544 + upper_size += 8; 1.545 + } 1.546 + 1.547 + /* 1.548 + * Locate the insertion point. 1.549 + */ 1.550 + for (i = 0; i < upper_used && code > upper[i].key; i++) ; 1.551 + 1.552 + if (i < upper_used) { 1.553 + /* 1.554 + * Shift the array up by one. 1.555 + */ 1.556 + for (j = upper_used; j > i; j--) 1.557 + (void) memcpy((char *) &upper[j], (char *) &upper[j - 1], 1.558 + sizeof(_case_t)); 1.559 + } 1.560 + 1.561 + upper[i].key = cases[0]; /* Upper */ 1.562 + upper[i].other1 = cases[1]; /* Lower */ 1.563 + upper[i].other2 = cases[2]; /* Title */ 1.564 + 1.565 + upper_used++; 1.566 +} 1.567 + 1.568 +static void 1.569 +#ifdef __STDC__ 1.570 +add_lower(unsigned long code) 1.571 +#else 1.572 +add_lower(code) 1.573 +unsigned long code; 1.574 +#endif 1.575 +{ 1.576 + unsigned long i, j; 1.577 + 1.578 + /* 1.579 + * Always map the code to itself. 1.580 + */ 1.581 + cases[1] = code; 1.582 + 1.583 + /* 1.584 + * If the title case character is empty, then make it the same as the 1.585 + * upper case. 1.586 + */ 1.587 + if (cases[2] == 0) 1.588 + cases[2] = cases[0]; 1.589 + 1.590 + if (lower_used == lower_size) { 1.591 + if (lower_size == 0) 1.592 + lower = (_case_t *) malloc(sizeof(_case_t) << 3); 1.593 + else 1.594 + lower = (_case_t *) realloc((char *) lower, 1.595 + sizeof(_case_t) * (lower_size + 8)); 1.596 + lower_size += 8; 1.597 + } 1.598 + 1.599 + /* 1.600 + * Locate the insertion point. 1.601 + */ 1.602 + for (i = 0; i < lower_used && code > lower[i].key; i++) ; 1.603 + 1.604 + if (i < lower_used) { 1.605 + /* 1.606 + * Shift the array up by one. 1.607 + */ 1.608 + for (j = lower_used; j > i; j--) 1.609 + (void) memcpy((char *) &lower[j], (char *) &lower[j - 1], 1.610 + sizeof(_case_t)); 1.611 + } 1.612 + 1.613 + lower[i].key = cases[1]; /* Lower */ 1.614 + lower[i].other1 = cases[0]; /* Upper */ 1.615 + lower[i].other2 = cases[2]; /* Title */ 1.616 + 1.617 + lower_used++; 1.618 +} 1.619 + 1.620 +static void 1.621 +#ifdef __STDC__ 1.622 +ordered_ccl_insert(unsigned long c, unsigned long ccl_code) 1.623 +#else 1.624 +ordered_ccl_insert(c, ccl_code) 1.625 +unsigned long c, ccl_code; 1.626 +#endif 1.627 +{ 1.628 + unsigned long i, j; 1.629 + 1.630 + if (ccl_used == ccl_size) { 1.631 + if (ccl_size == 0) 1.632 + ccl = (unsigned long *) malloc(sizeof(unsigned long) * 24); 1.633 + else 1.634 + ccl = (unsigned long *) 1.635 + realloc((char *) ccl, sizeof(unsigned long) * (ccl_size + 24)); 1.636 + ccl_size += 24; 1.637 + } 1.638 + 1.639 + /* 1.640 + * Optimize adding the first item. 1.641 + */ 1.642 + if (ccl_used == 0) { 1.643 + ccl[0] = ccl[1] = c; 1.644 + ccl[2] = ccl_code; 1.645 + ccl_used += 3; 1.646 + return; 1.647 + } 1.648 + 1.649 + /* 1.650 + * Handle the special case of extending the range on the end. This 1.651 + * requires that the combining class codes are the same. 1.652 + */ 1.653 + if (ccl_code == ccl[ccl_used - 1] && c == ccl[ccl_used - 2] + 1) { 1.654 + ccl[ccl_used - 2] = c; 1.655 + return; 1.656 + } 1.657 + 1.658 + /* 1.659 + * Handle the special case of adding another range on the end. 1.660 + */ 1.661 + if (c > ccl[ccl_used - 2] + 1 || 1.662 + (c == ccl[ccl_used - 2] + 1 && ccl_code != ccl[ccl_used - 1])) { 1.663 + ccl[ccl_used++] = c; 1.664 + ccl[ccl_used++] = c; 1.665 + ccl[ccl_used++] = ccl_code; 1.666 + return; 1.667 + } 1.668 + 1.669 + /* 1.670 + * Locate either the insertion point or range for the code. 1.671 + */ 1.672 + for (i = 0; i < ccl_used && c > ccl[i + 1] + 1; i += 3) ; 1.673 + 1.674 + if (ccl_code == ccl[i + 2] && c == ccl[i + 1] + 1) { 1.675 + /* 1.676 + * Extend an existing range. 1.677 + */ 1.678 + ccl[i + 1] = c; 1.679 + return; 1.680 + } else if (c < ccl[i]) { 1.681 + /* 1.682 + * Start a new range before the current location. 1.683 + */ 1.684 + for (j = ccl_used; j > i; j -= 3) { 1.685 + ccl[j] = ccl[j - 3]; 1.686 + ccl[j - 1] = ccl[j - 4]; 1.687 + ccl[j - 2] = ccl[j - 5]; 1.688 + } 1.689 + ccl[i] = ccl[i + 1] = c; 1.690 + ccl[i + 2] = ccl_code; 1.691 + } 1.692 +} 1.693 + 1.694 +/* 1.695 + * Adds a number if it does not already exist and returns an index value 1.696 + * multiplied by 2. 1.697 + */ 1.698 +static unsigned long 1.699 +#ifdef __STDC__ 1.700 +make_number(short num, short denom) 1.701 +#else 1.702 +make_number(num, denom) 1.703 +short num, denom; 1.704 +#endif 1.705 +{ 1.706 + unsigned long n; 1.707 + 1.708 + /* 1.709 + * Determine if the number already exists. 1.710 + */ 1.711 + for (n = 0; n < nums_used; n++) { 1.712 + if (nums[n].numerator == num && nums[n].denominator == denom) 1.713 + return n << 1; 1.714 + } 1.715 + 1.716 + if (nums_used == nums_size) { 1.717 + if (nums_size == 0) 1.718 + nums = (_num_t *) malloc(sizeof(_num_t) << 3); 1.719 + else 1.720 + nums = (_num_t *) realloc((char *) nums, 1.721 + sizeof(_num_t) * (nums_size + 8)); 1.722 + nums_size += 8; 1.723 + } 1.724 + 1.725 + n = nums_used++; 1.726 + nums[n].numerator = num; 1.727 + nums[n].denominator = denom; 1.728 + 1.729 + return n << 1; 1.730 +} 1.731 + 1.732 +static void 1.733 +#ifdef __STDC__ 1.734 +add_number(unsigned long code, short num, short denom) 1.735 +#else 1.736 +add_number(code, num, denom) 1.737 +unsigned long code; 1.738 +short num, denom; 1.739 +#endif 1.740 +{ 1.741 + unsigned long i, j; 1.742 + 1.743 + /* 1.744 + * Insert the code in order. 1.745 + */ 1.746 + for (i = 0; i < ncodes_used && code > ncodes[i].code; i++) ; 1.747 + 1.748 + /* 1.749 + * Handle the case of the codes matching and simply replace the number 1.750 + * that was there before. 1.751 + */ 1.752 + if (ncodes_used > 0 && code == ncodes[i].code) { 1.753 + ncodes[i].idx = make_number(num, denom); 1.754 + return; 1.755 + } 1.756 + 1.757 + /* 1.758 + * Resize the array if necessary. 1.759 + */ 1.760 + if (ncodes_used == ncodes_size) { 1.761 + if (ncodes_size == 0) 1.762 + ncodes = (_codeidx_t *) malloc(sizeof(_codeidx_t) << 3); 1.763 + else 1.764 + ncodes = (_codeidx_t *) 1.765 + realloc((char *) ncodes, sizeof(_codeidx_t) * (ncodes_size + 8)); 1.766 + 1.767 + ncodes_size += 8; 1.768 + } 1.769 + 1.770 + /* 1.771 + * Shift things around to insert the code if necessary. 1.772 + */ 1.773 + if (i < ncodes_used) { 1.774 + for (j = ncodes_used; j > i; j--) { 1.775 + ncodes[j].code = ncodes[j - 1].code; 1.776 + ncodes[j].idx = ncodes[j - 1].idx; 1.777 + } 1.778 + } 1.779 + ncodes[i].code = code; 1.780 + ncodes[i].idx = make_number(num, denom); 1.781 + 1.782 + ncodes_used++; 1.783 +} 1.784 + 1.785 +/* 1.786 + * This routine assumes that the line is a valid Unicode Character Database 1.787 + * entry. 1.788 + */ 1.789 +static void 1.790 +#ifdef __STDC__ 1.791 +read_cdata(FILE *in) 1.792 +#else 1.793 +read_cdata(in) 1.794 +FILE *in; 1.795 +#endif 1.796 +{ 1.797 + unsigned long i, lineno, skip, code, ccl_code; 1.798 + short wnum, neg, number[2]; 1.799 + char line[512], *s, *e; 1.800 + 1.801 + lineno = skip = 0; 1.802 + while (fscanf(in, "%[^\n]\n", line) != EOF) { 1.803 + lineno++; 1.804 + 1.805 + /* 1.806 + * Skip blank lines and lines that start with a '#'. 1.807 + */ 1.808 + if (line[0] == 0 || line[0] == '#') 1.809 + continue; 1.810 + 1.811 + /* 1.812 + * If lines need to be skipped, do it here. 1.813 + */ 1.814 + if (skip) { 1.815 + skip--; 1.816 + continue; 1.817 + } 1.818 + 1.819 + /* 1.820 + * Collect the code. The code can be up to 6 hex digits in length to 1.821 + * allow surrogates to be specified. 1.822 + */ 1.823 + for (s = line, i = code = 0; *s != ';' && i < 6; i++, s++) { 1.824 + code <<= 4; 1.825 + if (*s >= '0' && *s <= '9') 1.826 + code += *s - '0'; 1.827 + else if (*s >= 'A' && *s <= 'F') 1.828 + code += (*s - 'A') + 10; 1.829 + else if (*s >= 'a' && *s <= 'f') 1.830 + code += (*s - 'a') + 10; 1.831 + } 1.832 + 1.833 + /* 1.834 + * Handle the following special cases: 1.835 + * 1. 4E00-9FA5 CJK Ideographs. 1.836 + * 2. AC00-D7A3 Hangul Syllables. 1.837 + * 3. D800-DFFF Surrogates. 1.838 + * 4. E000-F8FF Private Use Area. 1.839 + * 5. F900-FA2D Han compatibility. 1.840 + */ 1.841 + switch (code) { 1.842 + case 0x4e00: 1.843 + /* 1.844 + * The Han ideographs. 1.845 + */ 1.846 + add_range(0x4e00, 0x9fff, "Lo", "L"); 1.847 + 1.848 + /* 1.849 + * Add the characters to the defined category. 1.850 + */ 1.851 + add_range(0x4e00, 0x9fa5, "Cp", 0); 1.852 + 1.853 + skip = 1; 1.854 + break; 1.855 + case 0xac00: 1.856 + /* 1.857 + * The Hangul syllables. 1.858 + */ 1.859 + add_range(0xac00, 0xd7a3, "Lo", "L"); 1.860 + 1.861 + /* 1.862 + * Add the characters to the defined category. 1.863 + */ 1.864 + add_range(0xac00, 0xd7a3, "Cp", 0); 1.865 + 1.866 + skip = 1; 1.867 + break; 1.868 + case 0xd800: 1.869 + /* 1.870 + * Make a range of all surrogates and assume some default 1.871 + * properties. 1.872 + */ 1.873 + add_range(0x010000, 0x10ffff, "Cs", "L"); 1.874 + skip = 5; 1.875 + break; 1.876 + case 0xe000: 1.877 + /* 1.878 + * The Private Use area. Add with a default set of properties. 1.879 + */ 1.880 + add_range(0xe000, 0xf8ff, "Co", "L"); 1.881 + skip = 1; 1.882 + break; 1.883 + case 0xf900: 1.884 + /* 1.885 + * The CJK compatibility area. 1.886 + */ 1.887 + add_range(0xf900, 0xfaff, "Lo", "L"); 1.888 + 1.889 + /* 1.890 + * Add the characters to the defined category. 1.891 + */ 1.892 + add_range(0xf900, 0xfaff, "Cp", 0); 1.893 + 1.894 + skip = 1; 1.895 + } 1.896 + 1.897 + if (skip) 1.898 + continue; 1.899 + 1.900 + /* 1.901 + * Add the code to the defined category. 1.902 + */ 1.903 + ordered_range_insert(code, "Cp", 2); 1.904 + 1.905 + /* 1.906 + * Locate the first character property field. 1.907 + */ 1.908 + for (i = 0; *s != 0 && i < 2; s++) { 1.909 + if (*s == ';') 1.910 + i++; 1.911 + } 1.912 + for (e = s; *e && *e != ';'; e++) ; 1.913 + 1.914 + ordered_range_insert(code, s, e - s); 1.915 + 1.916 + /* 1.917 + * Locate the combining class code. 1.918 + */ 1.919 + for (s = e; *s != 0 && i < 3; s++) { 1.920 + if (*s == ';') 1.921 + i++; 1.922 + } 1.923 + 1.924 + /* 1.925 + * Convert the combining class code from decimal. 1.926 + */ 1.927 + for (ccl_code = 0, e = s; *e && *e != ';'; e++) 1.928 + ccl_code = (ccl_code * 10) + (*e - '0'); 1.929 + 1.930 + /* 1.931 + * Add the code if it not 0. 1.932 + */ 1.933 + if (ccl_code != 0) 1.934 + ordered_ccl_insert(code, ccl_code); 1.935 + 1.936 + /* 1.937 + * Locate the second character property field. 1.938 + */ 1.939 + for (s = e; *s != 0 && i < 4; s++) { 1.940 + if (*s == ';') 1.941 + i++; 1.942 + } 1.943 + for (e = s; *e && *e != ';'; e++) ; 1.944 + 1.945 + ordered_range_insert(code, s, e - s); 1.946 + 1.947 + /* 1.948 + * Check for a decomposition. 1.949 + */ 1.950 + s = ++e; 1.951 + if (*s != ';' && *s != '<') { 1.952 + /* 1.953 + * Collect the codes of the decomposition. 1.954 + */ 1.955 + for (dectmp_size = 0; *s != ';'; ) { 1.956 + /* 1.957 + * Skip all leading non-hex digits. 1.958 + */ 1.959 + while (!ishdigit(*s)) 1.960 + s++; 1.961 + 1.962 + for (dectmp[dectmp_size] = 0; ishdigit(*s); s++) { 1.963 + dectmp[dectmp_size] <<= 4; 1.964 + if (*s >= '0' && *s <= '9') 1.965 + dectmp[dectmp_size] += *s - '0'; 1.966 + else if (*s >= 'A' && *s <= 'F') 1.967 + dectmp[dectmp_size] += (*s - 'A') + 10; 1.968 + else if (*s >= 'a' && *s <= 'f') 1.969 + dectmp[dectmp_size] += (*s - 'a') + 10; 1.970 + } 1.971 + dectmp_size++; 1.972 + } 1.973 + 1.974 + /* 1.975 + * If there is more than one code in the temporary decomposition 1.976 + * array, then add the character with its decomposition. 1.977 + */ 1.978 + if (dectmp_size > 1) 1.979 + add_decomp(code); 1.980 + } 1.981 + 1.982 + /* 1.983 + * Skip to the number field. 1.984 + */ 1.985 + for (i = 0; i < 3 && *s; s++) { 1.986 + if (*s == ';') 1.987 + i++; 1.988 + } 1.989 + 1.990 + /* 1.991 + * Scan the number in. 1.992 + */ 1.993 + number[0] = number[1] = 0; 1.994 + for (e = s, neg = wnum = 0; *e && *e != ';'; e++) { 1.995 + if (*e == '-') { 1.996 + neg = 1; 1.997 + continue; 1.998 + } 1.999 + 1.1000 + if (*e == '/') { 1.1001 + /* 1.1002 + * Move the the denominator of the fraction. 1.1003 + */ 1.1004 + if (neg) 1.1005 + number[wnum] *= -1; 1.1006 + neg = 0; 1.1007 + e++; 1.1008 + wnum++; 1.1009 + } 1.1010 + number[wnum] = (number[wnum] * 10) + (*e - '0'); 1.1011 + } 1.1012 + 1.1013 + if (e > s) { 1.1014 + /* 1.1015 + * Adjust the denominator in case of integers and add the number. 1.1016 + */ 1.1017 + if (wnum == 0) 1.1018 + number[1] = number[0]; 1.1019 + 1.1020 + add_number(code, number[0], number[1]); 1.1021 + } 1.1022 + 1.1023 + /* 1.1024 + * Skip to the start of the possible case mappings. 1.1025 + */ 1.1026 + for (s = e, i = 0; i < 4 && *s; s++) { 1.1027 + if (*s == ';') 1.1028 + i++; 1.1029 + } 1.1030 + 1.1031 + /* 1.1032 + * Collect the case mappings. 1.1033 + */ 1.1034 + cases[0] = cases[1] = cases[2] = 0; 1.1035 + for (i = 0; i < 3; i++) { 1.1036 + while (ishdigit(*s)) { 1.1037 + cases[i] <<= 4; 1.1038 + if (*s >= '0' && *s <= '9') 1.1039 + cases[i] += *s - '0'; 1.1040 + else if (*s >= 'A' && *s <= 'F') 1.1041 + cases[i] += (*s - 'A') + 10; 1.1042 + else if (*s >= 'a' && *s <= 'f') 1.1043 + cases[i] += (*s - 'a') + 10; 1.1044 + s++; 1.1045 + } 1.1046 + if (*s == ';') 1.1047 + s++; 1.1048 + } 1.1049 + if (cases[0] && cases[1]) 1.1050 + /* 1.1051 + * Add the upper and lower mappings for a title case character. 1.1052 + */ 1.1053 + add_title(code); 1.1054 + else if (cases[1]) 1.1055 + /* 1.1056 + * Add the lower and title case mappings for the upper case 1.1057 + * character. 1.1058 + */ 1.1059 + add_upper(code); 1.1060 + else if (cases[0]) 1.1061 + /* 1.1062 + * Add the upper and title case mappings for the lower case 1.1063 + * character. 1.1064 + */ 1.1065 + add_lower(code); 1.1066 + } 1.1067 +} 1.1068 + 1.1069 +static _decomp_t * 1.1070 +#ifdef __STDC__ 1.1071 +find_decomp(unsigned long code) 1.1072 +#else 1.1073 +find_decomp(code) 1.1074 +unsigned long code; 1.1075 +#endif 1.1076 +{ 1.1077 + long l, r, m; 1.1078 + 1.1079 + l = 0; 1.1080 + r = decomps_used - 1; 1.1081 + while (l <= r) { 1.1082 + m = (l + r) >> 1; 1.1083 + if (code > decomps[m].code) 1.1084 + l = m + 1; 1.1085 + else if (code < decomps[m].code) 1.1086 + r = m - 1; 1.1087 + else 1.1088 + return &decomps[m]; 1.1089 + } 1.1090 + return 0; 1.1091 +} 1.1092 + 1.1093 +static void 1.1094 +#ifdef __STDC__ 1.1095 +decomp_it(_decomp_t *d) 1.1096 +#else 1.1097 +decomp_it(d) 1.1098 +_decomp_t *d; 1.1099 +#endif 1.1100 +{ 1.1101 + unsigned long i; 1.1102 + _decomp_t *dp; 1.1103 + 1.1104 + for (i = 0; i < d->used; i++) { 1.1105 + if ((dp = find_decomp(d->decomp[i])) != 0) 1.1106 + decomp_it(dp); 1.1107 + else 1.1108 + dectmp[dectmp_size++] = d->decomp[i]; 1.1109 + } 1.1110 +} 1.1111 + 1.1112 +/* 1.1113 + * Expand all decompositions by recursively decomposing each character 1.1114 + * in the decomposition. 1.1115 + */ 1.1116 +static void 1.1117 +#ifdef __STDC__ 1.1118 +expand_decomp(void) 1.1119 +#else 1.1120 +expand_decomp() 1.1121 +#endif 1.1122 +{ 1.1123 + unsigned long i; 1.1124 + 1.1125 + for (i = 0; i < decomps_used; i++) { 1.1126 + dectmp_size = 0; 1.1127 + decomp_it(&decomps[i]); 1.1128 + if (dectmp_size > 0) 1.1129 + add_decomp(decomps[i].code); 1.1130 + } 1.1131 +} 1.1132 + 1.1133 +static void 1.1134 +#ifdef __STDC__ 1.1135 +write_cdata(char *opath) 1.1136 +#else 1.1137 +write_cdata(opath) 1.1138 +char *opath; 1.1139 +#endif 1.1140 +{ 1.1141 + FILE *out; 1.1142 + unsigned long i, idx, bytes, nprops; 1.1143 + unsigned short casecnt[2]; 1.1144 + char path[BUFSIZ]; 1.1145 + 1.1146 + /***************************************************************** 1.1147 + * 1.1148 + * Generate the ctype data. 1.1149 + * 1.1150 + *****************************************************************/ 1.1151 + 1.1152 + /* 1.1153 + * Open the ctype.dat file. 1.1154 + */ 1.1155 + sprintf(path, "%s/ctype.dat", opath); 1.1156 + if ((out = fopen(path, "wb")) == 0) 1.1157 + return; 1.1158 + 1.1159 + /* 1.1160 + * Collect the offsets for the properties. The offsets array is 1.1161 + * on a 4-byte boundary to keep things efficient for architectures 1.1162 + * that need such a thing. 1.1163 + */ 1.1164 + for (i = idx = 0; i < NUMPROPS; i++) { 1.1165 + propcnt[i] = (proptbl[i].used != 0) ? idx : 0xffff; 1.1166 + idx += proptbl[i].used; 1.1167 + } 1.1168 + 1.1169 + /* 1.1170 + * Add the sentinel index which is used by the binary search as the upper 1.1171 + * bound for a search. 1.1172 + */ 1.1173 + propcnt[i] = idx; 1.1174 + 1.1175 + /* 1.1176 + * Record the actual number of property lists. This may be different than 1.1177 + * the number of offsets actually written because of aligning on a 4-byte 1.1178 + * boundary. 1.1179 + */ 1.1180 + hdr[1] = NUMPROPS; 1.1181 + 1.1182 + /* 1.1183 + * Calculate the byte count needed and pad the property counts array to a 1.1184 + * 4-byte boundary. 1.1185 + */ 1.1186 + if ((bytes = sizeof(unsigned short) * (NUMPROPS + 1)) & 3) 1.1187 + bytes += 4 - (bytes & 3); 1.1188 + nprops = bytes / sizeof(unsigned short); 1.1189 + bytes += sizeof(unsigned long) * idx; 1.1190 + 1.1191 + /* 1.1192 + * Write the header. 1.1193 + */ 1.1194 + fwrite((char *) hdr, sizeof(unsigned short), 2, out); 1.1195 + 1.1196 + /* 1.1197 + * Write the byte count. 1.1198 + */ 1.1199 + fwrite((char *) &bytes, sizeof(unsigned long), 1, out); 1.1200 + 1.1201 + /* 1.1202 + * Write the property list counts. 1.1203 + */ 1.1204 + fwrite((char *) propcnt, sizeof(unsigned short), nprops, out); 1.1205 + 1.1206 + /* 1.1207 + * Write the property lists. 1.1208 + */ 1.1209 + for (i = 0; i < NUMPROPS; i++) { 1.1210 + if (proptbl[i].used > 0) 1.1211 + fwrite((char *) proptbl[i].ranges, sizeof(unsigned long), 1.1212 + proptbl[i].used, out); 1.1213 + } 1.1214 + 1.1215 + fclose(out); 1.1216 + 1.1217 + /***************************************************************** 1.1218 + * 1.1219 + * Generate the case mapping data. 1.1220 + * 1.1221 + *****************************************************************/ 1.1222 + 1.1223 + /* 1.1224 + * Open the case.dat file. 1.1225 + */ 1.1226 + sprintf(path, "%s/case.dat", opath); 1.1227 + if ((out = fopen(path, "wb")) == 0) 1.1228 + return; 1.1229 + 1.1230 + /* 1.1231 + * Write the case mapping tables. 1.1232 + */ 1.1233 + hdr[1] = upper_used + lower_used + title_used; 1.1234 + casecnt[0] = upper_used; 1.1235 + casecnt[1] = lower_used; 1.1236 + 1.1237 + /* 1.1238 + * Write the header. 1.1239 + */ 1.1240 + fwrite((char *) hdr, sizeof(unsigned short), 2, out); 1.1241 + 1.1242 + /* 1.1243 + * Write the upper and lower case table sizes. 1.1244 + */ 1.1245 + fwrite((char *) casecnt, sizeof(unsigned short), 2, out); 1.1246 + 1.1247 + if (upper_used > 0) 1.1248 + /* 1.1249 + * Write the upper case table. 1.1250 + */ 1.1251 + fwrite((char *) upper, sizeof(_case_t), upper_used, out); 1.1252 + 1.1253 + if (lower_used > 0) 1.1254 + /* 1.1255 + * Write the lower case table. 1.1256 + */ 1.1257 + fwrite((char *) lower, sizeof(_case_t), lower_used, out); 1.1258 + 1.1259 + if (title_used > 0) 1.1260 + /* 1.1261 + * Write the title case table. 1.1262 + */ 1.1263 + fwrite((char *) title, sizeof(_case_t), title_used, out); 1.1264 + 1.1265 + fclose(out); 1.1266 + 1.1267 + /***************************************************************** 1.1268 + * 1.1269 + * Generate the decomposition data. 1.1270 + * 1.1271 + *****************************************************************/ 1.1272 + 1.1273 + /* 1.1274 + * Fully expand all decompositions before generating the output file. 1.1275 + */ 1.1276 + expand_decomp(); 1.1277 + 1.1278 + /* 1.1279 + * Open the decomp.dat file. 1.1280 + */ 1.1281 + sprintf(path, "%s/decomp.dat", opath); 1.1282 + if ((out = fopen(path, "wb")) == 0) 1.1283 + return; 1.1284 + 1.1285 + hdr[1] = decomps_used; 1.1286 + 1.1287 + /* 1.1288 + * Write the header. 1.1289 + */ 1.1290 + fwrite((char *) hdr, sizeof(unsigned short), 2, out); 1.1291 + 1.1292 + /* 1.1293 + * Write a temporary byte count which will be calculated as the 1.1294 + * decompositions are written out. 1.1295 + */ 1.1296 + bytes = 0; 1.1297 + fwrite((char *) &bytes, sizeof(unsigned long), 1, out); 1.1298 + 1.1299 + if (decomps_used) { 1.1300 + /* 1.1301 + * Write the list of decomp nodes. 1.1302 + */ 1.1303 + for (i = idx = 0; i < decomps_used; i++) { 1.1304 + fwrite((char *) &decomps[i].code, sizeof(unsigned long), 1, out); 1.1305 + fwrite((char *) &idx, sizeof(unsigned long), 1, out); 1.1306 + idx += decomps[i].used; 1.1307 + } 1.1308 + 1.1309 + /* 1.1310 + * Write the sentinel index as the last decomp node. 1.1311 + */ 1.1312 + fwrite((char *) &idx, sizeof(unsigned long), 1, out); 1.1313 + 1.1314 + /* 1.1315 + * Write the decompositions themselves. 1.1316 + */ 1.1317 + for (i = 0; i < decomps_used; i++) 1.1318 + fwrite((char *) decomps[i].decomp, sizeof(unsigned long), 1.1319 + decomps[i].used, out); 1.1320 + 1.1321 + /* 1.1322 + * Seek back to the beginning and write the byte count. 1.1323 + */ 1.1324 + bytes = (sizeof(unsigned long) * idx) + 1.1325 + (sizeof(unsigned long) * ((hdr[1] << 1) + 1)); 1.1326 + fseek(out, sizeof(unsigned short) << 1, 0L); 1.1327 + fwrite((char *) &bytes, sizeof(unsigned long), 1, out); 1.1328 + 1.1329 + fclose(out); 1.1330 + } 1.1331 + 1.1332 + /***************************************************************** 1.1333 + * 1.1334 + * Generate the combining class data. 1.1335 + * 1.1336 + *****************************************************************/ 1.1337 + 1.1338 + /* 1.1339 + * Open the cmbcl.dat file. 1.1340 + */ 1.1341 + sprintf(path, "%s/cmbcl.dat", opath); 1.1342 + if ((out = fopen(path, "wb")) == 0) 1.1343 + return; 1.1344 + 1.1345 + /* 1.1346 + * Set the number of ranges used. Each range has a combining class which 1.1347 + * means each entry is a 3-tuple. 1.1348 + */ 1.1349 + hdr[1] = ccl_used / 3; 1.1350 + 1.1351 + /* 1.1352 + * Write the header. 1.1353 + */ 1.1354 + fwrite((char *) hdr, sizeof(unsigned short), 2, out); 1.1355 + 1.1356 + /* 1.1357 + * Write out the byte count to maintain header size. 1.1358 + */ 1.1359 + bytes = ccl_used * sizeof(unsigned long); 1.1360 + fwrite((char *) &bytes, sizeof(unsigned long), 1, out); 1.1361 + 1.1362 + if (ccl_used > 0) 1.1363 + /* 1.1364 + * Write the combining class ranges out. 1.1365 + */ 1.1366 + fwrite((char *) ccl, sizeof(unsigned long), ccl_used, out); 1.1367 + 1.1368 + fclose(out); 1.1369 + 1.1370 + /***************************************************************** 1.1371 + * 1.1372 + * Generate the number data. 1.1373 + * 1.1374 + *****************************************************************/ 1.1375 + 1.1376 + /* 1.1377 + * Open the num.dat file. 1.1378 + */ 1.1379 + sprintf(path, "%s/num.dat", opath); 1.1380 + if ((out = fopen(path, "wb")) == 0) 1.1381 + return; 1.1382 + 1.1383 + /* 1.1384 + * The count part of the header will be the total number of codes that 1.1385 + * have numbers. 1.1386 + */ 1.1387 + hdr[1] = (unsigned short) (ncodes_used << 1); 1.1388 + bytes = (ncodes_used * sizeof(_codeidx_t)) + (nums_used * sizeof(_num_t)); 1.1389 + 1.1390 + /* 1.1391 + * Write the header. 1.1392 + */ 1.1393 + fwrite((char *) hdr, sizeof(unsigned short), 2, out); 1.1394 + 1.1395 + /* 1.1396 + * Write out the byte count to maintain header size. 1.1397 + */ 1.1398 + fwrite((char *) &bytes, sizeof(unsigned long), 1, out); 1.1399 + 1.1400 + /* 1.1401 + * Now, if number mappings exist, write them out. 1.1402 + */ 1.1403 + if (ncodes_used > 0) { 1.1404 + fwrite((char *) ncodes, sizeof(_codeidx_t), ncodes_used, out); 1.1405 + fwrite((char *) nums, sizeof(_num_t), nums_used, out); 1.1406 + } 1.1407 + 1.1408 + fclose(out); 1.1409 +} 1.1410 + 1.1411 +void 1.1412 +#ifdef __STDC__ 1.1413 +main(int argc, char *argv[]) 1.1414 +#else 1.1415 +main(argc, argv) 1.1416 +int argc; 1.1417 +char *argv[]; 1.1418 +#endif 1.1419 +{ 1.1420 + FILE *in; 1.1421 + char *prog, *opath; 1.1422 + 1.1423 + if ((prog = strrchr(argv[0], '/')) != 0) 1.1424 + prog++; 1.1425 + else 1.1426 + prog = argv[0]; 1.1427 + 1.1428 + opath = 0; 1.1429 + in = stdin; 1.1430 + 1.1431 + argc--; 1.1432 + argv++; 1.1433 + 1.1434 + while (argc > 0) { 1.1435 + if (argv[0][0] == '-' && argv[0][1] == 'o') { 1.1436 + argc--; 1.1437 + argv++; 1.1438 + opath = argv[0]; 1.1439 + } else { 1.1440 + if (in != stdin) 1.1441 + fclose(in); 1.1442 + if ((in = fopen(argv[0], "rb")) == 0) 1.1443 + fprintf(stderr, "%s: unable to open ctype file %s\n", 1.1444 + prog, argv[0]); 1.1445 + else { 1.1446 + read_cdata(in); 1.1447 + fclose(in); 1.1448 + in = 0; 1.1449 + } 1.1450 + } 1.1451 + argc--; 1.1452 + argv++; 1.1453 + } 1.1454 + 1.1455 + if (opath == 0) 1.1456 + opath = "."; 1.1457 + write_cdata(opath); 1.1458 + 1.1459 + exit(0); 1.1460 +}