intl/unicharutil/tools/ucgendat.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 * Copyright 1996, 1997, 1998 Computing Research Labs,
michael@0 3 * New Mexico State University
michael@0 4 *
michael@0 5 * Permission is hereby granted, free of charge, to any person obtaining a
michael@0 6 * copy of this software and associated documentation files (the "Software"),
michael@0 7 * to deal in the Software without restriction, including without limitation
michael@0 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
michael@0 9 * and/or sell copies of the Software, and to permit persons to whom the
michael@0 10 * Software is furnished to do so, subject to the following conditions:
michael@0 11 *
michael@0 12 * The above copyright notice and this permission notice shall be included in
michael@0 13 * all copies or substantial portions of the Software.
michael@0 14 *
michael@0 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
michael@0 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
michael@0 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
michael@0 18 * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
michael@0 19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
michael@0 20 * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
michael@0 21 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
michael@0 22 */
michael@0 23 #ifndef lint
michael@0 24 #ifdef __GNUC__
michael@0 25 static char rcsid[] __attribute__ ((unused)) = "$Id: ucgendat.c,v 1.1 1999/01/08 00:19:21 ftang%netscape.com Exp $";
michael@0 26 #else
michael@0 27 static char rcsid[] = "$Id: ucgendat.c,v 1.1 1999/01/08 00:19:21 ftang%netscape.com Exp $";
michael@0 28 #endif
michael@0 29 #endif
michael@0 30
michael@0 31 #include <stdio.h>
michael@0 32 #include <stdlib.h>
michael@0 33 #include <string.h>
michael@0 34 #ifndef WIN32
michael@0 35 #include <unistd.h>
michael@0 36 #endif
michael@0 37
michael@0 38 #define ishdigit(cc) (((cc) >= '0' && (cc) <= '9') ||\
michael@0 39 ((cc) >= 'A' && (cc) <= 'F') ||\
michael@0 40 ((cc) >= 'a' && (cc) <= 'f'))
michael@0 41
michael@0 42 /*
michael@0 43 * A header written to the output file with the byte-order-mark and the number
michael@0 44 * of property nodes.
michael@0 45 */
michael@0 46 static unsigned short hdr[2] = {0xfeff, 0};
michael@0 47
michael@0 48 #define NUMPROPS 49
michael@0 49 #define NEEDPROPS (NUMPROPS + (4 - (NUMPROPS & 3)))
michael@0 50
michael@0 51 typedef struct {
michael@0 52 char *name;
michael@0 53 int len;
michael@0 54 } _prop_t;
michael@0 55
michael@0 56 /*
michael@0 57 * List of properties expected to be found in the Unicode Character Database
michael@0 58 * including some implementation specific properties.
michael@0 59 *
michael@0 60 * The implementation specific properties are:
michael@0 61 * Cm = Composed (can be decomposed)
michael@0 62 * Nb = Non-breaking
michael@0 63 * Sy = Symmetric (has left and right forms)
michael@0 64 * Hd = Hex digit
michael@0 65 * Qm = Quote marks
michael@0 66 * Mr = Mirroring
michael@0 67 * Ss = Space, other
michael@0 68 * Cp = Defined character
michael@0 69 */
michael@0 70 static _prop_t props[NUMPROPS] = {
michael@0 71 {"Mn", 2}, {"Mc", 2}, {"Me", 2}, {"Nd", 2}, {"Nl", 2}, {"No", 2},
michael@0 72 {"Zs", 2}, {"Zl", 2}, {"Zp", 2}, {"Cc", 2}, {"Cf", 2}, {"Cs", 2},
michael@0 73 {"Co", 2}, {"Cn", 2}, {"Lu", 2}, {"Ll", 2}, {"Lt", 2}, {"Lm", 2},
michael@0 74 {"Lo", 2}, {"Pc", 2}, {"Pd", 2}, {"Ps", 2}, {"Pe", 2}, {"Po", 2},
michael@0 75 {"Sm", 2}, {"Sc", 2}, {"Sk", 2}, {"So", 2}, {"L", 1}, {"R", 1},
michael@0 76 {"EN", 2}, {"ES", 2}, {"ET", 2}, {"AN", 2}, {"CS", 2}, {"B", 1},
michael@0 77 {"S", 1}, {"WS", 2}, {"ON", 2},
michael@0 78 {"Cm", 2}, {"Nb", 2}, {"Sy", 2}, {"Hd", 2}, {"Qm", 2}, {"Mr", 2},
michael@0 79 {"Ss", 2}, {"Cp", 2}, {"Pi", 2}, {"Pf", 2}
michael@0 80 };
michael@0 81
michael@0 82 typedef struct {
michael@0 83 unsigned long *ranges;
michael@0 84 unsigned short used;
michael@0 85 unsigned short size;
michael@0 86 } _ranges_t;
michael@0 87
michael@0 88 static _ranges_t proptbl[NUMPROPS];
michael@0 89
michael@0 90 /*
michael@0 91 * Make sure this array is sized to be on a 4-byte boundary at compile time.
michael@0 92 */
michael@0 93 static unsigned short propcnt[NEEDPROPS];
michael@0 94
michael@0 95 /*
michael@0 96 * Array used to collect a decomposition before adding it to the decomposition
michael@0 97 * table.
michael@0 98 */
michael@0 99 static unsigned long dectmp[64];
michael@0 100 static unsigned long dectmp_size;
michael@0 101
michael@0 102 typedef struct {
michael@0 103 unsigned long code;
michael@0 104 unsigned short size;
michael@0 105 unsigned short used;
michael@0 106 unsigned long *decomp;
michael@0 107 } _decomp_t;
michael@0 108
michael@0 109 /*
michael@0 110 * List of decomposition. Created and expanded in order as the characters are
michael@0 111 * encountered.
michael@0 112 */
michael@0 113 static _decomp_t *decomps;
michael@0 114 static unsigned long decomps_used;
michael@0 115 static unsigned long decomps_size;
michael@0 116
michael@0 117 /*
michael@0 118 * Types and lists for handling lists of case mappings.
michael@0 119 */
michael@0 120 typedef struct {
michael@0 121 unsigned long key;
michael@0 122 unsigned long other1;
michael@0 123 unsigned long other2;
michael@0 124 } _case_t;
michael@0 125
michael@0 126 static _case_t *upper;
michael@0 127 static _case_t *lower;
michael@0 128 static _case_t *title;
michael@0 129 static unsigned long upper_used;
michael@0 130 static unsigned long upper_size;
michael@0 131 static unsigned long lower_used;
michael@0 132 static unsigned long lower_size;
michael@0 133 static unsigned long title_used;
michael@0 134 static unsigned long title_size;
michael@0 135
michael@0 136 /*
michael@0 137 * Array used to collect case mappings before adding them to a list.
michael@0 138 */
michael@0 139 static unsigned long cases[3];
michael@0 140
michael@0 141 /*
michael@0 142 * An array to hold ranges for combining classes.
michael@0 143 */
michael@0 144 static unsigned long *ccl;
michael@0 145 static unsigned long ccl_used;
michael@0 146 static unsigned long ccl_size;
michael@0 147
michael@0 148 /*
michael@0 149 * Structures for handling numbers.
michael@0 150 */
michael@0 151 typedef struct {
michael@0 152 unsigned long code;
michael@0 153 unsigned long idx;
michael@0 154 } _codeidx_t;
michael@0 155
michael@0 156 typedef struct {
michael@0 157 short numerator;
michael@0 158 short denominator;
michael@0 159 } _num_t;
michael@0 160
michael@0 161 /*
michael@0 162 * Arrays to hold the mapping of codes to numbers.
michael@0 163 */
michael@0 164 static _codeidx_t *ncodes;
michael@0 165 static unsigned long ncodes_used;
michael@0 166 static unsigned long ncodes_size;
michael@0 167
michael@0 168 static _num_t *nums;
michael@0 169 static unsigned long nums_used;
michael@0 170 static unsigned long nums_size;
michael@0 171
michael@0 172 /*
michael@0 173 * Array for holding numbers.
michael@0 174 */
michael@0 175 static _num_t *nums;
michael@0 176 static unsigned long nums_used;
michael@0 177 static unsigned long nums_size;
michael@0 178
michael@0 179 static void
michael@0 180 #ifdef __STDC__
michael@0 181 add_range(unsigned long start, unsigned long end, char *p1, char *p2)
michael@0 182 #else
michael@0 183 add_range(start, end, p1, p2)
michael@0 184 unsigned long start, end;
michael@0 185 char *p1, *p2;
michael@0 186 #endif
michael@0 187 {
michael@0 188 int i, j, k, len;
michael@0 189 _ranges_t *rlp;
michael@0 190 char *name;
michael@0 191
michael@0 192 for (k = 0; k < 2; k++) {
michael@0 193 if (k == 0) {
michael@0 194 name = p1;
michael@0 195 len = 2;
michael@0 196 } else {
michael@0 197 if (p2 == 0)
michael@0 198 break;
michael@0 199
michael@0 200 name = p2;
michael@0 201 len = 1;
michael@0 202 }
michael@0 203
michael@0 204 for (i = 0; i < NUMPROPS; i++) {
michael@0 205 if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
michael@0 206 break;
michael@0 207 }
michael@0 208
michael@0 209 if (i == NUMPROPS)
michael@0 210 continue;
michael@0 211
michael@0 212 rlp = &proptbl[i];
michael@0 213
michael@0 214 /*
michael@0 215 * Resize the range list if necessary.
michael@0 216 */
michael@0 217 if (rlp->used == rlp->size) {
michael@0 218 if (rlp->size == 0)
michael@0 219 rlp->ranges = (unsigned long *)
michael@0 220 malloc(sizeof(unsigned long) << 3);
michael@0 221 else
michael@0 222 rlp->ranges = (unsigned long *)
michael@0 223 realloc((char *) rlp->ranges,
michael@0 224 sizeof(unsigned long) * (rlp->size + 8));
michael@0 225 rlp->size += 8;
michael@0 226 }
michael@0 227
michael@0 228 /*
michael@0 229 * If this is the first code for this property list, just add it
michael@0 230 * and return.
michael@0 231 */
michael@0 232 if (rlp->used == 0) {
michael@0 233 rlp->ranges[0] = start;
michael@0 234 rlp->ranges[1] = end;
michael@0 235 rlp->used += 2;
michael@0 236 continue;
michael@0 237 }
michael@0 238
michael@0 239 /*
michael@0 240 * Optimize the case of adding the range to the end.
michael@0 241 */
michael@0 242 j = rlp->used - 1;
michael@0 243 if (start > rlp->ranges[j]) {
michael@0 244 j = rlp->used;
michael@0 245 rlp->ranges[j++] = start;
michael@0 246 rlp->ranges[j++] = end;
michael@0 247 rlp->used = j;
michael@0 248 continue;
michael@0 249 }
michael@0 250
michael@0 251 /*
michael@0 252 * Need to locate the insertion point.
michael@0 253 */
michael@0 254 for (i = 0;
michael@0 255 i < rlp->used && start > rlp->ranges[i + 1] + 1; i += 2) ;
michael@0 256
michael@0 257 /*
michael@0 258 * If the start value lies in the current range, then simply set the
michael@0 259 * new end point of the range to the end value passed as a parameter.
michael@0 260 */
michael@0 261 if (rlp->ranges[i] <= start && start <= rlp->ranges[i + 1] + 1) {
michael@0 262 rlp->ranges[i + 1] = end;
michael@0 263 return;
michael@0 264 }
michael@0 265
michael@0 266 /*
michael@0 267 * Shift following values up by two.
michael@0 268 */
michael@0 269 for (j = rlp->used; j > i; j -= 2) {
michael@0 270 rlp->ranges[j] = rlp->ranges[j - 2];
michael@0 271 rlp->ranges[j + 1] = rlp->ranges[j - 1];
michael@0 272 }
michael@0 273
michael@0 274 /*
michael@0 275 * Add the new range at the insertion point.
michael@0 276 */
michael@0 277 rlp->ranges[i] = start;
michael@0 278 rlp->ranges[i + 1] = end;
michael@0 279 rlp->used += 2;
michael@0 280 }
michael@0 281 }
michael@0 282
michael@0 283 static void
michael@0 284 #ifdef __STDC__
michael@0 285 ordered_range_insert(unsigned long c, char *name, int len)
michael@0 286 #else
michael@0 287 ordered_range_insert(c, name, len)
michael@0 288 unsigned long c;
michael@0 289 char *name;
michael@0 290 int len;
michael@0 291 #endif
michael@0 292 {
michael@0 293 int i, j;
michael@0 294 unsigned long s, e;
michael@0 295 _ranges_t *rlp;
michael@0 296
michael@0 297 if (len == 0)
michael@0 298 return;
michael@0 299
michael@0 300 for (i = 0; i < NUMPROPS; i++) {
michael@0 301 if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
michael@0 302 break;
michael@0 303 }
michael@0 304
michael@0 305 if (i == NUMPROPS)
michael@0 306 return;
michael@0 307
michael@0 308 /*
michael@0 309 * Have a match, so insert the code in order.
michael@0 310 */
michael@0 311 rlp = &proptbl[i];
michael@0 312
michael@0 313 /*
michael@0 314 * Resize the range list if necessary.
michael@0 315 */
michael@0 316 if (rlp->used == rlp->size) {
michael@0 317 if (rlp->size == 0)
michael@0 318 rlp->ranges = (unsigned long *)
michael@0 319 malloc(sizeof(unsigned long) << 3);
michael@0 320 else
michael@0 321 rlp->ranges = (unsigned long *)
michael@0 322 realloc((char *) rlp->ranges,
michael@0 323 sizeof(unsigned long) * (rlp->size + 8));
michael@0 324 rlp->size += 8;
michael@0 325 }
michael@0 326
michael@0 327 /*
michael@0 328 * If this is the first code for this property list, just add it
michael@0 329 * and return.
michael@0 330 */
michael@0 331 if (rlp->used == 0) {
michael@0 332 rlp->ranges[0] = rlp->ranges[1] = c;
michael@0 333 rlp->used += 2;
michael@0 334 return;
michael@0 335 }
michael@0 336
michael@0 337 /*
michael@0 338 * Optimize the cases of extending the last range and adding new ranges to
michael@0 339 * the end.
michael@0 340 */
michael@0 341 j = rlp->used - 1;
michael@0 342 e = rlp->ranges[j];
michael@0 343 s = rlp->ranges[j - 1];
michael@0 344
michael@0 345 if (c == e + 1) {
michael@0 346 /*
michael@0 347 * Extend the last range.
michael@0 348 */
michael@0 349 rlp->ranges[j] = c;
michael@0 350 return;
michael@0 351 }
michael@0 352
michael@0 353 if (c > e + 1) {
michael@0 354 /*
michael@0 355 * Start another range on the end.
michael@0 356 */
michael@0 357 j = rlp->used;
michael@0 358 rlp->ranges[j] = rlp->ranges[j + 1] = c;
michael@0 359 rlp->used += 2;
michael@0 360 return;
michael@0 361 }
michael@0 362
michael@0 363 if (c >= s)
michael@0 364 /*
michael@0 365 * The code is a duplicate of a code in the last range, so just return.
michael@0 366 */
michael@0 367 return;
michael@0 368
michael@0 369 /*
michael@0 370 * The code should be inserted somewhere before the last range in the
michael@0 371 * list. Locate the insertion point.
michael@0 372 */
michael@0 373 for (i = 0;
michael@0 374 i < rlp->used && c > rlp->ranges[i + 1] + 1; i += 2) ;
michael@0 375
michael@0 376 s = rlp->ranges[i];
michael@0 377 e = rlp->ranges[i + 1];
michael@0 378
michael@0 379 if (c == e + 1)
michael@0 380 /*
michael@0 381 * Simply extend the current range.
michael@0 382 */
michael@0 383 rlp->ranges[i + 1] = c;
michael@0 384 else if (c < s) {
michael@0 385 /*
michael@0 386 * Add a new entry before the current location. Shift all entries
michael@0 387 * before the current one up by one to make room.
michael@0 388 */
michael@0 389 for (j = rlp->used; j > i; j -= 2) {
michael@0 390 rlp->ranges[j] = rlp->ranges[j - 2];
michael@0 391 rlp->ranges[j + 1] = rlp->ranges[j - 1];
michael@0 392 }
michael@0 393 rlp->ranges[i] = rlp->ranges[i + 1] = c;
michael@0 394
michael@0 395 rlp->used += 2;
michael@0 396 }
michael@0 397 }
michael@0 398
michael@0 399 static void
michael@0 400 #ifdef __STDC__
michael@0 401 add_decomp(unsigned long code)
michael@0 402 #else
michael@0 403 add_decomp(code)
michael@0 404 unsigned long code;
michael@0 405 #endif
michael@0 406 {
michael@0 407 unsigned long i, j, size;
michael@0 408
michael@0 409 /*
michael@0 410 * Add the code to the composite property.
michael@0 411 */
michael@0 412 ordered_range_insert(code, "Cm", 2);
michael@0 413
michael@0 414 /*
michael@0 415 * Locate the insertion point for the code.
michael@0 416 */
michael@0 417 for (i = 0; i < decomps_used && code > decomps[i].code; i++) ;
michael@0 418
michael@0 419 /*
michael@0 420 * Allocate space for a new decomposition.
michael@0 421 */
michael@0 422 if (decomps_used == decomps_size) {
michael@0 423 if (decomps_size == 0)
michael@0 424 decomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3);
michael@0 425 else
michael@0 426 decomps = (_decomp_t *)
michael@0 427 realloc((char *) decomps,
michael@0 428 sizeof(_decomp_t) * (decomps_size + 8));
michael@0 429 (void) memset((char *) (decomps + decomps_size), 0,
michael@0 430 sizeof(_decomp_t) << 3);
michael@0 431 decomps_size += 8;
michael@0 432 }
michael@0 433
michael@0 434 if (i < decomps_used && code != decomps[i].code) {
michael@0 435 /*
michael@0 436 * Shift the decomps up by one if the codes don't match.
michael@0 437 */
michael@0 438 for (j = decomps_used; j > i; j--)
michael@0 439 (void) memcpy((char *) &decomps[j], (char *) &decomps[j - 1],
michael@0 440 sizeof(_decomp_t));
michael@0 441 }
michael@0 442
michael@0 443 /*
michael@0 444 * Insert or replace a decomposition.
michael@0 445 */
michael@0 446 size = dectmp_size + (4 - (dectmp_size & 3));
michael@0 447 if (decomps[i].size < size) {
michael@0 448 if (decomps[i].size == 0)
michael@0 449 decomps[i].decomp = (unsigned long *)
michael@0 450 malloc(sizeof(unsigned long) * size);
michael@0 451 else
michael@0 452 decomps[i].decomp = (unsigned long *)
michael@0 453 realloc((char *) decomps[i].decomp,
michael@0 454 sizeof(unsigned long) * size);
michael@0 455 decomps[i].size = size;
michael@0 456 }
michael@0 457
michael@0 458 if (decomps[i].code != code)
michael@0 459 decomps_used++;
michael@0 460
michael@0 461 decomps[i].code = code;
michael@0 462 decomps[i].used = dectmp_size;
michael@0 463 (void) memcpy((char *) decomps[i].decomp, (char *) dectmp,
michael@0 464 sizeof(unsigned long) * dectmp_size);
michael@0 465
michael@0 466 }
michael@0 467
michael@0 468 static void
michael@0 469 #ifdef __STDC__
michael@0 470 add_title(unsigned long code)
michael@0 471 #else
michael@0 472 add_title(code)
michael@0 473 unsigned long code;
michael@0 474 #endif
michael@0 475 {
michael@0 476 unsigned long i, j;
michael@0 477
michael@0 478 /*
michael@0 479 * Always map the code to itself.
michael@0 480 */
michael@0 481 cases[2] = code;
michael@0 482
michael@0 483 if (title_used == title_size) {
michael@0 484 if (title_size == 0)
michael@0 485 title = (_case_t *) malloc(sizeof(_case_t) << 3);
michael@0 486 else
michael@0 487 title = (_case_t *) realloc((char *) title,
michael@0 488 sizeof(_case_t) * (title_size + 8));
michael@0 489 title_size += 8;
michael@0 490 }
michael@0 491
michael@0 492 /*
michael@0 493 * Locate the insertion point.
michael@0 494 */
michael@0 495 for (i = 0; i < title_used && code > title[i].key; i++) ;
michael@0 496
michael@0 497 if (i < title_used) {
michael@0 498 /*
michael@0 499 * Shift the array up by one.
michael@0 500 */
michael@0 501 for (j = title_used; j > i; j--)
michael@0 502 (void) memcpy((char *) &title[j], (char *) &title[j - 1],
michael@0 503 sizeof(_case_t));
michael@0 504 }
michael@0 505
michael@0 506 title[i].key = cases[2]; /* Title */
michael@0 507 title[i].other1 = cases[0]; /* Upper */
michael@0 508 title[i].other2 = cases[1]; /* Lower */
michael@0 509
michael@0 510 title_used++;
michael@0 511 }
michael@0 512
michael@0 513 static void
michael@0 514 #ifdef __STDC__
michael@0 515 add_upper(unsigned long code)
michael@0 516 #else
michael@0 517 add_upper(code)
michael@0 518 unsigned long code;
michael@0 519 #endif
michael@0 520 {
michael@0 521 unsigned long i, j;
michael@0 522
michael@0 523 /*
michael@0 524 * Always map the code to itself.
michael@0 525 */
michael@0 526 cases[0] = code;
michael@0 527
michael@0 528 /*
michael@0 529 * If the title case character is not present, then make it the same as
michael@0 530 * the upper case.
michael@0 531 */
michael@0 532 if (cases[2] == 0)
michael@0 533 cases[2] = code;
michael@0 534
michael@0 535 if (upper_used == upper_size) {
michael@0 536 if (upper_size == 0)
michael@0 537 upper = (_case_t *) malloc(sizeof(_case_t) << 3);
michael@0 538 else
michael@0 539 upper = (_case_t *) realloc((char *) upper,
michael@0 540 sizeof(_case_t) * (upper_size + 8));
michael@0 541 upper_size += 8;
michael@0 542 }
michael@0 543
michael@0 544 /*
michael@0 545 * Locate the insertion point.
michael@0 546 */
michael@0 547 for (i = 0; i < upper_used && code > upper[i].key; i++) ;
michael@0 548
michael@0 549 if (i < upper_used) {
michael@0 550 /*
michael@0 551 * Shift the array up by one.
michael@0 552 */
michael@0 553 for (j = upper_used; j > i; j--)
michael@0 554 (void) memcpy((char *) &upper[j], (char *) &upper[j - 1],
michael@0 555 sizeof(_case_t));
michael@0 556 }
michael@0 557
michael@0 558 upper[i].key = cases[0]; /* Upper */
michael@0 559 upper[i].other1 = cases[1]; /* Lower */
michael@0 560 upper[i].other2 = cases[2]; /* Title */
michael@0 561
michael@0 562 upper_used++;
michael@0 563 }
michael@0 564
michael@0 565 static void
michael@0 566 #ifdef __STDC__
michael@0 567 add_lower(unsigned long code)
michael@0 568 #else
michael@0 569 add_lower(code)
michael@0 570 unsigned long code;
michael@0 571 #endif
michael@0 572 {
michael@0 573 unsigned long i, j;
michael@0 574
michael@0 575 /*
michael@0 576 * Always map the code to itself.
michael@0 577 */
michael@0 578 cases[1] = code;
michael@0 579
michael@0 580 /*
michael@0 581 * If the title case character is empty, then make it the same as the
michael@0 582 * upper case.
michael@0 583 */
michael@0 584 if (cases[2] == 0)
michael@0 585 cases[2] = cases[0];
michael@0 586
michael@0 587 if (lower_used == lower_size) {
michael@0 588 if (lower_size == 0)
michael@0 589 lower = (_case_t *) malloc(sizeof(_case_t) << 3);
michael@0 590 else
michael@0 591 lower = (_case_t *) realloc((char *) lower,
michael@0 592 sizeof(_case_t) * (lower_size + 8));
michael@0 593 lower_size += 8;
michael@0 594 }
michael@0 595
michael@0 596 /*
michael@0 597 * Locate the insertion point.
michael@0 598 */
michael@0 599 for (i = 0; i < lower_used && code > lower[i].key; i++) ;
michael@0 600
michael@0 601 if (i < lower_used) {
michael@0 602 /*
michael@0 603 * Shift the array up by one.
michael@0 604 */
michael@0 605 for (j = lower_used; j > i; j--)
michael@0 606 (void) memcpy((char *) &lower[j], (char *) &lower[j - 1],
michael@0 607 sizeof(_case_t));
michael@0 608 }
michael@0 609
michael@0 610 lower[i].key = cases[1]; /* Lower */
michael@0 611 lower[i].other1 = cases[0]; /* Upper */
michael@0 612 lower[i].other2 = cases[2]; /* Title */
michael@0 613
michael@0 614 lower_used++;
michael@0 615 }
michael@0 616
michael@0 617 static void
michael@0 618 #ifdef __STDC__
michael@0 619 ordered_ccl_insert(unsigned long c, unsigned long ccl_code)
michael@0 620 #else
michael@0 621 ordered_ccl_insert(c, ccl_code)
michael@0 622 unsigned long c, ccl_code;
michael@0 623 #endif
michael@0 624 {
michael@0 625 unsigned long i, j;
michael@0 626
michael@0 627 if (ccl_used == ccl_size) {
michael@0 628 if (ccl_size == 0)
michael@0 629 ccl = (unsigned long *) malloc(sizeof(unsigned long) * 24);
michael@0 630 else
michael@0 631 ccl = (unsigned long *)
michael@0 632 realloc((char *) ccl, sizeof(unsigned long) * (ccl_size + 24));
michael@0 633 ccl_size += 24;
michael@0 634 }
michael@0 635
michael@0 636 /*
michael@0 637 * Optimize adding the first item.
michael@0 638 */
michael@0 639 if (ccl_used == 0) {
michael@0 640 ccl[0] = ccl[1] = c;
michael@0 641 ccl[2] = ccl_code;
michael@0 642 ccl_used += 3;
michael@0 643 return;
michael@0 644 }
michael@0 645
michael@0 646 /*
michael@0 647 * Handle the special case of extending the range on the end. This
michael@0 648 * requires that the combining class codes are the same.
michael@0 649 */
michael@0 650 if (ccl_code == ccl[ccl_used - 1] && c == ccl[ccl_used - 2] + 1) {
michael@0 651 ccl[ccl_used - 2] = c;
michael@0 652 return;
michael@0 653 }
michael@0 654
michael@0 655 /*
michael@0 656 * Handle the special case of adding another range on the end.
michael@0 657 */
michael@0 658 if (c > ccl[ccl_used - 2] + 1 ||
michael@0 659 (c == ccl[ccl_used - 2] + 1 && ccl_code != ccl[ccl_used - 1])) {
michael@0 660 ccl[ccl_used++] = c;
michael@0 661 ccl[ccl_used++] = c;
michael@0 662 ccl[ccl_used++] = ccl_code;
michael@0 663 return;
michael@0 664 }
michael@0 665
michael@0 666 /*
michael@0 667 * Locate either the insertion point or range for the code.
michael@0 668 */
michael@0 669 for (i = 0; i < ccl_used && c > ccl[i + 1] + 1; i += 3) ;
michael@0 670
michael@0 671 if (ccl_code == ccl[i + 2] && c == ccl[i + 1] + 1) {
michael@0 672 /*
michael@0 673 * Extend an existing range.
michael@0 674 */
michael@0 675 ccl[i + 1] = c;
michael@0 676 return;
michael@0 677 } else if (c < ccl[i]) {
michael@0 678 /*
michael@0 679 * Start a new range before the current location.
michael@0 680 */
michael@0 681 for (j = ccl_used; j > i; j -= 3) {
michael@0 682 ccl[j] = ccl[j - 3];
michael@0 683 ccl[j - 1] = ccl[j - 4];
michael@0 684 ccl[j - 2] = ccl[j - 5];
michael@0 685 }
michael@0 686 ccl[i] = ccl[i + 1] = c;
michael@0 687 ccl[i + 2] = ccl_code;
michael@0 688 }
michael@0 689 }
michael@0 690
michael@0 691 /*
michael@0 692 * Adds a number if it does not already exist and returns an index value
michael@0 693 * multiplied by 2.
michael@0 694 */
michael@0 695 static unsigned long
michael@0 696 #ifdef __STDC__
michael@0 697 make_number(short num, short denom)
michael@0 698 #else
michael@0 699 make_number(num, denom)
michael@0 700 short num, denom;
michael@0 701 #endif
michael@0 702 {
michael@0 703 unsigned long n;
michael@0 704
michael@0 705 /*
michael@0 706 * Determine if the number already exists.
michael@0 707 */
michael@0 708 for (n = 0; n < nums_used; n++) {
michael@0 709 if (nums[n].numerator == num && nums[n].denominator == denom)
michael@0 710 return n << 1;
michael@0 711 }
michael@0 712
michael@0 713 if (nums_used == nums_size) {
michael@0 714 if (nums_size == 0)
michael@0 715 nums = (_num_t *) malloc(sizeof(_num_t) << 3);
michael@0 716 else
michael@0 717 nums = (_num_t *) realloc((char *) nums,
michael@0 718 sizeof(_num_t) * (nums_size + 8));
michael@0 719 nums_size += 8;
michael@0 720 }
michael@0 721
michael@0 722 n = nums_used++;
michael@0 723 nums[n].numerator = num;
michael@0 724 nums[n].denominator = denom;
michael@0 725
michael@0 726 return n << 1;
michael@0 727 }
michael@0 728
michael@0 729 static void
michael@0 730 #ifdef __STDC__
michael@0 731 add_number(unsigned long code, short num, short denom)
michael@0 732 #else
michael@0 733 add_number(code, num, denom)
michael@0 734 unsigned long code;
michael@0 735 short num, denom;
michael@0 736 #endif
michael@0 737 {
michael@0 738 unsigned long i, j;
michael@0 739
michael@0 740 /*
michael@0 741 * Insert the code in order.
michael@0 742 */
michael@0 743 for (i = 0; i < ncodes_used && code > ncodes[i].code; i++) ;
michael@0 744
michael@0 745 /*
michael@0 746 * Handle the case of the codes matching and simply replace the number
michael@0 747 * that was there before.
michael@0 748 */
michael@0 749 if (ncodes_used > 0 && code == ncodes[i].code) {
michael@0 750 ncodes[i].idx = make_number(num, denom);
michael@0 751 return;
michael@0 752 }
michael@0 753
michael@0 754 /*
michael@0 755 * Resize the array if necessary.
michael@0 756 */
michael@0 757 if (ncodes_used == ncodes_size) {
michael@0 758 if (ncodes_size == 0)
michael@0 759 ncodes = (_codeidx_t *) malloc(sizeof(_codeidx_t) << 3);
michael@0 760 else
michael@0 761 ncodes = (_codeidx_t *)
michael@0 762 realloc((char *) ncodes, sizeof(_codeidx_t) * (ncodes_size + 8));
michael@0 763
michael@0 764 ncodes_size += 8;
michael@0 765 }
michael@0 766
michael@0 767 /*
michael@0 768 * Shift things around to insert the code if necessary.
michael@0 769 */
michael@0 770 if (i < ncodes_used) {
michael@0 771 for (j = ncodes_used; j > i; j--) {
michael@0 772 ncodes[j].code = ncodes[j - 1].code;
michael@0 773 ncodes[j].idx = ncodes[j - 1].idx;
michael@0 774 }
michael@0 775 }
michael@0 776 ncodes[i].code = code;
michael@0 777 ncodes[i].idx = make_number(num, denom);
michael@0 778
michael@0 779 ncodes_used++;
michael@0 780 }
michael@0 781
michael@0 782 /*
michael@0 783 * This routine assumes that the line is a valid Unicode Character Database
michael@0 784 * entry.
michael@0 785 */
michael@0 786 static void
michael@0 787 #ifdef __STDC__
michael@0 788 read_cdata(FILE *in)
michael@0 789 #else
michael@0 790 read_cdata(in)
michael@0 791 FILE *in;
michael@0 792 #endif
michael@0 793 {
michael@0 794 unsigned long i, lineno, skip, code, ccl_code;
michael@0 795 short wnum, neg, number[2];
michael@0 796 char line[512], *s, *e;
michael@0 797
michael@0 798 lineno = skip = 0;
michael@0 799 while (fscanf(in, "%[^\n]\n", line) != EOF) {
michael@0 800 lineno++;
michael@0 801
michael@0 802 /*
michael@0 803 * Skip blank lines and lines that start with a '#'.
michael@0 804 */
michael@0 805 if (line[0] == 0 || line[0] == '#')
michael@0 806 continue;
michael@0 807
michael@0 808 /*
michael@0 809 * If lines need to be skipped, do it here.
michael@0 810 */
michael@0 811 if (skip) {
michael@0 812 skip--;
michael@0 813 continue;
michael@0 814 }
michael@0 815
michael@0 816 /*
michael@0 817 * Collect the code. The code can be up to 6 hex digits in length to
michael@0 818 * allow surrogates to be specified.
michael@0 819 */
michael@0 820 for (s = line, i = code = 0; *s != ';' && i < 6; i++, s++) {
michael@0 821 code <<= 4;
michael@0 822 if (*s >= '0' && *s <= '9')
michael@0 823 code += *s - '0';
michael@0 824 else if (*s >= 'A' && *s <= 'F')
michael@0 825 code += (*s - 'A') + 10;
michael@0 826 else if (*s >= 'a' && *s <= 'f')
michael@0 827 code += (*s - 'a') + 10;
michael@0 828 }
michael@0 829
michael@0 830 /*
michael@0 831 * Handle the following special cases:
michael@0 832 * 1. 4E00-9FA5 CJK Ideographs.
michael@0 833 * 2. AC00-D7A3 Hangul Syllables.
michael@0 834 * 3. D800-DFFF Surrogates.
michael@0 835 * 4. E000-F8FF Private Use Area.
michael@0 836 * 5. F900-FA2D Han compatibility.
michael@0 837 */
michael@0 838 switch (code) {
michael@0 839 case 0x4e00:
michael@0 840 /*
michael@0 841 * The Han ideographs.
michael@0 842 */
michael@0 843 add_range(0x4e00, 0x9fff, "Lo", "L");
michael@0 844
michael@0 845 /*
michael@0 846 * Add the characters to the defined category.
michael@0 847 */
michael@0 848 add_range(0x4e00, 0x9fa5, "Cp", 0);
michael@0 849
michael@0 850 skip = 1;
michael@0 851 break;
michael@0 852 case 0xac00:
michael@0 853 /*
michael@0 854 * The Hangul syllables.
michael@0 855 */
michael@0 856 add_range(0xac00, 0xd7a3, "Lo", "L");
michael@0 857
michael@0 858 /*
michael@0 859 * Add the characters to the defined category.
michael@0 860 */
michael@0 861 add_range(0xac00, 0xd7a3, "Cp", 0);
michael@0 862
michael@0 863 skip = 1;
michael@0 864 break;
michael@0 865 case 0xd800:
michael@0 866 /*
michael@0 867 * Make a range of all surrogates and assume some default
michael@0 868 * properties.
michael@0 869 */
michael@0 870 add_range(0x010000, 0x10ffff, "Cs", "L");
michael@0 871 skip = 5;
michael@0 872 break;
michael@0 873 case 0xe000:
michael@0 874 /*
michael@0 875 * The Private Use area. Add with a default set of properties.
michael@0 876 */
michael@0 877 add_range(0xe000, 0xf8ff, "Co", "L");
michael@0 878 skip = 1;
michael@0 879 break;
michael@0 880 case 0xf900:
michael@0 881 /*
michael@0 882 * The CJK compatibility area.
michael@0 883 */
michael@0 884 add_range(0xf900, 0xfaff, "Lo", "L");
michael@0 885
michael@0 886 /*
michael@0 887 * Add the characters to the defined category.
michael@0 888 */
michael@0 889 add_range(0xf900, 0xfaff, "Cp", 0);
michael@0 890
michael@0 891 skip = 1;
michael@0 892 }
michael@0 893
michael@0 894 if (skip)
michael@0 895 continue;
michael@0 896
michael@0 897 /*
michael@0 898 * Add the code to the defined category.
michael@0 899 */
michael@0 900 ordered_range_insert(code, "Cp", 2);
michael@0 901
michael@0 902 /*
michael@0 903 * Locate the first character property field.
michael@0 904 */
michael@0 905 for (i = 0; *s != 0 && i < 2; s++) {
michael@0 906 if (*s == ';')
michael@0 907 i++;
michael@0 908 }
michael@0 909 for (e = s; *e && *e != ';'; e++) ;
michael@0 910
michael@0 911 ordered_range_insert(code, s, e - s);
michael@0 912
michael@0 913 /*
michael@0 914 * Locate the combining class code.
michael@0 915 */
michael@0 916 for (s = e; *s != 0 && i < 3; s++) {
michael@0 917 if (*s == ';')
michael@0 918 i++;
michael@0 919 }
michael@0 920
michael@0 921 /*
michael@0 922 * Convert the combining class code from decimal.
michael@0 923 */
michael@0 924 for (ccl_code = 0, e = s; *e && *e != ';'; e++)
michael@0 925 ccl_code = (ccl_code * 10) + (*e - '0');
michael@0 926
michael@0 927 /*
michael@0 928 * Add the code if it not 0.
michael@0 929 */
michael@0 930 if (ccl_code != 0)
michael@0 931 ordered_ccl_insert(code, ccl_code);
michael@0 932
michael@0 933 /*
michael@0 934 * Locate the second character property field.
michael@0 935 */
michael@0 936 for (s = e; *s != 0 && i < 4; s++) {
michael@0 937 if (*s == ';')
michael@0 938 i++;
michael@0 939 }
michael@0 940 for (e = s; *e && *e != ';'; e++) ;
michael@0 941
michael@0 942 ordered_range_insert(code, s, e - s);
michael@0 943
michael@0 944 /*
michael@0 945 * Check for a decomposition.
michael@0 946 */
michael@0 947 s = ++e;
michael@0 948 if (*s != ';' && *s != '<') {
michael@0 949 /*
michael@0 950 * Collect the codes of the decomposition.
michael@0 951 */
michael@0 952 for (dectmp_size = 0; *s != ';'; ) {
michael@0 953 /*
michael@0 954 * Skip all leading non-hex digits.
michael@0 955 */
michael@0 956 while (!ishdigit(*s))
michael@0 957 s++;
michael@0 958
michael@0 959 for (dectmp[dectmp_size] = 0; ishdigit(*s); s++) {
michael@0 960 dectmp[dectmp_size] <<= 4;
michael@0 961 if (*s >= '0' && *s <= '9')
michael@0 962 dectmp[dectmp_size] += *s - '0';
michael@0 963 else if (*s >= 'A' && *s <= 'F')
michael@0 964 dectmp[dectmp_size] += (*s - 'A') + 10;
michael@0 965 else if (*s >= 'a' && *s <= 'f')
michael@0 966 dectmp[dectmp_size] += (*s - 'a') + 10;
michael@0 967 }
michael@0 968 dectmp_size++;
michael@0 969 }
michael@0 970
michael@0 971 /*
michael@0 972 * If there is more than one code in the temporary decomposition
michael@0 973 * array, then add the character with its decomposition.
michael@0 974 */
michael@0 975 if (dectmp_size > 1)
michael@0 976 add_decomp(code);
michael@0 977 }
michael@0 978
michael@0 979 /*
michael@0 980 * Skip to the number field.
michael@0 981 */
michael@0 982 for (i = 0; i < 3 && *s; s++) {
michael@0 983 if (*s == ';')
michael@0 984 i++;
michael@0 985 }
michael@0 986
michael@0 987 /*
michael@0 988 * Scan the number in.
michael@0 989 */
michael@0 990 number[0] = number[1] = 0;
michael@0 991 for (e = s, neg = wnum = 0; *e && *e != ';'; e++) {
michael@0 992 if (*e == '-') {
michael@0 993 neg = 1;
michael@0 994 continue;
michael@0 995 }
michael@0 996
michael@0 997 if (*e == '/') {
michael@0 998 /*
michael@0 999 * Move the the denominator of the fraction.
michael@0 1000 */
michael@0 1001 if (neg)
michael@0 1002 number[wnum] *= -1;
michael@0 1003 neg = 0;
michael@0 1004 e++;
michael@0 1005 wnum++;
michael@0 1006 }
michael@0 1007 number[wnum] = (number[wnum] * 10) + (*e - '0');
michael@0 1008 }
michael@0 1009
michael@0 1010 if (e > s) {
michael@0 1011 /*
michael@0 1012 * Adjust the denominator in case of integers and add the number.
michael@0 1013 */
michael@0 1014 if (wnum == 0)
michael@0 1015 number[1] = number[0];
michael@0 1016
michael@0 1017 add_number(code, number[0], number[1]);
michael@0 1018 }
michael@0 1019
michael@0 1020 /*
michael@0 1021 * Skip to the start of the possible case mappings.
michael@0 1022 */
michael@0 1023 for (s = e, i = 0; i < 4 && *s; s++) {
michael@0 1024 if (*s == ';')
michael@0 1025 i++;
michael@0 1026 }
michael@0 1027
michael@0 1028 /*
michael@0 1029 * Collect the case mappings.
michael@0 1030 */
michael@0 1031 cases[0] = cases[1] = cases[2] = 0;
michael@0 1032 for (i = 0; i < 3; i++) {
michael@0 1033 while (ishdigit(*s)) {
michael@0 1034 cases[i] <<= 4;
michael@0 1035 if (*s >= '0' && *s <= '9')
michael@0 1036 cases[i] += *s - '0';
michael@0 1037 else if (*s >= 'A' && *s <= 'F')
michael@0 1038 cases[i] += (*s - 'A') + 10;
michael@0 1039 else if (*s >= 'a' && *s <= 'f')
michael@0 1040 cases[i] += (*s - 'a') + 10;
michael@0 1041 s++;
michael@0 1042 }
michael@0 1043 if (*s == ';')
michael@0 1044 s++;
michael@0 1045 }
michael@0 1046 if (cases[0] && cases[1])
michael@0 1047 /*
michael@0 1048 * Add the upper and lower mappings for a title case character.
michael@0 1049 */
michael@0 1050 add_title(code);
michael@0 1051 else if (cases[1])
michael@0 1052 /*
michael@0 1053 * Add the lower and title case mappings for the upper case
michael@0 1054 * character.
michael@0 1055 */
michael@0 1056 add_upper(code);
michael@0 1057 else if (cases[0])
michael@0 1058 /*
michael@0 1059 * Add the upper and title case mappings for the lower case
michael@0 1060 * character.
michael@0 1061 */
michael@0 1062 add_lower(code);
michael@0 1063 }
michael@0 1064 }
michael@0 1065
michael@0 1066 static _decomp_t *
michael@0 1067 #ifdef __STDC__
michael@0 1068 find_decomp(unsigned long code)
michael@0 1069 #else
michael@0 1070 find_decomp(code)
michael@0 1071 unsigned long code;
michael@0 1072 #endif
michael@0 1073 {
michael@0 1074 long l, r, m;
michael@0 1075
michael@0 1076 l = 0;
michael@0 1077 r = decomps_used - 1;
michael@0 1078 while (l <= r) {
michael@0 1079 m = (l + r) >> 1;
michael@0 1080 if (code > decomps[m].code)
michael@0 1081 l = m + 1;
michael@0 1082 else if (code < decomps[m].code)
michael@0 1083 r = m - 1;
michael@0 1084 else
michael@0 1085 return &decomps[m];
michael@0 1086 }
michael@0 1087 return 0;
michael@0 1088 }
michael@0 1089
michael@0 1090 static void
michael@0 1091 #ifdef __STDC__
michael@0 1092 decomp_it(_decomp_t *d)
michael@0 1093 #else
michael@0 1094 decomp_it(d)
michael@0 1095 _decomp_t *d;
michael@0 1096 #endif
michael@0 1097 {
michael@0 1098 unsigned long i;
michael@0 1099 _decomp_t *dp;
michael@0 1100
michael@0 1101 for (i = 0; i < d->used; i++) {
michael@0 1102 if ((dp = find_decomp(d->decomp[i])) != 0)
michael@0 1103 decomp_it(dp);
michael@0 1104 else
michael@0 1105 dectmp[dectmp_size++] = d->decomp[i];
michael@0 1106 }
michael@0 1107 }
michael@0 1108
michael@0 1109 /*
michael@0 1110 * Expand all decompositions by recursively decomposing each character
michael@0 1111 * in the decomposition.
michael@0 1112 */
michael@0 1113 static void
michael@0 1114 #ifdef __STDC__
michael@0 1115 expand_decomp(void)
michael@0 1116 #else
michael@0 1117 expand_decomp()
michael@0 1118 #endif
michael@0 1119 {
michael@0 1120 unsigned long i;
michael@0 1121
michael@0 1122 for (i = 0; i < decomps_used; i++) {
michael@0 1123 dectmp_size = 0;
michael@0 1124 decomp_it(&decomps[i]);
michael@0 1125 if (dectmp_size > 0)
michael@0 1126 add_decomp(decomps[i].code);
michael@0 1127 }
michael@0 1128 }
michael@0 1129
michael@0 1130 static void
michael@0 1131 #ifdef __STDC__
michael@0 1132 write_cdata(char *opath)
michael@0 1133 #else
michael@0 1134 write_cdata(opath)
michael@0 1135 char *opath;
michael@0 1136 #endif
michael@0 1137 {
michael@0 1138 FILE *out;
michael@0 1139 unsigned long i, idx, bytes, nprops;
michael@0 1140 unsigned short casecnt[2];
michael@0 1141 char path[BUFSIZ];
michael@0 1142
michael@0 1143 /*****************************************************************
michael@0 1144 *
michael@0 1145 * Generate the ctype data.
michael@0 1146 *
michael@0 1147 *****************************************************************/
michael@0 1148
michael@0 1149 /*
michael@0 1150 * Open the ctype.dat file.
michael@0 1151 */
michael@0 1152 sprintf(path, "%s/ctype.dat", opath);
michael@0 1153 if ((out = fopen(path, "wb")) == 0)
michael@0 1154 return;
michael@0 1155
michael@0 1156 /*
michael@0 1157 * Collect the offsets for the properties. The offsets array is
michael@0 1158 * on a 4-byte boundary to keep things efficient for architectures
michael@0 1159 * that need such a thing.
michael@0 1160 */
michael@0 1161 for (i = idx = 0; i < NUMPROPS; i++) {
michael@0 1162 propcnt[i] = (proptbl[i].used != 0) ? idx : 0xffff;
michael@0 1163 idx += proptbl[i].used;
michael@0 1164 }
michael@0 1165
michael@0 1166 /*
michael@0 1167 * Add the sentinel index which is used by the binary search as the upper
michael@0 1168 * bound for a search.
michael@0 1169 */
michael@0 1170 propcnt[i] = idx;
michael@0 1171
michael@0 1172 /*
michael@0 1173 * Record the actual number of property lists. This may be different than
michael@0 1174 * the number of offsets actually written because of aligning on a 4-byte
michael@0 1175 * boundary.
michael@0 1176 */
michael@0 1177 hdr[1] = NUMPROPS;
michael@0 1178
michael@0 1179 /*
michael@0 1180 * Calculate the byte count needed and pad the property counts array to a
michael@0 1181 * 4-byte boundary.
michael@0 1182 */
michael@0 1183 if ((bytes = sizeof(unsigned short) * (NUMPROPS + 1)) & 3)
michael@0 1184 bytes += 4 - (bytes & 3);
michael@0 1185 nprops = bytes / sizeof(unsigned short);
michael@0 1186 bytes += sizeof(unsigned long) * idx;
michael@0 1187
michael@0 1188 /*
michael@0 1189 * Write the header.
michael@0 1190 */
michael@0 1191 fwrite((char *) hdr, sizeof(unsigned short), 2, out);
michael@0 1192
michael@0 1193 /*
michael@0 1194 * Write the byte count.
michael@0 1195 */
michael@0 1196 fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
michael@0 1197
michael@0 1198 /*
michael@0 1199 * Write the property list counts.
michael@0 1200 */
michael@0 1201 fwrite((char *) propcnt, sizeof(unsigned short), nprops, out);
michael@0 1202
michael@0 1203 /*
michael@0 1204 * Write the property lists.
michael@0 1205 */
michael@0 1206 for (i = 0; i < NUMPROPS; i++) {
michael@0 1207 if (proptbl[i].used > 0)
michael@0 1208 fwrite((char *) proptbl[i].ranges, sizeof(unsigned long),
michael@0 1209 proptbl[i].used, out);
michael@0 1210 }
michael@0 1211
michael@0 1212 fclose(out);
michael@0 1213
michael@0 1214 /*****************************************************************
michael@0 1215 *
michael@0 1216 * Generate the case mapping data.
michael@0 1217 *
michael@0 1218 *****************************************************************/
michael@0 1219
michael@0 1220 /*
michael@0 1221 * Open the case.dat file.
michael@0 1222 */
michael@0 1223 sprintf(path, "%s/case.dat", opath);
michael@0 1224 if ((out = fopen(path, "wb")) == 0)
michael@0 1225 return;
michael@0 1226
michael@0 1227 /*
michael@0 1228 * Write the case mapping tables.
michael@0 1229 */
michael@0 1230 hdr[1] = upper_used + lower_used + title_used;
michael@0 1231 casecnt[0] = upper_used;
michael@0 1232 casecnt[1] = lower_used;
michael@0 1233
michael@0 1234 /*
michael@0 1235 * Write the header.
michael@0 1236 */
michael@0 1237 fwrite((char *) hdr, sizeof(unsigned short), 2, out);
michael@0 1238
michael@0 1239 /*
michael@0 1240 * Write the upper and lower case table sizes.
michael@0 1241 */
michael@0 1242 fwrite((char *) casecnt, sizeof(unsigned short), 2, out);
michael@0 1243
michael@0 1244 if (upper_used > 0)
michael@0 1245 /*
michael@0 1246 * Write the upper case table.
michael@0 1247 */
michael@0 1248 fwrite((char *) upper, sizeof(_case_t), upper_used, out);
michael@0 1249
michael@0 1250 if (lower_used > 0)
michael@0 1251 /*
michael@0 1252 * Write the lower case table.
michael@0 1253 */
michael@0 1254 fwrite((char *) lower, sizeof(_case_t), lower_used, out);
michael@0 1255
michael@0 1256 if (title_used > 0)
michael@0 1257 /*
michael@0 1258 * Write the title case table.
michael@0 1259 */
michael@0 1260 fwrite((char *) title, sizeof(_case_t), title_used, out);
michael@0 1261
michael@0 1262 fclose(out);
michael@0 1263
michael@0 1264 /*****************************************************************
michael@0 1265 *
michael@0 1266 * Generate the decomposition data.
michael@0 1267 *
michael@0 1268 *****************************************************************/
michael@0 1269
michael@0 1270 /*
michael@0 1271 * Fully expand all decompositions before generating the output file.
michael@0 1272 */
michael@0 1273 expand_decomp();
michael@0 1274
michael@0 1275 /*
michael@0 1276 * Open the decomp.dat file.
michael@0 1277 */
michael@0 1278 sprintf(path, "%s/decomp.dat", opath);
michael@0 1279 if ((out = fopen(path, "wb")) == 0)
michael@0 1280 return;
michael@0 1281
michael@0 1282 hdr[1] = decomps_used;
michael@0 1283
michael@0 1284 /*
michael@0 1285 * Write the header.
michael@0 1286 */
michael@0 1287 fwrite((char *) hdr, sizeof(unsigned short), 2, out);
michael@0 1288
michael@0 1289 /*
michael@0 1290 * Write a temporary byte count which will be calculated as the
michael@0 1291 * decompositions are written out.
michael@0 1292 */
michael@0 1293 bytes = 0;
michael@0 1294 fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
michael@0 1295
michael@0 1296 if (decomps_used) {
michael@0 1297 /*
michael@0 1298 * Write the list of decomp nodes.
michael@0 1299 */
michael@0 1300 for (i = idx = 0; i < decomps_used; i++) {
michael@0 1301 fwrite((char *) &decomps[i].code, sizeof(unsigned long), 1, out);
michael@0 1302 fwrite((char *) &idx, sizeof(unsigned long), 1, out);
michael@0 1303 idx += decomps[i].used;
michael@0 1304 }
michael@0 1305
michael@0 1306 /*
michael@0 1307 * Write the sentinel index as the last decomp node.
michael@0 1308 */
michael@0 1309 fwrite((char *) &idx, sizeof(unsigned long), 1, out);
michael@0 1310
michael@0 1311 /*
michael@0 1312 * Write the decompositions themselves.
michael@0 1313 */
michael@0 1314 for (i = 0; i < decomps_used; i++)
michael@0 1315 fwrite((char *) decomps[i].decomp, sizeof(unsigned long),
michael@0 1316 decomps[i].used, out);
michael@0 1317
michael@0 1318 /*
michael@0 1319 * Seek back to the beginning and write the byte count.
michael@0 1320 */
michael@0 1321 bytes = (sizeof(unsigned long) * idx) +
michael@0 1322 (sizeof(unsigned long) * ((hdr[1] << 1) + 1));
michael@0 1323 fseek(out, sizeof(unsigned short) << 1, 0L);
michael@0 1324 fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
michael@0 1325
michael@0 1326 fclose(out);
michael@0 1327 }
michael@0 1328
michael@0 1329 /*****************************************************************
michael@0 1330 *
michael@0 1331 * Generate the combining class data.
michael@0 1332 *
michael@0 1333 *****************************************************************/
michael@0 1334
michael@0 1335 /*
michael@0 1336 * Open the cmbcl.dat file.
michael@0 1337 */
michael@0 1338 sprintf(path, "%s/cmbcl.dat", opath);
michael@0 1339 if ((out = fopen(path, "wb")) == 0)
michael@0 1340 return;
michael@0 1341
michael@0 1342 /*
michael@0 1343 * Set the number of ranges used. Each range has a combining class which
michael@0 1344 * means each entry is a 3-tuple.
michael@0 1345 */
michael@0 1346 hdr[1] = ccl_used / 3;
michael@0 1347
michael@0 1348 /*
michael@0 1349 * Write the header.
michael@0 1350 */
michael@0 1351 fwrite((char *) hdr, sizeof(unsigned short), 2, out);
michael@0 1352
michael@0 1353 /*
michael@0 1354 * Write out the byte count to maintain header size.
michael@0 1355 */
michael@0 1356 bytes = ccl_used * sizeof(unsigned long);
michael@0 1357 fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
michael@0 1358
michael@0 1359 if (ccl_used > 0)
michael@0 1360 /*
michael@0 1361 * Write the combining class ranges out.
michael@0 1362 */
michael@0 1363 fwrite((char *) ccl, sizeof(unsigned long), ccl_used, out);
michael@0 1364
michael@0 1365 fclose(out);
michael@0 1366
michael@0 1367 /*****************************************************************
michael@0 1368 *
michael@0 1369 * Generate the number data.
michael@0 1370 *
michael@0 1371 *****************************************************************/
michael@0 1372
michael@0 1373 /*
michael@0 1374 * Open the num.dat file.
michael@0 1375 */
michael@0 1376 sprintf(path, "%s/num.dat", opath);
michael@0 1377 if ((out = fopen(path, "wb")) == 0)
michael@0 1378 return;
michael@0 1379
michael@0 1380 /*
michael@0 1381 * The count part of the header will be the total number of codes that
michael@0 1382 * have numbers.
michael@0 1383 */
michael@0 1384 hdr[1] = (unsigned short) (ncodes_used << 1);
michael@0 1385 bytes = (ncodes_used * sizeof(_codeidx_t)) + (nums_used * sizeof(_num_t));
michael@0 1386
michael@0 1387 /*
michael@0 1388 * Write the header.
michael@0 1389 */
michael@0 1390 fwrite((char *) hdr, sizeof(unsigned short), 2, out);
michael@0 1391
michael@0 1392 /*
michael@0 1393 * Write out the byte count to maintain header size.
michael@0 1394 */
michael@0 1395 fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
michael@0 1396
michael@0 1397 /*
michael@0 1398 * Now, if number mappings exist, write them out.
michael@0 1399 */
michael@0 1400 if (ncodes_used > 0) {
michael@0 1401 fwrite((char *) ncodes, sizeof(_codeidx_t), ncodes_used, out);
michael@0 1402 fwrite((char *) nums, sizeof(_num_t), nums_used, out);
michael@0 1403 }
michael@0 1404
michael@0 1405 fclose(out);
michael@0 1406 }
michael@0 1407
michael@0 1408 void
michael@0 1409 #ifdef __STDC__
michael@0 1410 main(int argc, char *argv[])
michael@0 1411 #else
michael@0 1412 main(argc, argv)
michael@0 1413 int argc;
michael@0 1414 char *argv[];
michael@0 1415 #endif
michael@0 1416 {
michael@0 1417 FILE *in;
michael@0 1418 char *prog, *opath;
michael@0 1419
michael@0 1420 if ((prog = strrchr(argv[0], '/')) != 0)
michael@0 1421 prog++;
michael@0 1422 else
michael@0 1423 prog = argv[0];
michael@0 1424
michael@0 1425 opath = 0;
michael@0 1426 in = stdin;
michael@0 1427
michael@0 1428 argc--;
michael@0 1429 argv++;
michael@0 1430
michael@0 1431 while (argc > 0) {
michael@0 1432 if (argv[0][0] == '-' && argv[0][1] == 'o') {
michael@0 1433 argc--;
michael@0 1434 argv++;
michael@0 1435 opath = argv[0];
michael@0 1436 } else {
michael@0 1437 if (in != stdin)
michael@0 1438 fclose(in);
michael@0 1439 if ((in = fopen(argv[0], "rb")) == 0)
michael@0 1440 fprintf(stderr, "%s: unable to open ctype file %s\n",
michael@0 1441 prog, argv[0]);
michael@0 1442 else {
michael@0 1443 read_cdata(in);
michael@0 1444 fclose(in);
michael@0 1445 in = 0;
michael@0 1446 }
michael@0 1447 }
michael@0 1448 argc--;
michael@0 1449 argv++;
michael@0 1450 }
michael@0 1451
michael@0 1452 if (opath == 0)
michael@0 1453 opath = ".";
michael@0 1454 write_cdata(opath);
michael@0 1455
michael@0 1456 exit(0);
michael@0 1457 }

mercurial