|
1 /* |
|
2 * Copyright 1996, 1997, 1998 Computing Research Labs, |
|
3 * New Mexico State University |
|
4 * |
|
5 * Permission is hereby granted, free of charge, to any person obtaining a |
|
6 * copy of this software and associated documentation files (the "Software"), |
|
7 * to deal in the Software without restriction, including without limitation |
|
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
|
9 * and/or sell copies of the Software, and to permit persons to whom the |
|
10 * Software is furnished to do so, subject to the following conditions: |
|
11 * |
|
12 * The above copyright notice and this permission notice shall be included in |
|
13 * all copies or substantial portions of the Software. |
|
14 * |
|
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
|
18 * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY |
|
19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT |
|
20 * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR |
|
21 * THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
|
22 */ |
|
23 #ifndef lint |
|
24 #ifdef __GNUC__ |
|
25 static char rcsid[] __attribute__ ((unused)) = "$Id: ucgendat.c,v 1.1 1999/01/08 00:19:21 ftang%netscape.com Exp $"; |
|
26 #else |
|
27 static char rcsid[] = "$Id: ucgendat.c,v 1.1 1999/01/08 00:19:21 ftang%netscape.com Exp $"; |
|
28 #endif |
|
29 #endif |
|
30 |
|
31 #include <stdio.h> |
|
32 #include <stdlib.h> |
|
33 #include <string.h> |
|
34 #ifndef WIN32 |
|
35 #include <unistd.h> |
|
36 #endif |
|
37 |
|
38 #define ishdigit(cc) (((cc) >= '0' && (cc) <= '9') ||\ |
|
39 ((cc) >= 'A' && (cc) <= 'F') ||\ |
|
40 ((cc) >= 'a' && (cc) <= 'f')) |
|
41 |
|
42 /* |
|
43 * A header written to the output file with the byte-order-mark and the number |
|
44 * of property nodes. |
|
45 */ |
|
46 static unsigned short hdr[2] = {0xfeff, 0}; |
|
47 |
|
48 #define NUMPROPS 49 |
|
49 #define NEEDPROPS (NUMPROPS + (4 - (NUMPROPS & 3))) |
|
50 |
|
51 typedef struct { |
|
52 char *name; |
|
53 int len; |
|
54 } _prop_t; |
|
55 |
|
56 /* |
|
57 * List of properties expected to be found in the Unicode Character Database |
|
58 * including some implementation specific properties. |
|
59 * |
|
60 * The implementation specific properties are: |
|
61 * Cm = Composed (can be decomposed) |
|
62 * Nb = Non-breaking |
|
63 * Sy = Symmetric (has left and right forms) |
|
64 * Hd = Hex digit |
|
65 * Qm = Quote marks |
|
66 * Mr = Mirroring |
|
67 * Ss = Space, other |
|
68 * Cp = Defined character |
|
69 */ |
|
70 static _prop_t props[NUMPROPS] = { |
|
71 {"Mn", 2}, {"Mc", 2}, {"Me", 2}, {"Nd", 2}, {"Nl", 2}, {"No", 2}, |
|
72 {"Zs", 2}, {"Zl", 2}, {"Zp", 2}, {"Cc", 2}, {"Cf", 2}, {"Cs", 2}, |
|
73 {"Co", 2}, {"Cn", 2}, {"Lu", 2}, {"Ll", 2}, {"Lt", 2}, {"Lm", 2}, |
|
74 {"Lo", 2}, {"Pc", 2}, {"Pd", 2}, {"Ps", 2}, {"Pe", 2}, {"Po", 2}, |
|
75 {"Sm", 2}, {"Sc", 2}, {"Sk", 2}, {"So", 2}, {"L", 1}, {"R", 1}, |
|
76 {"EN", 2}, {"ES", 2}, {"ET", 2}, {"AN", 2}, {"CS", 2}, {"B", 1}, |
|
77 {"S", 1}, {"WS", 2}, {"ON", 2}, |
|
78 {"Cm", 2}, {"Nb", 2}, {"Sy", 2}, {"Hd", 2}, {"Qm", 2}, {"Mr", 2}, |
|
79 {"Ss", 2}, {"Cp", 2}, {"Pi", 2}, {"Pf", 2} |
|
80 }; |
|
81 |
|
82 typedef struct { |
|
83 unsigned long *ranges; |
|
84 unsigned short used; |
|
85 unsigned short size; |
|
86 } _ranges_t; |
|
87 |
|
88 static _ranges_t proptbl[NUMPROPS]; |
|
89 |
|
90 /* |
|
91 * Make sure this array is sized to be on a 4-byte boundary at compile time. |
|
92 */ |
|
93 static unsigned short propcnt[NEEDPROPS]; |
|
94 |
|
95 /* |
|
96 * Array used to collect a decomposition before adding it to the decomposition |
|
97 * table. |
|
98 */ |
|
99 static unsigned long dectmp[64]; |
|
100 static unsigned long dectmp_size; |
|
101 |
|
102 typedef struct { |
|
103 unsigned long code; |
|
104 unsigned short size; |
|
105 unsigned short used; |
|
106 unsigned long *decomp; |
|
107 } _decomp_t; |
|
108 |
|
109 /* |
|
110 * List of decomposition. Created and expanded in order as the characters are |
|
111 * encountered. |
|
112 */ |
|
113 static _decomp_t *decomps; |
|
114 static unsigned long decomps_used; |
|
115 static unsigned long decomps_size; |
|
116 |
|
117 /* |
|
118 * Types and lists for handling lists of case mappings. |
|
119 */ |
|
120 typedef struct { |
|
121 unsigned long key; |
|
122 unsigned long other1; |
|
123 unsigned long other2; |
|
124 } _case_t; |
|
125 |
|
126 static _case_t *upper; |
|
127 static _case_t *lower; |
|
128 static _case_t *title; |
|
129 static unsigned long upper_used; |
|
130 static unsigned long upper_size; |
|
131 static unsigned long lower_used; |
|
132 static unsigned long lower_size; |
|
133 static unsigned long title_used; |
|
134 static unsigned long title_size; |
|
135 |
|
136 /* |
|
137 * Array used to collect case mappings before adding them to a list. |
|
138 */ |
|
139 static unsigned long cases[3]; |
|
140 |
|
141 /* |
|
142 * An array to hold ranges for combining classes. |
|
143 */ |
|
144 static unsigned long *ccl; |
|
145 static unsigned long ccl_used; |
|
146 static unsigned long ccl_size; |
|
147 |
|
148 /* |
|
149 * Structures for handling numbers. |
|
150 */ |
|
151 typedef struct { |
|
152 unsigned long code; |
|
153 unsigned long idx; |
|
154 } _codeidx_t; |
|
155 |
|
156 typedef struct { |
|
157 short numerator; |
|
158 short denominator; |
|
159 } _num_t; |
|
160 |
|
161 /* |
|
162 * Arrays to hold the mapping of codes to numbers. |
|
163 */ |
|
164 static _codeidx_t *ncodes; |
|
165 static unsigned long ncodes_used; |
|
166 static unsigned long ncodes_size; |
|
167 |
|
168 static _num_t *nums; |
|
169 static unsigned long nums_used; |
|
170 static unsigned long nums_size; |
|
171 |
|
172 /* |
|
173 * Array for holding numbers. |
|
174 */ |
|
175 static _num_t *nums; |
|
176 static unsigned long nums_used; |
|
177 static unsigned long nums_size; |
|
178 |
|
179 static void |
|
180 #ifdef __STDC__ |
|
181 add_range(unsigned long start, unsigned long end, char *p1, char *p2) |
|
182 #else |
|
183 add_range(start, end, p1, p2) |
|
184 unsigned long start, end; |
|
185 char *p1, *p2; |
|
186 #endif |
|
187 { |
|
188 int i, j, k, len; |
|
189 _ranges_t *rlp; |
|
190 char *name; |
|
191 |
|
192 for (k = 0; k < 2; k++) { |
|
193 if (k == 0) { |
|
194 name = p1; |
|
195 len = 2; |
|
196 } else { |
|
197 if (p2 == 0) |
|
198 break; |
|
199 |
|
200 name = p2; |
|
201 len = 1; |
|
202 } |
|
203 |
|
204 for (i = 0; i < NUMPROPS; i++) { |
|
205 if (props[i].len == len && memcmp(props[i].name, name, len) == 0) |
|
206 break; |
|
207 } |
|
208 |
|
209 if (i == NUMPROPS) |
|
210 continue; |
|
211 |
|
212 rlp = &proptbl[i]; |
|
213 |
|
214 /* |
|
215 * Resize the range list if necessary. |
|
216 */ |
|
217 if (rlp->used == rlp->size) { |
|
218 if (rlp->size == 0) |
|
219 rlp->ranges = (unsigned long *) |
|
220 malloc(sizeof(unsigned long) << 3); |
|
221 else |
|
222 rlp->ranges = (unsigned long *) |
|
223 realloc((char *) rlp->ranges, |
|
224 sizeof(unsigned long) * (rlp->size + 8)); |
|
225 rlp->size += 8; |
|
226 } |
|
227 |
|
228 /* |
|
229 * If this is the first code for this property list, just add it |
|
230 * and return. |
|
231 */ |
|
232 if (rlp->used == 0) { |
|
233 rlp->ranges[0] = start; |
|
234 rlp->ranges[1] = end; |
|
235 rlp->used += 2; |
|
236 continue; |
|
237 } |
|
238 |
|
239 /* |
|
240 * Optimize the case of adding the range to the end. |
|
241 */ |
|
242 j = rlp->used - 1; |
|
243 if (start > rlp->ranges[j]) { |
|
244 j = rlp->used; |
|
245 rlp->ranges[j++] = start; |
|
246 rlp->ranges[j++] = end; |
|
247 rlp->used = j; |
|
248 continue; |
|
249 } |
|
250 |
|
251 /* |
|
252 * Need to locate the insertion point. |
|
253 */ |
|
254 for (i = 0; |
|
255 i < rlp->used && start > rlp->ranges[i + 1] + 1; i += 2) ; |
|
256 |
|
257 /* |
|
258 * If the start value lies in the current range, then simply set the |
|
259 * new end point of the range to the end value passed as a parameter. |
|
260 */ |
|
261 if (rlp->ranges[i] <= start && start <= rlp->ranges[i + 1] + 1) { |
|
262 rlp->ranges[i + 1] = end; |
|
263 return; |
|
264 } |
|
265 |
|
266 /* |
|
267 * Shift following values up by two. |
|
268 */ |
|
269 for (j = rlp->used; j > i; j -= 2) { |
|
270 rlp->ranges[j] = rlp->ranges[j - 2]; |
|
271 rlp->ranges[j + 1] = rlp->ranges[j - 1]; |
|
272 } |
|
273 |
|
274 /* |
|
275 * Add the new range at the insertion point. |
|
276 */ |
|
277 rlp->ranges[i] = start; |
|
278 rlp->ranges[i + 1] = end; |
|
279 rlp->used += 2; |
|
280 } |
|
281 } |
|
282 |
|
283 static void |
|
284 #ifdef __STDC__ |
|
285 ordered_range_insert(unsigned long c, char *name, int len) |
|
286 #else |
|
287 ordered_range_insert(c, name, len) |
|
288 unsigned long c; |
|
289 char *name; |
|
290 int len; |
|
291 #endif |
|
292 { |
|
293 int i, j; |
|
294 unsigned long s, e; |
|
295 _ranges_t *rlp; |
|
296 |
|
297 if (len == 0) |
|
298 return; |
|
299 |
|
300 for (i = 0; i < NUMPROPS; i++) { |
|
301 if (props[i].len == len && memcmp(props[i].name, name, len) == 0) |
|
302 break; |
|
303 } |
|
304 |
|
305 if (i == NUMPROPS) |
|
306 return; |
|
307 |
|
308 /* |
|
309 * Have a match, so insert the code in order. |
|
310 */ |
|
311 rlp = &proptbl[i]; |
|
312 |
|
313 /* |
|
314 * Resize the range list if necessary. |
|
315 */ |
|
316 if (rlp->used == rlp->size) { |
|
317 if (rlp->size == 0) |
|
318 rlp->ranges = (unsigned long *) |
|
319 malloc(sizeof(unsigned long) << 3); |
|
320 else |
|
321 rlp->ranges = (unsigned long *) |
|
322 realloc((char *) rlp->ranges, |
|
323 sizeof(unsigned long) * (rlp->size + 8)); |
|
324 rlp->size += 8; |
|
325 } |
|
326 |
|
327 /* |
|
328 * If this is the first code for this property list, just add it |
|
329 * and return. |
|
330 */ |
|
331 if (rlp->used == 0) { |
|
332 rlp->ranges[0] = rlp->ranges[1] = c; |
|
333 rlp->used += 2; |
|
334 return; |
|
335 } |
|
336 |
|
337 /* |
|
338 * Optimize the cases of extending the last range and adding new ranges to |
|
339 * the end. |
|
340 */ |
|
341 j = rlp->used - 1; |
|
342 e = rlp->ranges[j]; |
|
343 s = rlp->ranges[j - 1]; |
|
344 |
|
345 if (c == e + 1) { |
|
346 /* |
|
347 * Extend the last range. |
|
348 */ |
|
349 rlp->ranges[j] = c; |
|
350 return; |
|
351 } |
|
352 |
|
353 if (c > e + 1) { |
|
354 /* |
|
355 * Start another range on the end. |
|
356 */ |
|
357 j = rlp->used; |
|
358 rlp->ranges[j] = rlp->ranges[j + 1] = c; |
|
359 rlp->used += 2; |
|
360 return; |
|
361 } |
|
362 |
|
363 if (c >= s) |
|
364 /* |
|
365 * The code is a duplicate of a code in the last range, so just return. |
|
366 */ |
|
367 return; |
|
368 |
|
369 /* |
|
370 * The code should be inserted somewhere before the last range in the |
|
371 * list. Locate the insertion point. |
|
372 */ |
|
373 for (i = 0; |
|
374 i < rlp->used && c > rlp->ranges[i + 1] + 1; i += 2) ; |
|
375 |
|
376 s = rlp->ranges[i]; |
|
377 e = rlp->ranges[i + 1]; |
|
378 |
|
379 if (c == e + 1) |
|
380 /* |
|
381 * Simply extend the current range. |
|
382 */ |
|
383 rlp->ranges[i + 1] = c; |
|
384 else if (c < s) { |
|
385 /* |
|
386 * Add a new entry before the current location. Shift all entries |
|
387 * before the current one up by one to make room. |
|
388 */ |
|
389 for (j = rlp->used; j > i; j -= 2) { |
|
390 rlp->ranges[j] = rlp->ranges[j - 2]; |
|
391 rlp->ranges[j + 1] = rlp->ranges[j - 1]; |
|
392 } |
|
393 rlp->ranges[i] = rlp->ranges[i + 1] = c; |
|
394 |
|
395 rlp->used += 2; |
|
396 } |
|
397 } |
|
398 |
|
399 static void |
|
400 #ifdef __STDC__ |
|
401 add_decomp(unsigned long code) |
|
402 #else |
|
403 add_decomp(code) |
|
404 unsigned long code; |
|
405 #endif |
|
406 { |
|
407 unsigned long i, j, size; |
|
408 |
|
409 /* |
|
410 * Add the code to the composite property. |
|
411 */ |
|
412 ordered_range_insert(code, "Cm", 2); |
|
413 |
|
414 /* |
|
415 * Locate the insertion point for the code. |
|
416 */ |
|
417 for (i = 0; i < decomps_used && code > decomps[i].code; i++) ; |
|
418 |
|
419 /* |
|
420 * Allocate space for a new decomposition. |
|
421 */ |
|
422 if (decomps_used == decomps_size) { |
|
423 if (decomps_size == 0) |
|
424 decomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3); |
|
425 else |
|
426 decomps = (_decomp_t *) |
|
427 realloc((char *) decomps, |
|
428 sizeof(_decomp_t) * (decomps_size + 8)); |
|
429 (void) memset((char *) (decomps + decomps_size), 0, |
|
430 sizeof(_decomp_t) << 3); |
|
431 decomps_size += 8; |
|
432 } |
|
433 |
|
434 if (i < decomps_used && code != decomps[i].code) { |
|
435 /* |
|
436 * Shift the decomps up by one if the codes don't match. |
|
437 */ |
|
438 for (j = decomps_used; j > i; j--) |
|
439 (void) memcpy((char *) &decomps[j], (char *) &decomps[j - 1], |
|
440 sizeof(_decomp_t)); |
|
441 } |
|
442 |
|
443 /* |
|
444 * Insert or replace a decomposition. |
|
445 */ |
|
446 size = dectmp_size + (4 - (dectmp_size & 3)); |
|
447 if (decomps[i].size < size) { |
|
448 if (decomps[i].size == 0) |
|
449 decomps[i].decomp = (unsigned long *) |
|
450 malloc(sizeof(unsigned long) * size); |
|
451 else |
|
452 decomps[i].decomp = (unsigned long *) |
|
453 realloc((char *) decomps[i].decomp, |
|
454 sizeof(unsigned long) * size); |
|
455 decomps[i].size = size; |
|
456 } |
|
457 |
|
458 if (decomps[i].code != code) |
|
459 decomps_used++; |
|
460 |
|
461 decomps[i].code = code; |
|
462 decomps[i].used = dectmp_size; |
|
463 (void) memcpy((char *) decomps[i].decomp, (char *) dectmp, |
|
464 sizeof(unsigned long) * dectmp_size); |
|
465 |
|
466 } |
|
467 |
|
468 static void |
|
469 #ifdef __STDC__ |
|
470 add_title(unsigned long code) |
|
471 #else |
|
472 add_title(code) |
|
473 unsigned long code; |
|
474 #endif |
|
475 { |
|
476 unsigned long i, j; |
|
477 |
|
478 /* |
|
479 * Always map the code to itself. |
|
480 */ |
|
481 cases[2] = code; |
|
482 |
|
483 if (title_used == title_size) { |
|
484 if (title_size == 0) |
|
485 title = (_case_t *) malloc(sizeof(_case_t) << 3); |
|
486 else |
|
487 title = (_case_t *) realloc((char *) title, |
|
488 sizeof(_case_t) * (title_size + 8)); |
|
489 title_size += 8; |
|
490 } |
|
491 |
|
492 /* |
|
493 * Locate the insertion point. |
|
494 */ |
|
495 for (i = 0; i < title_used && code > title[i].key; i++) ; |
|
496 |
|
497 if (i < title_used) { |
|
498 /* |
|
499 * Shift the array up by one. |
|
500 */ |
|
501 for (j = title_used; j > i; j--) |
|
502 (void) memcpy((char *) &title[j], (char *) &title[j - 1], |
|
503 sizeof(_case_t)); |
|
504 } |
|
505 |
|
506 title[i].key = cases[2]; /* Title */ |
|
507 title[i].other1 = cases[0]; /* Upper */ |
|
508 title[i].other2 = cases[1]; /* Lower */ |
|
509 |
|
510 title_used++; |
|
511 } |
|
512 |
|
513 static void |
|
514 #ifdef __STDC__ |
|
515 add_upper(unsigned long code) |
|
516 #else |
|
517 add_upper(code) |
|
518 unsigned long code; |
|
519 #endif |
|
520 { |
|
521 unsigned long i, j; |
|
522 |
|
523 /* |
|
524 * Always map the code to itself. |
|
525 */ |
|
526 cases[0] = code; |
|
527 |
|
528 /* |
|
529 * If the title case character is not present, then make it the same as |
|
530 * the upper case. |
|
531 */ |
|
532 if (cases[2] == 0) |
|
533 cases[2] = code; |
|
534 |
|
535 if (upper_used == upper_size) { |
|
536 if (upper_size == 0) |
|
537 upper = (_case_t *) malloc(sizeof(_case_t) << 3); |
|
538 else |
|
539 upper = (_case_t *) realloc((char *) upper, |
|
540 sizeof(_case_t) * (upper_size + 8)); |
|
541 upper_size += 8; |
|
542 } |
|
543 |
|
544 /* |
|
545 * Locate the insertion point. |
|
546 */ |
|
547 for (i = 0; i < upper_used && code > upper[i].key; i++) ; |
|
548 |
|
549 if (i < upper_used) { |
|
550 /* |
|
551 * Shift the array up by one. |
|
552 */ |
|
553 for (j = upper_used; j > i; j--) |
|
554 (void) memcpy((char *) &upper[j], (char *) &upper[j - 1], |
|
555 sizeof(_case_t)); |
|
556 } |
|
557 |
|
558 upper[i].key = cases[0]; /* Upper */ |
|
559 upper[i].other1 = cases[1]; /* Lower */ |
|
560 upper[i].other2 = cases[2]; /* Title */ |
|
561 |
|
562 upper_used++; |
|
563 } |
|
564 |
|
565 static void |
|
566 #ifdef __STDC__ |
|
567 add_lower(unsigned long code) |
|
568 #else |
|
569 add_lower(code) |
|
570 unsigned long code; |
|
571 #endif |
|
572 { |
|
573 unsigned long i, j; |
|
574 |
|
575 /* |
|
576 * Always map the code to itself. |
|
577 */ |
|
578 cases[1] = code; |
|
579 |
|
580 /* |
|
581 * If the title case character is empty, then make it the same as the |
|
582 * upper case. |
|
583 */ |
|
584 if (cases[2] == 0) |
|
585 cases[2] = cases[0]; |
|
586 |
|
587 if (lower_used == lower_size) { |
|
588 if (lower_size == 0) |
|
589 lower = (_case_t *) malloc(sizeof(_case_t) << 3); |
|
590 else |
|
591 lower = (_case_t *) realloc((char *) lower, |
|
592 sizeof(_case_t) * (lower_size + 8)); |
|
593 lower_size += 8; |
|
594 } |
|
595 |
|
596 /* |
|
597 * Locate the insertion point. |
|
598 */ |
|
599 for (i = 0; i < lower_used && code > lower[i].key; i++) ; |
|
600 |
|
601 if (i < lower_used) { |
|
602 /* |
|
603 * Shift the array up by one. |
|
604 */ |
|
605 for (j = lower_used; j > i; j--) |
|
606 (void) memcpy((char *) &lower[j], (char *) &lower[j - 1], |
|
607 sizeof(_case_t)); |
|
608 } |
|
609 |
|
610 lower[i].key = cases[1]; /* Lower */ |
|
611 lower[i].other1 = cases[0]; /* Upper */ |
|
612 lower[i].other2 = cases[2]; /* Title */ |
|
613 |
|
614 lower_used++; |
|
615 } |
|
616 |
|
617 static void |
|
618 #ifdef __STDC__ |
|
619 ordered_ccl_insert(unsigned long c, unsigned long ccl_code) |
|
620 #else |
|
621 ordered_ccl_insert(c, ccl_code) |
|
622 unsigned long c, ccl_code; |
|
623 #endif |
|
624 { |
|
625 unsigned long i, j; |
|
626 |
|
627 if (ccl_used == ccl_size) { |
|
628 if (ccl_size == 0) |
|
629 ccl = (unsigned long *) malloc(sizeof(unsigned long) * 24); |
|
630 else |
|
631 ccl = (unsigned long *) |
|
632 realloc((char *) ccl, sizeof(unsigned long) * (ccl_size + 24)); |
|
633 ccl_size += 24; |
|
634 } |
|
635 |
|
636 /* |
|
637 * Optimize adding the first item. |
|
638 */ |
|
639 if (ccl_used == 0) { |
|
640 ccl[0] = ccl[1] = c; |
|
641 ccl[2] = ccl_code; |
|
642 ccl_used += 3; |
|
643 return; |
|
644 } |
|
645 |
|
646 /* |
|
647 * Handle the special case of extending the range on the end. This |
|
648 * requires that the combining class codes are the same. |
|
649 */ |
|
650 if (ccl_code == ccl[ccl_used - 1] && c == ccl[ccl_used - 2] + 1) { |
|
651 ccl[ccl_used - 2] = c; |
|
652 return; |
|
653 } |
|
654 |
|
655 /* |
|
656 * Handle the special case of adding another range on the end. |
|
657 */ |
|
658 if (c > ccl[ccl_used - 2] + 1 || |
|
659 (c == ccl[ccl_used - 2] + 1 && ccl_code != ccl[ccl_used - 1])) { |
|
660 ccl[ccl_used++] = c; |
|
661 ccl[ccl_used++] = c; |
|
662 ccl[ccl_used++] = ccl_code; |
|
663 return; |
|
664 } |
|
665 |
|
666 /* |
|
667 * Locate either the insertion point or range for the code. |
|
668 */ |
|
669 for (i = 0; i < ccl_used && c > ccl[i + 1] + 1; i += 3) ; |
|
670 |
|
671 if (ccl_code == ccl[i + 2] && c == ccl[i + 1] + 1) { |
|
672 /* |
|
673 * Extend an existing range. |
|
674 */ |
|
675 ccl[i + 1] = c; |
|
676 return; |
|
677 } else if (c < ccl[i]) { |
|
678 /* |
|
679 * Start a new range before the current location. |
|
680 */ |
|
681 for (j = ccl_used; j > i; j -= 3) { |
|
682 ccl[j] = ccl[j - 3]; |
|
683 ccl[j - 1] = ccl[j - 4]; |
|
684 ccl[j - 2] = ccl[j - 5]; |
|
685 } |
|
686 ccl[i] = ccl[i + 1] = c; |
|
687 ccl[i + 2] = ccl_code; |
|
688 } |
|
689 } |
|
690 |
|
691 /* |
|
692 * Adds a number if it does not already exist and returns an index value |
|
693 * multiplied by 2. |
|
694 */ |
|
695 static unsigned long |
|
696 #ifdef __STDC__ |
|
697 make_number(short num, short denom) |
|
698 #else |
|
699 make_number(num, denom) |
|
700 short num, denom; |
|
701 #endif |
|
702 { |
|
703 unsigned long n; |
|
704 |
|
705 /* |
|
706 * Determine if the number already exists. |
|
707 */ |
|
708 for (n = 0; n < nums_used; n++) { |
|
709 if (nums[n].numerator == num && nums[n].denominator == denom) |
|
710 return n << 1; |
|
711 } |
|
712 |
|
713 if (nums_used == nums_size) { |
|
714 if (nums_size == 0) |
|
715 nums = (_num_t *) malloc(sizeof(_num_t) << 3); |
|
716 else |
|
717 nums = (_num_t *) realloc((char *) nums, |
|
718 sizeof(_num_t) * (nums_size + 8)); |
|
719 nums_size += 8; |
|
720 } |
|
721 |
|
722 n = nums_used++; |
|
723 nums[n].numerator = num; |
|
724 nums[n].denominator = denom; |
|
725 |
|
726 return n << 1; |
|
727 } |
|
728 |
|
729 static void |
|
730 #ifdef __STDC__ |
|
731 add_number(unsigned long code, short num, short denom) |
|
732 #else |
|
733 add_number(code, num, denom) |
|
734 unsigned long code; |
|
735 short num, denom; |
|
736 #endif |
|
737 { |
|
738 unsigned long i, j; |
|
739 |
|
740 /* |
|
741 * Insert the code in order. |
|
742 */ |
|
743 for (i = 0; i < ncodes_used && code > ncodes[i].code; i++) ; |
|
744 |
|
745 /* |
|
746 * Handle the case of the codes matching and simply replace the number |
|
747 * that was there before. |
|
748 */ |
|
749 if (ncodes_used > 0 && code == ncodes[i].code) { |
|
750 ncodes[i].idx = make_number(num, denom); |
|
751 return; |
|
752 } |
|
753 |
|
754 /* |
|
755 * Resize the array if necessary. |
|
756 */ |
|
757 if (ncodes_used == ncodes_size) { |
|
758 if (ncodes_size == 0) |
|
759 ncodes = (_codeidx_t *) malloc(sizeof(_codeidx_t) << 3); |
|
760 else |
|
761 ncodes = (_codeidx_t *) |
|
762 realloc((char *) ncodes, sizeof(_codeidx_t) * (ncodes_size + 8)); |
|
763 |
|
764 ncodes_size += 8; |
|
765 } |
|
766 |
|
767 /* |
|
768 * Shift things around to insert the code if necessary. |
|
769 */ |
|
770 if (i < ncodes_used) { |
|
771 for (j = ncodes_used; j > i; j--) { |
|
772 ncodes[j].code = ncodes[j - 1].code; |
|
773 ncodes[j].idx = ncodes[j - 1].idx; |
|
774 } |
|
775 } |
|
776 ncodes[i].code = code; |
|
777 ncodes[i].idx = make_number(num, denom); |
|
778 |
|
779 ncodes_used++; |
|
780 } |
|
781 |
|
782 /* |
|
783 * This routine assumes that the line is a valid Unicode Character Database |
|
784 * entry. |
|
785 */ |
|
786 static void |
|
787 #ifdef __STDC__ |
|
788 read_cdata(FILE *in) |
|
789 #else |
|
790 read_cdata(in) |
|
791 FILE *in; |
|
792 #endif |
|
793 { |
|
794 unsigned long i, lineno, skip, code, ccl_code; |
|
795 short wnum, neg, number[2]; |
|
796 char line[512], *s, *e; |
|
797 |
|
798 lineno = skip = 0; |
|
799 while (fscanf(in, "%[^\n]\n", line) != EOF) { |
|
800 lineno++; |
|
801 |
|
802 /* |
|
803 * Skip blank lines and lines that start with a '#'. |
|
804 */ |
|
805 if (line[0] == 0 || line[0] == '#') |
|
806 continue; |
|
807 |
|
808 /* |
|
809 * If lines need to be skipped, do it here. |
|
810 */ |
|
811 if (skip) { |
|
812 skip--; |
|
813 continue; |
|
814 } |
|
815 |
|
816 /* |
|
817 * Collect the code. The code can be up to 6 hex digits in length to |
|
818 * allow surrogates to be specified. |
|
819 */ |
|
820 for (s = line, i = code = 0; *s != ';' && i < 6; i++, s++) { |
|
821 code <<= 4; |
|
822 if (*s >= '0' && *s <= '9') |
|
823 code += *s - '0'; |
|
824 else if (*s >= 'A' && *s <= 'F') |
|
825 code += (*s - 'A') + 10; |
|
826 else if (*s >= 'a' && *s <= 'f') |
|
827 code += (*s - 'a') + 10; |
|
828 } |
|
829 |
|
830 /* |
|
831 * Handle the following special cases: |
|
832 * 1. 4E00-9FA5 CJK Ideographs. |
|
833 * 2. AC00-D7A3 Hangul Syllables. |
|
834 * 3. D800-DFFF Surrogates. |
|
835 * 4. E000-F8FF Private Use Area. |
|
836 * 5. F900-FA2D Han compatibility. |
|
837 */ |
|
838 switch (code) { |
|
839 case 0x4e00: |
|
840 /* |
|
841 * The Han ideographs. |
|
842 */ |
|
843 add_range(0x4e00, 0x9fff, "Lo", "L"); |
|
844 |
|
845 /* |
|
846 * Add the characters to the defined category. |
|
847 */ |
|
848 add_range(0x4e00, 0x9fa5, "Cp", 0); |
|
849 |
|
850 skip = 1; |
|
851 break; |
|
852 case 0xac00: |
|
853 /* |
|
854 * The Hangul syllables. |
|
855 */ |
|
856 add_range(0xac00, 0xd7a3, "Lo", "L"); |
|
857 |
|
858 /* |
|
859 * Add the characters to the defined category. |
|
860 */ |
|
861 add_range(0xac00, 0xd7a3, "Cp", 0); |
|
862 |
|
863 skip = 1; |
|
864 break; |
|
865 case 0xd800: |
|
866 /* |
|
867 * Make a range of all surrogates and assume some default |
|
868 * properties. |
|
869 */ |
|
870 add_range(0x010000, 0x10ffff, "Cs", "L"); |
|
871 skip = 5; |
|
872 break; |
|
873 case 0xe000: |
|
874 /* |
|
875 * The Private Use area. Add with a default set of properties. |
|
876 */ |
|
877 add_range(0xe000, 0xf8ff, "Co", "L"); |
|
878 skip = 1; |
|
879 break; |
|
880 case 0xf900: |
|
881 /* |
|
882 * The CJK compatibility area. |
|
883 */ |
|
884 add_range(0xf900, 0xfaff, "Lo", "L"); |
|
885 |
|
886 /* |
|
887 * Add the characters to the defined category. |
|
888 */ |
|
889 add_range(0xf900, 0xfaff, "Cp", 0); |
|
890 |
|
891 skip = 1; |
|
892 } |
|
893 |
|
894 if (skip) |
|
895 continue; |
|
896 |
|
897 /* |
|
898 * Add the code to the defined category. |
|
899 */ |
|
900 ordered_range_insert(code, "Cp", 2); |
|
901 |
|
902 /* |
|
903 * Locate the first character property field. |
|
904 */ |
|
905 for (i = 0; *s != 0 && i < 2; s++) { |
|
906 if (*s == ';') |
|
907 i++; |
|
908 } |
|
909 for (e = s; *e && *e != ';'; e++) ; |
|
910 |
|
911 ordered_range_insert(code, s, e - s); |
|
912 |
|
913 /* |
|
914 * Locate the combining class code. |
|
915 */ |
|
916 for (s = e; *s != 0 && i < 3; s++) { |
|
917 if (*s == ';') |
|
918 i++; |
|
919 } |
|
920 |
|
921 /* |
|
922 * Convert the combining class code from decimal. |
|
923 */ |
|
924 for (ccl_code = 0, e = s; *e && *e != ';'; e++) |
|
925 ccl_code = (ccl_code * 10) + (*e - '0'); |
|
926 |
|
927 /* |
|
928 * Add the code if it not 0. |
|
929 */ |
|
930 if (ccl_code != 0) |
|
931 ordered_ccl_insert(code, ccl_code); |
|
932 |
|
933 /* |
|
934 * Locate the second character property field. |
|
935 */ |
|
936 for (s = e; *s != 0 && i < 4; s++) { |
|
937 if (*s == ';') |
|
938 i++; |
|
939 } |
|
940 for (e = s; *e && *e != ';'; e++) ; |
|
941 |
|
942 ordered_range_insert(code, s, e - s); |
|
943 |
|
944 /* |
|
945 * Check for a decomposition. |
|
946 */ |
|
947 s = ++e; |
|
948 if (*s != ';' && *s != '<') { |
|
949 /* |
|
950 * Collect the codes of the decomposition. |
|
951 */ |
|
952 for (dectmp_size = 0; *s != ';'; ) { |
|
953 /* |
|
954 * Skip all leading non-hex digits. |
|
955 */ |
|
956 while (!ishdigit(*s)) |
|
957 s++; |
|
958 |
|
959 for (dectmp[dectmp_size] = 0; ishdigit(*s); s++) { |
|
960 dectmp[dectmp_size] <<= 4; |
|
961 if (*s >= '0' && *s <= '9') |
|
962 dectmp[dectmp_size] += *s - '0'; |
|
963 else if (*s >= 'A' && *s <= 'F') |
|
964 dectmp[dectmp_size] += (*s - 'A') + 10; |
|
965 else if (*s >= 'a' && *s <= 'f') |
|
966 dectmp[dectmp_size] += (*s - 'a') + 10; |
|
967 } |
|
968 dectmp_size++; |
|
969 } |
|
970 |
|
971 /* |
|
972 * If there is more than one code in the temporary decomposition |
|
973 * array, then add the character with its decomposition. |
|
974 */ |
|
975 if (dectmp_size > 1) |
|
976 add_decomp(code); |
|
977 } |
|
978 |
|
979 /* |
|
980 * Skip to the number field. |
|
981 */ |
|
982 for (i = 0; i < 3 && *s; s++) { |
|
983 if (*s == ';') |
|
984 i++; |
|
985 } |
|
986 |
|
987 /* |
|
988 * Scan the number in. |
|
989 */ |
|
990 number[0] = number[1] = 0; |
|
991 for (e = s, neg = wnum = 0; *e && *e != ';'; e++) { |
|
992 if (*e == '-') { |
|
993 neg = 1; |
|
994 continue; |
|
995 } |
|
996 |
|
997 if (*e == '/') { |
|
998 /* |
|
999 * Move the the denominator of the fraction. |
|
1000 */ |
|
1001 if (neg) |
|
1002 number[wnum] *= -1; |
|
1003 neg = 0; |
|
1004 e++; |
|
1005 wnum++; |
|
1006 } |
|
1007 number[wnum] = (number[wnum] * 10) + (*e - '0'); |
|
1008 } |
|
1009 |
|
1010 if (e > s) { |
|
1011 /* |
|
1012 * Adjust the denominator in case of integers and add the number. |
|
1013 */ |
|
1014 if (wnum == 0) |
|
1015 number[1] = number[0]; |
|
1016 |
|
1017 add_number(code, number[0], number[1]); |
|
1018 } |
|
1019 |
|
1020 /* |
|
1021 * Skip to the start of the possible case mappings. |
|
1022 */ |
|
1023 for (s = e, i = 0; i < 4 && *s; s++) { |
|
1024 if (*s == ';') |
|
1025 i++; |
|
1026 } |
|
1027 |
|
1028 /* |
|
1029 * Collect the case mappings. |
|
1030 */ |
|
1031 cases[0] = cases[1] = cases[2] = 0; |
|
1032 for (i = 0; i < 3; i++) { |
|
1033 while (ishdigit(*s)) { |
|
1034 cases[i] <<= 4; |
|
1035 if (*s >= '0' && *s <= '9') |
|
1036 cases[i] += *s - '0'; |
|
1037 else if (*s >= 'A' && *s <= 'F') |
|
1038 cases[i] += (*s - 'A') + 10; |
|
1039 else if (*s >= 'a' && *s <= 'f') |
|
1040 cases[i] += (*s - 'a') + 10; |
|
1041 s++; |
|
1042 } |
|
1043 if (*s == ';') |
|
1044 s++; |
|
1045 } |
|
1046 if (cases[0] && cases[1]) |
|
1047 /* |
|
1048 * Add the upper and lower mappings for a title case character. |
|
1049 */ |
|
1050 add_title(code); |
|
1051 else if (cases[1]) |
|
1052 /* |
|
1053 * Add the lower and title case mappings for the upper case |
|
1054 * character. |
|
1055 */ |
|
1056 add_upper(code); |
|
1057 else if (cases[0]) |
|
1058 /* |
|
1059 * Add the upper and title case mappings for the lower case |
|
1060 * character. |
|
1061 */ |
|
1062 add_lower(code); |
|
1063 } |
|
1064 } |
|
1065 |
|
1066 static _decomp_t * |
|
1067 #ifdef __STDC__ |
|
1068 find_decomp(unsigned long code) |
|
1069 #else |
|
1070 find_decomp(code) |
|
1071 unsigned long code; |
|
1072 #endif |
|
1073 { |
|
1074 long l, r, m; |
|
1075 |
|
1076 l = 0; |
|
1077 r = decomps_used - 1; |
|
1078 while (l <= r) { |
|
1079 m = (l + r) >> 1; |
|
1080 if (code > decomps[m].code) |
|
1081 l = m + 1; |
|
1082 else if (code < decomps[m].code) |
|
1083 r = m - 1; |
|
1084 else |
|
1085 return &decomps[m]; |
|
1086 } |
|
1087 return 0; |
|
1088 } |
|
1089 |
|
1090 static void |
|
1091 #ifdef __STDC__ |
|
1092 decomp_it(_decomp_t *d) |
|
1093 #else |
|
1094 decomp_it(d) |
|
1095 _decomp_t *d; |
|
1096 #endif |
|
1097 { |
|
1098 unsigned long i; |
|
1099 _decomp_t *dp; |
|
1100 |
|
1101 for (i = 0; i < d->used; i++) { |
|
1102 if ((dp = find_decomp(d->decomp[i])) != 0) |
|
1103 decomp_it(dp); |
|
1104 else |
|
1105 dectmp[dectmp_size++] = d->decomp[i]; |
|
1106 } |
|
1107 } |
|
1108 |
|
1109 /* |
|
1110 * Expand all decompositions by recursively decomposing each character |
|
1111 * in the decomposition. |
|
1112 */ |
|
1113 static void |
|
1114 #ifdef __STDC__ |
|
1115 expand_decomp(void) |
|
1116 #else |
|
1117 expand_decomp() |
|
1118 #endif |
|
1119 { |
|
1120 unsigned long i; |
|
1121 |
|
1122 for (i = 0; i < decomps_used; i++) { |
|
1123 dectmp_size = 0; |
|
1124 decomp_it(&decomps[i]); |
|
1125 if (dectmp_size > 0) |
|
1126 add_decomp(decomps[i].code); |
|
1127 } |
|
1128 } |
|
1129 |
|
1130 static void |
|
1131 #ifdef __STDC__ |
|
1132 write_cdata(char *opath) |
|
1133 #else |
|
1134 write_cdata(opath) |
|
1135 char *opath; |
|
1136 #endif |
|
1137 { |
|
1138 FILE *out; |
|
1139 unsigned long i, idx, bytes, nprops; |
|
1140 unsigned short casecnt[2]; |
|
1141 char path[BUFSIZ]; |
|
1142 |
|
1143 /***************************************************************** |
|
1144 * |
|
1145 * Generate the ctype data. |
|
1146 * |
|
1147 *****************************************************************/ |
|
1148 |
|
1149 /* |
|
1150 * Open the ctype.dat file. |
|
1151 */ |
|
1152 sprintf(path, "%s/ctype.dat", opath); |
|
1153 if ((out = fopen(path, "wb")) == 0) |
|
1154 return; |
|
1155 |
|
1156 /* |
|
1157 * Collect the offsets for the properties. The offsets array is |
|
1158 * on a 4-byte boundary to keep things efficient for architectures |
|
1159 * that need such a thing. |
|
1160 */ |
|
1161 for (i = idx = 0; i < NUMPROPS; i++) { |
|
1162 propcnt[i] = (proptbl[i].used != 0) ? idx : 0xffff; |
|
1163 idx += proptbl[i].used; |
|
1164 } |
|
1165 |
|
1166 /* |
|
1167 * Add the sentinel index which is used by the binary search as the upper |
|
1168 * bound for a search. |
|
1169 */ |
|
1170 propcnt[i] = idx; |
|
1171 |
|
1172 /* |
|
1173 * Record the actual number of property lists. This may be different than |
|
1174 * the number of offsets actually written because of aligning on a 4-byte |
|
1175 * boundary. |
|
1176 */ |
|
1177 hdr[1] = NUMPROPS; |
|
1178 |
|
1179 /* |
|
1180 * Calculate the byte count needed and pad the property counts array to a |
|
1181 * 4-byte boundary. |
|
1182 */ |
|
1183 if ((bytes = sizeof(unsigned short) * (NUMPROPS + 1)) & 3) |
|
1184 bytes += 4 - (bytes & 3); |
|
1185 nprops = bytes / sizeof(unsigned short); |
|
1186 bytes += sizeof(unsigned long) * idx; |
|
1187 |
|
1188 /* |
|
1189 * Write the header. |
|
1190 */ |
|
1191 fwrite((char *) hdr, sizeof(unsigned short), 2, out); |
|
1192 |
|
1193 /* |
|
1194 * Write the byte count. |
|
1195 */ |
|
1196 fwrite((char *) &bytes, sizeof(unsigned long), 1, out); |
|
1197 |
|
1198 /* |
|
1199 * Write the property list counts. |
|
1200 */ |
|
1201 fwrite((char *) propcnt, sizeof(unsigned short), nprops, out); |
|
1202 |
|
1203 /* |
|
1204 * Write the property lists. |
|
1205 */ |
|
1206 for (i = 0; i < NUMPROPS; i++) { |
|
1207 if (proptbl[i].used > 0) |
|
1208 fwrite((char *) proptbl[i].ranges, sizeof(unsigned long), |
|
1209 proptbl[i].used, out); |
|
1210 } |
|
1211 |
|
1212 fclose(out); |
|
1213 |
|
1214 /***************************************************************** |
|
1215 * |
|
1216 * Generate the case mapping data. |
|
1217 * |
|
1218 *****************************************************************/ |
|
1219 |
|
1220 /* |
|
1221 * Open the case.dat file. |
|
1222 */ |
|
1223 sprintf(path, "%s/case.dat", opath); |
|
1224 if ((out = fopen(path, "wb")) == 0) |
|
1225 return; |
|
1226 |
|
1227 /* |
|
1228 * Write the case mapping tables. |
|
1229 */ |
|
1230 hdr[1] = upper_used + lower_used + title_used; |
|
1231 casecnt[0] = upper_used; |
|
1232 casecnt[1] = lower_used; |
|
1233 |
|
1234 /* |
|
1235 * Write the header. |
|
1236 */ |
|
1237 fwrite((char *) hdr, sizeof(unsigned short), 2, out); |
|
1238 |
|
1239 /* |
|
1240 * Write the upper and lower case table sizes. |
|
1241 */ |
|
1242 fwrite((char *) casecnt, sizeof(unsigned short), 2, out); |
|
1243 |
|
1244 if (upper_used > 0) |
|
1245 /* |
|
1246 * Write the upper case table. |
|
1247 */ |
|
1248 fwrite((char *) upper, sizeof(_case_t), upper_used, out); |
|
1249 |
|
1250 if (lower_used > 0) |
|
1251 /* |
|
1252 * Write the lower case table. |
|
1253 */ |
|
1254 fwrite((char *) lower, sizeof(_case_t), lower_used, out); |
|
1255 |
|
1256 if (title_used > 0) |
|
1257 /* |
|
1258 * Write the title case table. |
|
1259 */ |
|
1260 fwrite((char *) title, sizeof(_case_t), title_used, out); |
|
1261 |
|
1262 fclose(out); |
|
1263 |
|
1264 /***************************************************************** |
|
1265 * |
|
1266 * Generate the decomposition data. |
|
1267 * |
|
1268 *****************************************************************/ |
|
1269 |
|
1270 /* |
|
1271 * Fully expand all decompositions before generating the output file. |
|
1272 */ |
|
1273 expand_decomp(); |
|
1274 |
|
1275 /* |
|
1276 * Open the decomp.dat file. |
|
1277 */ |
|
1278 sprintf(path, "%s/decomp.dat", opath); |
|
1279 if ((out = fopen(path, "wb")) == 0) |
|
1280 return; |
|
1281 |
|
1282 hdr[1] = decomps_used; |
|
1283 |
|
1284 /* |
|
1285 * Write the header. |
|
1286 */ |
|
1287 fwrite((char *) hdr, sizeof(unsigned short), 2, out); |
|
1288 |
|
1289 /* |
|
1290 * Write a temporary byte count which will be calculated as the |
|
1291 * decompositions are written out. |
|
1292 */ |
|
1293 bytes = 0; |
|
1294 fwrite((char *) &bytes, sizeof(unsigned long), 1, out); |
|
1295 |
|
1296 if (decomps_used) { |
|
1297 /* |
|
1298 * Write the list of decomp nodes. |
|
1299 */ |
|
1300 for (i = idx = 0; i < decomps_used; i++) { |
|
1301 fwrite((char *) &decomps[i].code, sizeof(unsigned long), 1, out); |
|
1302 fwrite((char *) &idx, sizeof(unsigned long), 1, out); |
|
1303 idx += decomps[i].used; |
|
1304 } |
|
1305 |
|
1306 /* |
|
1307 * Write the sentinel index as the last decomp node. |
|
1308 */ |
|
1309 fwrite((char *) &idx, sizeof(unsigned long), 1, out); |
|
1310 |
|
1311 /* |
|
1312 * Write the decompositions themselves. |
|
1313 */ |
|
1314 for (i = 0; i < decomps_used; i++) |
|
1315 fwrite((char *) decomps[i].decomp, sizeof(unsigned long), |
|
1316 decomps[i].used, out); |
|
1317 |
|
1318 /* |
|
1319 * Seek back to the beginning and write the byte count. |
|
1320 */ |
|
1321 bytes = (sizeof(unsigned long) * idx) + |
|
1322 (sizeof(unsigned long) * ((hdr[1] << 1) + 1)); |
|
1323 fseek(out, sizeof(unsigned short) << 1, 0L); |
|
1324 fwrite((char *) &bytes, sizeof(unsigned long), 1, out); |
|
1325 |
|
1326 fclose(out); |
|
1327 } |
|
1328 |
|
1329 /***************************************************************** |
|
1330 * |
|
1331 * Generate the combining class data. |
|
1332 * |
|
1333 *****************************************************************/ |
|
1334 |
|
1335 /* |
|
1336 * Open the cmbcl.dat file. |
|
1337 */ |
|
1338 sprintf(path, "%s/cmbcl.dat", opath); |
|
1339 if ((out = fopen(path, "wb")) == 0) |
|
1340 return; |
|
1341 |
|
1342 /* |
|
1343 * Set the number of ranges used. Each range has a combining class which |
|
1344 * means each entry is a 3-tuple. |
|
1345 */ |
|
1346 hdr[1] = ccl_used / 3; |
|
1347 |
|
1348 /* |
|
1349 * Write the header. |
|
1350 */ |
|
1351 fwrite((char *) hdr, sizeof(unsigned short), 2, out); |
|
1352 |
|
1353 /* |
|
1354 * Write out the byte count to maintain header size. |
|
1355 */ |
|
1356 bytes = ccl_used * sizeof(unsigned long); |
|
1357 fwrite((char *) &bytes, sizeof(unsigned long), 1, out); |
|
1358 |
|
1359 if (ccl_used > 0) |
|
1360 /* |
|
1361 * Write the combining class ranges out. |
|
1362 */ |
|
1363 fwrite((char *) ccl, sizeof(unsigned long), ccl_used, out); |
|
1364 |
|
1365 fclose(out); |
|
1366 |
|
1367 /***************************************************************** |
|
1368 * |
|
1369 * Generate the number data. |
|
1370 * |
|
1371 *****************************************************************/ |
|
1372 |
|
1373 /* |
|
1374 * Open the num.dat file. |
|
1375 */ |
|
1376 sprintf(path, "%s/num.dat", opath); |
|
1377 if ((out = fopen(path, "wb")) == 0) |
|
1378 return; |
|
1379 |
|
1380 /* |
|
1381 * The count part of the header will be the total number of codes that |
|
1382 * have numbers. |
|
1383 */ |
|
1384 hdr[1] = (unsigned short) (ncodes_used << 1); |
|
1385 bytes = (ncodes_used * sizeof(_codeidx_t)) + (nums_used * sizeof(_num_t)); |
|
1386 |
|
1387 /* |
|
1388 * Write the header. |
|
1389 */ |
|
1390 fwrite((char *) hdr, sizeof(unsigned short), 2, out); |
|
1391 |
|
1392 /* |
|
1393 * Write out the byte count to maintain header size. |
|
1394 */ |
|
1395 fwrite((char *) &bytes, sizeof(unsigned long), 1, out); |
|
1396 |
|
1397 /* |
|
1398 * Now, if number mappings exist, write them out. |
|
1399 */ |
|
1400 if (ncodes_used > 0) { |
|
1401 fwrite((char *) ncodes, sizeof(_codeidx_t), ncodes_used, out); |
|
1402 fwrite((char *) nums, sizeof(_num_t), nums_used, out); |
|
1403 } |
|
1404 |
|
1405 fclose(out); |
|
1406 } |
|
1407 |
|
1408 void |
|
1409 #ifdef __STDC__ |
|
1410 main(int argc, char *argv[]) |
|
1411 #else |
|
1412 main(argc, argv) |
|
1413 int argc; |
|
1414 char *argv[]; |
|
1415 #endif |
|
1416 { |
|
1417 FILE *in; |
|
1418 char *prog, *opath; |
|
1419 |
|
1420 if ((prog = strrchr(argv[0], '/')) != 0) |
|
1421 prog++; |
|
1422 else |
|
1423 prog = argv[0]; |
|
1424 |
|
1425 opath = 0; |
|
1426 in = stdin; |
|
1427 |
|
1428 argc--; |
|
1429 argv++; |
|
1430 |
|
1431 while (argc > 0) { |
|
1432 if (argv[0][0] == '-' && argv[0][1] == 'o') { |
|
1433 argc--; |
|
1434 argv++; |
|
1435 opath = argv[0]; |
|
1436 } else { |
|
1437 if (in != stdin) |
|
1438 fclose(in); |
|
1439 if ((in = fopen(argv[0], "rb")) == 0) |
|
1440 fprintf(stderr, "%s: unable to open ctype file %s\n", |
|
1441 prog, argv[0]); |
|
1442 else { |
|
1443 read_cdata(in); |
|
1444 fclose(in); |
|
1445 in = 0; |
|
1446 } |
|
1447 } |
|
1448 argc--; |
|
1449 argv++; |
|
1450 } |
|
1451 |
|
1452 if (opath == 0) |
|
1453 opath = "."; |
|
1454 write_cdata(opath); |
|
1455 |
|
1456 exit(0); |
|
1457 } |