1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/unicharutil/src/ucdata.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1162 @@ 1.4 +/* 1.5 + * Copyright 1996, 1997, 1998 Computing Research Labs, 1.6 + * New Mexico State University 1.7 + * 1.8 + * Permission is hereby granted, free of charge, to any person obtaining a 1.9 + * copy of this software and associated documentation files (the "Software"), 1.10 + * to deal in the Software without restriction, including without limitation 1.11 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 1.12 + * and/or sell copies of the Software, and to permit persons to whom the 1.13 + * Software is furnished to do so, subject to the following conditions: 1.14 + * 1.15 + * The above copyright notice and this permission notice shall be included in 1.16 + * all copies or substantial portions of the Software. 1.17 + * 1.18 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1.19 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1.20 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1.21 + * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY 1.22 + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 1.23 + * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 1.24 + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. 1.25 + */ 1.26 +#ifndef lint 1.27 +#ifdef __GNUC__ 1.28 +static char rcsid[] __attribute__ ((unused)) = "$Id: ucdata.c,v 1.1 1999/01/08 00:19:11 ftang%netscape.com Exp $"; 1.29 +#else 1.30 +static char rcsid[] = "$Id: ucdata.c,v 1.1 1999/01/08 00:19:11 ftang%netscape.com Exp $"; 1.31 +#endif 1.32 +#endif 1.33 + 1.34 +#include <stdio.h> 1.35 +#include <stdlib.h> 1.36 +#include <string.h> 1.37 +#ifndef WIN32 1.38 +#include <unistd.h> 1.39 +#endif 1.40 + 1.41 +#include "ucdata.h" 1.42 + 1.43 +/************************************************************************** 1.44 + * 1.45 + * Miscellaneous types, data, and support functions. 1.46 + * 1.47 + **************************************************************************/ 1.48 + 1.49 +typedef struct { 1.50 + unsigned short bom; 1.51 + unsigned short cnt; 1.52 + union { 1.53 + unsigned long bytes; 1.54 + unsigned short len[2]; 1.55 + } size; 1.56 +} _ucheader_t; 1.57 + 1.58 +/* 1.59 + * A simple array of 32-bit masks for lookup. 1.60 + */ 1.61 +static unsigned long masks32[32] = { 1.62 + 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020, 1.63 + 0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800, 1.64 + 0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000, 1.65 + 0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000, 1.66 + 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, 1.67 + 0x40000000, 0x80000000 1.68 +}; 1.69 + 1.70 +#define endian_short(cc) (((cc) >> 8) | (((cc) & 0xff) << 8)) 1.71 +#define endian_long(cc) ((((cc) & 0xff) << 24)|((((cc) >> 8) & 0xff) << 16)|\ 1.72 + ((((cc) >> 16) & 0xff) << 8)|((cc) >> 24)) 1.73 + 1.74 +static FILE * 1.75 +#ifdef __STDC__ 1.76 +_ucopenfile(char *paths, char *filename, char *mode) 1.77 +#else 1.78 +_ucopenfile(paths, filename, mode) 1.79 +char *paths, *filename, *mode; 1.80 +#endif 1.81 +{ 1.82 + FILE *f; 1.83 + char *fp, *dp, *pp, path[BUFSIZ]; 1.84 + 1.85 + if (filename == 0 || *filename == 0) 1.86 + return 0; 1.87 + 1.88 + dp = paths; 1.89 + while (dp && *dp) { 1.90 + pp = path; 1.91 + while (*dp && *dp != ':') 1.92 + *pp++ = *dp++; 1.93 + *pp++ = '/'; 1.94 + 1.95 + fp = filename; 1.96 + while (*fp) 1.97 + *pp++ = *fp++; 1.98 + *pp = 0; 1.99 + 1.100 + if ((f = fopen(path, mode)) != 0) 1.101 + return f; 1.102 + 1.103 + if (*dp == ':') 1.104 + dp++; 1.105 + } 1.106 + 1.107 + return 0; 1.108 +} 1.109 + 1.110 +/************************************************************************** 1.111 + * 1.112 + * Support for the character properties. 1.113 + * 1.114 + **************************************************************************/ 1.115 + 1.116 +static unsigned long _ucprop_size; 1.117 +static unsigned short *_ucprop_offsets; 1.118 +static unsigned long *_ucprop_ranges; 1.119 + 1.120 +static void 1.121 +#ifdef __STDC__ 1.122 +_ucprop_load(char *paths, int reload) 1.123 +#else 1.124 +_ucprop_load(paths, reload) 1.125 +char *paths; 1.126 +int reload; 1.127 +#endif 1.128 +{ 1.129 + FILE *in; 1.130 + unsigned long size, i; 1.131 + _ucheader_t hdr; 1.132 + 1.133 + if (_ucprop_size > 0) { 1.134 + if (!reload) 1.135 + /* 1.136 + * The character properties have already been loaded. 1.137 + */ 1.138 + return; 1.139 + 1.140 + /* 1.141 + * Unload the current character property data in preparation for 1.142 + * loading a new copy. Only the first array has to be deallocated 1.143 + * because all the memory for the arrays is allocated as a single 1.144 + * block. 1.145 + */ 1.146 + free((char *) _ucprop_offsets); 1.147 + _ucprop_size = 0; 1.148 + } 1.149 + 1.150 + if ((in = _ucopenfile(paths, "ctype.dat", "rb")) == 0) 1.151 + return; 1.152 + 1.153 + /* 1.154 + * Load the header. 1.155 + */ 1.156 + fread((char *) &hdr, sizeof(_ucheader_t), 1, in); 1.157 + 1.158 + if (hdr.bom == 0xfffe) { 1.159 + hdr.cnt = endian_short(hdr.cnt); 1.160 + hdr.size.bytes = endian_long(hdr.size.bytes); 1.161 + } 1.162 + 1.163 + if ((_ucprop_size = hdr.cnt) == 0) { 1.164 + fclose(in); 1.165 + return; 1.166 + } 1.167 + 1.168 + /* 1.169 + * Allocate all the storage needed for the lookup table. 1.170 + */ 1.171 + _ucprop_offsets = (unsigned short *) malloc(hdr.size.bytes); 1.172 + 1.173 + /* 1.174 + * Calculate the offset into the storage for the ranges. The offsets 1.175 + * array is on a 4-byte boundary and one larger than the value provided in 1.176 + * the header count field. This means the offset to the ranges must be 1.177 + * calculated after aligning the count to a 4-byte boundary. 1.178 + */ 1.179 + if ((size = ((hdr.cnt + 1) * sizeof(unsigned short))) & 3) 1.180 + size += 4 - (size & 3); 1.181 + size >>= 1; 1.182 + _ucprop_ranges = (unsigned long *) (_ucprop_offsets + size); 1.183 + 1.184 + /* 1.185 + * Load the offset array. 1.186 + */ 1.187 + fread((char *) _ucprop_offsets, sizeof(unsigned short), size, in); 1.188 + 1.189 + /* 1.190 + * Do an endian swap if necessary. Don't forget there is an extra node on 1.191 + * the end with the final index. 1.192 + */ 1.193 + if (hdr.bom == 0xfffe) { 1.194 + for (i = 0; i <= _ucprop_size; i++) 1.195 + _ucprop_offsets[i] = endian_short(_ucprop_offsets[i]); 1.196 + } 1.197 + 1.198 + /* 1.199 + * Load the ranges. The number of elements is in the last array position 1.200 + * of the offsets. 1.201 + */ 1.202 + fread((char *) _ucprop_ranges, sizeof(unsigned long), 1.203 + _ucprop_offsets[_ucprop_size], in); 1.204 + 1.205 + fclose(in); 1.206 + 1.207 + /* 1.208 + * Do an endian swap if necessary. 1.209 + */ 1.210 + if (hdr.bom == 0xfffe) { 1.211 + for (i = 0; i < _ucprop_offsets[_ucprop_size]; i++) 1.212 + _ucprop_ranges[i] = endian_long(_ucprop_ranges[i]); 1.213 + } 1.214 +} 1.215 + 1.216 +static void 1.217 +#ifdef __STDC__ 1.218 +_ucprop_unload(void) 1.219 +#else 1.220 +_ucprop_unload() 1.221 +#endif 1.222 +{ 1.223 + if (_ucprop_size == 0) 1.224 + return; 1.225 + 1.226 + /* 1.227 + * Only need to free the offsets because the memory is allocated as a 1.228 + * single block. 1.229 + */ 1.230 + free((char *) _ucprop_offsets); 1.231 + _ucprop_size = 0; 1.232 +} 1.233 + 1.234 +static int 1.235 +#ifdef __STDC__ 1.236 +_ucprop_lookup(unsigned long code, unsigned long n) 1.237 +#else 1.238 +_ucprop_lookup(code, n) 1.239 +unsigned long code, n; 1.240 +#endif 1.241 +{ 1.242 + long l, r, m; 1.243 + 1.244 + /* 1.245 + * There is an extra node on the end of the offsets to allow this routine 1.246 + * to work right. If the index is 0xffff, then there are no nodes for the 1.247 + * property. 1.248 + */ 1.249 + if ((l = _ucprop_offsets[n]) == 0xffff) 1.250 + return 0; 1.251 + 1.252 + /* 1.253 + * Locate the next offset that is not 0xffff. The sentinel at the end of 1.254 + * the array is the max index value. 1.255 + */ 1.256 + for (m = 1; 1.257 + n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++) ; 1.258 + 1.259 + r = _ucprop_offsets[n + m] - 1; 1.260 + 1.261 + while (l <= r) { 1.262 + /* 1.263 + * Determine a "mid" point and adjust to make sure the mid point is at 1.264 + * the beginning of a range pair. 1.265 + */ 1.266 + m = (l + r) >> 1; 1.267 + m -= (m & 1); 1.268 + if (code > _ucprop_ranges[m + 1]) 1.269 + l = m + 2; 1.270 + else if (code < _ucprop_ranges[m]) 1.271 + r = m - 2; 1.272 + else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1]) 1.273 + return 1; 1.274 + } 1.275 + return 0; 1.276 +} 1.277 + 1.278 +int 1.279 +#ifdef __STDC__ 1.280 +ucisprop(unsigned long code, unsigned long mask1, unsigned long mask2) 1.281 +#else 1.282 +ucisprop(code, mask1, mask2) 1.283 +unsigned long code, mask1, mask2; 1.284 +#endif 1.285 +{ 1.286 + unsigned long i; 1.287 + 1.288 + if (mask1 == 0 && mask2 == 0) 1.289 + return 0; 1.290 + 1.291 + for (i = 0; mask1 && i < 32; i++) { 1.292 + if ((mask1 & masks32[i]) && _ucprop_lookup(code, i)) 1.293 + return 1; 1.294 + } 1.295 + 1.296 + for (i = 32; mask2 && i < _ucprop_size; i++) { 1.297 + if ((mask2 & masks32[i & 31]) && _ucprop_lookup(code, i)) 1.298 + return 1; 1.299 + } 1.300 + 1.301 + return 0; 1.302 +} 1.303 + 1.304 +/************************************************************************** 1.305 + * 1.306 + * Support for case mapping. 1.307 + * 1.308 + **************************************************************************/ 1.309 + 1.310 +static unsigned long _uccase_size; 1.311 +static unsigned short _uccase_len[2]; 1.312 +static unsigned long *_uccase_map; 1.313 + 1.314 +static void 1.315 +#ifdef __STDC__ 1.316 +_uccase_load(char *paths, int reload) 1.317 +#else 1.318 +_uccase_load(paths, reload) 1.319 +char *paths; 1.320 +int reload; 1.321 +#endif 1.322 +{ 1.323 + FILE *in; 1.324 + unsigned long i; 1.325 + _ucheader_t hdr; 1.326 + 1.327 + if (_uccase_size > 0) { 1.328 + if (!reload) 1.329 + /* 1.330 + * The case mappings have already been loaded. 1.331 + */ 1.332 + return; 1.333 + 1.334 + free((char *) _uccase_map); 1.335 + _uccase_size = 0; 1.336 + } 1.337 + 1.338 + if ((in = _ucopenfile(paths, "case.dat", "rb")) == 0) 1.339 + return; 1.340 + 1.341 + /* 1.342 + * Load the header. 1.343 + */ 1.344 + fread((char *) &hdr, sizeof(_ucheader_t), 1, in); 1.345 + 1.346 + if (hdr.bom == 0xfffe) { 1.347 + hdr.cnt = endian_short(hdr.cnt); 1.348 + hdr.size.len[0] = endian_short(hdr.size.len[0]); 1.349 + hdr.size.len[1] = endian_short(hdr.size.len[1]); 1.350 + } 1.351 + 1.352 + /* 1.353 + * Set the node count and lengths of the upper and lower case mapping 1.354 + * tables. 1.355 + */ 1.356 + _uccase_size = hdr.cnt * 3; 1.357 + _uccase_len[0] = hdr.size.len[0] * 3; 1.358 + _uccase_len[1] = hdr.size.len[1] * 3; 1.359 + 1.360 + _uccase_map = (unsigned long *) 1.361 + malloc(_uccase_size * sizeof(unsigned long)); 1.362 + 1.363 + /* 1.364 + * Load the case mapping table. 1.365 + */ 1.366 + fread((char *) _uccase_map, sizeof(unsigned long), _uccase_size, in); 1.367 + 1.368 + /* 1.369 + * Do an endian swap if necessary. 1.370 + */ 1.371 + if (hdr.bom == 0xfffe) { 1.372 + for (i = 0; i < _uccase_size; i++) 1.373 + _uccase_map[i] = endian_long(_uccase_map[i]); 1.374 + } 1.375 +} 1.376 + 1.377 +static void 1.378 +#ifdef __STDC__ 1.379 +_uccase_unload(void) 1.380 +#else 1.381 +_uccase_unload() 1.382 +#endif 1.383 +{ 1.384 + if (_uccase_size == 0) 1.385 + return; 1.386 + 1.387 + free((char *) _uccase_map); 1.388 + _uccase_size = 0; 1.389 +} 1.390 + 1.391 +static unsigned long 1.392 +#ifdef __STDC__ 1.393 +_uccase_lookup(unsigned long code, long l, long r, int field) 1.394 +#else 1.395 +_uccase_lookup(code, l, r, field) 1.396 +unsigned long code; 1.397 +long l, r; 1.398 +int field; 1.399 +#endif 1.400 +{ 1.401 + long m; 1.402 + 1.403 + /* 1.404 + * Do the binary search. 1.405 + */ 1.406 + while (l <= r) { 1.407 + /* 1.408 + * Determine a "mid" point and adjust to make sure the mid point is at 1.409 + * the beginning of a case mapping triple. 1.410 + */ 1.411 + m = (l + r) >> 1; 1.412 + m -= (m % 3); 1.413 + if (code > _uccase_map[m]) 1.414 + l = m + 3; 1.415 + else if (code < _uccase_map[m]) 1.416 + r = m - 3; 1.417 + else if (code == _uccase_map[m]) 1.418 + return _uccase_map[m + field]; 1.419 + } 1.420 + 1.421 + return code; 1.422 +} 1.423 + 1.424 +unsigned long 1.425 +#ifdef __STDC__ 1.426 +uctoupper(unsigned long code) 1.427 +#else 1.428 +uctoupper(code) 1.429 +unsigned long code; 1.430 +#endif 1.431 +{ 1.432 + int field; 1.433 + long l, r; 1.434 + 1.435 + if (ucisupper(code)) 1.436 + return code; 1.437 + 1.438 + if (ucislower(code)) { 1.439 + /* 1.440 + * The character is lower case. 1.441 + */ 1.442 + field = 1; 1.443 + l = _uccase_len[0]; 1.444 + r = (l + _uccase_len[1]) - 1; 1.445 + } else { 1.446 + /* 1.447 + * The character is title case. 1.448 + */ 1.449 + field = 2; 1.450 + l = _uccase_len[0] + _uccase_len[1]; 1.451 + r = _uccase_size - 1; 1.452 + } 1.453 + return _uccase_lookup(code, l, r, field); 1.454 +} 1.455 + 1.456 +unsigned long 1.457 +#ifdef __STDC__ 1.458 +uctolower(unsigned long code) 1.459 +#else 1.460 +uctolower(code) 1.461 +unsigned long code; 1.462 +#endif 1.463 +{ 1.464 + int field; 1.465 + long l, r; 1.466 + 1.467 + if (ucislower(code)) 1.468 + return code; 1.469 + 1.470 + if (ucisupper(code)) { 1.471 + /* 1.472 + * The character is upper case. 1.473 + */ 1.474 + field = 1; 1.475 + l = 0; 1.476 + r = _uccase_len[0] - 1; 1.477 + } else { 1.478 + /* 1.479 + * The character is title case. 1.480 + */ 1.481 + field = 2; 1.482 + l = _uccase_len[0] + _uccase_len[1]; 1.483 + r = _uccase_size - 1; 1.484 + } 1.485 + return _uccase_lookup(code, l, r, field); 1.486 +} 1.487 + 1.488 +unsigned long 1.489 +#ifdef __STDC__ 1.490 +uctotitle(unsigned long code) 1.491 +#else 1.492 +uctotitle(code) 1.493 +unsigned long code; 1.494 +#endif 1.495 +{ 1.496 + int field; 1.497 + long l, r; 1.498 + 1.499 + if (ucistitle(code)) 1.500 + return code; 1.501 + 1.502 + /* 1.503 + * The offset will always be the same for converting to title case. 1.504 + */ 1.505 + field = 2; 1.506 + 1.507 + if (ucisupper(code)) { 1.508 + /* 1.509 + * The character is upper case. 1.510 + */ 1.511 + l = 0; 1.512 + r = _uccase_len[0] - 1; 1.513 + } else { 1.514 + /* 1.515 + * The character is lower case. 1.516 + */ 1.517 + l = _uccase_len[0]; 1.518 + r = (l + _uccase_len[1]) - 1; 1.519 + } 1.520 + return _uccase_lookup(code, l, r, field); 1.521 +} 1.522 + 1.523 +/************************************************************************** 1.524 + * 1.525 + * Support for decompositions. 1.526 + * 1.527 + **************************************************************************/ 1.528 + 1.529 +static unsigned long _ucdcmp_size; 1.530 +static unsigned long *_ucdcmp_nodes; 1.531 +static unsigned long *_ucdcmp_decomp; 1.532 + 1.533 +static void 1.534 +#ifdef __STDC__ 1.535 +_ucdcmp_load(char *paths, int reload) 1.536 +#else 1.537 +_ucdcmp_load(paths, reload) 1.538 +char *paths; 1.539 +int reload; 1.540 +#endif 1.541 +{ 1.542 + FILE *in; 1.543 + unsigned long size, i; 1.544 + _ucheader_t hdr; 1.545 + 1.546 + if (_ucdcmp_size > 0) { 1.547 + if (!reload) 1.548 + /* 1.549 + * The decompositions have already been loaded. 1.550 + */ 1.551 + return; 1.552 + 1.553 + free((char *) _ucdcmp_nodes); 1.554 + _ucdcmp_size = 0; 1.555 + } 1.556 + 1.557 + if ((in = _ucopenfile(paths, "decomp.dat", "rb")) == 0) 1.558 + return; 1.559 + 1.560 + /* 1.561 + * Load the header. 1.562 + */ 1.563 + fread((char *) &hdr, sizeof(_ucheader_t), 1, in); 1.564 + 1.565 + if (hdr.bom == 0xfffe) { 1.566 + hdr.cnt = endian_short(hdr.cnt); 1.567 + hdr.size.bytes = endian_long(hdr.size.bytes); 1.568 + } 1.569 + 1.570 + _ucdcmp_size = hdr.cnt << 1; 1.571 + _ucdcmp_nodes = (unsigned long *) malloc(hdr.size.bytes); 1.572 + _ucdcmp_decomp = _ucdcmp_nodes + (_ucdcmp_size + 1); 1.573 + 1.574 + /* 1.575 + * Read the decomposition data in. 1.576 + */ 1.577 + size = hdr.size.bytes / sizeof(unsigned long); 1.578 + fread((char *) _ucdcmp_nodes, sizeof(unsigned long), size, in); 1.579 + 1.580 + /* 1.581 + * Do an endian swap if necessary. 1.582 + */ 1.583 + if (hdr.bom == 0xfffe) { 1.584 + for (i = 0; i < size; i++) 1.585 + _ucdcmp_nodes[i] = endian_long(_ucdcmp_nodes[i]); 1.586 + } 1.587 +} 1.588 + 1.589 +static void 1.590 +#ifdef __STDC__ 1.591 +_ucdcmp_unload(void) 1.592 +#else 1.593 +_ucdcmp_unload() 1.594 +#endif 1.595 +{ 1.596 + if (_ucdcmp_size == 0) 1.597 + return; 1.598 + 1.599 + /* 1.600 + * Only need to free the offsets because the memory is allocated as a 1.601 + * single block. 1.602 + */ 1.603 + free((char *) _ucdcmp_nodes); 1.604 + _ucdcmp_size = 0; 1.605 +} 1.606 + 1.607 +int 1.608 +#ifdef __STDC__ 1.609 +ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp) 1.610 +#else 1.611 +ucdecomp(code, num, decomp) 1.612 +unsigned long code, *num, **decomp; 1.613 +#endif 1.614 +{ 1.615 + long l, r, m; 1.616 + 1.617 + l = 0; 1.618 + r = _ucdcmp_nodes[_ucdcmp_size] - 1; 1.619 + 1.620 + while (l <= r) { 1.621 + /* 1.622 + * Determine a "mid" point and adjust to make sure the mid point is at 1.623 + * the beginning of a code+offset pair. 1.624 + */ 1.625 + m = (l + r) >> 1; 1.626 + m -= (m & 1); 1.627 + if (code > _ucdcmp_nodes[m]) 1.628 + l = m + 2; 1.629 + else if (code < _ucdcmp_nodes[m]) 1.630 + r = m - 2; 1.631 + else if (code == _ucdcmp_nodes[m]) { 1.632 + *num = _ucdcmp_nodes[m + 3] - _ucdcmp_nodes[m + 1]; 1.633 + *decomp = &_ucdcmp_decomp[_ucdcmp_nodes[m + 1]]; 1.634 + return 1; 1.635 + } 1.636 + } 1.637 + return 0; 1.638 +} 1.639 + 1.640 +int 1.641 +#ifdef __STDC__ 1.642 +ucdecomp_hangul(unsigned long code, unsigned long *num, unsigned long decomp[]) 1.643 +#else 1.644 +ucdecomp_hangul(code, num, decomp) 1.645 +unsigned long code, *num, decomp[]; 1.646 +#endif 1.647 +{ 1.648 + if (!ucishangul(code)) 1.649 + return 0; 1.650 + 1.651 + code -= 0xac00; 1.652 + decomp[0] = 0x1100 + (unsigned long) (code / 588); 1.653 + decomp[1] = 0x1161 + (unsigned long) ((code % 588) / 28); 1.654 + decomp[2] = 0x11a7 + (unsigned long) (code % 28); 1.655 + *num = (decomp[2] != 0x11a7) ? 3 : 2; 1.656 + 1.657 + return 1; 1.658 +} 1.659 + 1.660 +/************************************************************************** 1.661 + * 1.662 + * Support for combining classes. 1.663 + * 1.664 + **************************************************************************/ 1.665 + 1.666 +static unsigned long _uccmcl_size; 1.667 +static unsigned long *_uccmcl_nodes; 1.668 + 1.669 +static void 1.670 +#ifdef __STDC__ 1.671 +_uccmcl_load(char *paths, int reload) 1.672 +#else 1.673 +_uccmcl_load(paths, reload) 1.674 +char *paths; 1.675 +int reload; 1.676 +#endif 1.677 +{ 1.678 + FILE *in; 1.679 + unsigned long i; 1.680 + _ucheader_t hdr; 1.681 + 1.682 + if (_uccmcl_size > 0) { 1.683 + if (!reload) 1.684 + /* 1.685 + * The combining classes have already been loaded. 1.686 + */ 1.687 + return; 1.688 + 1.689 + free((char *) _uccmcl_nodes); 1.690 + _uccmcl_size = 0; 1.691 + } 1.692 + 1.693 + if ((in = _ucopenfile(paths, "cmbcl.dat", "rb")) == 0) 1.694 + return; 1.695 + 1.696 + /* 1.697 + * Load the header. 1.698 + */ 1.699 + fread((char *) &hdr, sizeof(_ucheader_t), 1, in); 1.700 + 1.701 + if (hdr.bom == 0xfffe) { 1.702 + hdr.cnt = endian_short(hdr.cnt); 1.703 + hdr.size.bytes = endian_long(hdr.size.bytes); 1.704 + } 1.705 + 1.706 + _uccmcl_size = hdr.cnt * 3; 1.707 + _uccmcl_nodes = (unsigned long *) malloc(hdr.size.bytes); 1.708 + 1.709 + /* 1.710 + * Read the combining classes in. 1.711 + */ 1.712 + fread((char *) _uccmcl_nodes, sizeof(unsigned long), _uccmcl_size, in); 1.713 + 1.714 + /* 1.715 + * Do an endian swap if necessary. 1.716 + */ 1.717 + if (hdr.bom == 0xfffe) { 1.718 + for (i = 0; i < _uccmcl_size; i++) 1.719 + _uccmcl_nodes[i] = endian_long(_uccmcl_nodes[i]); 1.720 + } 1.721 +} 1.722 + 1.723 +static void 1.724 +#ifdef __STDC__ 1.725 +_uccmcl_unload(void) 1.726 +#else 1.727 +_uccmcl_unload() 1.728 +#endif 1.729 +{ 1.730 + if (_uccmcl_size == 0) 1.731 + return; 1.732 + 1.733 + free((char *) _uccmcl_nodes); 1.734 + _uccmcl_size = 0; 1.735 +} 1.736 + 1.737 +unsigned long 1.738 +#ifdef __STDC__ 1.739 +uccombining_class(unsigned long code) 1.740 +#else 1.741 +uccombining_class(code) 1.742 +unsigned long code; 1.743 +#endif 1.744 +{ 1.745 + long l, r, m; 1.746 + 1.747 + l = 0; 1.748 + r = _uccmcl_size - 1; 1.749 + 1.750 + while (l <= r) { 1.751 + m = (l + r) >> 1; 1.752 + m -= (m % 3); 1.753 + if (code > _uccmcl_nodes[m + 1]) 1.754 + l = m + 3; 1.755 + else if (code < _uccmcl_nodes[m]) 1.756 + r = m - 3; 1.757 + else if (code >= _uccmcl_nodes[m] && code <= _uccmcl_nodes[m + 1]) 1.758 + return _uccmcl_nodes[m + 2]; 1.759 + } 1.760 + return 0; 1.761 +} 1.762 + 1.763 +/************************************************************************** 1.764 + * 1.765 + * Support for numeric values. 1.766 + * 1.767 + **************************************************************************/ 1.768 + 1.769 +static unsigned long *_ucnum_nodes; 1.770 +static unsigned long _ucnum_size; 1.771 +static short *_ucnum_vals; 1.772 + 1.773 +static void 1.774 +#ifdef __STDC__ 1.775 +_ucnumb_load(char *paths, int reload) 1.776 +#else 1.777 +_ucnumb_load(paths, reload) 1.778 +char *paths; 1.779 +int reload; 1.780 +#endif 1.781 +{ 1.782 + FILE *in; 1.783 + unsigned long size, i; 1.784 + _ucheader_t hdr; 1.785 + 1.786 + if (_ucnum_size > 0) { 1.787 + if (!reload) 1.788 + /* 1.789 + * The numbers have already been loaded. 1.790 + */ 1.791 + return; 1.792 + 1.793 + free((char *) _ucnum_nodes); 1.794 + _ucnum_size = 0; 1.795 + } 1.796 + 1.797 + if ((in = _ucopenfile(paths, "num.dat", "rb")) == 0) 1.798 + return; 1.799 + 1.800 + /* 1.801 + * Load the header. 1.802 + */ 1.803 + fread((char *) &hdr, sizeof(_ucheader_t), 1, in); 1.804 + 1.805 + if (hdr.bom == 0xfffe) { 1.806 + hdr.cnt = endian_short(hdr.cnt); 1.807 + hdr.size.bytes = endian_long(hdr.size.bytes); 1.808 + } 1.809 + 1.810 + _ucnum_size = hdr.cnt; 1.811 + _ucnum_nodes = (unsigned long *) malloc(hdr.size.bytes); 1.812 + _ucnum_vals = (short *) (_ucnum_nodes + _ucnum_size); 1.813 + 1.814 + /* 1.815 + * Read the combining classes in. 1.816 + */ 1.817 + fread((char *) _ucnum_nodes, sizeof(unsigned char), hdr.size.bytes, in); 1.818 + 1.819 + /* 1.820 + * Do an endian swap if necessary. 1.821 + */ 1.822 + if (hdr.bom == 0xfffe) { 1.823 + for (i = 0; i < _ucnum_size; i++) 1.824 + _ucnum_nodes[i] = endian_long(_ucnum_nodes[i]); 1.825 + 1.826 + /* 1.827 + * Determine the number of values that have to be adjusted. 1.828 + */ 1.829 + size = (hdr.size.bytes - 1.830 + (_ucnum_size * (sizeof(unsigned long) << 1))) / 1.831 + sizeof(short); 1.832 + 1.833 + for (i = 0; i < size; i++) 1.834 + _ucnum_vals[i] = endian_short(_ucnum_vals[i]); 1.835 + } 1.836 +} 1.837 + 1.838 +static void 1.839 +#ifdef __STDC__ 1.840 +_ucnumb_unload(void) 1.841 +#else 1.842 +_ucnumb_unload() 1.843 +#endif 1.844 +{ 1.845 + if (_ucnum_size == 0) 1.846 + return; 1.847 + 1.848 + free((char *) _ucnum_nodes); 1.849 + _ucnum_size = 0; 1.850 +} 1.851 + 1.852 +int 1.853 +#ifdef __STDC__ 1.854 +ucnumber_lookup(unsigned long code, struct ucnumber *num) 1.855 +#else 1.856 +ucnumber_lookup(code, num) 1.857 +unsigned long code; 1.858 +struct ucnumber *num; 1.859 +#endif 1.860 +{ 1.861 + long l, r, m; 1.862 + short *vp; 1.863 + 1.864 + l = 0; 1.865 + r = _ucnum_size - 1; 1.866 + while (l <= r) { 1.867 + /* 1.868 + * Determine a "mid" point and adjust to make sure the mid point is at 1.869 + * the beginning of a code+offset pair. 1.870 + */ 1.871 + m = (l + r) >> 1; 1.872 + m -= (m & 1); 1.873 + if (code > _ucnum_nodes[m]) 1.874 + l = m + 2; 1.875 + else if (code < _ucnum_nodes[m]) 1.876 + r = m - 2; 1.877 + else { 1.878 + vp = _ucnum_vals + _ucnum_nodes[m + 1]; 1.879 + num->numerator = (int) *vp++; 1.880 + num->denominator = (int) *vp; 1.881 + return 1; 1.882 + } 1.883 + } 1.884 + return 0; 1.885 +} 1.886 + 1.887 +int 1.888 +#ifdef __STDC__ 1.889 +ucdigit_lookup(unsigned long code, int *digit) 1.890 +#else 1.891 +ucdigit_lookup(code, digit) 1.892 +unsigned long code; 1.893 +int *digit; 1.894 +#endif 1.895 +{ 1.896 + long l, r, m; 1.897 + short *vp; 1.898 + 1.899 + l = 0; 1.900 + r = _ucnum_size - 1; 1.901 + while (l <= r) { 1.902 + /* 1.903 + * Determine a "mid" point and adjust to make sure the mid point is at 1.904 + * the beginning of a code+offset pair. 1.905 + */ 1.906 + m = (l + r) >> 1; 1.907 + m -= (m & 1); 1.908 + if (code > _ucnum_nodes[m]) 1.909 + l = m + 2; 1.910 + else if (code < _ucnum_nodes[m]) 1.911 + r = m - 2; 1.912 + else { 1.913 + vp = _ucnum_vals + _ucnum_nodes[m + 1]; 1.914 + if (*vp == *(vp + 1)) { 1.915 + *digit = *vp; 1.916 + return 1; 1.917 + } 1.918 + return 0; 1.919 + } 1.920 + } 1.921 + return 0; 1.922 +} 1.923 + 1.924 +struct ucnumber 1.925 +#ifdef __STDC__ 1.926 +ucgetnumber(unsigned long code) 1.927 +#else 1.928 +ucgetnumber(code) 1.929 +unsigned long code; 1.930 +#endif 1.931 +{ 1.932 + struct ucnumber num; 1.933 + 1.934 + /* 1.935 + * Initialize with some arbitrary value, because the caller simply cannot 1.936 + * tell for sure if the code is a number without calling the ucisnumber() 1.937 + * macro before calling this function. 1.938 + */ 1.939 + num.numerator = num.denominator = -111; 1.940 + 1.941 + (void) ucnumber_lookup(code, &num); 1.942 + 1.943 + return num; 1.944 +} 1.945 + 1.946 +int 1.947 +#ifdef __STDC__ 1.948 +ucgetdigit(unsigned long code) 1.949 +#else 1.950 +ucgetdigit(code) 1.951 +unsigned long code; 1.952 +#endif 1.953 +{ 1.954 + int dig; 1.955 + 1.956 + /* 1.957 + * Initialize with some arbitrary value, because the caller simply cannot 1.958 + * tell for sure if the code is a number without calling the ucisdigit() 1.959 + * macro before calling this function. 1.960 + */ 1.961 + dig = -111; 1.962 + 1.963 + (void) ucdigit_lookup(code, &dig); 1.964 + 1.965 + return dig; 1.966 +} 1.967 + 1.968 +/************************************************************************** 1.969 + * 1.970 + * Setup and cleanup routines. 1.971 + * 1.972 + **************************************************************************/ 1.973 + 1.974 +void 1.975 +#ifdef __STDC__ 1.976 +ucdata_load(char *paths, int masks) 1.977 +#else 1.978 +ucdata_load(paths, masks) 1.979 +char *paths; 1.980 +int masks; 1.981 +#endif 1.982 +{ 1.983 + if (masks & UCDATA_CTYPE) 1.984 + _ucprop_load(paths, 0); 1.985 + if (masks & UCDATA_CASE) 1.986 + _uccase_load(paths, 0); 1.987 + if (masks & UCDATA_DECOMP) 1.988 + _ucdcmp_load(paths, 0); 1.989 + if (masks & UCDATA_CMBCL) 1.990 + _uccmcl_load(paths, 0); 1.991 + if (masks & UCDATA_NUM) 1.992 + _ucnumb_load(paths, 0); 1.993 +} 1.994 + 1.995 +void 1.996 +#ifdef __STDC__ 1.997 +ucdata_unload(int masks) 1.998 +#else 1.999 +ucdata_unload(masks) 1.1000 +int masks; 1.1001 +#endif 1.1002 +{ 1.1003 + if (masks & UCDATA_CTYPE) 1.1004 + _ucprop_unload(); 1.1005 + if (masks & UCDATA_CASE) 1.1006 + _uccase_unload(); 1.1007 + if (masks & UCDATA_DECOMP) 1.1008 + _ucdcmp_unload(); 1.1009 + if (masks & UCDATA_CMBCL) 1.1010 + _uccmcl_unload(); 1.1011 + if (masks & UCDATA_NUM) 1.1012 + _ucnumb_unload(); 1.1013 +} 1.1014 + 1.1015 +void 1.1016 +#ifdef __STDC__ 1.1017 +ucdata_reload(char *paths, int masks) 1.1018 +#else 1.1019 +ucdata_reload(paths, masks) 1.1020 +char *paths; 1.1021 +int masks; 1.1022 +#endif 1.1023 +{ 1.1024 + if (masks & UCDATA_CTYPE) 1.1025 + _ucprop_load(paths, 1); 1.1026 + if (masks & UCDATA_CASE) 1.1027 + _uccase_load(paths, 1); 1.1028 + if (masks & UCDATA_DECOMP) 1.1029 + _ucdcmp_load(paths, 1); 1.1030 + if (masks & UCDATA_CMBCL) 1.1031 + _uccmcl_load(paths, 1); 1.1032 + if (masks & UCDATA_NUM) 1.1033 + _ucnumb_load(paths, 1); 1.1034 +} 1.1035 + 1.1036 +#ifdef TEST 1.1037 + 1.1038 +void 1.1039 +#ifdef __STDC__ 1.1040 +main(void) 1.1041 +#else 1.1042 +main() 1.1043 +#endif 1.1044 +{ 1.1045 + int dig; 1.1046 + unsigned long i, lo, *dec; 1.1047 + struct ucnumber num; 1.1048 + 1.1049 + ucdata_setup("."); 1.1050 + 1.1051 + if (ucisweak(0x30)) 1.1052 + printf("WEAK\n"); 1.1053 + else 1.1054 + printf("NOT WEAK\n"); 1.1055 + 1.1056 + printf("LOWER 0x%04lX\n", uctolower(0xff3a)); 1.1057 + printf("UPPER 0x%04lX\n", uctoupper(0xff5a)); 1.1058 + 1.1059 + if (ucisalpha(0x1d5)) 1.1060 + printf("ALPHA\n"); 1.1061 + else 1.1062 + printf("NOT ALPHA\n"); 1.1063 + 1.1064 + if (ucisupper(0x1d5)) { 1.1065 + printf("UPPER\n"); 1.1066 + lo = uctolower(0x1d5); 1.1067 + printf("0x%04lx\n", lo); 1.1068 + lo = uctotitle(0x1d5); 1.1069 + printf("0x%04lx\n", lo); 1.1070 + } else 1.1071 + printf("NOT UPPER\n"); 1.1072 + 1.1073 + if (ucistitle(0x1d5)) 1.1074 + printf("TITLE\n"); 1.1075 + else 1.1076 + printf("NOT TITLE\n"); 1.1077 + 1.1078 + if (uciscomposite(0x1d5)) 1.1079 + printf("COMPOSITE\n"); 1.1080 + else 1.1081 + printf("NOT COMPOSITE\n"); 1.1082 + 1.1083 + if (ucdecomp(0x1d5, &lo, &dec)) { 1.1084 + for (i = 0; i < lo; i++) 1.1085 + printf("0x%04lx ", dec[i]); 1.1086 + putchar('\n'); 1.1087 + } 1.1088 + 1.1089 + if ((lo = uccombining_class(0x41)) != 0) 1.1090 + printf("0x41 CCL %ld\n", lo); 1.1091 + 1.1092 + if (ucisxdigit(0xfeff)) 1.1093 + printf("0xFEFF HEX DIGIT\n"); 1.1094 + else 1.1095 + printf("0xFEFF NOT HEX DIGIT\n"); 1.1096 + 1.1097 + if (ucisdefined(0x10000)) 1.1098 + printf("0x10000 DEFINED\n"); 1.1099 + else 1.1100 + printf("0x10000 NOT DEFINED\n"); 1.1101 + 1.1102 + if (ucnumber_lookup(0x30, &num)) { 1.1103 + if (num.numerator != num.denominator) 1.1104 + printf("UCNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator); 1.1105 + else 1.1106 + printf("UCNUMBER: 0x30 = %d\n", num.numerator); 1.1107 + } else 1.1108 + printf("UCNUMBER: 0x30 NOT A NUMBER\n"); 1.1109 + 1.1110 + if (ucnumber_lookup(0xbc, &num)) { 1.1111 + if (num.numerator != num.denominator) 1.1112 + printf("UCNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator); 1.1113 + else 1.1114 + printf("UCNUMBER: 0xbc = %d\n", num.numerator); 1.1115 + } else 1.1116 + printf("UCNUMBER: 0xbc NOT A NUMBER\n"); 1.1117 + 1.1118 + 1.1119 + if (ucnumber_lookup(0xff19, &num)) { 1.1120 + if (num.numerator != num.denominator) 1.1121 + printf("UCNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator); 1.1122 + else 1.1123 + printf("UCNUMBER: 0xff19 = %d\n", num.numerator); 1.1124 + } else 1.1125 + printf("UCNUMBER: 0xff19 NOT A NUMBER\n"); 1.1126 + 1.1127 + if (ucnumber_lookup(0x4e00, &num)) { 1.1128 + if (num.numerator != num.denominator) 1.1129 + printf("UCNUMBER: 0x4e00 = %d/%d\n", num.numerator, num.denominator); 1.1130 + else 1.1131 + printf("UCNUMBER: 0x4e00 = %d\n", num.numerator); 1.1132 + } else 1.1133 + printf("UCNUMBER: 0x4e00 NOT A NUMBER\n"); 1.1134 + 1.1135 + if (ucdigit_lookup(0x06f9, &dig)) 1.1136 + printf("UCDIGIT: 0x6f9 = %d\n", dig); 1.1137 + else 1.1138 + printf("UCDIGIT: 0x6f9 NOT A NUMBER\n"); 1.1139 + 1.1140 + dig = ucgetdigit(0x0969); 1.1141 + printf("UCGETDIGIT: 0x969 = %d\n", dig); 1.1142 + 1.1143 + num = ucgetnumber(0x30); 1.1144 + if (num.numerator != num.denominator) 1.1145 + printf("UCGETNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator); 1.1146 + else 1.1147 + printf("UCGETNUMBER: 0x30 = %d\n", num.numerator); 1.1148 + 1.1149 + num = ucgetnumber(0xbc); 1.1150 + if (num.numerator != num.denominator) 1.1151 + printf("UCGETNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator); 1.1152 + else 1.1153 + printf("UCGETNUMBER: 0xbc = %d\n", num.numerator); 1.1154 + 1.1155 + num = ucgetnumber(0xff19); 1.1156 + if (num.numerator != num.denominator) 1.1157 + printf("UCGETNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator); 1.1158 + else 1.1159 + printf("UCGETNUMBER: 0xff19 = %d\n", num.numerator); 1.1160 + 1.1161 + ucdata_cleanup(); 1.1162 + exit(0); 1.1163 +} 1.1164 + 1.1165 +#endif /* TEST */