Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | * Copyright 1996, 1997, 1998 Computing Research Labs, |
michael@0 | 3 | * New Mexico State University |
michael@0 | 4 | * |
michael@0 | 5 | * Permission is hereby granted, free of charge, to any person obtaining a |
michael@0 | 6 | * copy of this software and associated documentation files (the "Software"), |
michael@0 | 7 | * to deal in the Software without restriction, including without limitation |
michael@0 | 8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
michael@0 | 9 | * and/or sell copies of the Software, and to permit persons to whom the |
michael@0 | 10 | * Software is furnished to do so, subject to the following conditions: |
michael@0 | 11 | * |
michael@0 | 12 | * The above copyright notice and this permission notice shall be included in |
michael@0 | 13 | * all copies or substantial portions of the Software. |
michael@0 | 14 | * |
michael@0 | 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
michael@0 | 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
michael@0 | 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
michael@0 | 18 | * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY |
michael@0 | 19 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT |
michael@0 | 20 | * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR |
michael@0 | 21 | * THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
michael@0 | 22 | */ |
michael@0 | 23 | #ifndef lint |
michael@0 | 24 | #ifdef __GNUC__ |
michael@0 | 25 | static char rcsid[] __attribute__ ((unused)) = "$Id: ucdata.c,v 1.1 1999/01/08 00:19:11 ftang%netscape.com Exp $"; |
michael@0 | 26 | #else |
michael@0 | 27 | static char rcsid[] = "$Id: ucdata.c,v 1.1 1999/01/08 00:19:11 ftang%netscape.com Exp $"; |
michael@0 | 28 | #endif |
michael@0 | 29 | #endif |
michael@0 | 30 | |
michael@0 | 31 | #include <stdio.h> |
michael@0 | 32 | #include <stdlib.h> |
michael@0 | 33 | #include <string.h> |
michael@0 | 34 | #ifndef WIN32 |
michael@0 | 35 | #include <unistd.h> |
michael@0 | 36 | #endif |
michael@0 | 37 | |
michael@0 | 38 | #include "ucdata.h" |
michael@0 | 39 | |
michael@0 | 40 | /************************************************************************** |
michael@0 | 41 | * |
michael@0 | 42 | * Miscellaneous types, data, and support functions. |
michael@0 | 43 | * |
michael@0 | 44 | **************************************************************************/ |
michael@0 | 45 | |
michael@0 | 46 | typedef struct { |
michael@0 | 47 | unsigned short bom; |
michael@0 | 48 | unsigned short cnt; |
michael@0 | 49 | union { |
michael@0 | 50 | unsigned long bytes; |
michael@0 | 51 | unsigned short len[2]; |
michael@0 | 52 | } size; |
michael@0 | 53 | } _ucheader_t; |
michael@0 | 54 | |
michael@0 | 55 | /* |
michael@0 | 56 | * A simple array of 32-bit masks for lookup. |
michael@0 | 57 | */ |
michael@0 | 58 | static unsigned long masks32[32] = { |
michael@0 | 59 | 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020, |
michael@0 | 60 | 0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800, |
michael@0 | 61 | 0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000, |
michael@0 | 62 | 0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000, |
michael@0 | 63 | 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, |
michael@0 | 64 | 0x40000000, 0x80000000 |
michael@0 | 65 | }; |
michael@0 | 66 | |
michael@0 | 67 | #define endian_short(cc) (((cc) >> 8) | (((cc) & 0xff) << 8)) |
michael@0 | 68 | #define endian_long(cc) ((((cc) & 0xff) << 24)|((((cc) >> 8) & 0xff) << 16)|\ |
michael@0 | 69 | ((((cc) >> 16) & 0xff) << 8)|((cc) >> 24)) |
michael@0 | 70 | |
michael@0 | 71 | static FILE * |
michael@0 | 72 | #ifdef __STDC__ |
michael@0 | 73 | _ucopenfile(char *paths, char *filename, char *mode) |
michael@0 | 74 | #else |
michael@0 | 75 | _ucopenfile(paths, filename, mode) |
michael@0 | 76 | char *paths, *filename, *mode; |
michael@0 | 77 | #endif |
michael@0 | 78 | { |
michael@0 | 79 | FILE *f; |
michael@0 | 80 | char *fp, *dp, *pp, path[BUFSIZ]; |
michael@0 | 81 | |
michael@0 | 82 | if (filename == 0 || *filename == 0) |
michael@0 | 83 | return 0; |
michael@0 | 84 | |
michael@0 | 85 | dp = paths; |
michael@0 | 86 | while (dp && *dp) { |
michael@0 | 87 | pp = path; |
michael@0 | 88 | while (*dp && *dp != ':') |
michael@0 | 89 | *pp++ = *dp++; |
michael@0 | 90 | *pp++ = '/'; |
michael@0 | 91 | |
michael@0 | 92 | fp = filename; |
michael@0 | 93 | while (*fp) |
michael@0 | 94 | *pp++ = *fp++; |
michael@0 | 95 | *pp = 0; |
michael@0 | 96 | |
michael@0 | 97 | if ((f = fopen(path, mode)) != 0) |
michael@0 | 98 | return f; |
michael@0 | 99 | |
michael@0 | 100 | if (*dp == ':') |
michael@0 | 101 | dp++; |
michael@0 | 102 | } |
michael@0 | 103 | |
michael@0 | 104 | return 0; |
michael@0 | 105 | } |
michael@0 | 106 | |
michael@0 | 107 | /************************************************************************** |
michael@0 | 108 | * |
michael@0 | 109 | * Support for the character properties. |
michael@0 | 110 | * |
michael@0 | 111 | **************************************************************************/ |
michael@0 | 112 | |
michael@0 | 113 | static unsigned long _ucprop_size; |
michael@0 | 114 | static unsigned short *_ucprop_offsets; |
michael@0 | 115 | static unsigned long *_ucprop_ranges; |
michael@0 | 116 | |
michael@0 | 117 | static void |
michael@0 | 118 | #ifdef __STDC__ |
michael@0 | 119 | _ucprop_load(char *paths, int reload) |
michael@0 | 120 | #else |
michael@0 | 121 | _ucprop_load(paths, reload) |
michael@0 | 122 | char *paths; |
michael@0 | 123 | int reload; |
michael@0 | 124 | #endif |
michael@0 | 125 | { |
michael@0 | 126 | FILE *in; |
michael@0 | 127 | unsigned long size, i; |
michael@0 | 128 | _ucheader_t hdr; |
michael@0 | 129 | |
michael@0 | 130 | if (_ucprop_size > 0) { |
michael@0 | 131 | if (!reload) |
michael@0 | 132 | /* |
michael@0 | 133 | * The character properties have already been loaded. |
michael@0 | 134 | */ |
michael@0 | 135 | return; |
michael@0 | 136 | |
michael@0 | 137 | /* |
michael@0 | 138 | * Unload the current character property data in preparation for |
michael@0 | 139 | * loading a new copy. Only the first array has to be deallocated |
michael@0 | 140 | * because all the memory for the arrays is allocated as a single |
michael@0 | 141 | * block. |
michael@0 | 142 | */ |
michael@0 | 143 | free((char *) _ucprop_offsets); |
michael@0 | 144 | _ucprop_size = 0; |
michael@0 | 145 | } |
michael@0 | 146 | |
michael@0 | 147 | if ((in = _ucopenfile(paths, "ctype.dat", "rb")) == 0) |
michael@0 | 148 | return; |
michael@0 | 149 | |
michael@0 | 150 | /* |
michael@0 | 151 | * Load the header. |
michael@0 | 152 | */ |
michael@0 | 153 | fread((char *) &hdr, sizeof(_ucheader_t), 1, in); |
michael@0 | 154 | |
michael@0 | 155 | if (hdr.bom == 0xfffe) { |
michael@0 | 156 | hdr.cnt = endian_short(hdr.cnt); |
michael@0 | 157 | hdr.size.bytes = endian_long(hdr.size.bytes); |
michael@0 | 158 | } |
michael@0 | 159 | |
michael@0 | 160 | if ((_ucprop_size = hdr.cnt) == 0) { |
michael@0 | 161 | fclose(in); |
michael@0 | 162 | return; |
michael@0 | 163 | } |
michael@0 | 164 | |
michael@0 | 165 | /* |
michael@0 | 166 | * Allocate all the storage needed for the lookup table. |
michael@0 | 167 | */ |
michael@0 | 168 | _ucprop_offsets = (unsigned short *) malloc(hdr.size.bytes); |
michael@0 | 169 | |
michael@0 | 170 | /* |
michael@0 | 171 | * Calculate the offset into the storage for the ranges. The offsets |
michael@0 | 172 | * array is on a 4-byte boundary and one larger than the value provided in |
michael@0 | 173 | * the header count field. This means the offset to the ranges must be |
michael@0 | 174 | * calculated after aligning the count to a 4-byte boundary. |
michael@0 | 175 | */ |
michael@0 | 176 | if ((size = ((hdr.cnt + 1) * sizeof(unsigned short))) & 3) |
michael@0 | 177 | size += 4 - (size & 3); |
michael@0 | 178 | size >>= 1; |
michael@0 | 179 | _ucprop_ranges = (unsigned long *) (_ucprop_offsets + size); |
michael@0 | 180 | |
michael@0 | 181 | /* |
michael@0 | 182 | * Load the offset array. |
michael@0 | 183 | */ |
michael@0 | 184 | fread((char *) _ucprop_offsets, sizeof(unsigned short), size, in); |
michael@0 | 185 | |
michael@0 | 186 | /* |
michael@0 | 187 | * Do an endian swap if necessary. Don't forget there is an extra node on |
michael@0 | 188 | * the end with the final index. |
michael@0 | 189 | */ |
michael@0 | 190 | if (hdr.bom == 0xfffe) { |
michael@0 | 191 | for (i = 0; i <= _ucprop_size; i++) |
michael@0 | 192 | _ucprop_offsets[i] = endian_short(_ucprop_offsets[i]); |
michael@0 | 193 | } |
michael@0 | 194 | |
michael@0 | 195 | /* |
michael@0 | 196 | * Load the ranges. The number of elements is in the last array position |
michael@0 | 197 | * of the offsets. |
michael@0 | 198 | */ |
michael@0 | 199 | fread((char *) _ucprop_ranges, sizeof(unsigned long), |
michael@0 | 200 | _ucprop_offsets[_ucprop_size], in); |
michael@0 | 201 | |
michael@0 | 202 | fclose(in); |
michael@0 | 203 | |
michael@0 | 204 | /* |
michael@0 | 205 | * Do an endian swap if necessary. |
michael@0 | 206 | */ |
michael@0 | 207 | if (hdr.bom == 0xfffe) { |
michael@0 | 208 | for (i = 0; i < _ucprop_offsets[_ucprop_size]; i++) |
michael@0 | 209 | _ucprop_ranges[i] = endian_long(_ucprop_ranges[i]); |
michael@0 | 210 | } |
michael@0 | 211 | } |
michael@0 | 212 | |
michael@0 | 213 | static void |
michael@0 | 214 | #ifdef __STDC__ |
michael@0 | 215 | _ucprop_unload(void) |
michael@0 | 216 | #else |
michael@0 | 217 | _ucprop_unload() |
michael@0 | 218 | #endif |
michael@0 | 219 | { |
michael@0 | 220 | if (_ucprop_size == 0) |
michael@0 | 221 | return; |
michael@0 | 222 | |
michael@0 | 223 | /* |
michael@0 | 224 | * Only need to free the offsets because the memory is allocated as a |
michael@0 | 225 | * single block. |
michael@0 | 226 | */ |
michael@0 | 227 | free((char *) _ucprop_offsets); |
michael@0 | 228 | _ucprop_size = 0; |
michael@0 | 229 | } |
michael@0 | 230 | |
michael@0 | 231 | static int |
michael@0 | 232 | #ifdef __STDC__ |
michael@0 | 233 | _ucprop_lookup(unsigned long code, unsigned long n) |
michael@0 | 234 | #else |
michael@0 | 235 | _ucprop_lookup(code, n) |
michael@0 | 236 | unsigned long code, n; |
michael@0 | 237 | #endif |
michael@0 | 238 | { |
michael@0 | 239 | long l, r, m; |
michael@0 | 240 | |
michael@0 | 241 | /* |
michael@0 | 242 | * There is an extra node on the end of the offsets to allow this routine |
michael@0 | 243 | * to work right. If the index is 0xffff, then there are no nodes for the |
michael@0 | 244 | * property. |
michael@0 | 245 | */ |
michael@0 | 246 | if ((l = _ucprop_offsets[n]) == 0xffff) |
michael@0 | 247 | return 0; |
michael@0 | 248 | |
michael@0 | 249 | /* |
michael@0 | 250 | * Locate the next offset that is not 0xffff. The sentinel at the end of |
michael@0 | 251 | * the array is the max index value. |
michael@0 | 252 | */ |
michael@0 | 253 | for (m = 1; |
michael@0 | 254 | n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++) ; |
michael@0 | 255 | |
michael@0 | 256 | r = _ucprop_offsets[n + m] - 1; |
michael@0 | 257 | |
michael@0 | 258 | while (l <= r) { |
michael@0 | 259 | /* |
michael@0 | 260 | * Determine a "mid" point and adjust to make sure the mid point is at |
michael@0 | 261 | * the beginning of a range pair. |
michael@0 | 262 | */ |
michael@0 | 263 | m = (l + r) >> 1; |
michael@0 | 264 | m -= (m & 1); |
michael@0 | 265 | if (code > _ucprop_ranges[m + 1]) |
michael@0 | 266 | l = m + 2; |
michael@0 | 267 | else if (code < _ucprop_ranges[m]) |
michael@0 | 268 | r = m - 2; |
michael@0 | 269 | else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1]) |
michael@0 | 270 | return 1; |
michael@0 | 271 | } |
michael@0 | 272 | return 0; |
michael@0 | 273 | } |
michael@0 | 274 | |
michael@0 | 275 | int |
michael@0 | 276 | #ifdef __STDC__ |
michael@0 | 277 | ucisprop(unsigned long code, unsigned long mask1, unsigned long mask2) |
michael@0 | 278 | #else |
michael@0 | 279 | ucisprop(code, mask1, mask2) |
michael@0 | 280 | unsigned long code, mask1, mask2; |
michael@0 | 281 | #endif |
michael@0 | 282 | { |
michael@0 | 283 | unsigned long i; |
michael@0 | 284 | |
michael@0 | 285 | if (mask1 == 0 && mask2 == 0) |
michael@0 | 286 | return 0; |
michael@0 | 287 | |
michael@0 | 288 | for (i = 0; mask1 && i < 32; i++) { |
michael@0 | 289 | if ((mask1 & masks32[i]) && _ucprop_lookup(code, i)) |
michael@0 | 290 | return 1; |
michael@0 | 291 | } |
michael@0 | 292 | |
michael@0 | 293 | for (i = 32; mask2 && i < _ucprop_size; i++) { |
michael@0 | 294 | if ((mask2 & masks32[i & 31]) && _ucprop_lookup(code, i)) |
michael@0 | 295 | return 1; |
michael@0 | 296 | } |
michael@0 | 297 | |
michael@0 | 298 | return 0; |
michael@0 | 299 | } |
michael@0 | 300 | |
michael@0 | 301 | /************************************************************************** |
michael@0 | 302 | * |
michael@0 | 303 | * Support for case mapping. |
michael@0 | 304 | * |
michael@0 | 305 | **************************************************************************/ |
michael@0 | 306 | |
michael@0 | 307 | static unsigned long _uccase_size; |
michael@0 | 308 | static unsigned short _uccase_len[2]; |
michael@0 | 309 | static unsigned long *_uccase_map; |
michael@0 | 310 | |
michael@0 | 311 | static void |
michael@0 | 312 | #ifdef __STDC__ |
michael@0 | 313 | _uccase_load(char *paths, int reload) |
michael@0 | 314 | #else |
michael@0 | 315 | _uccase_load(paths, reload) |
michael@0 | 316 | char *paths; |
michael@0 | 317 | int reload; |
michael@0 | 318 | #endif |
michael@0 | 319 | { |
michael@0 | 320 | FILE *in; |
michael@0 | 321 | unsigned long i; |
michael@0 | 322 | _ucheader_t hdr; |
michael@0 | 323 | |
michael@0 | 324 | if (_uccase_size > 0) { |
michael@0 | 325 | if (!reload) |
michael@0 | 326 | /* |
michael@0 | 327 | * The case mappings have already been loaded. |
michael@0 | 328 | */ |
michael@0 | 329 | return; |
michael@0 | 330 | |
michael@0 | 331 | free((char *) _uccase_map); |
michael@0 | 332 | _uccase_size = 0; |
michael@0 | 333 | } |
michael@0 | 334 | |
michael@0 | 335 | if ((in = _ucopenfile(paths, "case.dat", "rb")) == 0) |
michael@0 | 336 | return; |
michael@0 | 337 | |
michael@0 | 338 | /* |
michael@0 | 339 | * Load the header. |
michael@0 | 340 | */ |
michael@0 | 341 | fread((char *) &hdr, sizeof(_ucheader_t), 1, in); |
michael@0 | 342 | |
michael@0 | 343 | if (hdr.bom == 0xfffe) { |
michael@0 | 344 | hdr.cnt = endian_short(hdr.cnt); |
michael@0 | 345 | hdr.size.len[0] = endian_short(hdr.size.len[0]); |
michael@0 | 346 | hdr.size.len[1] = endian_short(hdr.size.len[1]); |
michael@0 | 347 | } |
michael@0 | 348 | |
michael@0 | 349 | /* |
michael@0 | 350 | * Set the node count and lengths of the upper and lower case mapping |
michael@0 | 351 | * tables. |
michael@0 | 352 | */ |
michael@0 | 353 | _uccase_size = hdr.cnt * 3; |
michael@0 | 354 | _uccase_len[0] = hdr.size.len[0] * 3; |
michael@0 | 355 | _uccase_len[1] = hdr.size.len[1] * 3; |
michael@0 | 356 | |
michael@0 | 357 | _uccase_map = (unsigned long *) |
michael@0 | 358 | malloc(_uccase_size * sizeof(unsigned long)); |
michael@0 | 359 | |
michael@0 | 360 | /* |
michael@0 | 361 | * Load the case mapping table. |
michael@0 | 362 | */ |
michael@0 | 363 | fread((char *) _uccase_map, sizeof(unsigned long), _uccase_size, in); |
michael@0 | 364 | |
michael@0 | 365 | /* |
michael@0 | 366 | * Do an endian swap if necessary. |
michael@0 | 367 | */ |
michael@0 | 368 | if (hdr.bom == 0xfffe) { |
michael@0 | 369 | for (i = 0; i < _uccase_size; i++) |
michael@0 | 370 | _uccase_map[i] = endian_long(_uccase_map[i]); |
michael@0 | 371 | } |
michael@0 | 372 | } |
michael@0 | 373 | |
michael@0 | 374 | static void |
michael@0 | 375 | #ifdef __STDC__ |
michael@0 | 376 | _uccase_unload(void) |
michael@0 | 377 | #else |
michael@0 | 378 | _uccase_unload() |
michael@0 | 379 | #endif |
michael@0 | 380 | { |
michael@0 | 381 | if (_uccase_size == 0) |
michael@0 | 382 | return; |
michael@0 | 383 | |
michael@0 | 384 | free((char *) _uccase_map); |
michael@0 | 385 | _uccase_size = 0; |
michael@0 | 386 | } |
michael@0 | 387 | |
michael@0 | 388 | static unsigned long |
michael@0 | 389 | #ifdef __STDC__ |
michael@0 | 390 | _uccase_lookup(unsigned long code, long l, long r, int field) |
michael@0 | 391 | #else |
michael@0 | 392 | _uccase_lookup(code, l, r, field) |
michael@0 | 393 | unsigned long code; |
michael@0 | 394 | long l, r; |
michael@0 | 395 | int field; |
michael@0 | 396 | #endif |
michael@0 | 397 | { |
michael@0 | 398 | long m; |
michael@0 | 399 | |
michael@0 | 400 | /* |
michael@0 | 401 | * Do the binary search. |
michael@0 | 402 | */ |
michael@0 | 403 | while (l <= r) { |
michael@0 | 404 | /* |
michael@0 | 405 | * Determine a "mid" point and adjust to make sure the mid point is at |
michael@0 | 406 | * the beginning of a case mapping triple. |
michael@0 | 407 | */ |
michael@0 | 408 | m = (l + r) >> 1; |
michael@0 | 409 | m -= (m % 3); |
michael@0 | 410 | if (code > _uccase_map[m]) |
michael@0 | 411 | l = m + 3; |
michael@0 | 412 | else if (code < _uccase_map[m]) |
michael@0 | 413 | r = m - 3; |
michael@0 | 414 | else if (code == _uccase_map[m]) |
michael@0 | 415 | return _uccase_map[m + field]; |
michael@0 | 416 | } |
michael@0 | 417 | |
michael@0 | 418 | return code; |
michael@0 | 419 | } |
michael@0 | 420 | |
michael@0 | 421 | unsigned long |
michael@0 | 422 | #ifdef __STDC__ |
michael@0 | 423 | uctoupper(unsigned long code) |
michael@0 | 424 | #else |
michael@0 | 425 | uctoupper(code) |
michael@0 | 426 | unsigned long code; |
michael@0 | 427 | #endif |
michael@0 | 428 | { |
michael@0 | 429 | int field; |
michael@0 | 430 | long l, r; |
michael@0 | 431 | |
michael@0 | 432 | if (ucisupper(code)) |
michael@0 | 433 | return code; |
michael@0 | 434 | |
michael@0 | 435 | if (ucislower(code)) { |
michael@0 | 436 | /* |
michael@0 | 437 | * The character is lower case. |
michael@0 | 438 | */ |
michael@0 | 439 | field = 1; |
michael@0 | 440 | l = _uccase_len[0]; |
michael@0 | 441 | r = (l + _uccase_len[1]) - 1; |
michael@0 | 442 | } else { |
michael@0 | 443 | /* |
michael@0 | 444 | * The character is title case. |
michael@0 | 445 | */ |
michael@0 | 446 | field = 2; |
michael@0 | 447 | l = _uccase_len[0] + _uccase_len[1]; |
michael@0 | 448 | r = _uccase_size - 1; |
michael@0 | 449 | } |
michael@0 | 450 | return _uccase_lookup(code, l, r, field); |
michael@0 | 451 | } |
michael@0 | 452 | |
michael@0 | 453 | unsigned long |
michael@0 | 454 | #ifdef __STDC__ |
michael@0 | 455 | uctolower(unsigned long code) |
michael@0 | 456 | #else |
michael@0 | 457 | uctolower(code) |
michael@0 | 458 | unsigned long code; |
michael@0 | 459 | #endif |
michael@0 | 460 | { |
michael@0 | 461 | int field; |
michael@0 | 462 | long l, r; |
michael@0 | 463 | |
michael@0 | 464 | if (ucislower(code)) |
michael@0 | 465 | return code; |
michael@0 | 466 | |
michael@0 | 467 | if (ucisupper(code)) { |
michael@0 | 468 | /* |
michael@0 | 469 | * The character is upper case. |
michael@0 | 470 | */ |
michael@0 | 471 | field = 1; |
michael@0 | 472 | l = 0; |
michael@0 | 473 | r = _uccase_len[0] - 1; |
michael@0 | 474 | } else { |
michael@0 | 475 | /* |
michael@0 | 476 | * The character is title case. |
michael@0 | 477 | */ |
michael@0 | 478 | field = 2; |
michael@0 | 479 | l = _uccase_len[0] + _uccase_len[1]; |
michael@0 | 480 | r = _uccase_size - 1; |
michael@0 | 481 | } |
michael@0 | 482 | return _uccase_lookup(code, l, r, field); |
michael@0 | 483 | } |
michael@0 | 484 | |
michael@0 | 485 | unsigned long |
michael@0 | 486 | #ifdef __STDC__ |
michael@0 | 487 | uctotitle(unsigned long code) |
michael@0 | 488 | #else |
michael@0 | 489 | uctotitle(code) |
michael@0 | 490 | unsigned long code; |
michael@0 | 491 | #endif |
michael@0 | 492 | { |
michael@0 | 493 | int field; |
michael@0 | 494 | long l, r; |
michael@0 | 495 | |
michael@0 | 496 | if (ucistitle(code)) |
michael@0 | 497 | return code; |
michael@0 | 498 | |
michael@0 | 499 | /* |
michael@0 | 500 | * The offset will always be the same for converting to title case. |
michael@0 | 501 | */ |
michael@0 | 502 | field = 2; |
michael@0 | 503 | |
michael@0 | 504 | if (ucisupper(code)) { |
michael@0 | 505 | /* |
michael@0 | 506 | * The character is upper case. |
michael@0 | 507 | */ |
michael@0 | 508 | l = 0; |
michael@0 | 509 | r = _uccase_len[0] - 1; |
michael@0 | 510 | } else { |
michael@0 | 511 | /* |
michael@0 | 512 | * The character is lower case. |
michael@0 | 513 | */ |
michael@0 | 514 | l = _uccase_len[0]; |
michael@0 | 515 | r = (l + _uccase_len[1]) - 1; |
michael@0 | 516 | } |
michael@0 | 517 | return _uccase_lookup(code, l, r, field); |
michael@0 | 518 | } |
michael@0 | 519 | |
michael@0 | 520 | /************************************************************************** |
michael@0 | 521 | * |
michael@0 | 522 | * Support for decompositions. |
michael@0 | 523 | * |
michael@0 | 524 | **************************************************************************/ |
michael@0 | 525 | |
michael@0 | 526 | static unsigned long _ucdcmp_size; |
michael@0 | 527 | static unsigned long *_ucdcmp_nodes; |
michael@0 | 528 | static unsigned long *_ucdcmp_decomp; |
michael@0 | 529 | |
michael@0 | 530 | static void |
michael@0 | 531 | #ifdef __STDC__ |
michael@0 | 532 | _ucdcmp_load(char *paths, int reload) |
michael@0 | 533 | #else |
michael@0 | 534 | _ucdcmp_load(paths, reload) |
michael@0 | 535 | char *paths; |
michael@0 | 536 | int reload; |
michael@0 | 537 | #endif |
michael@0 | 538 | { |
michael@0 | 539 | FILE *in; |
michael@0 | 540 | unsigned long size, i; |
michael@0 | 541 | _ucheader_t hdr; |
michael@0 | 542 | |
michael@0 | 543 | if (_ucdcmp_size > 0) { |
michael@0 | 544 | if (!reload) |
michael@0 | 545 | /* |
michael@0 | 546 | * The decompositions have already been loaded. |
michael@0 | 547 | */ |
michael@0 | 548 | return; |
michael@0 | 549 | |
michael@0 | 550 | free((char *) _ucdcmp_nodes); |
michael@0 | 551 | _ucdcmp_size = 0; |
michael@0 | 552 | } |
michael@0 | 553 | |
michael@0 | 554 | if ((in = _ucopenfile(paths, "decomp.dat", "rb")) == 0) |
michael@0 | 555 | return; |
michael@0 | 556 | |
michael@0 | 557 | /* |
michael@0 | 558 | * Load the header. |
michael@0 | 559 | */ |
michael@0 | 560 | fread((char *) &hdr, sizeof(_ucheader_t), 1, in); |
michael@0 | 561 | |
michael@0 | 562 | if (hdr.bom == 0xfffe) { |
michael@0 | 563 | hdr.cnt = endian_short(hdr.cnt); |
michael@0 | 564 | hdr.size.bytes = endian_long(hdr.size.bytes); |
michael@0 | 565 | } |
michael@0 | 566 | |
michael@0 | 567 | _ucdcmp_size = hdr.cnt << 1; |
michael@0 | 568 | _ucdcmp_nodes = (unsigned long *) malloc(hdr.size.bytes); |
michael@0 | 569 | _ucdcmp_decomp = _ucdcmp_nodes + (_ucdcmp_size + 1); |
michael@0 | 570 | |
michael@0 | 571 | /* |
michael@0 | 572 | * Read the decomposition data in. |
michael@0 | 573 | */ |
michael@0 | 574 | size = hdr.size.bytes / sizeof(unsigned long); |
michael@0 | 575 | fread((char *) _ucdcmp_nodes, sizeof(unsigned long), size, in); |
michael@0 | 576 | |
michael@0 | 577 | /* |
michael@0 | 578 | * Do an endian swap if necessary. |
michael@0 | 579 | */ |
michael@0 | 580 | if (hdr.bom == 0xfffe) { |
michael@0 | 581 | for (i = 0; i < size; i++) |
michael@0 | 582 | _ucdcmp_nodes[i] = endian_long(_ucdcmp_nodes[i]); |
michael@0 | 583 | } |
michael@0 | 584 | } |
michael@0 | 585 | |
michael@0 | 586 | static void |
michael@0 | 587 | #ifdef __STDC__ |
michael@0 | 588 | _ucdcmp_unload(void) |
michael@0 | 589 | #else |
michael@0 | 590 | _ucdcmp_unload() |
michael@0 | 591 | #endif |
michael@0 | 592 | { |
michael@0 | 593 | if (_ucdcmp_size == 0) |
michael@0 | 594 | return; |
michael@0 | 595 | |
michael@0 | 596 | /* |
michael@0 | 597 | * Only need to free the offsets because the memory is allocated as a |
michael@0 | 598 | * single block. |
michael@0 | 599 | */ |
michael@0 | 600 | free((char *) _ucdcmp_nodes); |
michael@0 | 601 | _ucdcmp_size = 0; |
michael@0 | 602 | } |
michael@0 | 603 | |
michael@0 | 604 | int |
michael@0 | 605 | #ifdef __STDC__ |
michael@0 | 606 | ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp) |
michael@0 | 607 | #else |
michael@0 | 608 | ucdecomp(code, num, decomp) |
michael@0 | 609 | unsigned long code, *num, **decomp; |
michael@0 | 610 | #endif |
michael@0 | 611 | { |
michael@0 | 612 | long l, r, m; |
michael@0 | 613 | |
michael@0 | 614 | l = 0; |
michael@0 | 615 | r = _ucdcmp_nodes[_ucdcmp_size] - 1; |
michael@0 | 616 | |
michael@0 | 617 | while (l <= r) { |
michael@0 | 618 | /* |
michael@0 | 619 | * Determine a "mid" point and adjust to make sure the mid point is at |
michael@0 | 620 | * the beginning of a code+offset pair. |
michael@0 | 621 | */ |
michael@0 | 622 | m = (l + r) >> 1; |
michael@0 | 623 | m -= (m & 1); |
michael@0 | 624 | if (code > _ucdcmp_nodes[m]) |
michael@0 | 625 | l = m + 2; |
michael@0 | 626 | else if (code < _ucdcmp_nodes[m]) |
michael@0 | 627 | r = m - 2; |
michael@0 | 628 | else if (code == _ucdcmp_nodes[m]) { |
michael@0 | 629 | *num = _ucdcmp_nodes[m + 3] - _ucdcmp_nodes[m + 1]; |
michael@0 | 630 | *decomp = &_ucdcmp_decomp[_ucdcmp_nodes[m + 1]]; |
michael@0 | 631 | return 1; |
michael@0 | 632 | } |
michael@0 | 633 | } |
michael@0 | 634 | return 0; |
michael@0 | 635 | } |
michael@0 | 636 | |
michael@0 | 637 | int |
michael@0 | 638 | #ifdef __STDC__ |
michael@0 | 639 | ucdecomp_hangul(unsigned long code, unsigned long *num, unsigned long decomp[]) |
michael@0 | 640 | #else |
michael@0 | 641 | ucdecomp_hangul(code, num, decomp) |
michael@0 | 642 | unsigned long code, *num, decomp[]; |
michael@0 | 643 | #endif |
michael@0 | 644 | { |
michael@0 | 645 | if (!ucishangul(code)) |
michael@0 | 646 | return 0; |
michael@0 | 647 | |
michael@0 | 648 | code -= 0xac00; |
michael@0 | 649 | decomp[0] = 0x1100 + (unsigned long) (code / 588); |
michael@0 | 650 | decomp[1] = 0x1161 + (unsigned long) ((code % 588) / 28); |
michael@0 | 651 | decomp[2] = 0x11a7 + (unsigned long) (code % 28); |
michael@0 | 652 | *num = (decomp[2] != 0x11a7) ? 3 : 2; |
michael@0 | 653 | |
michael@0 | 654 | return 1; |
michael@0 | 655 | } |
michael@0 | 656 | |
michael@0 | 657 | /************************************************************************** |
michael@0 | 658 | * |
michael@0 | 659 | * Support for combining classes. |
michael@0 | 660 | * |
michael@0 | 661 | **************************************************************************/ |
michael@0 | 662 | |
michael@0 | 663 | static unsigned long _uccmcl_size; |
michael@0 | 664 | static unsigned long *_uccmcl_nodes; |
michael@0 | 665 | |
michael@0 | 666 | static void |
michael@0 | 667 | #ifdef __STDC__ |
michael@0 | 668 | _uccmcl_load(char *paths, int reload) |
michael@0 | 669 | #else |
michael@0 | 670 | _uccmcl_load(paths, reload) |
michael@0 | 671 | char *paths; |
michael@0 | 672 | int reload; |
michael@0 | 673 | #endif |
michael@0 | 674 | { |
michael@0 | 675 | FILE *in; |
michael@0 | 676 | unsigned long i; |
michael@0 | 677 | _ucheader_t hdr; |
michael@0 | 678 | |
michael@0 | 679 | if (_uccmcl_size > 0) { |
michael@0 | 680 | if (!reload) |
michael@0 | 681 | /* |
michael@0 | 682 | * The combining classes have already been loaded. |
michael@0 | 683 | */ |
michael@0 | 684 | return; |
michael@0 | 685 | |
michael@0 | 686 | free((char *) _uccmcl_nodes); |
michael@0 | 687 | _uccmcl_size = 0; |
michael@0 | 688 | } |
michael@0 | 689 | |
michael@0 | 690 | if ((in = _ucopenfile(paths, "cmbcl.dat", "rb")) == 0) |
michael@0 | 691 | return; |
michael@0 | 692 | |
michael@0 | 693 | /* |
michael@0 | 694 | * Load the header. |
michael@0 | 695 | */ |
michael@0 | 696 | fread((char *) &hdr, sizeof(_ucheader_t), 1, in); |
michael@0 | 697 | |
michael@0 | 698 | if (hdr.bom == 0xfffe) { |
michael@0 | 699 | hdr.cnt = endian_short(hdr.cnt); |
michael@0 | 700 | hdr.size.bytes = endian_long(hdr.size.bytes); |
michael@0 | 701 | } |
michael@0 | 702 | |
michael@0 | 703 | _uccmcl_size = hdr.cnt * 3; |
michael@0 | 704 | _uccmcl_nodes = (unsigned long *) malloc(hdr.size.bytes); |
michael@0 | 705 | |
michael@0 | 706 | /* |
michael@0 | 707 | * Read the combining classes in. |
michael@0 | 708 | */ |
michael@0 | 709 | fread((char *) _uccmcl_nodes, sizeof(unsigned long), _uccmcl_size, in); |
michael@0 | 710 | |
michael@0 | 711 | /* |
michael@0 | 712 | * Do an endian swap if necessary. |
michael@0 | 713 | */ |
michael@0 | 714 | if (hdr.bom == 0xfffe) { |
michael@0 | 715 | for (i = 0; i < _uccmcl_size; i++) |
michael@0 | 716 | _uccmcl_nodes[i] = endian_long(_uccmcl_nodes[i]); |
michael@0 | 717 | } |
michael@0 | 718 | } |
michael@0 | 719 | |
michael@0 | 720 | static void |
michael@0 | 721 | #ifdef __STDC__ |
michael@0 | 722 | _uccmcl_unload(void) |
michael@0 | 723 | #else |
michael@0 | 724 | _uccmcl_unload() |
michael@0 | 725 | #endif |
michael@0 | 726 | { |
michael@0 | 727 | if (_uccmcl_size == 0) |
michael@0 | 728 | return; |
michael@0 | 729 | |
michael@0 | 730 | free((char *) _uccmcl_nodes); |
michael@0 | 731 | _uccmcl_size = 0; |
michael@0 | 732 | } |
michael@0 | 733 | |
michael@0 | 734 | unsigned long |
michael@0 | 735 | #ifdef __STDC__ |
michael@0 | 736 | uccombining_class(unsigned long code) |
michael@0 | 737 | #else |
michael@0 | 738 | uccombining_class(code) |
michael@0 | 739 | unsigned long code; |
michael@0 | 740 | #endif |
michael@0 | 741 | { |
michael@0 | 742 | long l, r, m; |
michael@0 | 743 | |
michael@0 | 744 | l = 0; |
michael@0 | 745 | r = _uccmcl_size - 1; |
michael@0 | 746 | |
michael@0 | 747 | while (l <= r) { |
michael@0 | 748 | m = (l + r) >> 1; |
michael@0 | 749 | m -= (m % 3); |
michael@0 | 750 | if (code > _uccmcl_nodes[m + 1]) |
michael@0 | 751 | l = m + 3; |
michael@0 | 752 | else if (code < _uccmcl_nodes[m]) |
michael@0 | 753 | r = m - 3; |
michael@0 | 754 | else if (code >= _uccmcl_nodes[m] && code <= _uccmcl_nodes[m + 1]) |
michael@0 | 755 | return _uccmcl_nodes[m + 2]; |
michael@0 | 756 | } |
michael@0 | 757 | return 0; |
michael@0 | 758 | } |
michael@0 | 759 | |
michael@0 | 760 | /************************************************************************** |
michael@0 | 761 | * |
michael@0 | 762 | * Support for numeric values. |
michael@0 | 763 | * |
michael@0 | 764 | **************************************************************************/ |
michael@0 | 765 | |
michael@0 | 766 | static unsigned long *_ucnum_nodes; |
michael@0 | 767 | static unsigned long _ucnum_size; |
michael@0 | 768 | static short *_ucnum_vals; |
michael@0 | 769 | |
michael@0 | 770 | static void |
michael@0 | 771 | #ifdef __STDC__ |
michael@0 | 772 | _ucnumb_load(char *paths, int reload) |
michael@0 | 773 | #else |
michael@0 | 774 | _ucnumb_load(paths, reload) |
michael@0 | 775 | char *paths; |
michael@0 | 776 | int reload; |
michael@0 | 777 | #endif |
michael@0 | 778 | { |
michael@0 | 779 | FILE *in; |
michael@0 | 780 | unsigned long size, i; |
michael@0 | 781 | _ucheader_t hdr; |
michael@0 | 782 | |
michael@0 | 783 | if (_ucnum_size > 0) { |
michael@0 | 784 | if (!reload) |
michael@0 | 785 | /* |
michael@0 | 786 | * The numbers have already been loaded. |
michael@0 | 787 | */ |
michael@0 | 788 | return; |
michael@0 | 789 | |
michael@0 | 790 | free((char *) _ucnum_nodes); |
michael@0 | 791 | _ucnum_size = 0; |
michael@0 | 792 | } |
michael@0 | 793 | |
michael@0 | 794 | if ((in = _ucopenfile(paths, "num.dat", "rb")) == 0) |
michael@0 | 795 | return; |
michael@0 | 796 | |
michael@0 | 797 | /* |
michael@0 | 798 | * Load the header. |
michael@0 | 799 | */ |
michael@0 | 800 | fread((char *) &hdr, sizeof(_ucheader_t), 1, in); |
michael@0 | 801 | |
michael@0 | 802 | if (hdr.bom == 0xfffe) { |
michael@0 | 803 | hdr.cnt = endian_short(hdr.cnt); |
michael@0 | 804 | hdr.size.bytes = endian_long(hdr.size.bytes); |
michael@0 | 805 | } |
michael@0 | 806 | |
michael@0 | 807 | _ucnum_size = hdr.cnt; |
michael@0 | 808 | _ucnum_nodes = (unsigned long *) malloc(hdr.size.bytes); |
michael@0 | 809 | _ucnum_vals = (short *) (_ucnum_nodes + _ucnum_size); |
michael@0 | 810 | |
michael@0 | 811 | /* |
michael@0 | 812 | * Read the combining classes in. |
michael@0 | 813 | */ |
michael@0 | 814 | fread((char *) _ucnum_nodes, sizeof(unsigned char), hdr.size.bytes, in); |
michael@0 | 815 | |
michael@0 | 816 | /* |
michael@0 | 817 | * Do an endian swap if necessary. |
michael@0 | 818 | */ |
michael@0 | 819 | if (hdr.bom == 0xfffe) { |
michael@0 | 820 | for (i = 0; i < _ucnum_size; i++) |
michael@0 | 821 | _ucnum_nodes[i] = endian_long(_ucnum_nodes[i]); |
michael@0 | 822 | |
michael@0 | 823 | /* |
michael@0 | 824 | * Determine the number of values that have to be adjusted. |
michael@0 | 825 | */ |
michael@0 | 826 | size = (hdr.size.bytes - |
michael@0 | 827 | (_ucnum_size * (sizeof(unsigned long) << 1))) / |
michael@0 | 828 | sizeof(short); |
michael@0 | 829 | |
michael@0 | 830 | for (i = 0; i < size; i++) |
michael@0 | 831 | _ucnum_vals[i] = endian_short(_ucnum_vals[i]); |
michael@0 | 832 | } |
michael@0 | 833 | } |
michael@0 | 834 | |
michael@0 | 835 | static void |
michael@0 | 836 | #ifdef __STDC__ |
michael@0 | 837 | _ucnumb_unload(void) |
michael@0 | 838 | #else |
michael@0 | 839 | _ucnumb_unload() |
michael@0 | 840 | #endif |
michael@0 | 841 | { |
michael@0 | 842 | if (_ucnum_size == 0) |
michael@0 | 843 | return; |
michael@0 | 844 | |
michael@0 | 845 | free((char *) _ucnum_nodes); |
michael@0 | 846 | _ucnum_size = 0; |
michael@0 | 847 | } |
michael@0 | 848 | |
michael@0 | 849 | int |
michael@0 | 850 | #ifdef __STDC__ |
michael@0 | 851 | ucnumber_lookup(unsigned long code, struct ucnumber *num) |
michael@0 | 852 | #else |
michael@0 | 853 | ucnumber_lookup(code, num) |
michael@0 | 854 | unsigned long code; |
michael@0 | 855 | struct ucnumber *num; |
michael@0 | 856 | #endif |
michael@0 | 857 | { |
michael@0 | 858 | long l, r, m; |
michael@0 | 859 | short *vp; |
michael@0 | 860 | |
michael@0 | 861 | l = 0; |
michael@0 | 862 | r = _ucnum_size - 1; |
michael@0 | 863 | while (l <= r) { |
michael@0 | 864 | /* |
michael@0 | 865 | * Determine a "mid" point and adjust to make sure the mid point is at |
michael@0 | 866 | * the beginning of a code+offset pair. |
michael@0 | 867 | */ |
michael@0 | 868 | m = (l + r) >> 1; |
michael@0 | 869 | m -= (m & 1); |
michael@0 | 870 | if (code > _ucnum_nodes[m]) |
michael@0 | 871 | l = m + 2; |
michael@0 | 872 | else if (code < _ucnum_nodes[m]) |
michael@0 | 873 | r = m - 2; |
michael@0 | 874 | else { |
michael@0 | 875 | vp = _ucnum_vals + _ucnum_nodes[m + 1]; |
michael@0 | 876 | num->numerator = (int) *vp++; |
michael@0 | 877 | num->denominator = (int) *vp; |
michael@0 | 878 | return 1; |
michael@0 | 879 | } |
michael@0 | 880 | } |
michael@0 | 881 | return 0; |
michael@0 | 882 | } |
michael@0 | 883 | |
michael@0 | 884 | int |
michael@0 | 885 | #ifdef __STDC__ |
michael@0 | 886 | ucdigit_lookup(unsigned long code, int *digit) |
michael@0 | 887 | #else |
michael@0 | 888 | ucdigit_lookup(code, digit) |
michael@0 | 889 | unsigned long code; |
michael@0 | 890 | int *digit; |
michael@0 | 891 | #endif |
michael@0 | 892 | { |
michael@0 | 893 | long l, r, m; |
michael@0 | 894 | short *vp; |
michael@0 | 895 | |
michael@0 | 896 | l = 0; |
michael@0 | 897 | r = _ucnum_size - 1; |
michael@0 | 898 | while (l <= r) { |
michael@0 | 899 | /* |
michael@0 | 900 | * Determine a "mid" point and adjust to make sure the mid point is at |
michael@0 | 901 | * the beginning of a code+offset pair. |
michael@0 | 902 | */ |
michael@0 | 903 | m = (l + r) >> 1; |
michael@0 | 904 | m -= (m & 1); |
michael@0 | 905 | if (code > _ucnum_nodes[m]) |
michael@0 | 906 | l = m + 2; |
michael@0 | 907 | else if (code < _ucnum_nodes[m]) |
michael@0 | 908 | r = m - 2; |
michael@0 | 909 | else { |
michael@0 | 910 | vp = _ucnum_vals + _ucnum_nodes[m + 1]; |
michael@0 | 911 | if (*vp == *(vp + 1)) { |
michael@0 | 912 | *digit = *vp; |
michael@0 | 913 | return 1; |
michael@0 | 914 | } |
michael@0 | 915 | return 0; |
michael@0 | 916 | } |
michael@0 | 917 | } |
michael@0 | 918 | return 0; |
michael@0 | 919 | } |
michael@0 | 920 | |
michael@0 | 921 | struct ucnumber |
michael@0 | 922 | #ifdef __STDC__ |
michael@0 | 923 | ucgetnumber(unsigned long code) |
michael@0 | 924 | #else |
michael@0 | 925 | ucgetnumber(code) |
michael@0 | 926 | unsigned long code; |
michael@0 | 927 | #endif |
michael@0 | 928 | { |
michael@0 | 929 | struct ucnumber num; |
michael@0 | 930 | |
michael@0 | 931 | /* |
michael@0 | 932 | * Initialize with some arbitrary value, because the caller simply cannot |
michael@0 | 933 | * tell for sure if the code is a number without calling the ucisnumber() |
michael@0 | 934 | * macro before calling this function. |
michael@0 | 935 | */ |
michael@0 | 936 | num.numerator = num.denominator = -111; |
michael@0 | 937 | |
michael@0 | 938 | (void) ucnumber_lookup(code, &num); |
michael@0 | 939 | |
michael@0 | 940 | return num; |
michael@0 | 941 | } |
michael@0 | 942 | |
michael@0 | 943 | int |
michael@0 | 944 | #ifdef __STDC__ |
michael@0 | 945 | ucgetdigit(unsigned long code) |
michael@0 | 946 | #else |
michael@0 | 947 | ucgetdigit(code) |
michael@0 | 948 | unsigned long code; |
michael@0 | 949 | #endif |
michael@0 | 950 | { |
michael@0 | 951 | int dig; |
michael@0 | 952 | |
michael@0 | 953 | /* |
michael@0 | 954 | * Initialize with some arbitrary value, because the caller simply cannot |
michael@0 | 955 | * tell for sure if the code is a number without calling the ucisdigit() |
michael@0 | 956 | * macro before calling this function. |
michael@0 | 957 | */ |
michael@0 | 958 | dig = -111; |
michael@0 | 959 | |
michael@0 | 960 | (void) ucdigit_lookup(code, &dig); |
michael@0 | 961 | |
michael@0 | 962 | return dig; |
michael@0 | 963 | } |
michael@0 | 964 | |
michael@0 | 965 | /************************************************************************** |
michael@0 | 966 | * |
michael@0 | 967 | * Setup and cleanup routines. |
michael@0 | 968 | * |
michael@0 | 969 | **************************************************************************/ |
michael@0 | 970 | |
michael@0 | 971 | void |
michael@0 | 972 | #ifdef __STDC__ |
michael@0 | 973 | ucdata_load(char *paths, int masks) |
michael@0 | 974 | #else |
michael@0 | 975 | ucdata_load(paths, masks) |
michael@0 | 976 | char *paths; |
michael@0 | 977 | int masks; |
michael@0 | 978 | #endif |
michael@0 | 979 | { |
michael@0 | 980 | if (masks & UCDATA_CTYPE) |
michael@0 | 981 | _ucprop_load(paths, 0); |
michael@0 | 982 | if (masks & UCDATA_CASE) |
michael@0 | 983 | _uccase_load(paths, 0); |
michael@0 | 984 | if (masks & UCDATA_DECOMP) |
michael@0 | 985 | _ucdcmp_load(paths, 0); |
michael@0 | 986 | if (masks & UCDATA_CMBCL) |
michael@0 | 987 | _uccmcl_load(paths, 0); |
michael@0 | 988 | if (masks & UCDATA_NUM) |
michael@0 | 989 | _ucnumb_load(paths, 0); |
michael@0 | 990 | } |
michael@0 | 991 | |
michael@0 | 992 | void |
michael@0 | 993 | #ifdef __STDC__ |
michael@0 | 994 | ucdata_unload(int masks) |
michael@0 | 995 | #else |
michael@0 | 996 | ucdata_unload(masks) |
michael@0 | 997 | int masks; |
michael@0 | 998 | #endif |
michael@0 | 999 | { |
michael@0 | 1000 | if (masks & UCDATA_CTYPE) |
michael@0 | 1001 | _ucprop_unload(); |
michael@0 | 1002 | if (masks & UCDATA_CASE) |
michael@0 | 1003 | _uccase_unload(); |
michael@0 | 1004 | if (masks & UCDATA_DECOMP) |
michael@0 | 1005 | _ucdcmp_unload(); |
michael@0 | 1006 | if (masks & UCDATA_CMBCL) |
michael@0 | 1007 | _uccmcl_unload(); |
michael@0 | 1008 | if (masks & UCDATA_NUM) |
michael@0 | 1009 | _ucnumb_unload(); |
michael@0 | 1010 | } |
michael@0 | 1011 | |
michael@0 | 1012 | void |
michael@0 | 1013 | #ifdef __STDC__ |
michael@0 | 1014 | ucdata_reload(char *paths, int masks) |
michael@0 | 1015 | #else |
michael@0 | 1016 | ucdata_reload(paths, masks) |
michael@0 | 1017 | char *paths; |
michael@0 | 1018 | int masks; |
michael@0 | 1019 | #endif |
michael@0 | 1020 | { |
michael@0 | 1021 | if (masks & UCDATA_CTYPE) |
michael@0 | 1022 | _ucprop_load(paths, 1); |
michael@0 | 1023 | if (masks & UCDATA_CASE) |
michael@0 | 1024 | _uccase_load(paths, 1); |
michael@0 | 1025 | if (masks & UCDATA_DECOMP) |
michael@0 | 1026 | _ucdcmp_load(paths, 1); |
michael@0 | 1027 | if (masks & UCDATA_CMBCL) |
michael@0 | 1028 | _uccmcl_load(paths, 1); |
michael@0 | 1029 | if (masks & UCDATA_NUM) |
michael@0 | 1030 | _ucnumb_load(paths, 1); |
michael@0 | 1031 | } |
michael@0 | 1032 | |
michael@0 | 1033 | #ifdef TEST |
michael@0 | 1034 | |
michael@0 | 1035 | void |
michael@0 | 1036 | #ifdef __STDC__ |
michael@0 | 1037 | main(void) |
michael@0 | 1038 | #else |
michael@0 | 1039 | main() |
michael@0 | 1040 | #endif |
michael@0 | 1041 | { |
michael@0 | 1042 | int dig; |
michael@0 | 1043 | unsigned long i, lo, *dec; |
michael@0 | 1044 | struct ucnumber num; |
michael@0 | 1045 | |
michael@0 | 1046 | ucdata_setup("."); |
michael@0 | 1047 | |
michael@0 | 1048 | if (ucisweak(0x30)) |
michael@0 | 1049 | printf("WEAK\n"); |
michael@0 | 1050 | else |
michael@0 | 1051 | printf("NOT WEAK\n"); |
michael@0 | 1052 | |
michael@0 | 1053 | printf("LOWER 0x%04lX\n", uctolower(0xff3a)); |
michael@0 | 1054 | printf("UPPER 0x%04lX\n", uctoupper(0xff5a)); |
michael@0 | 1055 | |
michael@0 | 1056 | if (ucisalpha(0x1d5)) |
michael@0 | 1057 | printf("ALPHA\n"); |
michael@0 | 1058 | else |
michael@0 | 1059 | printf("NOT ALPHA\n"); |
michael@0 | 1060 | |
michael@0 | 1061 | if (ucisupper(0x1d5)) { |
michael@0 | 1062 | printf("UPPER\n"); |
michael@0 | 1063 | lo = uctolower(0x1d5); |
michael@0 | 1064 | printf("0x%04lx\n", lo); |
michael@0 | 1065 | lo = uctotitle(0x1d5); |
michael@0 | 1066 | printf("0x%04lx\n", lo); |
michael@0 | 1067 | } else |
michael@0 | 1068 | printf("NOT UPPER\n"); |
michael@0 | 1069 | |
michael@0 | 1070 | if (ucistitle(0x1d5)) |
michael@0 | 1071 | printf("TITLE\n"); |
michael@0 | 1072 | else |
michael@0 | 1073 | printf("NOT TITLE\n"); |
michael@0 | 1074 | |
michael@0 | 1075 | if (uciscomposite(0x1d5)) |
michael@0 | 1076 | printf("COMPOSITE\n"); |
michael@0 | 1077 | else |
michael@0 | 1078 | printf("NOT COMPOSITE\n"); |
michael@0 | 1079 | |
michael@0 | 1080 | if (ucdecomp(0x1d5, &lo, &dec)) { |
michael@0 | 1081 | for (i = 0; i < lo; i++) |
michael@0 | 1082 | printf("0x%04lx ", dec[i]); |
michael@0 | 1083 | putchar('\n'); |
michael@0 | 1084 | } |
michael@0 | 1085 | |
michael@0 | 1086 | if ((lo = uccombining_class(0x41)) != 0) |
michael@0 | 1087 | printf("0x41 CCL %ld\n", lo); |
michael@0 | 1088 | |
michael@0 | 1089 | if (ucisxdigit(0xfeff)) |
michael@0 | 1090 | printf("0xFEFF HEX DIGIT\n"); |
michael@0 | 1091 | else |
michael@0 | 1092 | printf("0xFEFF NOT HEX DIGIT\n"); |
michael@0 | 1093 | |
michael@0 | 1094 | if (ucisdefined(0x10000)) |
michael@0 | 1095 | printf("0x10000 DEFINED\n"); |
michael@0 | 1096 | else |
michael@0 | 1097 | printf("0x10000 NOT DEFINED\n"); |
michael@0 | 1098 | |
michael@0 | 1099 | if (ucnumber_lookup(0x30, &num)) { |
michael@0 | 1100 | if (num.numerator != num.denominator) |
michael@0 | 1101 | printf("UCNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator); |
michael@0 | 1102 | else |
michael@0 | 1103 | printf("UCNUMBER: 0x30 = %d\n", num.numerator); |
michael@0 | 1104 | } else |
michael@0 | 1105 | printf("UCNUMBER: 0x30 NOT A NUMBER\n"); |
michael@0 | 1106 | |
michael@0 | 1107 | if (ucnumber_lookup(0xbc, &num)) { |
michael@0 | 1108 | if (num.numerator != num.denominator) |
michael@0 | 1109 | printf("UCNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator); |
michael@0 | 1110 | else |
michael@0 | 1111 | printf("UCNUMBER: 0xbc = %d\n", num.numerator); |
michael@0 | 1112 | } else |
michael@0 | 1113 | printf("UCNUMBER: 0xbc NOT A NUMBER\n"); |
michael@0 | 1114 | |
michael@0 | 1115 | |
michael@0 | 1116 | if (ucnumber_lookup(0xff19, &num)) { |
michael@0 | 1117 | if (num.numerator != num.denominator) |
michael@0 | 1118 | printf("UCNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator); |
michael@0 | 1119 | else |
michael@0 | 1120 | printf("UCNUMBER: 0xff19 = %d\n", num.numerator); |
michael@0 | 1121 | } else |
michael@0 | 1122 | printf("UCNUMBER: 0xff19 NOT A NUMBER\n"); |
michael@0 | 1123 | |
michael@0 | 1124 | if (ucnumber_lookup(0x4e00, &num)) { |
michael@0 | 1125 | if (num.numerator != num.denominator) |
michael@0 | 1126 | printf("UCNUMBER: 0x4e00 = %d/%d\n", num.numerator, num.denominator); |
michael@0 | 1127 | else |
michael@0 | 1128 | printf("UCNUMBER: 0x4e00 = %d\n", num.numerator); |
michael@0 | 1129 | } else |
michael@0 | 1130 | printf("UCNUMBER: 0x4e00 NOT A NUMBER\n"); |
michael@0 | 1131 | |
michael@0 | 1132 | if (ucdigit_lookup(0x06f9, &dig)) |
michael@0 | 1133 | printf("UCDIGIT: 0x6f9 = %d\n", dig); |
michael@0 | 1134 | else |
michael@0 | 1135 | printf("UCDIGIT: 0x6f9 NOT A NUMBER\n"); |
michael@0 | 1136 | |
michael@0 | 1137 | dig = ucgetdigit(0x0969); |
michael@0 | 1138 | printf("UCGETDIGIT: 0x969 = %d\n", dig); |
michael@0 | 1139 | |
michael@0 | 1140 | num = ucgetnumber(0x30); |
michael@0 | 1141 | if (num.numerator != num.denominator) |
michael@0 | 1142 | printf("UCGETNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator); |
michael@0 | 1143 | else |
michael@0 | 1144 | printf("UCGETNUMBER: 0x30 = %d\n", num.numerator); |
michael@0 | 1145 | |
michael@0 | 1146 | num = ucgetnumber(0xbc); |
michael@0 | 1147 | if (num.numerator != num.denominator) |
michael@0 | 1148 | printf("UCGETNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator); |
michael@0 | 1149 | else |
michael@0 | 1150 | printf("UCGETNUMBER: 0xbc = %d\n", num.numerator); |
michael@0 | 1151 | |
michael@0 | 1152 | num = ucgetnumber(0xff19); |
michael@0 | 1153 | if (num.numerator != num.denominator) |
michael@0 | 1154 | printf("UCGETNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator); |
michael@0 | 1155 | else |
michael@0 | 1156 | printf("UCGETNUMBER: 0xff19 = %d\n", num.numerator); |
michael@0 | 1157 | |
michael@0 | 1158 | ucdata_cleanup(); |
michael@0 | 1159 | exit(0); |
michael@0 | 1160 | } |
michael@0 | 1161 | |
michael@0 | 1162 | #endif /* TEST */ |