michael@0: /* michael@0: * Copyright 1996, 1997, 1998 Computing Research Labs, michael@0: * New Mexico State University michael@0: * michael@0: * Permission is hereby granted, free of charge, to any person obtaining a michael@0: * copy of this software and associated documentation files (the "Software"), michael@0: * to deal in the Software without restriction, including without limitation michael@0: * the rights to use, copy, modify, merge, publish, distribute, sublicense, michael@0: * and/or sell copies of the Software, and to permit persons to whom the michael@0: * Software is furnished to do so, subject to the following conditions: michael@0: * michael@0: * The above copyright notice and this permission notice shall be included in michael@0: * all copies or substantial portions of the Software. michael@0: * michael@0: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR michael@0: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, michael@0: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL michael@0: * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY michael@0: * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT michael@0: * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR michael@0: * THE USE OR OTHER DEALINGS IN THE SOFTWARE. michael@0: */ michael@0: #ifndef lint michael@0: #ifdef __GNUC__ michael@0: static char rcsid[] __attribute__ ((unused)) = "$Id: ucdata.c,v 1.1 1999/01/08 00:19:11 ftang%netscape.com Exp $"; michael@0: #else michael@0: static char rcsid[] = "$Id: ucdata.c,v 1.1 1999/01/08 00:19:11 ftang%netscape.com Exp $"; michael@0: #endif michael@0: #endif michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #ifndef WIN32 michael@0: #include michael@0: #endif michael@0: michael@0: #include "ucdata.h" michael@0: michael@0: /************************************************************************** michael@0: * michael@0: * Miscellaneous types, data, and support functions. michael@0: * michael@0: **************************************************************************/ michael@0: michael@0: typedef struct { michael@0: unsigned short bom; michael@0: unsigned short cnt; michael@0: union { michael@0: unsigned long bytes; michael@0: unsigned short len[2]; michael@0: } size; michael@0: } _ucheader_t; michael@0: michael@0: /* michael@0: * A simple array of 32-bit masks for lookup. michael@0: */ michael@0: static unsigned long masks32[32] = { michael@0: 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020, michael@0: 0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800, michael@0: 0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000, michael@0: 0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000, michael@0: 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, michael@0: 0x40000000, 0x80000000 michael@0: }; michael@0: michael@0: #define endian_short(cc) (((cc) >> 8) | (((cc) & 0xff) << 8)) michael@0: #define endian_long(cc) ((((cc) & 0xff) << 24)|((((cc) >> 8) & 0xff) << 16)|\ michael@0: ((((cc) >> 16) & 0xff) << 8)|((cc) >> 24)) michael@0: michael@0: static FILE * michael@0: #ifdef __STDC__ michael@0: _ucopenfile(char *paths, char *filename, char *mode) michael@0: #else michael@0: _ucopenfile(paths, filename, mode) michael@0: char *paths, *filename, *mode; michael@0: #endif michael@0: { michael@0: FILE *f; michael@0: char *fp, *dp, *pp, path[BUFSIZ]; michael@0: michael@0: if (filename == 0 || *filename == 0) michael@0: return 0; michael@0: michael@0: dp = paths; michael@0: while (dp && *dp) { michael@0: pp = path; michael@0: while (*dp && *dp != ':') michael@0: *pp++ = *dp++; michael@0: *pp++ = '/'; michael@0: michael@0: fp = filename; michael@0: while (*fp) michael@0: *pp++ = *fp++; michael@0: *pp = 0; michael@0: michael@0: if ((f = fopen(path, mode)) != 0) michael@0: return f; michael@0: michael@0: if (*dp == ':') michael@0: dp++; michael@0: } michael@0: michael@0: return 0; michael@0: } michael@0: michael@0: /************************************************************************** michael@0: * michael@0: * Support for the character properties. michael@0: * michael@0: **************************************************************************/ michael@0: michael@0: static unsigned long _ucprop_size; michael@0: static unsigned short *_ucprop_offsets; michael@0: static unsigned long *_ucprop_ranges; michael@0: michael@0: static void michael@0: #ifdef __STDC__ michael@0: _ucprop_load(char *paths, int reload) michael@0: #else michael@0: _ucprop_load(paths, reload) michael@0: char *paths; michael@0: int reload; michael@0: #endif michael@0: { michael@0: FILE *in; michael@0: unsigned long size, i; michael@0: _ucheader_t hdr; michael@0: michael@0: if (_ucprop_size > 0) { michael@0: if (!reload) michael@0: /* michael@0: * The character properties have already been loaded. michael@0: */ michael@0: return; michael@0: michael@0: /* michael@0: * Unload the current character property data in preparation for michael@0: * loading a new copy. Only the first array has to be deallocated michael@0: * because all the memory for the arrays is allocated as a single michael@0: * block. michael@0: */ michael@0: free((char *) _ucprop_offsets); michael@0: _ucprop_size = 0; michael@0: } michael@0: michael@0: if ((in = _ucopenfile(paths, "ctype.dat", "rb")) == 0) michael@0: return; michael@0: michael@0: /* michael@0: * Load the header. michael@0: */ michael@0: fread((char *) &hdr, sizeof(_ucheader_t), 1, in); michael@0: michael@0: if (hdr.bom == 0xfffe) { michael@0: hdr.cnt = endian_short(hdr.cnt); michael@0: hdr.size.bytes = endian_long(hdr.size.bytes); michael@0: } michael@0: michael@0: if ((_ucprop_size = hdr.cnt) == 0) { michael@0: fclose(in); michael@0: return; michael@0: } michael@0: michael@0: /* michael@0: * Allocate all the storage needed for the lookup table. michael@0: */ michael@0: _ucprop_offsets = (unsigned short *) malloc(hdr.size.bytes); michael@0: michael@0: /* michael@0: * Calculate the offset into the storage for the ranges. The offsets michael@0: * array is on a 4-byte boundary and one larger than the value provided in michael@0: * the header count field. This means the offset to the ranges must be michael@0: * calculated after aligning the count to a 4-byte boundary. michael@0: */ michael@0: if ((size = ((hdr.cnt + 1) * sizeof(unsigned short))) & 3) michael@0: size += 4 - (size & 3); michael@0: size >>= 1; michael@0: _ucprop_ranges = (unsigned long *) (_ucprop_offsets + size); michael@0: michael@0: /* michael@0: * Load the offset array. michael@0: */ michael@0: fread((char *) _ucprop_offsets, sizeof(unsigned short), size, in); michael@0: michael@0: /* michael@0: * Do an endian swap if necessary. Don't forget there is an extra node on michael@0: * the end with the final index. michael@0: */ michael@0: if (hdr.bom == 0xfffe) { michael@0: for (i = 0; i <= _ucprop_size; i++) michael@0: _ucprop_offsets[i] = endian_short(_ucprop_offsets[i]); michael@0: } michael@0: michael@0: /* michael@0: * Load the ranges. The number of elements is in the last array position michael@0: * of the offsets. michael@0: */ michael@0: fread((char *) _ucprop_ranges, sizeof(unsigned long), michael@0: _ucprop_offsets[_ucprop_size], in); michael@0: michael@0: fclose(in); michael@0: michael@0: /* michael@0: * Do an endian swap if necessary. michael@0: */ michael@0: if (hdr.bom == 0xfffe) { michael@0: for (i = 0; i < _ucprop_offsets[_ucprop_size]; i++) michael@0: _ucprop_ranges[i] = endian_long(_ucprop_ranges[i]); michael@0: } michael@0: } michael@0: michael@0: static void michael@0: #ifdef __STDC__ michael@0: _ucprop_unload(void) michael@0: #else michael@0: _ucprop_unload() michael@0: #endif michael@0: { michael@0: if (_ucprop_size == 0) michael@0: return; michael@0: michael@0: /* michael@0: * Only need to free the offsets because the memory is allocated as a michael@0: * single block. michael@0: */ michael@0: free((char *) _ucprop_offsets); michael@0: _ucprop_size = 0; michael@0: } michael@0: michael@0: static int michael@0: #ifdef __STDC__ michael@0: _ucprop_lookup(unsigned long code, unsigned long n) michael@0: #else michael@0: _ucprop_lookup(code, n) michael@0: unsigned long code, n; michael@0: #endif michael@0: { michael@0: long l, r, m; michael@0: michael@0: /* michael@0: * There is an extra node on the end of the offsets to allow this routine michael@0: * to work right. If the index is 0xffff, then there are no nodes for the michael@0: * property. michael@0: */ michael@0: if ((l = _ucprop_offsets[n]) == 0xffff) michael@0: return 0; michael@0: michael@0: /* michael@0: * Locate the next offset that is not 0xffff. The sentinel at the end of michael@0: * the array is the max index value. michael@0: */ michael@0: for (m = 1; michael@0: n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++) ; michael@0: michael@0: r = _ucprop_offsets[n + m] - 1; michael@0: michael@0: while (l <= r) { michael@0: /* michael@0: * Determine a "mid" point and adjust to make sure the mid point is at michael@0: * the beginning of a range pair. michael@0: */ michael@0: m = (l + r) >> 1; michael@0: m -= (m & 1); michael@0: if (code > _ucprop_ranges[m + 1]) michael@0: l = m + 2; michael@0: else if (code < _ucprop_ranges[m]) michael@0: r = m - 2; michael@0: else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1]) michael@0: return 1; michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: int michael@0: #ifdef __STDC__ michael@0: ucisprop(unsigned long code, unsigned long mask1, unsigned long mask2) michael@0: #else michael@0: ucisprop(code, mask1, mask2) michael@0: unsigned long code, mask1, mask2; michael@0: #endif michael@0: { michael@0: unsigned long i; michael@0: michael@0: if (mask1 == 0 && mask2 == 0) michael@0: return 0; michael@0: michael@0: for (i = 0; mask1 && i < 32; i++) { michael@0: if ((mask1 & masks32[i]) && _ucprop_lookup(code, i)) michael@0: return 1; michael@0: } michael@0: michael@0: for (i = 32; mask2 && i < _ucprop_size; i++) { michael@0: if ((mask2 & masks32[i & 31]) && _ucprop_lookup(code, i)) michael@0: return 1; michael@0: } michael@0: michael@0: return 0; michael@0: } michael@0: michael@0: /************************************************************************** michael@0: * michael@0: * Support for case mapping. michael@0: * michael@0: **************************************************************************/ michael@0: michael@0: static unsigned long _uccase_size; michael@0: static unsigned short _uccase_len[2]; michael@0: static unsigned long *_uccase_map; michael@0: michael@0: static void michael@0: #ifdef __STDC__ michael@0: _uccase_load(char *paths, int reload) michael@0: #else michael@0: _uccase_load(paths, reload) michael@0: char *paths; michael@0: int reload; michael@0: #endif michael@0: { michael@0: FILE *in; michael@0: unsigned long i; michael@0: _ucheader_t hdr; michael@0: michael@0: if (_uccase_size > 0) { michael@0: if (!reload) michael@0: /* michael@0: * The case mappings have already been loaded. michael@0: */ michael@0: return; michael@0: michael@0: free((char *) _uccase_map); michael@0: _uccase_size = 0; michael@0: } michael@0: michael@0: if ((in = _ucopenfile(paths, "case.dat", "rb")) == 0) michael@0: return; michael@0: michael@0: /* michael@0: * Load the header. michael@0: */ michael@0: fread((char *) &hdr, sizeof(_ucheader_t), 1, in); michael@0: michael@0: if (hdr.bom == 0xfffe) { michael@0: hdr.cnt = endian_short(hdr.cnt); michael@0: hdr.size.len[0] = endian_short(hdr.size.len[0]); michael@0: hdr.size.len[1] = endian_short(hdr.size.len[1]); michael@0: } michael@0: michael@0: /* michael@0: * Set the node count and lengths of the upper and lower case mapping michael@0: * tables. michael@0: */ michael@0: _uccase_size = hdr.cnt * 3; michael@0: _uccase_len[0] = hdr.size.len[0] * 3; michael@0: _uccase_len[1] = hdr.size.len[1] * 3; michael@0: michael@0: _uccase_map = (unsigned long *) michael@0: malloc(_uccase_size * sizeof(unsigned long)); michael@0: michael@0: /* michael@0: * Load the case mapping table. michael@0: */ michael@0: fread((char *) _uccase_map, sizeof(unsigned long), _uccase_size, in); michael@0: michael@0: /* michael@0: * Do an endian swap if necessary. michael@0: */ michael@0: if (hdr.bom == 0xfffe) { michael@0: for (i = 0; i < _uccase_size; i++) michael@0: _uccase_map[i] = endian_long(_uccase_map[i]); michael@0: } michael@0: } michael@0: michael@0: static void michael@0: #ifdef __STDC__ michael@0: _uccase_unload(void) michael@0: #else michael@0: _uccase_unload() michael@0: #endif michael@0: { michael@0: if (_uccase_size == 0) michael@0: return; michael@0: michael@0: free((char *) _uccase_map); michael@0: _uccase_size = 0; michael@0: } michael@0: michael@0: static unsigned long michael@0: #ifdef __STDC__ michael@0: _uccase_lookup(unsigned long code, long l, long r, int field) michael@0: #else michael@0: _uccase_lookup(code, l, r, field) michael@0: unsigned long code; michael@0: long l, r; michael@0: int field; michael@0: #endif michael@0: { michael@0: long m; michael@0: michael@0: /* michael@0: * Do the binary search. michael@0: */ michael@0: while (l <= r) { michael@0: /* michael@0: * Determine a "mid" point and adjust to make sure the mid point is at michael@0: * the beginning of a case mapping triple. michael@0: */ michael@0: m = (l + r) >> 1; michael@0: m -= (m % 3); michael@0: if (code > _uccase_map[m]) michael@0: l = m + 3; michael@0: else if (code < _uccase_map[m]) michael@0: r = m - 3; michael@0: else if (code == _uccase_map[m]) michael@0: return _uccase_map[m + field]; michael@0: } michael@0: michael@0: return code; michael@0: } michael@0: michael@0: unsigned long michael@0: #ifdef __STDC__ michael@0: uctoupper(unsigned long code) michael@0: #else michael@0: uctoupper(code) michael@0: unsigned long code; michael@0: #endif michael@0: { michael@0: int field; michael@0: long l, r; michael@0: michael@0: if (ucisupper(code)) michael@0: return code; michael@0: michael@0: if (ucislower(code)) { michael@0: /* michael@0: * The character is lower case. michael@0: */ michael@0: field = 1; michael@0: l = _uccase_len[0]; michael@0: r = (l + _uccase_len[1]) - 1; michael@0: } else { michael@0: /* michael@0: * The character is title case. michael@0: */ michael@0: field = 2; michael@0: l = _uccase_len[0] + _uccase_len[1]; michael@0: r = _uccase_size - 1; michael@0: } michael@0: return _uccase_lookup(code, l, r, field); michael@0: } michael@0: michael@0: unsigned long michael@0: #ifdef __STDC__ michael@0: uctolower(unsigned long code) michael@0: #else michael@0: uctolower(code) michael@0: unsigned long code; michael@0: #endif michael@0: { michael@0: int field; michael@0: long l, r; michael@0: michael@0: if (ucislower(code)) michael@0: return code; michael@0: michael@0: if (ucisupper(code)) { michael@0: /* michael@0: * The character is upper case. michael@0: */ michael@0: field = 1; michael@0: l = 0; michael@0: r = _uccase_len[0] - 1; michael@0: } else { michael@0: /* michael@0: * The character is title case. michael@0: */ michael@0: field = 2; michael@0: l = _uccase_len[0] + _uccase_len[1]; michael@0: r = _uccase_size - 1; michael@0: } michael@0: return _uccase_lookup(code, l, r, field); michael@0: } michael@0: michael@0: unsigned long michael@0: #ifdef __STDC__ michael@0: uctotitle(unsigned long code) michael@0: #else michael@0: uctotitle(code) michael@0: unsigned long code; michael@0: #endif michael@0: { michael@0: int field; michael@0: long l, r; michael@0: michael@0: if (ucistitle(code)) michael@0: return code; michael@0: michael@0: /* michael@0: * The offset will always be the same for converting to title case. michael@0: */ michael@0: field = 2; michael@0: michael@0: if (ucisupper(code)) { michael@0: /* michael@0: * The character is upper case. michael@0: */ michael@0: l = 0; michael@0: r = _uccase_len[0] - 1; michael@0: } else { michael@0: /* michael@0: * The character is lower case. michael@0: */ michael@0: l = _uccase_len[0]; michael@0: r = (l + _uccase_len[1]) - 1; michael@0: } michael@0: return _uccase_lookup(code, l, r, field); michael@0: } michael@0: michael@0: /************************************************************************** michael@0: * michael@0: * Support for decompositions. michael@0: * michael@0: **************************************************************************/ michael@0: michael@0: static unsigned long _ucdcmp_size; michael@0: static unsigned long *_ucdcmp_nodes; michael@0: static unsigned long *_ucdcmp_decomp; michael@0: michael@0: static void michael@0: #ifdef __STDC__ michael@0: _ucdcmp_load(char *paths, int reload) michael@0: #else michael@0: _ucdcmp_load(paths, reload) michael@0: char *paths; michael@0: int reload; michael@0: #endif michael@0: { michael@0: FILE *in; michael@0: unsigned long size, i; michael@0: _ucheader_t hdr; michael@0: michael@0: if (_ucdcmp_size > 0) { michael@0: if (!reload) michael@0: /* michael@0: * The decompositions have already been loaded. michael@0: */ michael@0: return; michael@0: michael@0: free((char *) _ucdcmp_nodes); michael@0: _ucdcmp_size = 0; michael@0: } michael@0: michael@0: if ((in = _ucopenfile(paths, "decomp.dat", "rb")) == 0) michael@0: return; michael@0: michael@0: /* michael@0: * Load the header. michael@0: */ michael@0: fread((char *) &hdr, sizeof(_ucheader_t), 1, in); michael@0: michael@0: if (hdr.bom == 0xfffe) { michael@0: hdr.cnt = endian_short(hdr.cnt); michael@0: hdr.size.bytes = endian_long(hdr.size.bytes); michael@0: } michael@0: michael@0: _ucdcmp_size = hdr.cnt << 1; michael@0: _ucdcmp_nodes = (unsigned long *) malloc(hdr.size.bytes); michael@0: _ucdcmp_decomp = _ucdcmp_nodes + (_ucdcmp_size + 1); michael@0: michael@0: /* michael@0: * Read the decomposition data in. michael@0: */ michael@0: size = hdr.size.bytes / sizeof(unsigned long); michael@0: fread((char *) _ucdcmp_nodes, sizeof(unsigned long), size, in); michael@0: michael@0: /* michael@0: * Do an endian swap if necessary. michael@0: */ michael@0: if (hdr.bom == 0xfffe) { michael@0: for (i = 0; i < size; i++) michael@0: _ucdcmp_nodes[i] = endian_long(_ucdcmp_nodes[i]); michael@0: } michael@0: } michael@0: michael@0: static void michael@0: #ifdef __STDC__ michael@0: _ucdcmp_unload(void) michael@0: #else michael@0: _ucdcmp_unload() michael@0: #endif michael@0: { michael@0: if (_ucdcmp_size == 0) michael@0: return; michael@0: michael@0: /* michael@0: * Only need to free the offsets because the memory is allocated as a michael@0: * single block. michael@0: */ michael@0: free((char *) _ucdcmp_nodes); michael@0: _ucdcmp_size = 0; michael@0: } michael@0: michael@0: int michael@0: #ifdef __STDC__ michael@0: ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp) michael@0: #else michael@0: ucdecomp(code, num, decomp) michael@0: unsigned long code, *num, **decomp; michael@0: #endif michael@0: { michael@0: long l, r, m; michael@0: michael@0: l = 0; michael@0: r = _ucdcmp_nodes[_ucdcmp_size] - 1; michael@0: michael@0: while (l <= r) { michael@0: /* michael@0: * Determine a "mid" point and adjust to make sure the mid point is at michael@0: * the beginning of a code+offset pair. michael@0: */ michael@0: m = (l + r) >> 1; michael@0: m -= (m & 1); michael@0: if (code > _ucdcmp_nodes[m]) michael@0: l = m + 2; michael@0: else if (code < _ucdcmp_nodes[m]) michael@0: r = m - 2; michael@0: else if (code == _ucdcmp_nodes[m]) { michael@0: *num = _ucdcmp_nodes[m + 3] - _ucdcmp_nodes[m + 1]; michael@0: *decomp = &_ucdcmp_decomp[_ucdcmp_nodes[m + 1]]; michael@0: return 1; michael@0: } michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: int michael@0: #ifdef __STDC__ michael@0: ucdecomp_hangul(unsigned long code, unsigned long *num, unsigned long decomp[]) michael@0: #else michael@0: ucdecomp_hangul(code, num, decomp) michael@0: unsigned long code, *num, decomp[]; michael@0: #endif michael@0: { michael@0: if (!ucishangul(code)) michael@0: return 0; michael@0: michael@0: code -= 0xac00; michael@0: decomp[0] = 0x1100 + (unsigned long) (code / 588); michael@0: decomp[1] = 0x1161 + (unsigned long) ((code % 588) / 28); michael@0: decomp[2] = 0x11a7 + (unsigned long) (code % 28); michael@0: *num = (decomp[2] != 0x11a7) ? 3 : 2; michael@0: michael@0: return 1; michael@0: } michael@0: michael@0: /************************************************************************** michael@0: * michael@0: * Support for combining classes. michael@0: * michael@0: **************************************************************************/ michael@0: michael@0: static unsigned long _uccmcl_size; michael@0: static unsigned long *_uccmcl_nodes; michael@0: michael@0: static void michael@0: #ifdef __STDC__ michael@0: _uccmcl_load(char *paths, int reload) michael@0: #else michael@0: _uccmcl_load(paths, reload) michael@0: char *paths; michael@0: int reload; michael@0: #endif michael@0: { michael@0: FILE *in; michael@0: unsigned long i; michael@0: _ucheader_t hdr; michael@0: michael@0: if (_uccmcl_size > 0) { michael@0: if (!reload) michael@0: /* michael@0: * The combining classes have already been loaded. michael@0: */ michael@0: return; michael@0: michael@0: free((char *) _uccmcl_nodes); michael@0: _uccmcl_size = 0; michael@0: } michael@0: michael@0: if ((in = _ucopenfile(paths, "cmbcl.dat", "rb")) == 0) michael@0: return; michael@0: michael@0: /* michael@0: * Load the header. michael@0: */ michael@0: fread((char *) &hdr, sizeof(_ucheader_t), 1, in); michael@0: michael@0: if (hdr.bom == 0xfffe) { michael@0: hdr.cnt = endian_short(hdr.cnt); michael@0: hdr.size.bytes = endian_long(hdr.size.bytes); michael@0: } michael@0: michael@0: _uccmcl_size = hdr.cnt * 3; michael@0: _uccmcl_nodes = (unsigned long *) malloc(hdr.size.bytes); michael@0: michael@0: /* michael@0: * Read the combining classes in. michael@0: */ michael@0: fread((char *) _uccmcl_nodes, sizeof(unsigned long), _uccmcl_size, in); michael@0: michael@0: /* michael@0: * Do an endian swap if necessary. michael@0: */ michael@0: if (hdr.bom == 0xfffe) { michael@0: for (i = 0; i < _uccmcl_size; i++) michael@0: _uccmcl_nodes[i] = endian_long(_uccmcl_nodes[i]); michael@0: } michael@0: } michael@0: michael@0: static void michael@0: #ifdef __STDC__ michael@0: _uccmcl_unload(void) michael@0: #else michael@0: _uccmcl_unload() michael@0: #endif michael@0: { michael@0: if (_uccmcl_size == 0) michael@0: return; michael@0: michael@0: free((char *) _uccmcl_nodes); michael@0: _uccmcl_size = 0; michael@0: } michael@0: michael@0: unsigned long michael@0: #ifdef __STDC__ michael@0: uccombining_class(unsigned long code) michael@0: #else michael@0: uccombining_class(code) michael@0: unsigned long code; michael@0: #endif michael@0: { michael@0: long l, r, m; michael@0: michael@0: l = 0; michael@0: r = _uccmcl_size - 1; michael@0: michael@0: while (l <= r) { michael@0: m = (l + r) >> 1; michael@0: m -= (m % 3); michael@0: if (code > _uccmcl_nodes[m + 1]) michael@0: l = m + 3; michael@0: else if (code < _uccmcl_nodes[m]) michael@0: r = m - 3; michael@0: else if (code >= _uccmcl_nodes[m] && code <= _uccmcl_nodes[m + 1]) michael@0: return _uccmcl_nodes[m + 2]; michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: /************************************************************************** michael@0: * michael@0: * Support for numeric values. michael@0: * michael@0: **************************************************************************/ michael@0: michael@0: static unsigned long *_ucnum_nodes; michael@0: static unsigned long _ucnum_size; michael@0: static short *_ucnum_vals; michael@0: michael@0: static void michael@0: #ifdef __STDC__ michael@0: _ucnumb_load(char *paths, int reload) michael@0: #else michael@0: _ucnumb_load(paths, reload) michael@0: char *paths; michael@0: int reload; michael@0: #endif michael@0: { michael@0: FILE *in; michael@0: unsigned long size, i; michael@0: _ucheader_t hdr; michael@0: michael@0: if (_ucnum_size > 0) { michael@0: if (!reload) michael@0: /* michael@0: * The numbers have already been loaded. michael@0: */ michael@0: return; michael@0: michael@0: free((char *) _ucnum_nodes); michael@0: _ucnum_size = 0; michael@0: } michael@0: michael@0: if ((in = _ucopenfile(paths, "num.dat", "rb")) == 0) michael@0: return; michael@0: michael@0: /* michael@0: * Load the header. michael@0: */ michael@0: fread((char *) &hdr, sizeof(_ucheader_t), 1, in); michael@0: michael@0: if (hdr.bom == 0xfffe) { michael@0: hdr.cnt = endian_short(hdr.cnt); michael@0: hdr.size.bytes = endian_long(hdr.size.bytes); michael@0: } michael@0: michael@0: _ucnum_size = hdr.cnt; michael@0: _ucnum_nodes = (unsigned long *) malloc(hdr.size.bytes); michael@0: _ucnum_vals = (short *) (_ucnum_nodes + _ucnum_size); michael@0: michael@0: /* michael@0: * Read the combining classes in. michael@0: */ michael@0: fread((char *) _ucnum_nodes, sizeof(unsigned char), hdr.size.bytes, in); michael@0: michael@0: /* michael@0: * Do an endian swap if necessary. michael@0: */ michael@0: if (hdr.bom == 0xfffe) { michael@0: for (i = 0; i < _ucnum_size; i++) michael@0: _ucnum_nodes[i] = endian_long(_ucnum_nodes[i]); michael@0: michael@0: /* michael@0: * Determine the number of values that have to be adjusted. michael@0: */ michael@0: size = (hdr.size.bytes - michael@0: (_ucnum_size * (sizeof(unsigned long) << 1))) / michael@0: sizeof(short); michael@0: michael@0: for (i = 0; i < size; i++) michael@0: _ucnum_vals[i] = endian_short(_ucnum_vals[i]); michael@0: } michael@0: } michael@0: michael@0: static void michael@0: #ifdef __STDC__ michael@0: _ucnumb_unload(void) michael@0: #else michael@0: _ucnumb_unload() michael@0: #endif michael@0: { michael@0: if (_ucnum_size == 0) michael@0: return; michael@0: michael@0: free((char *) _ucnum_nodes); michael@0: _ucnum_size = 0; michael@0: } michael@0: michael@0: int michael@0: #ifdef __STDC__ michael@0: ucnumber_lookup(unsigned long code, struct ucnumber *num) michael@0: #else michael@0: ucnumber_lookup(code, num) michael@0: unsigned long code; michael@0: struct ucnumber *num; michael@0: #endif michael@0: { michael@0: long l, r, m; michael@0: short *vp; michael@0: michael@0: l = 0; michael@0: r = _ucnum_size - 1; michael@0: while (l <= r) { michael@0: /* michael@0: * Determine a "mid" point and adjust to make sure the mid point is at michael@0: * the beginning of a code+offset pair. michael@0: */ michael@0: m = (l + r) >> 1; michael@0: m -= (m & 1); michael@0: if (code > _ucnum_nodes[m]) michael@0: l = m + 2; michael@0: else if (code < _ucnum_nodes[m]) michael@0: r = m - 2; michael@0: else { michael@0: vp = _ucnum_vals + _ucnum_nodes[m + 1]; michael@0: num->numerator = (int) *vp++; michael@0: num->denominator = (int) *vp; michael@0: return 1; michael@0: } michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: int michael@0: #ifdef __STDC__ michael@0: ucdigit_lookup(unsigned long code, int *digit) michael@0: #else michael@0: ucdigit_lookup(code, digit) michael@0: unsigned long code; michael@0: int *digit; michael@0: #endif michael@0: { michael@0: long l, r, m; michael@0: short *vp; michael@0: michael@0: l = 0; michael@0: r = _ucnum_size - 1; michael@0: while (l <= r) { michael@0: /* michael@0: * Determine a "mid" point and adjust to make sure the mid point is at michael@0: * the beginning of a code+offset pair. michael@0: */ michael@0: m = (l + r) >> 1; michael@0: m -= (m & 1); michael@0: if (code > _ucnum_nodes[m]) michael@0: l = m + 2; michael@0: else if (code < _ucnum_nodes[m]) michael@0: r = m - 2; michael@0: else { michael@0: vp = _ucnum_vals + _ucnum_nodes[m + 1]; michael@0: if (*vp == *(vp + 1)) { michael@0: *digit = *vp; michael@0: return 1; michael@0: } michael@0: return 0; michael@0: } michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: struct ucnumber michael@0: #ifdef __STDC__ michael@0: ucgetnumber(unsigned long code) michael@0: #else michael@0: ucgetnumber(code) michael@0: unsigned long code; michael@0: #endif michael@0: { michael@0: struct ucnumber num; michael@0: michael@0: /* michael@0: * Initialize with some arbitrary value, because the caller simply cannot michael@0: * tell for sure if the code is a number without calling the ucisnumber() michael@0: * macro before calling this function. michael@0: */ michael@0: num.numerator = num.denominator = -111; michael@0: michael@0: (void) ucnumber_lookup(code, &num); michael@0: michael@0: return num; michael@0: } michael@0: michael@0: int michael@0: #ifdef __STDC__ michael@0: ucgetdigit(unsigned long code) michael@0: #else michael@0: ucgetdigit(code) michael@0: unsigned long code; michael@0: #endif michael@0: { michael@0: int dig; michael@0: michael@0: /* michael@0: * Initialize with some arbitrary value, because the caller simply cannot michael@0: * tell for sure if the code is a number without calling the ucisdigit() michael@0: * macro before calling this function. michael@0: */ michael@0: dig = -111; michael@0: michael@0: (void) ucdigit_lookup(code, &dig); michael@0: michael@0: return dig; michael@0: } michael@0: michael@0: /************************************************************************** michael@0: * michael@0: * Setup and cleanup routines. michael@0: * michael@0: **************************************************************************/ michael@0: michael@0: void michael@0: #ifdef __STDC__ michael@0: ucdata_load(char *paths, int masks) michael@0: #else michael@0: ucdata_load(paths, masks) michael@0: char *paths; michael@0: int masks; michael@0: #endif michael@0: { michael@0: if (masks & UCDATA_CTYPE) michael@0: _ucprop_load(paths, 0); michael@0: if (masks & UCDATA_CASE) michael@0: _uccase_load(paths, 0); michael@0: if (masks & UCDATA_DECOMP) michael@0: _ucdcmp_load(paths, 0); michael@0: if (masks & UCDATA_CMBCL) michael@0: _uccmcl_load(paths, 0); michael@0: if (masks & UCDATA_NUM) michael@0: _ucnumb_load(paths, 0); michael@0: } michael@0: michael@0: void michael@0: #ifdef __STDC__ michael@0: ucdata_unload(int masks) michael@0: #else michael@0: ucdata_unload(masks) michael@0: int masks; michael@0: #endif michael@0: { michael@0: if (masks & UCDATA_CTYPE) michael@0: _ucprop_unload(); michael@0: if (masks & UCDATA_CASE) michael@0: _uccase_unload(); michael@0: if (masks & UCDATA_DECOMP) michael@0: _ucdcmp_unload(); michael@0: if (masks & UCDATA_CMBCL) michael@0: _uccmcl_unload(); michael@0: if (masks & UCDATA_NUM) michael@0: _ucnumb_unload(); michael@0: } michael@0: michael@0: void michael@0: #ifdef __STDC__ michael@0: ucdata_reload(char *paths, int masks) michael@0: #else michael@0: ucdata_reload(paths, masks) michael@0: char *paths; michael@0: int masks; michael@0: #endif michael@0: { michael@0: if (masks & UCDATA_CTYPE) michael@0: _ucprop_load(paths, 1); michael@0: if (masks & UCDATA_CASE) michael@0: _uccase_load(paths, 1); michael@0: if (masks & UCDATA_DECOMP) michael@0: _ucdcmp_load(paths, 1); michael@0: if (masks & UCDATA_CMBCL) michael@0: _uccmcl_load(paths, 1); michael@0: if (masks & UCDATA_NUM) michael@0: _ucnumb_load(paths, 1); michael@0: } michael@0: michael@0: #ifdef TEST michael@0: michael@0: void michael@0: #ifdef __STDC__ michael@0: main(void) michael@0: #else michael@0: main() michael@0: #endif michael@0: { michael@0: int dig; michael@0: unsigned long i, lo, *dec; michael@0: struct ucnumber num; michael@0: michael@0: ucdata_setup("."); michael@0: michael@0: if (ucisweak(0x30)) michael@0: printf("WEAK\n"); michael@0: else michael@0: printf("NOT WEAK\n"); michael@0: michael@0: printf("LOWER 0x%04lX\n", uctolower(0xff3a)); michael@0: printf("UPPER 0x%04lX\n", uctoupper(0xff5a)); michael@0: michael@0: if (ucisalpha(0x1d5)) michael@0: printf("ALPHA\n"); michael@0: else michael@0: printf("NOT ALPHA\n"); michael@0: michael@0: if (ucisupper(0x1d5)) { michael@0: printf("UPPER\n"); michael@0: lo = uctolower(0x1d5); michael@0: printf("0x%04lx\n", lo); michael@0: lo = uctotitle(0x1d5); michael@0: printf("0x%04lx\n", lo); michael@0: } else michael@0: printf("NOT UPPER\n"); michael@0: michael@0: if (ucistitle(0x1d5)) michael@0: printf("TITLE\n"); michael@0: else michael@0: printf("NOT TITLE\n"); michael@0: michael@0: if (uciscomposite(0x1d5)) michael@0: printf("COMPOSITE\n"); michael@0: else michael@0: printf("NOT COMPOSITE\n"); michael@0: michael@0: if (ucdecomp(0x1d5, &lo, &dec)) { michael@0: for (i = 0; i < lo; i++) michael@0: printf("0x%04lx ", dec[i]); michael@0: putchar('\n'); michael@0: } michael@0: michael@0: if ((lo = uccombining_class(0x41)) != 0) michael@0: printf("0x41 CCL %ld\n", lo); michael@0: michael@0: if (ucisxdigit(0xfeff)) michael@0: printf("0xFEFF HEX DIGIT\n"); michael@0: else michael@0: printf("0xFEFF NOT HEX DIGIT\n"); michael@0: michael@0: if (ucisdefined(0x10000)) michael@0: printf("0x10000 DEFINED\n"); michael@0: else michael@0: printf("0x10000 NOT DEFINED\n"); michael@0: michael@0: if (ucnumber_lookup(0x30, &num)) { michael@0: if (num.numerator != num.denominator) michael@0: printf("UCNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator); michael@0: else michael@0: printf("UCNUMBER: 0x30 = %d\n", num.numerator); michael@0: } else michael@0: printf("UCNUMBER: 0x30 NOT A NUMBER\n"); michael@0: michael@0: if (ucnumber_lookup(0xbc, &num)) { michael@0: if (num.numerator != num.denominator) michael@0: printf("UCNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator); michael@0: else michael@0: printf("UCNUMBER: 0xbc = %d\n", num.numerator); michael@0: } else michael@0: printf("UCNUMBER: 0xbc NOT A NUMBER\n"); michael@0: michael@0: michael@0: if (ucnumber_lookup(0xff19, &num)) { michael@0: if (num.numerator != num.denominator) michael@0: printf("UCNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator); michael@0: else michael@0: printf("UCNUMBER: 0xff19 = %d\n", num.numerator); michael@0: } else michael@0: printf("UCNUMBER: 0xff19 NOT A NUMBER\n"); michael@0: michael@0: if (ucnumber_lookup(0x4e00, &num)) { michael@0: if (num.numerator != num.denominator) michael@0: printf("UCNUMBER: 0x4e00 = %d/%d\n", num.numerator, num.denominator); michael@0: else michael@0: printf("UCNUMBER: 0x4e00 = %d\n", num.numerator); michael@0: } else michael@0: printf("UCNUMBER: 0x4e00 NOT A NUMBER\n"); michael@0: michael@0: if (ucdigit_lookup(0x06f9, &dig)) michael@0: printf("UCDIGIT: 0x6f9 = %d\n", dig); michael@0: else michael@0: printf("UCDIGIT: 0x6f9 NOT A NUMBER\n"); michael@0: michael@0: dig = ucgetdigit(0x0969); michael@0: printf("UCGETDIGIT: 0x969 = %d\n", dig); michael@0: michael@0: num = ucgetnumber(0x30); michael@0: if (num.numerator != num.denominator) michael@0: printf("UCGETNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator); michael@0: else michael@0: printf("UCGETNUMBER: 0x30 = %d\n", num.numerator); michael@0: michael@0: num = ucgetnumber(0xbc); michael@0: if (num.numerator != num.denominator) michael@0: printf("UCGETNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator); michael@0: else michael@0: printf("UCGETNUMBER: 0xbc = %d\n", num.numerator); michael@0: michael@0: num = ucgetnumber(0xff19); michael@0: if (num.numerator != num.denominator) michael@0: printf("UCGETNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator); michael@0: else michael@0: printf("UCGETNUMBER: 0xff19 = %d\n", num.numerator); michael@0: michael@0: ucdata_cleanup(); michael@0: exit(0); michael@0: } michael@0: michael@0: #endif /* TEST */