1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/extensions/spellcheck/hunspell/src/hashmgr.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,982 @@ 1.4 +/******* BEGIN LICENSE BLOCK ******* 1.5 + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 1.6 + * 1.7 + * The contents of this file are subject to the Mozilla Public License Version 1.8 + * 1.1 (the "License"); you may not use this file except in compliance with 1.9 + * the License. You may obtain a copy of the License at 1.10 + * http://www.mozilla.org/MPL/ 1.11 + * 1.12 + * Software distributed under the License is distributed on an "AS IS" basis, 1.13 + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 1.14 + * for the specific language governing rights and limitations under the 1.15 + * License. 1.16 + * 1.17 + * The Initial Developers of the Original Code are Kevin Hendricks (MySpell) 1.18 + * and László Németh (Hunspell). Portions created by the Initial Developers 1.19 + * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved. 1.20 + * 1.21 + * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca) 1.22 + * David Einstein (deinst@world.std.com) 1.23 + * László Németh (nemethl@gyorsposta.hu) 1.24 + * Caolan McNamara (caolanm@redhat.com) 1.25 + * Davide Prina 1.26 + * Giuseppe Modugno 1.27 + * Gianluca Turconi 1.28 + * Simon Brouwer 1.29 + * Noll Janos 1.30 + * Biro Arpad 1.31 + * Goldman Eleonora 1.32 + * Sarlos Tamas 1.33 + * Bencsath Boldizsar 1.34 + * Halacsy Peter 1.35 + * Dvornik Laszlo 1.36 + * Gefferth Andras 1.37 + * Nagy Viktor 1.38 + * Varga Daniel 1.39 + * Chris Halls 1.40 + * Rene Engelhard 1.41 + * Bram Moolenaar 1.42 + * Dafydd Jones 1.43 + * Harri Pitkanen 1.44 + * Andras Timar 1.45 + * Tor Lillqvist 1.46 + * 1.47 + * Alternatively, the contents of this file may be used under the terms of 1.48 + * either the GNU General Public License Version 2 or later (the "GPL"), or 1.49 + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 1.50 + * in which case the provisions of the GPL or the LGPL are applicable instead 1.51 + * of those above. If you wish to allow use of your version of this file only 1.52 + * under the terms of either the GPL or the LGPL, and not to allow others to 1.53 + * use your version of this file under the terms of the MPL, indicate your 1.54 + * decision by deleting the provisions above and replace them with the notice 1.55 + * and other provisions required by the GPL or the LGPL. If you do not delete 1.56 + * the provisions above, a recipient may use your version of this file under 1.57 + * the terms of any one of the MPL, the GPL or the LGPL. 1.58 + * 1.59 + ******* END LICENSE BLOCK *******/ 1.60 + 1.61 +#include <stdlib.h> 1.62 +#include <string.h> 1.63 +#include <stdio.h> 1.64 +#include <ctype.h> 1.65 + 1.66 +#include "hashmgr.hxx" 1.67 +#include "csutil.hxx" 1.68 +#include "atypes.hxx" 1.69 + 1.70 +// build a hash table from a munched word list 1.71 + 1.72 +HashMgr::HashMgr(const char * tpath, const char * apath, const char * key) 1.73 +{ 1.74 + tablesize = 0; 1.75 + tableptr = NULL; 1.76 + flag_mode = FLAG_CHAR; 1.77 + complexprefixes = 0; 1.78 + utf8 = 0; 1.79 + langnum = 0; 1.80 + lang = NULL; 1.81 + enc = NULL; 1.82 + csconv = 0; 1.83 + ignorechars = NULL; 1.84 + ignorechars_utf16 = NULL; 1.85 + ignorechars_utf16_len = 0; 1.86 + numaliasf = 0; 1.87 + aliasf = NULL; 1.88 + numaliasm = 0; 1.89 + aliasm = NULL; 1.90 + forbiddenword = FORBIDDENWORD; // forbidden word signing flag 1.91 + load_config(apath, key); 1.92 + int ec = load_tables(tpath, key); 1.93 + if (ec) { 1.94 + /* error condition - what should we do here */ 1.95 + HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec); 1.96 + if (tableptr) { 1.97 + free(tableptr); 1.98 + tableptr = NULL; 1.99 + } 1.100 + tablesize = 0; 1.101 + } 1.102 +} 1.103 + 1.104 + 1.105 +HashMgr::~HashMgr() 1.106 +{ 1.107 + if (tableptr) { 1.108 + // now pass through hash table freeing up everything 1.109 + // go through column by column of the table 1.110 + for (int i=0; i < tablesize; i++) { 1.111 + struct hentry * pt = tableptr[i]; 1.112 + struct hentry * nt = NULL; 1.113 + while(pt) { 1.114 + nt = pt->next; 1.115 + if (pt->astr && (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) free(pt->astr); 1.116 + free(pt); 1.117 + pt = nt; 1.118 + } 1.119 + } 1.120 + free(tableptr); 1.121 + } 1.122 + tablesize = 0; 1.123 + 1.124 + if (aliasf) { 1.125 + for (int j = 0; j < (numaliasf); j++) free(aliasf[j]); 1.126 + free(aliasf); 1.127 + aliasf = NULL; 1.128 + if (aliasflen) { 1.129 + free(aliasflen); 1.130 + aliasflen = NULL; 1.131 + } 1.132 + } 1.133 + if (aliasm) { 1.134 + for (int j = 0; j < (numaliasm); j++) free(aliasm[j]); 1.135 + free(aliasm); 1.136 + aliasm = NULL; 1.137 + } 1.138 + 1.139 +#ifndef OPENOFFICEORG 1.140 +#ifndef MOZILLA_CLIENT 1.141 + if (utf8) free_utf_tbl(); 1.142 +#endif 1.143 +#endif 1.144 + 1.145 + if (enc) free(enc); 1.146 + if (lang) free(lang); 1.147 + 1.148 + if (ignorechars) free(ignorechars); 1.149 + if (ignorechars_utf16) free(ignorechars_utf16); 1.150 + 1.151 +#ifdef MOZILLA_CLIENT 1.152 + delete [] csconv; 1.153 +#endif 1.154 +} 1.155 + 1.156 +// lookup a root word in the hashtable 1.157 + 1.158 +struct hentry * HashMgr::lookup(const char *word) const 1.159 +{ 1.160 + struct hentry * dp; 1.161 + if (tableptr) { 1.162 + dp = tableptr[hash(word)]; 1.163 + if (!dp) return NULL; 1.164 + for ( ; dp != NULL; dp = dp->next) { 1.165 + if (strcmp(word, dp->word) == 0) return dp; 1.166 + } 1.167 + } 1.168 + return NULL; 1.169 +} 1.170 + 1.171 +// add a word to the hash table (private) 1.172 +int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff, 1.173 + int al, const char * desc, bool onlyupcase) 1.174 +{ 1.175 + bool upcasehomonym = false; 1.176 + int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0; 1.177 + // variable-length hash record with word and optional fields 1.178 + struct hentry* hp = 1.179 + (struct hentry *) malloc (sizeof(struct hentry) + wbl + descl); 1.180 + if (!hp) return 1; 1.181 + char * hpw = hp->word; 1.182 + strcpy(hpw, word); 1.183 + if (ignorechars != NULL) { 1.184 + if (utf8) { 1.185 + remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len); 1.186 + } else { 1.187 + remove_ignored_chars(hpw, ignorechars); 1.188 + } 1.189 + } 1.190 + if (complexprefixes) { 1.191 + if (utf8) reverseword_utf(hpw); else reverseword(hpw); 1.192 + } 1.193 + 1.194 + int i = hash(hpw); 1.195 + 1.196 + hp->blen = (unsigned char) wbl; 1.197 + hp->clen = (unsigned char) wcl; 1.198 + hp->alen = (short) al; 1.199 + hp->astr = aff; 1.200 + hp->next = NULL; 1.201 + hp->next_homonym = NULL; 1.202 + 1.203 + // store the description string or its pointer 1.204 + if (desc) { 1.205 + hp->var = H_OPT; 1.206 + if (aliasm) { 1.207 + hp->var += H_OPT_ALIASM; 1.208 + store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc))); 1.209 + } else { 1.210 + strcpy(hpw + wbl + 1, desc); 1.211 + if (complexprefixes) { 1.212 + if (utf8) reverseword_utf(HENTRY_DATA(hp)); 1.213 + else reverseword(HENTRY_DATA(hp)); 1.214 + } 1.215 + } 1.216 + if (strstr(HENTRY_DATA(hp), MORPH_PHON)) hp->var += H_OPT_PHON; 1.217 + } else hp->var = 0; 1.218 + 1.219 + struct hentry * dp = tableptr[i]; 1.220 + if (!dp) { 1.221 + tableptr[i] = hp; 1.222 + return 0; 1.223 + } 1.224 + while (dp->next != NULL) { 1.225 + if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) { 1.226 + // remove hidden onlyupcase homonym 1.227 + if (!onlyupcase) { 1.228 + if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { 1.229 + free(dp->astr); 1.230 + dp->astr = hp->astr; 1.231 + dp->alen = hp->alen; 1.232 + free(hp); 1.233 + return 0; 1.234 + } else { 1.235 + dp->next_homonym = hp; 1.236 + } 1.237 + } else { 1.238 + upcasehomonym = true; 1.239 + } 1.240 + } 1.241 + dp=dp->next; 1.242 + } 1.243 + if (strcmp(hp->word, dp->word) == 0) { 1.244 + // remove hidden onlyupcase homonym 1.245 + if (!onlyupcase) { 1.246 + if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { 1.247 + free(dp->astr); 1.248 + dp->astr = hp->astr; 1.249 + dp->alen = hp->alen; 1.250 + free(hp); 1.251 + return 0; 1.252 + } else { 1.253 + dp->next_homonym = hp; 1.254 + } 1.255 + } else { 1.256 + upcasehomonym = true; 1.257 + } 1.258 + } 1.259 + if (!upcasehomonym) { 1.260 + dp->next = hp; 1.261 + } else { 1.262 + // remove hidden onlyupcase homonym 1.263 + if (hp->astr) free(hp->astr); 1.264 + free(hp); 1.265 + } 1.266 + return 0; 1.267 +} 1.268 + 1.269 +int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl, 1.270 + unsigned short * flags, int al, char * dp, int captype) 1.271 +{ 1.272 + // add inner capitalized forms to handle the following allcap forms: 1.273 + // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG 1.274 + // Allcaps with suffixes: CIA's -> CIA'S 1.275 + if (((captype == HUHCAP) || (captype == HUHINITCAP) || 1.276 + ((captype == ALLCAP) && (flags != NULL))) && 1.277 + !((flags != NULL) && TESTAFF(flags, forbiddenword, al))) { 1.278 + unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (al+1)); 1.279 + if (!flags2) return 1; 1.280 + if (al) memcpy(flags2, flags, al * sizeof(unsigned short)); 1.281 + flags2[al] = ONLYUPCASEFLAG; 1.282 + if (utf8) { 1.283 + char st[BUFSIZE]; 1.284 + w_char w[BUFSIZE]; 1.285 + int wlen = u8_u16(w, BUFSIZE, word); 1.286 + mkallsmall_utf(w, wlen, langnum); 1.287 + mkallcap_utf(w, 1, langnum); 1.288 + u16_u8(st, BUFSIZE, w, wlen); 1.289 + return add_word(st,wbl,wcl,flags2,al+1,dp, true); 1.290 + } else { 1.291 + mkallsmall(word, csconv); 1.292 + mkinitcap(word, csconv); 1.293 + return add_word(word,wbl,wcl,flags2,al+1,dp, true); 1.294 + } 1.295 + } 1.296 + return 0; 1.297 +} 1.298 + 1.299 +// detect captype and modify word length for UTF-8 encoding 1.300 +int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) { 1.301 + int len; 1.302 + if (utf8) { 1.303 + w_char dest_utf[BUFSIZE]; 1.304 + len = u8_u16(dest_utf, BUFSIZE, word); 1.305 + *captype = get_captype_utf8(dest_utf, len, langnum); 1.306 + } else { 1.307 + len = wbl; 1.308 + *captype = get_captype((char *) word, len, csconv); 1.309 + } 1.310 + return len; 1.311 +} 1.312 + 1.313 +// remove word (personal dictionary function for standalone applications) 1.314 +int HashMgr::remove(const char * word) 1.315 +{ 1.316 + struct hentry * dp = lookup(word); 1.317 + while (dp) { 1.318 + if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) { 1.319 + unsigned short * flags = 1.320 + (unsigned short *) malloc(sizeof(short) * (dp->alen + 1)); 1.321 + if (!flags) return 1; 1.322 + for (int i = 0; i < dp->alen; i++) flags[i] = dp->astr[i]; 1.323 + flags[dp->alen] = forbiddenword; 1.324 + dp->astr = flags; 1.325 + dp->alen++; 1.326 + flag_qsort(flags, 0, dp->alen); 1.327 + } 1.328 + dp = dp->next_homonym; 1.329 + } 1.330 + return 0; 1.331 +} 1.332 + 1.333 +/* remove forbidden flag to add a personal word to the hash */ 1.334 +int HashMgr::remove_forbidden_flag(const char * word) { 1.335 + struct hentry * dp = lookup(word); 1.336 + if (!dp) return 1; 1.337 + while (dp) { 1.338 + if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) { 1.339 + if (dp->alen == 1) dp->alen = 0; // XXX forbidden words of personal dic. 1.340 + else { 1.341 + unsigned short * flags2 = 1.342 + (unsigned short *) malloc(sizeof(short) * (dp->alen - 1)); 1.343 + if (!flags2) return 1; 1.344 + int i, j = 0; 1.345 + for (i = 0; i < dp->alen; i++) { 1.346 + if (dp->astr[i] != forbiddenword) flags2[j++] = dp->astr[i]; 1.347 + } 1.348 + dp->alen--; 1.349 + dp->astr = flags2; // XXX allowed forbidden words 1.350 + } 1.351 + } 1.352 + dp = dp->next_homonym; 1.353 + } 1.354 + return 0; 1.355 +} 1.356 + 1.357 +// add a custom dic. word to the hash table (public) 1.358 +int HashMgr::add(const char * word) 1.359 +{ 1.360 + unsigned short * flags = NULL; 1.361 + int al = 0; 1.362 + if (remove_forbidden_flag(word)) { 1.363 + int captype; 1.364 + int wbl = strlen(word); 1.365 + int wcl = get_clen_and_captype(word, wbl, &captype); 1.366 + add_word(word, wbl, wcl, flags, al, NULL, false); 1.367 + return add_hidden_capitalized_word((char *) word, wbl, wcl, flags, al, NULL, captype); 1.368 + } 1.369 + return 0; 1.370 +} 1.371 + 1.372 +int HashMgr::add_with_affix(const char * word, const char * example) 1.373 +{ 1.374 + // detect captype and modify word length for UTF-8 encoding 1.375 + struct hentry * dp = lookup(example); 1.376 + remove_forbidden_flag(word); 1.377 + if (dp && dp->astr) { 1.378 + int captype; 1.379 + int wbl = strlen(word); 1.380 + int wcl = get_clen_and_captype(word, wbl, &captype); 1.381 + if (aliasf) { 1.382 + add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false); 1.383 + } else { 1.384 + unsigned short * flags = (unsigned short *) malloc (dp->alen * sizeof(short)); 1.385 + if (flags) { 1.386 + memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short)); 1.387 + add_word(word, wbl, wcl, flags, dp->alen, NULL, false); 1.388 + } else return 1; 1.389 + } 1.390 + return add_hidden_capitalized_word((char *) word, wbl, wcl, dp->astr, dp->alen, NULL, captype); 1.391 + } 1.392 + return 1; 1.393 +} 1.394 + 1.395 +// walk the hash table entry by entry - null at end 1.396 +// initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp); 1.397 +struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const 1.398 +{ 1.399 + if (hp && hp->next != NULL) return hp->next; 1.400 + for (col++; col < tablesize; col++) { 1.401 + if (tableptr[col]) return tableptr[col]; 1.402 + } 1.403 + // null at end and reset to start 1.404 + col = -1; 1.405 + return NULL; 1.406 +} 1.407 + 1.408 +// load a munched word list and build a hash table on the fly 1.409 +int HashMgr::load_tables(const char * tpath, const char * key) 1.410 +{ 1.411 + int al; 1.412 + char * ap; 1.413 + char * dp; 1.414 + char * dp2; 1.415 + unsigned short * flags; 1.416 + char * ts; 1.417 + 1.418 + // open dictionary file 1.419 + FileMgr * dict = new FileMgr(tpath, key); 1.420 + if (dict == NULL) return 1; 1.421 + 1.422 + // first read the first line of file to get hash table size */ 1.423 + if (!(ts = dict->getline())) { 1.424 + HUNSPELL_WARNING(stderr, "error: empty dic file\n"); 1.425 + delete dict; 1.426 + return 2; 1.427 + } 1.428 + mychomp(ts); 1.429 + 1.430 + /* remove byte order mark */ 1.431 + if (strncmp(ts,"\xEF\xBB\xBF",3) == 0) { 1.432 + memmove(ts, ts+3, strlen(ts+3)+1); 1.433 + // warning: dic file begins with byte order mark: possible incompatibility with old Hunspell versions 1.434 + } 1.435 + 1.436 + tablesize = atoi(ts); 1.437 + if (tablesize == 0) { 1.438 + HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the dic file\n"); 1.439 + delete dict; 1.440 + return 4; 1.441 + } 1.442 + tablesize = tablesize + 5 + USERWORD; 1.443 + if ((tablesize %2) == 0) tablesize++; 1.444 + 1.445 + // allocate the hash table 1.446 + tableptr = (struct hentry **) malloc(tablesize * sizeof(struct hentry *)); 1.447 + if (! tableptr) { 1.448 + delete dict; 1.449 + return 3; 1.450 + } 1.451 + for (int i=0; i<tablesize; i++) tableptr[i] = NULL; 1.452 + 1.453 + // loop through all words on much list and add to hash 1.454 + // table and create word and affix strings 1.455 + 1.456 + while ((ts = dict->getline())) { 1.457 + mychomp(ts); 1.458 + // split each line into word and morphological description 1.459 + dp = ts; 1.460 + while ((dp = strchr(dp, ':'))) { 1.461 + if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) { 1.462 + for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--); 1.463 + if (dp < ts) { // missing word 1.464 + dp = NULL; 1.465 + } else { 1.466 + *(dp + 1) = '\0'; 1.467 + dp = dp + 2; 1.468 + } 1.469 + break; 1.470 + } 1.471 + dp++; 1.472 + } 1.473 + 1.474 + // tabulator is the old morphological field separator 1.475 + dp2 = strchr(ts, '\t'); 1.476 + if (dp2 && (!dp || dp2 < dp)) { 1.477 + *dp2 = '\0'; 1.478 + dp = dp2 + 1; 1.479 + } 1.480 + 1.481 + // split each line into word and affix char strings 1.482 + // "\/" signs slash in words (not affix separator) 1.483 + // "/" at beginning of the line is word character (not affix separator) 1.484 + ap = strchr(ts,'/'); 1.485 + while (ap) { 1.486 + if (ap == ts) { 1.487 + ap++; 1.488 + continue; 1.489 + } else if (*(ap - 1) != '\\') break; 1.490 + // replace "\/" with "/" 1.491 + for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++); 1.492 + ap = strchr(ap,'/'); 1.493 + } 1.494 + 1.495 + if (ap) { 1.496 + *ap = '\0'; 1.497 + if (aliasf) { 1.498 + int index = atoi(ap + 1); 1.499 + al = get_aliasf(index, &flags, dict); 1.500 + if (!al) { 1.501 + HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", dict->getlinenum()); 1.502 + *ap = '\0'; 1.503 + } 1.504 + } else { 1.505 + al = decode_flags(&flags, ap + 1, dict); 1.506 + if (al == -1) { 1.507 + HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); 1.508 + delete dict; 1.509 + return 6; 1.510 + } 1.511 + flag_qsort(flags, 0, al); 1.512 + } 1.513 + } else { 1.514 + al = 0; 1.515 + ap = NULL; 1.516 + flags = NULL; 1.517 + } 1.518 + 1.519 + int captype; 1.520 + int wbl = strlen(ts); 1.521 + int wcl = get_clen_and_captype(ts, wbl, &captype); 1.522 + // add the word and its index plus its capitalized form optionally 1.523 + if (add_word(ts,wbl,wcl,flags,al,dp, false) || 1.524 + add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) { 1.525 + delete dict; 1.526 + return 5; 1.527 + } 1.528 + } 1.529 + 1.530 + delete dict; 1.531 + return 0; 1.532 +} 1.533 + 1.534 +// the hash function is a simple load and rotate 1.535 +// algorithm borrowed 1.536 + 1.537 +int HashMgr::hash(const char * word) const 1.538 +{ 1.539 + long hv = 0; 1.540 + for (int i=0; i < 4 && *word != 0; i++) 1.541 + hv = (hv << 8) | (*word++); 1.542 + while (*word != 0) { 1.543 + ROTATE(hv,ROTATE_LEN); 1.544 + hv ^= (*word++); 1.545 + } 1.546 + return (unsigned long) hv % tablesize; 1.547 +} 1.548 + 1.549 +int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) { 1.550 + int len; 1.551 + if (*flags == '\0') { 1.552 + *result = NULL; 1.553 + return 0; 1.554 + } 1.555 + switch (flag_mode) { 1.556 + case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) 1.557 + len = strlen(flags); 1.558 + if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", af->getlinenum()); 1.559 + len /= 2; 1.560 + *result = (unsigned short *) malloc(len * sizeof(short)); 1.561 + if (!*result) return -1; 1.562 + for (int i = 0; i < len; i++) { 1.563 + (*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned short) flags[i * 2 + 1]; 1.564 + } 1.565 + break; 1.566 + } 1.567 + case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 23 233) 1.568 + int i; 1.569 + len = 1; 1.570 + char * src = flags; 1.571 + unsigned short * dest; 1.572 + char * p; 1.573 + for (p = flags; *p; p++) { 1.574 + if (*p == ',') len++; 1.575 + } 1.576 + *result = (unsigned short *) malloc(len * sizeof(short)); 1.577 + if (!*result) return -1; 1.578 + dest = *result; 1.579 + for (p = flags; *p; p++) { 1.580 + if (*p == ',') { 1.581 + i = atoi(src); 1.582 + if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n", 1.583 + af->getlinenum(), i, DEFAULTFLAGS - 1); 1.584 + *dest = (unsigned short) i; 1.585 + if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", af->getlinenum()); 1.586 + src = p + 1; 1.587 + dest++; 1.588 + } 1.589 + } 1.590 + i = atoi(src); 1.591 + if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n", 1.592 + af->getlinenum(), i, DEFAULTFLAGS - 1); 1.593 + *dest = (unsigned short) i; 1.594 + if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", af->getlinenum()); 1.595 + break; 1.596 + } 1.597 + case FLAG_UNI: { // UTF-8 characters 1.598 + w_char w[BUFSIZE/2]; 1.599 + len = u8_u16(w, BUFSIZE/2, flags); 1.600 + *result = (unsigned short *) malloc(len * sizeof(short)); 1.601 + if (!*result) return -1; 1.602 + memcpy(*result, w, len * sizeof(short)); 1.603 + break; 1.604 + } 1.605 + default: { // Ispell's one-character flags (erfg -> e r f g) 1.606 + unsigned short * dest; 1.607 + len = strlen(flags); 1.608 + *result = (unsigned short *) malloc(len * sizeof(short)); 1.609 + if (!*result) return -1; 1.610 + dest = *result; 1.611 + for (unsigned char * p = (unsigned char *) flags; *p; p++) { 1.612 + *dest = (unsigned short) *p; 1.613 + dest++; 1.614 + } 1.615 + } 1.616 + } 1.617 + return len; 1.618 +} 1.619 + 1.620 +unsigned short HashMgr::decode_flag(const char * f) { 1.621 + unsigned short s = 0; 1.622 + int i; 1.623 + switch (flag_mode) { 1.624 + case FLAG_LONG: 1.625 + s = ((unsigned short) f[0] << 8) + (unsigned short) f[1]; 1.626 + break; 1.627 + case FLAG_NUM: 1.628 + i = atoi(f); 1.629 + if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n", i, DEFAULTFLAGS - 1); 1.630 + s = (unsigned short) i; 1.631 + break; 1.632 + case FLAG_UNI: 1.633 + u8_u16((w_char *) &s, 1, f); 1.634 + break; 1.635 + default: 1.636 + s = (unsigned short) *((unsigned char *)f); 1.637 + } 1.638 + if (s == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); 1.639 + return s; 1.640 +} 1.641 + 1.642 +char * HashMgr::encode_flag(unsigned short f) { 1.643 + unsigned char ch[10]; 1.644 + if (f==0) return mystrdup("(NULL)"); 1.645 + if (flag_mode == FLAG_LONG) { 1.646 + ch[0] = (unsigned char) (f >> 8); 1.647 + ch[1] = (unsigned char) (f - ((f >> 8) << 8)); 1.648 + ch[2] = '\0'; 1.649 + } else if (flag_mode == FLAG_NUM) { 1.650 + sprintf((char *) ch, "%d", f); 1.651 + } else if (flag_mode == FLAG_UNI) { 1.652 + u16_u8((char *) &ch, 10, (w_char *) &f, 1); 1.653 + } else { 1.654 + ch[0] = (unsigned char) (f); 1.655 + ch[1] = '\0'; 1.656 + } 1.657 + return mystrdup((char *) ch); 1.658 +} 1.659 + 1.660 +// read in aff file and set flag mode 1.661 +int HashMgr::load_config(const char * affpath, const char * key) 1.662 +{ 1.663 + char * line; // io buffers 1.664 + int firstline = 1; 1.665 + 1.666 + // open the affix file 1.667 + FileMgr * afflst = new FileMgr(affpath, key); 1.668 + if (!afflst) { 1.669 + HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath); 1.670 + return 1; 1.671 + } 1.672 + 1.673 + // read in each line ignoring any that do not 1.674 + // start with a known line type indicator 1.675 + 1.676 + while ((line = afflst->getline())) { 1.677 + mychomp(line); 1.678 + 1.679 + /* remove byte order mark */ 1.680 + if (firstline) { 1.681 + firstline = 0; 1.682 + if (strncmp(line,"\xEF\xBB\xBF",3) == 0) memmove(line, line+3, strlen(line+3)+1); 1.683 + } 1.684 + 1.685 + /* parse in the try string */ 1.686 + if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) { 1.687 + if (flag_mode != FLAG_CHAR) { 1.688 + HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of the FLAG affix file parameter\n", afflst->getlinenum()); 1.689 + } 1.690 + if (strstr(line, "long")) flag_mode = FLAG_LONG; 1.691 + if (strstr(line, "num")) flag_mode = FLAG_NUM; 1.692 + if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI; 1.693 + if (flag_mode == FLAG_CHAR) { 1.694 + HUNSPELL_WARNING(stderr, "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n", afflst->getlinenum()); 1.695 + } 1.696 + } 1.697 + if (strncmp(line,"FORBIDDENWORD",13) == 0) { 1.698 + char * st = NULL; 1.699 + if (parse_string(line, &st, afflst->getlinenum())) { 1.700 + delete afflst; 1.701 + return 1; 1.702 + } 1.703 + forbiddenword = decode_flag(st); 1.704 + free(st); 1.705 + } 1.706 + if (strncmp(line, "SET", 3) == 0) { 1.707 + if (parse_string(line, &enc, afflst->getlinenum())) { 1.708 + delete afflst; 1.709 + return 1; 1.710 + } 1.711 + if (strcmp(enc, "UTF-8") == 0) { 1.712 + utf8 = 1; 1.713 +#ifndef OPENOFFICEORG 1.714 +#ifndef MOZILLA_CLIENT 1.715 + initialize_utf_tbl(); 1.716 +#endif 1.717 +#endif 1.718 + } else csconv = get_current_cs(enc); 1.719 + } 1.720 + if (strncmp(line, "LANG", 4) == 0) { 1.721 + if (parse_string(line, &lang, afflst->getlinenum())) { 1.722 + delete afflst; 1.723 + return 1; 1.724 + } 1.725 + langnum = get_lang_num(lang); 1.726 + } 1.727 + 1.728 + /* parse in the ignored characters (for example, Arabic optional diacritics characters */ 1.729 + if (strncmp(line,"IGNORE",6) == 0) { 1.730 + if (parse_array(line, &ignorechars, &ignorechars_utf16, 1.731 + &ignorechars_utf16_len, utf8, afflst->getlinenum())) { 1.732 + delete afflst; 1.733 + return 1; 1.734 + } 1.735 + } 1.736 + 1.737 + if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) { 1.738 + if (parse_aliasf(line, afflst)) { 1.739 + delete afflst; 1.740 + return 1; 1.741 + } 1.742 + } 1.743 + 1.744 + if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) { 1.745 + if (parse_aliasm(line, afflst)) { 1.746 + delete afflst; 1.747 + return 1; 1.748 + } 1.749 + } 1.750 + 1.751 + if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1; 1.752 + if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break; 1.753 + } 1.754 + if (csconv == NULL) csconv = get_current_cs(SPELL_ENCODING); 1.755 + delete afflst; 1.756 + return 0; 1.757 +} 1.758 + 1.759 +/* parse in the ALIAS table */ 1.760 +int HashMgr::parse_aliasf(char * line, FileMgr * af) 1.761 +{ 1.762 + if (numaliasf != 0) { 1.763 + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum()); 1.764 + return 1; 1.765 + } 1.766 + char * tp = line; 1.767 + char * piece; 1.768 + int i = 0; 1.769 + int np = 0; 1.770 + piece = mystrsep(&tp, 0); 1.771 + while (piece) { 1.772 + if (*piece != '\0') { 1.773 + switch(i) { 1.774 + case 0: { np++; break; } 1.775 + case 1: { 1.776 + numaliasf = atoi(piece); 1.777 + if (numaliasf < 1) { 1.778 + numaliasf = 0; 1.779 + aliasf = NULL; 1.780 + aliasflen = NULL; 1.781 + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum()); 1.782 + return 1; 1.783 + } 1.784 + aliasf = (unsigned short **) malloc(numaliasf * sizeof(unsigned short *)); 1.785 + aliasflen = (unsigned short *) malloc(numaliasf * sizeof(short)); 1.786 + if (!aliasf || !aliasflen) { 1.787 + numaliasf = 0; 1.788 + if (aliasf) free(aliasf); 1.789 + if (aliasflen) free(aliasflen); 1.790 + aliasf = NULL; 1.791 + aliasflen = NULL; 1.792 + return 1; 1.793 + } 1.794 + np++; 1.795 + break; 1.796 + } 1.797 + default: break; 1.798 + } 1.799 + i++; 1.800 + } 1.801 + piece = mystrsep(&tp, 0); 1.802 + } 1.803 + if (np != 2) { 1.804 + numaliasf = 0; 1.805 + free(aliasf); 1.806 + free(aliasflen); 1.807 + aliasf = NULL; 1.808 + aliasflen = NULL; 1.809 + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum()); 1.810 + return 1; 1.811 + } 1.812 + 1.813 + /* now parse the numaliasf lines to read in the remainder of the table */ 1.814 + char * nl; 1.815 + for (int j=0; j < numaliasf; j++) { 1.816 + if (!(nl = af->getline())) return 1; 1.817 + mychomp(nl); 1.818 + tp = nl; 1.819 + i = 0; 1.820 + aliasf[j] = NULL; 1.821 + aliasflen[j] = 0; 1.822 + piece = mystrsep(&tp, 0); 1.823 + while (piece) { 1.824 + if (*piece != '\0') { 1.825 + switch(i) { 1.826 + case 0: { 1.827 + if (strncmp(piece,"AF",2) != 0) { 1.828 + numaliasf = 0; 1.829 + free(aliasf); 1.830 + free(aliasflen); 1.831 + aliasf = NULL; 1.832 + aliasflen = NULL; 1.833 + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum()); 1.834 + return 1; 1.835 + } 1.836 + break; 1.837 + } 1.838 + case 1: { 1.839 + aliasflen[j] = (unsigned short) decode_flags(&(aliasf[j]), piece, af); 1.840 + flag_qsort(aliasf[j], 0, aliasflen[j]); 1.841 + break; 1.842 + } 1.843 + default: break; 1.844 + } 1.845 + i++; 1.846 + } 1.847 + piece = mystrsep(&tp, 0); 1.848 + } 1.849 + if (!aliasf[j]) { 1.850 + free(aliasf); 1.851 + free(aliasflen); 1.852 + aliasf = NULL; 1.853 + aliasflen = NULL; 1.854 + numaliasf = 0; 1.855 + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum()); 1.856 + return 1; 1.857 + } 1.858 + } 1.859 + return 0; 1.860 +} 1.861 + 1.862 +int HashMgr::is_aliasf() { 1.863 + return (aliasf != NULL); 1.864 +} 1.865 + 1.866 +int HashMgr::get_aliasf(int index, unsigned short ** fvec, FileMgr * af) { 1.867 + if ((index > 0) && (index <= numaliasf)) { 1.868 + *fvec = aliasf[index - 1]; 1.869 + return aliasflen[index - 1]; 1.870 + } 1.871 + HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n", af->getlinenum(), index); 1.872 + *fvec = NULL; 1.873 + return 0; 1.874 +} 1.875 + 1.876 +/* parse morph alias definitions */ 1.877 +int HashMgr::parse_aliasm(char * line, FileMgr * af) 1.878 +{ 1.879 + if (numaliasm != 0) { 1.880 + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum()); 1.881 + return 1; 1.882 + } 1.883 + char * tp = line; 1.884 + char * piece; 1.885 + int i = 0; 1.886 + int np = 0; 1.887 + piece = mystrsep(&tp, 0); 1.888 + while (piece) { 1.889 + if (*piece != '\0') { 1.890 + switch(i) { 1.891 + case 0: { np++; break; } 1.892 + case 1: { 1.893 + numaliasm = atoi(piece); 1.894 + if (numaliasm < 1) { 1.895 + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum()); 1.896 + return 1; 1.897 + } 1.898 + aliasm = (char **) malloc(numaliasm * sizeof(char *)); 1.899 + if (!aliasm) { 1.900 + numaliasm = 0; 1.901 + return 1; 1.902 + } 1.903 + np++; 1.904 + break; 1.905 + } 1.906 + default: break; 1.907 + } 1.908 + i++; 1.909 + } 1.910 + piece = mystrsep(&tp, 0); 1.911 + } 1.912 + if (np != 2) { 1.913 + numaliasm = 0; 1.914 + free(aliasm); 1.915 + aliasm = NULL; 1.916 + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum()); 1.917 + return 1; 1.918 + } 1.919 + 1.920 + /* now parse the numaliasm lines to read in the remainder of the table */ 1.921 + char * nl = line; 1.922 + for (int j=0; j < numaliasm; j++) { 1.923 + if (!(nl = af->getline())) return 1; 1.924 + mychomp(nl); 1.925 + tp = nl; 1.926 + i = 0; 1.927 + aliasm[j] = NULL; 1.928 + piece = mystrsep(&tp, ' '); 1.929 + while (piece) { 1.930 + if (*piece != '\0') { 1.931 + switch(i) { 1.932 + case 0: { 1.933 + if (strncmp(piece,"AM",2) != 0) { 1.934 + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum()); 1.935 + numaliasm = 0; 1.936 + free(aliasm); 1.937 + aliasm = NULL; 1.938 + return 1; 1.939 + } 1.940 + break; 1.941 + } 1.942 + case 1: { 1.943 + // add the remaining of the line 1.944 + if (*tp) { 1.945 + *(tp - 1) = ' '; 1.946 + tp = tp + strlen(tp); 1.947 + } 1.948 + if (complexprefixes) { 1.949 + if (utf8) reverseword_utf(piece); 1.950 + else reverseword(piece); 1.951 + } 1.952 + aliasm[j] = mystrdup(piece); 1.953 + if (!aliasm[j]) { 1.954 + numaliasm = 0; 1.955 + free(aliasm); 1.956 + aliasm = NULL; 1.957 + return 1; 1.958 + } 1.959 + break; } 1.960 + default: break; 1.961 + } 1.962 + i++; 1.963 + } 1.964 + piece = mystrsep(&tp, ' '); 1.965 + } 1.966 + if (!aliasm[j]) { 1.967 + numaliasm = 0; 1.968 + free(aliasm); 1.969 + aliasm = NULL; 1.970 + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum()); 1.971 + return 1; 1.972 + } 1.973 + } 1.974 + return 0; 1.975 +} 1.976 + 1.977 +int HashMgr::is_aliasm() { 1.978 + return (aliasm != NULL); 1.979 +} 1.980 + 1.981 +char * HashMgr::get_aliasm(int index) { 1.982 + if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1]; 1.983 + HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index); 1.984 + return NULL; 1.985 +}