michael@0: /******* BEGIN LICENSE BLOCK ******* michael@0: * Version: MPL 1.1/GPL 2.0/LGPL 2.1 michael@0: * michael@0: * The contents of this file are subject to the Mozilla Public License Version michael@0: * 1.1 (the "License"); you may not use this file except in compliance with michael@0: * the License. You may obtain a copy of the License at michael@0: * http://www.mozilla.org/MPL/ michael@0: * michael@0: * Software distributed under the License is distributed on an "AS IS" basis, michael@0: * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License michael@0: * for the specific language governing rights and limitations under the michael@0: * License. michael@0: * michael@0: * The Initial Developers of the Original Code are Kevin Hendricks (MySpell) michael@0: * and László Németh (Hunspell). Portions created by the Initial Developers michael@0: * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved. michael@0: * michael@0: * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca) michael@0: * David Einstein (deinst@world.std.com) michael@0: * László Németh (nemethl@gyorsposta.hu) michael@0: * Caolan McNamara (caolanm@redhat.com) michael@0: * Davide Prina michael@0: * Giuseppe Modugno michael@0: * Gianluca Turconi michael@0: * Simon Brouwer michael@0: * Noll Janos michael@0: * Biro Arpad michael@0: * Goldman Eleonora michael@0: * Sarlos Tamas michael@0: * Bencsath Boldizsar michael@0: * Halacsy Peter michael@0: * Dvornik Laszlo michael@0: * Gefferth Andras michael@0: * Nagy Viktor michael@0: * Varga Daniel michael@0: * Chris Halls michael@0: * Rene Engelhard michael@0: * Bram Moolenaar michael@0: * Dafydd Jones michael@0: * Harri Pitkanen michael@0: * Andras Timar michael@0: * Tor Lillqvist michael@0: * michael@0: * Alternatively, the contents of this file may be used under the terms of michael@0: * either the GNU General Public License Version 2 or later (the "GPL"), or michael@0: * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), michael@0: * in which case the provisions of the GPL or the LGPL are applicable instead michael@0: * of those above. If you wish to allow use of your version of this file only michael@0: * under the terms of either the GPL or the LGPL, and not to allow others to michael@0: * use your version of this file under the terms of the MPL, indicate your michael@0: * decision by deleting the provisions above and replace them with the notice michael@0: * and other provisions required by the GPL or the LGPL. If you do not delete michael@0: * the provisions above, a recipient may use your version of this file under michael@0: * the terms of any one of the MPL, the GPL or the LGPL. michael@0: * michael@0: ******* END LICENSE BLOCK *******/ michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include "hashmgr.hxx" michael@0: #include "csutil.hxx" michael@0: #include "atypes.hxx" michael@0: michael@0: // build a hash table from a munched word list michael@0: michael@0: HashMgr::HashMgr(const char * tpath, const char * apath, const char * key) michael@0: { michael@0: tablesize = 0; michael@0: tableptr = NULL; michael@0: flag_mode = FLAG_CHAR; michael@0: complexprefixes = 0; michael@0: utf8 = 0; michael@0: langnum = 0; michael@0: lang = NULL; michael@0: enc = NULL; michael@0: csconv = 0; michael@0: ignorechars = NULL; michael@0: ignorechars_utf16 = NULL; michael@0: ignorechars_utf16_len = 0; michael@0: numaliasf = 0; michael@0: aliasf = NULL; michael@0: numaliasm = 0; michael@0: aliasm = NULL; michael@0: forbiddenword = FORBIDDENWORD; // forbidden word signing flag michael@0: load_config(apath, key); michael@0: int ec = load_tables(tpath, key); michael@0: if (ec) { michael@0: /* error condition - what should we do here */ michael@0: HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec); michael@0: if (tableptr) { michael@0: free(tableptr); michael@0: tableptr = NULL; michael@0: } michael@0: tablesize = 0; michael@0: } michael@0: } michael@0: michael@0: michael@0: HashMgr::~HashMgr() michael@0: { michael@0: if (tableptr) { michael@0: // now pass through hash table freeing up everything michael@0: // go through column by column of the table michael@0: for (int i=0; i < tablesize; i++) { michael@0: struct hentry * pt = tableptr[i]; michael@0: struct hentry * nt = NULL; michael@0: while(pt) { michael@0: nt = pt->next; michael@0: if (pt->astr && (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) free(pt->astr); michael@0: free(pt); michael@0: pt = nt; michael@0: } michael@0: } michael@0: free(tableptr); michael@0: } michael@0: tablesize = 0; michael@0: michael@0: if (aliasf) { michael@0: for (int j = 0; j < (numaliasf); j++) free(aliasf[j]); michael@0: free(aliasf); michael@0: aliasf = NULL; michael@0: if (aliasflen) { michael@0: free(aliasflen); michael@0: aliasflen = NULL; michael@0: } michael@0: } michael@0: if (aliasm) { michael@0: for (int j = 0; j < (numaliasm); j++) free(aliasm[j]); michael@0: free(aliasm); michael@0: aliasm = NULL; michael@0: } michael@0: michael@0: #ifndef OPENOFFICEORG michael@0: #ifndef MOZILLA_CLIENT michael@0: if (utf8) free_utf_tbl(); michael@0: #endif michael@0: #endif michael@0: michael@0: if (enc) free(enc); michael@0: if (lang) free(lang); michael@0: michael@0: if (ignorechars) free(ignorechars); michael@0: if (ignorechars_utf16) free(ignorechars_utf16); michael@0: michael@0: #ifdef MOZILLA_CLIENT michael@0: delete [] csconv; michael@0: #endif michael@0: } michael@0: michael@0: // lookup a root word in the hashtable michael@0: michael@0: struct hentry * HashMgr::lookup(const char *word) const michael@0: { michael@0: struct hentry * dp; michael@0: if (tableptr) { michael@0: dp = tableptr[hash(word)]; michael@0: if (!dp) return NULL; michael@0: for ( ; dp != NULL; dp = dp->next) { michael@0: if (strcmp(word, dp->word) == 0) return dp; michael@0: } michael@0: } michael@0: return NULL; michael@0: } michael@0: michael@0: // add a word to the hash table (private) michael@0: int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff, michael@0: int al, const char * desc, bool onlyupcase) michael@0: { michael@0: bool upcasehomonym = false; michael@0: int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0; michael@0: // variable-length hash record with word and optional fields michael@0: struct hentry* hp = michael@0: (struct hentry *) malloc (sizeof(struct hentry) + wbl + descl); michael@0: if (!hp) return 1; michael@0: char * hpw = hp->word; michael@0: strcpy(hpw, word); michael@0: if (ignorechars != NULL) { michael@0: if (utf8) { michael@0: remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len); michael@0: } else { michael@0: remove_ignored_chars(hpw, ignorechars); michael@0: } michael@0: } michael@0: if (complexprefixes) { michael@0: if (utf8) reverseword_utf(hpw); else reverseword(hpw); michael@0: } michael@0: michael@0: int i = hash(hpw); michael@0: michael@0: hp->blen = (unsigned char) wbl; michael@0: hp->clen = (unsigned char) wcl; michael@0: hp->alen = (short) al; michael@0: hp->astr = aff; michael@0: hp->next = NULL; michael@0: hp->next_homonym = NULL; michael@0: michael@0: // store the description string or its pointer michael@0: if (desc) { michael@0: hp->var = H_OPT; michael@0: if (aliasm) { michael@0: hp->var += H_OPT_ALIASM; michael@0: store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc))); michael@0: } else { michael@0: strcpy(hpw + wbl + 1, desc); michael@0: if (complexprefixes) { michael@0: if (utf8) reverseword_utf(HENTRY_DATA(hp)); michael@0: else reverseword(HENTRY_DATA(hp)); michael@0: } michael@0: } michael@0: if (strstr(HENTRY_DATA(hp), MORPH_PHON)) hp->var += H_OPT_PHON; michael@0: } else hp->var = 0; michael@0: michael@0: struct hentry * dp = tableptr[i]; michael@0: if (!dp) { michael@0: tableptr[i] = hp; michael@0: return 0; michael@0: } michael@0: while (dp->next != NULL) { michael@0: if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) { michael@0: // remove hidden onlyupcase homonym michael@0: if (!onlyupcase) { michael@0: if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { michael@0: free(dp->astr); michael@0: dp->astr = hp->astr; michael@0: dp->alen = hp->alen; michael@0: free(hp); michael@0: return 0; michael@0: } else { michael@0: dp->next_homonym = hp; michael@0: } michael@0: } else { michael@0: upcasehomonym = true; michael@0: } michael@0: } michael@0: dp=dp->next; michael@0: } michael@0: if (strcmp(hp->word, dp->word) == 0) { michael@0: // remove hidden onlyupcase homonym michael@0: if (!onlyupcase) { michael@0: if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { michael@0: free(dp->astr); michael@0: dp->astr = hp->astr; michael@0: dp->alen = hp->alen; michael@0: free(hp); michael@0: return 0; michael@0: } else { michael@0: dp->next_homonym = hp; michael@0: } michael@0: } else { michael@0: upcasehomonym = true; michael@0: } michael@0: } michael@0: if (!upcasehomonym) { michael@0: dp->next = hp; michael@0: } else { michael@0: // remove hidden onlyupcase homonym michael@0: if (hp->astr) free(hp->astr); michael@0: free(hp); michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl, michael@0: unsigned short * flags, int al, char * dp, int captype) michael@0: { michael@0: // add inner capitalized forms to handle the following allcap forms: michael@0: // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG michael@0: // Allcaps with suffixes: CIA's -> CIA'S michael@0: if (((captype == HUHCAP) || (captype == HUHINITCAP) || michael@0: ((captype == ALLCAP) && (flags != NULL))) && michael@0: !((flags != NULL) && TESTAFF(flags, forbiddenword, al))) { michael@0: unsigned short * flags2 = (unsigned short *) malloc (sizeof(unsigned short) * (al+1)); michael@0: if (!flags2) return 1; michael@0: if (al) memcpy(flags2, flags, al * sizeof(unsigned short)); michael@0: flags2[al] = ONLYUPCASEFLAG; michael@0: if (utf8) { michael@0: char st[BUFSIZE]; michael@0: w_char w[BUFSIZE]; michael@0: int wlen = u8_u16(w, BUFSIZE, word); michael@0: mkallsmall_utf(w, wlen, langnum); michael@0: mkallcap_utf(w, 1, langnum); michael@0: u16_u8(st, BUFSIZE, w, wlen); michael@0: return add_word(st,wbl,wcl,flags2,al+1,dp, true); michael@0: } else { michael@0: mkallsmall(word, csconv); michael@0: mkinitcap(word, csconv); michael@0: return add_word(word,wbl,wcl,flags2,al+1,dp, true); michael@0: } michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: // detect captype and modify word length for UTF-8 encoding michael@0: int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) { michael@0: int len; michael@0: if (utf8) { michael@0: w_char dest_utf[BUFSIZE]; michael@0: len = u8_u16(dest_utf, BUFSIZE, word); michael@0: *captype = get_captype_utf8(dest_utf, len, langnum); michael@0: } else { michael@0: len = wbl; michael@0: *captype = get_captype((char *) word, len, csconv); michael@0: } michael@0: return len; michael@0: } michael@0: michael@0: // remove word (personal dictionary function for standalone applications) michael@0: int HashMgr::remove(const char * word) michael@0: { michael@0: struct hentry * dp = lookup(word); michael@0: while (dp) { michael@0: if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) { michael@0: unsigned short * flags = michael@0: (unsigned short *) malloc(sizeof(short) * (dp->alen + 1)); michael@0: if (!flags) return 1; michael@0: for (int i = 0; i < dp->alen; i++) flags[i] = dp->astr[i]; michael@0: flags[dp->alen] = forbiddenword; michael@0: dp->astr = flags; michael@0: dp->alen++; michael@0: flag_qsort(flags, 0, dp->alen); michael@0: } michael@0: dp = dp->next_homonym; michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: /* remove forbidden flag to add a personal word to the hash */ michael@0: int HashMgr::remove_forbidden_flag(const char * word) { michael@0: struct hentry * dp = lookup(word); michael@0: if (!dp) return 1; michael@0: while (dp) { michael@0: if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) { michael@0: if (dp->alen == 1) dp->alen = 0; // XXX forbidden words of personal dic. michael@0: else { michael@0: unsigned short * flags2 = michael@0: (unsigned short *) malloc(sizeof(short) * (dp->alen - 1)); michael@0: if (!flags2) return 1; michael@0: int i, j = 0; michael@0: for (i = 0; i < dp->alen; i++) { michael@0: if (dp->astr[i] != forbiddenword) flags2[j++] = dp->astr[i]; michael@0: } michael@0: dp->alen--; michael@0: dp->astr = flags2; // XXX allowed forbidden words michael@0: } michael@0: } michael@0: dp = dp->next_homonym; michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: // add a custom dic. word to the hash table (public) michael@0: int HashMgr::add(const char * word) michael@0: { michael@0: unsigned short * flags = NULL; michael@0: int al = 0; michael@0: if (remove_forbidden_flag(word)) { michael@0: int captype; michael@0: int wbl = strlen(word); michael@0: int wcl = get_clen_and_captype(word, wbl, &captype); michael@0: add_word(word, wbl, wcl, flags, al, NULL, false); michael@0: return add_hidden_capitalized_word((char *) word, wbl, wcl, flags, al, NULL, captype); michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: int HashMgr::add_with_affix(const char * word, const char * example) michael@0: { michael@0: // detect captype and modify word length for UTF-8 encoding michael@0: struct hentry * dp = lookup(example); michael@0: remove_forbidden_flag(word); michael@0: if (dp && dp->astr) { michael@0: int captype; michael@0: int wbl = strlen(word); michael@0: int wcl = get_clen_and_captype(word, wbl, &captype); michael@0: if (aliasf) { michael@0: add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false); michael@0: } else { michael@0: unsigned short * flags = (unsigned short *) malloc (dp->alen * sizeof(short)); michael@0: if (flags) { michael@0: memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short)); michael@0: add_word(word, wbl, wcl, flags, dp->alen, NULL, false); michael@0: } else return 1; michael@0: } michael@0: return add_hidden_capitalized_word((char *) word, wbl, wcl, dp->astr, dp->alen, NULL, captype); michael@0: } michael@0: return 1; michael@0: } michael@0: michael@0: // walk the hash table entry by entry - null at end michael@0: // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp); michael@0: struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const michael@0: { michael@0: if (hp && hp->next != NULL) return hp->next; michael@0: for (col++; col < tablesize; col++) { michael@0: if (tableptr[col]) return tableptr[col]; michael@0: } michael@0: // null at end and reset to start michael@0: col = -1; michael@0: return NULL; michael@0: } michael@0: michael@0: // load a munched word list and build a hash table on the fly michael@0: int HashMgr::load_tables(const char * tpath, const char * key) michael@0: { michael@0: int al; michael@0: char * ap; michael@0: char * dp; michael@0: char * dp2; michael@0: unsigned short * flags; michael@0: char * ts; michael@0: michael@0: // open dictionary file michael@0: FileMgr * dict = new FileMgr(tpath, key); michael@0: if (dict == NULL) return 1; michael@0: michael@0: // first read the first line of file to get hash table size */ michael@0: if (!(ts = dict->getline())) { michael@0: HUNSPELL_WARNING(stderr, "error: empty dic file\n"); michael@0: delete dict; michael@0: return 2; michael@0: } michael@0: mychomp(ts); michael@0: michael@0: /* remove byte order mark */ michael@0: if (strncmp(ts,"\xEF\xBB\xBF",3) == 0) { michael@0: memmove(ts, ts+3, strlen(ts+3)+1); michael@0: // warning: dic file begins with byte order mark: possible incompatibility with old Hunspell versions michael@0: } michael@0: michael@0: tablesize = atoi(ts); michael@0: if (tablesize == 0) { michael@0: HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the dic file\n"); michael@0: delete dict; michael@0: return 4; michael@0: } michael@0: tablesize = tablesize + 5 + USERWORD; michael@0: if ((tablesize %2) == 0) tablesize++; michael@0: michael@0: // allocate the hash table michael@0: tableptr = (struct hentry **) malloc(tablesize * sizeof(struct hentry *)); michael@0: if (! tableptr) { michael@0: delete dict; michael@0: return 3; michael@0: } michael@0: for (int i=0; igetline())) { michael@0: mychomp(ts); michael@0: // split each line into word and morphological description michael@0: dp = ts; michael@0: while ((dp = strchr(dp, ':'))) { michael@0: if ((dp > ts + 3) && (*(dp - 3) == ' ' || *(dp - 3) == '\t')) { michael@0: for (dp -= 4; dp >= ts && (*dp == ' ' || *dp == '\t'); dp--); michael@0: if (dp < ts) { // missing word michael@0: dp = NULL; michael@0: } else { michael@0: *(dp + 1) = '\0'; michael@0: dp = dp + 2; michael@0: } michael@0: break; michael@0: } michael@0: dp++; michael@0: } michael@0: michael@0: // tabulator is the old morphological field separator michael@0: dp2 = strchr(ts, '\t'); michael@0: if (dp2 && (!dp || dp2 < dp)) { michael@0: *dp2 = '\0'; michael@0: dp = dp2 + 1; michael@0: } michael@0: michael@0: // split each line into word and affix char strings michael@0: // "\/" signs slash in words (not affix separator) michael@0: // "/" at beginning of the line is word character (not affix separator) michael@0: ap = strchr(ts,'/'); michael@0: while (ap) { michael@0: if (ap == ts) { michael@0: ap++; michael@0: continue; michael@0: } else if (*(ap - 1) != '\\') break; michael@0: // replace "\/" with "/" michael@0: for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++); michael@0: ap = strchr(ap,'/'); michael@0: } michael@0: michael@0: if (ap) { michael@0: *ap = '\0'; michael@0: if (aliasf) { michael@0: int index = atoi(ap + 1); michael@0: al = get_aliasf(index, &flags, dict); michael@0: if (!al) { michael@0: HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", dict->getlinenum()); michael@0: *ap = '\0'; michael@0: } michael@0: } else { michael@0: al = decode_flags(&flags, ap + 1, dict); michael@0: if (al == -1) { michael@0: HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); michael@0: delete dict; michael@0: return 6; michael@0: } michael@0: flag_qsort(flags, 0, al); michael@0: } michael@0: } else { michael@0: al = 0; michael@0: ap = NULL; michael@0: flags = NULL; michael@0: } michael@0: michael@0: int captype; michael@0: int wbl = strlen(ts); michael@0: int wcl = get_clen_and_captype(ts, wbl, &captype); michael@0: // add the word and its index plus its capitalized form optionally michael@0: if (add_word(ts,wbl,wcl,flags,al,dp, false) || michael@0: add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) { michael@0: delete dict; michael@0: return 5; michael@0: } michael@0: } michael@0: michael@0: delete dict; michael@0: return 0; michael@0: } michael@0: michael@0: // the hash function is a simple load and rotate michael@0: // algorithm borrowed michael@0: michael@0: int HashMgr::hash(const char * word) const michael@0: { michael@0: long hv = 0; michael@0: for (int i=0; i < 4 && *word != 0; i++) michael@0: hv = (hv << 8) | (*word++); michael@0: while (*word != 0) { michael@0: ROTATE(hv,ROTATE_LEN); michael@0: hv ^= (*word++); michael@0: } michael@0: return (unsigned long) hv % tablesize; michael@0: } michael@0: michael@0: int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) { michael@0: int len; michael@0: if (*flags == '\0') { michael@0: *result = NULL; michael@0: return 0; michael@0: } michael@0: switch (flag_mode) { michael@0: case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) michael@0: len = strlen(flags); michael@0: if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", af->getlinenum()); michael@0: len /= 2; michael@0: *result = (unsigned short *) malloc(len * sizeof(short)); michael@0: if (!*result) return -1; michael@0: for (int i = 0; i < len; i++) { michael@0: (*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned short) flags[i * 2 + 1]; michael@0: } michael@0: break; michael@0: } michael@0: case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 23 233) michael@0: int i; michael@0: len = 1; michael@0: char * src = flags; michael@0: unsigned short * dest; michael@0: char * p; michael@0: for (p = flags; *p; p++) { michael@0: if (*p == ',') len++; michael@0: } michael@0: *result = (unsigned short *) malloc(len * sizeof(short)); michael@0: if (!*result) return -1; michael@0: dest = *result; michael@0: for (p = flags; *p; p++) { michael@0: if (*p == ',') { michael@0: i = atoi(src); michael@0: if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n", michael@0: af->getlinenum(), i, DEFAULTFLAGS - 1); michael@0: *dest = (unsigned short) i; michael@0: if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", af->getlinenum()); michael@0: src = p + 1; michael@0: dest++; michael@0: } michael@0: } michael@0: i = atoi(src); michael@0: if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: line %d: flag id %d is too large (max: %d)\n", michael@0: af->getlinenum(), i, DEFAULTFLAGS - 1); michael@0: *dest = (unsigned short) i; michael@0: if (*dest == 0) HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", af->getlinenum()); michael@0: break; michael@0: } michael@0: case FLAG_UNI: { // UTF-8 characters michael@0: w_char w[BUFSIZE/2]; michael@0: len = u8_u16(w, BUFSIZE/2, flags); michael@0: *result = (unsigned short *) malloc(len * sizeof(short)); michael@0: if (!*result) return -1; michael@0: memcpy(*result, w, len * sizeof(short)); michael@0: break; michael@0: } michael@0: default: { // Ispell's one-character flags (erfg -> e r f g) michael@0: unsigned short * dest; michael@0: len = strlen(flags); michael@0: *result = (unsigned short *) malloc(len * sizeof(short)); michael@0: if (!*result) return -1; michael@0: dest = *result; michael@0: for (unsigned char * p = (unsigned char *) flags; *p; p++) { michael@0: *dest = (unsigned short) *p; michael@0: dest++; michael@0: } michael@0: } michael@0: } michael@0: return len; michael@0: } michael@0: michael@0: unsigned short HashMgr::decode_flag(const char * f) { michael@0: unsigned short s = 0; michael@0: int i; michael@0: switch (flag_mode) { michael@0: case FLAG_LONG: michael@0: s = ((unsigned short) f[0] << 8) + (unsigned short) f[1]; michael@0: break; michael@0: case FLAG_NUM: michael@0: i = atoi(f); michael@0: if (i >= DEFAULTFLAGS) HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n", i, DEFAULTFLAGS - 1); michael@0: s = (unsigned short) i; michael@0: break; michael@0: case FLAG_UNI: michael@0: u8_u16((w_char *) &s, 1, f); michael@0: break; michael@0: default: michael@0: s = (unsigned short) *((unsigned char *)f); michael@0: } michael@0: if (s == 0) HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); michael@0: return s; michael@0: } michael@0: michael@0: char * HashMgr::encode_flag(unsigned short f) { michael@0: unsigned char ch[10]; michael@0: if (f==0) return mystrdup("(NULL)"); michael@0: if (flag_mode == FLAG_LONG) { michael@0: ch[0] = (unsigned char) (f >> 8); michael@0: ch[1] = (unsigned char) (f - ((f >> 8) << 8)); michael@0: ch[2] = '\0'; michael@0: } else if (flag_mode == FLAG_NUM) { michael@0: sprintf((char *) ch, "%d", f); michael@0: } else if (flag_mode == FLAG_UNI) { michael@0: u16_u8((char *) &ch, 10, (w_char *) &f, 1); michael@0: } else { michael@0: ch[0] = (unsigned char) (f); michael@0: ch[1] = '\0'; michael@0: } michael@0: return mystrdup((char *) ch); michael@0: } michael@0: michael@0: // read in aff file and set flag mode michael@0: int HashMgr::load_config(const char * affpath, const char * key) michael@0: { michael@0: char * line; // io buffers michael@0: int firstline = 1; michael@0: michael@0: // open the affix file michael@0: FileMgr * afflst = new FileMgr(affpath, key); michael@0: if (!afflst) { michael@0: HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath); michael@0: return 1; michael@0: } michael@0: michael@0: // read in each line ignoring any that do not michael@0: // start with a known line type indicator michael@0: michael@0: while ((line = afflst->getline())) { michael@0: mychomp(line); michael@0: michael@0: /* remove byte order mark */ michael@0: if (firstline) { michael@0: firstline = 0; michael@0: if (strncmp(line,"\xEF\xBB\xBF",3) == 0) memmove(line, line+3, strlen(line+3)+1); michael@0: } michael@0: michael@0: /* parse in the try string */ michael@0: if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) { michael@0: if (flag_mode != FLAG_CHAR) { michael@0: HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of the FLAG affix file parameter\n", afflst->getlinenum()); michael@0: } michael@0: if (strstr(line, "long")) flag_mode = FLAG_LONG; michael@0: if (strstr(line, "num")) flag_mode = FLAG_NUM; michael@0: if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI; michael@0: if (flag_mode == FLAG_CHAR) { michael@0: HUNSPELL_WARNING(stderr, "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n", afflst->getlinenum()); michael@0: } michael@0: } michael@0: if (strncmp(line,"FORBIDDENWORD",13) == 0) { michael@0: char * st = NULL; michael@0: if (parse_string(line, &st, afflst->getlinenum())) { michael@0: delete afflst; michael@0: return 1; michael@0: } michael@0: forbiddenword = decode_flag(st); michael@0: free(st); michael@0: } michael@0: if (strncmp(line, "SET", 3) == 0) { michael@0: if (parse_string(line, &enc, afflst->getlinenum())) { michael@0: delete afflst; michael@0: return 1; michael@0: } michael@0: if (strcmp(enc, "UTF-8") == 0) { michael@0: utf8 = 1; michael@0: #ifndef OPENOFFICEORG michael@0: #ifndef MOZILLA_CLIENT michael@0: initialize_utf_tbl(); michael@0: #endif michael@0: #endif michael@0: } else csconv = get_current_cs(enc); michael@0: } michael@0: if (strncmp(line, "LANG", 4) == 0) { michael@0: if (parse_string(line, &lang, afflst->getlinenum())) { michael@0: delete afflst; michael@0: return 1; michael@0: } michael@0: langnum = get_lang_num(lang); michael@0: } michael@0: michael@0: /* parse in the ignored characters (for example, Arabic optional diacritics characters */ michael@0: if (strncmp(line,"IGNORE",6) == 0) { michael@0: if (parse_array(line, &ignorechars, &ignorechars_utf16, michael@0: &ignorechars_utf16_len, utf8, afflst->getlinenum())) { michael@0: delete afflst; michael@0: return 1; michael@0: } michael@0: } michael@0: michael@0: if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) { michael@0: if (parse_aliasf(line, afflst)) { michael@0: delete afflst; michael@0: return 1; michael@0: } michael@0: } michael@0: michael@0: if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) { michael@0: if (parse_aliasm(line, afflst)) { michael@0: delete afflst; michael@0: return 1; michael@0: } michael@0: } michael@0: michael@0: if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1; michael@0: if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break; michael@0: } michael@0: if (csconv == NULL) csconv = get_current_cs(SPELL_ENCODING); michael@0: delete afflst; michael@0: return 0; michael@0: } michael@0: michael@0: /* parse in the ALIAS table */ michael@0: int HashMgr::parse_aliasf(char * line, FileMgr * af) michael@0: { michael@0: if (numaliasf != 0) { michael@0: HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum()); michael@0: return 1; michael@0: } michael@0: char * tp = line; michael@0: char * piece; michael@0: int i = 0; michael@0: int np = 0; michael@0: piece = mystrsep(&tp, 0); michael@0: while (piece) { michael@0: if (*piece != '\0') { michael@0: switch(i) { michael@0: case 0: { np++; break; } michael@0: case 1: { michael@0: numaliasf = atoi(piece); michael@0: if (numaliasf < 1) { michael@0: numaliasf = 0; michael@0: aliasf = NULL; michael@0: aliasflen = NULL; michael@0: HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum()); michael@0: return 1; michael@0: } michael@0: aliasf = (unsigned short **) malloc(numaliasf * sizeof(unsigned short *)); michael@0: aliasflen = (unsigned short *) malloc(numaliasf * sizeof(short)); michael@0: if (!aliasf || !aliasflen) { michael@0: numaliasf = 0; michael@0: if (aliasf) free(aliasf); michael@0: if (aliasflen) free(aliasflen); michael@0: aliasf = NULL; michael@0: aliasflen = NULL; michael@0: return 1; michael@0: } michael@0: np++; michael@0: break; michael@0: } michael@0: default: break; michael@0: } michael@0: i++; michael@0: } michael@0: piece = mystrsep(&tp, 0); michael@0: } michael@0: if (np != 2) { michael@0: numaliasf = 0; michael@0: free(aliasf); michael@0: free(aliasflen); michael@0: aliasf = NULL; michael@0: aliasflen = NULL; michael@0: HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum()); michael@0: return 1; michael@0: } michael@0: michael@0: /* now parse the numaliasf lines to read in the remainder of the table */ michael@0: char * nl; michael@0: for (int j=0; j < numaliasf; j++) { michael@0: if (!(nl = af->getline())) return 1; michael@0: mychomp(nl); michael@0: tp = nl; michael@0: i = 0; michael@0: aliasf[j] = NULL; michael@0: aliasflen[j] = 0; michael@0: piece = mystrsep(&tp, 0); michael@0: while (piece) { michael@0: if (*piece != '\0') { michael@0: switch(i) { michael@0: case 0: { michael@0: if (strncmp(piece,"AF",2) != 0) { michael@0: numaliasf = 0; michael@0: free(aliasf); michael@0: free(aliasflen); michael@0: aliasf = NULL; michael@0: aliasflen = NULL; michael@0: HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum()); michael@0: return 1; michael@0: } michael@0: break; michael@0: } michael@0: case 1: { michael@0: aliasflen[j] = (unsigned short) decode_flags(&(aliasf[j]), piece, af); michael@0: flag_qsort(aliasf[j], 0, aliasflen[j]); michael@0: break; michael@0: } michael@0: default: break; michael@0: } michael@0: i++; michael@0: } michael@0: piece = mystrsep(&tp, 0); michael@0: } michael@0: if (!aliasf[j]) { michael@0: free(aliasf); michael@0: free(aliasflen); michael@0: aliasf = NULL; michael@0: aliasflen = NULL; michael@0: numaliasf = 0; michael@0: HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum()); michael@0: return 1; michael@0: } michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: int HashMgr::is_aliasf() { michael@0: return (aliasf != NULL); michael@0: } michael@0: michael@0: int HashMgr::get_aliasf(int index, unsigned short ** fvec, FileMgr * af) { michael@0: if ((index > 0) && (index <= numaliasf)) { michael@0: *fvec = aliasf[index - 1]; michael@0: return aliasflen[index - 1]; michael@0: } michael@0: HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n", af->getlinenum(), index); michael@0: *fvec = NULL; michael@0: return 0; michael@0: } michael@0: michael@0: /* parse morph alias definitions */ michael@0: int HashMgr::parse_aliasm(char * line, FileMgr * af) michael@0: { michael@0: if (numaliasm != 0) { michael@0: HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum()); michael@0: return 1; michael@0: } michael@0: char * tp = line; michael@0: char * piece; michael@0: int i = 0; michael@0: int np = 0; michael@0: piece = mystrsep(&tp, 0); michael@0: while (piece) { michael@0: if (*piece != '\0') { michael@0: switch(i) { michael@0: case 0: { np++; break; } michael@0: case 1: { michael@0: numaliasm = atoi(piece); michael@0: if (numaliasm < 1) { michael@0: HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum()); michael@0: return 1; michael@0: } michael@0: aliasm = (char **) malloc(numaliasm * sizeof(char *)); michael@0: if (!aliasm) { michael@0: numaliasm = 0; michael@0: return 1; michael@0: } michael@0: np++; michael@0: break; michael@0: } michael@0: default: break; michael@0: } michael@0: i++; michael@0: } michael@0: piece = mystrsep(&tp, 0); michael@0: } michael@0: if (np != 2) { michael@0: numaliasm = 0; michael@0: free(aliasm); michael@0: aliasm = NULL; michael@0: HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum()); michael@0: return 1; michael@0: } michael@0: michael@0: /* now parse the numaliasm lines to read in the remainder of the table */ michael@0: char * nl = line; michael@0: for (int j=0; j < numaliasm; j++) { michael@0: if (!(nl = af->getline())) return 1; michael@0: mychomp(nl); michael@0: tp = nl; michael@0: i = 0; michael@0: aliasm[j] = NULL; michael@0: piece = mystrsep(&tp, ' '); michael@0: while (piece) { michael@0: if (*piece != '\0') { michael@0: switch(i) { michael@0: case 0: { michael@0: if (strncmp(piece,"AM",2) != 0) { michael@0: HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum()); michael@0: numaliasm = 0; michael@0: free(aliasm); michael@0: aliasm = NULL; michael@0: return 1; michael@0: } michael@0: break; michael@0: } michael@0: case 1: { michael@0: // add the remaining of the line michael@0: if (*tp) { michael@0: *(tp - 1) = ' '; michael@0: tp = tp + strlen(tp); michael@0: } michael@0: if (complexprefixes) { michael@0: if (utf8) reverseword_utf(piece); michael@0: else reverseword(piece); michael@0: } michael@0: aliasm[j] = mystrdup(piece); michael@0: if (!aliasm[j]) { michael@0: numaliasm = 0; michael@0: free(aliasm); michael@0: aliasm = NULL; michael@0: return 1; michael@0: } michael@0: break; } michael@0: default: break; michael@0: } michael@0: i++; michael@0: } michael@0: piece = mystrsep(&tp, ' '); michael@0: } michael@0: if (!aliasm[j]) { michael@0: numaliasm = 0; michael@0: free(aliasm); michael@0: aliasm = NULL; michael@0: HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum()); michael@0: return 1; michael@0: } michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: int HashMgr::is_aliasm() { michael@0: return (aliasm != NULL); michael@0: } michael@0: michael@0: char * HashMgr::get_aliasm(int index) { michael@0: if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1]; michael@0: HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index); michael@0: return NULL; michael@0: }