michael@0: /******* BEGIN LICENSE BLOCK ******* michael@0: * Version: MPL 1.1/GPL 2.0/LGPL 2.1 michael@0: * michael@0: * The contents of this file are subject to the Mozilla Public License Version michael@0: * 1.1 (the "License"); you may not use this file except in compliance with michael@0: * the License. You may obtain a copy of the License at michael@0: * http://www.mozilla.org/MPL/ michael@0: * michael@0: * Software distributed under the License is distributed on an "AS IS" basis, michael@0: * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License michael@0: * for the specific language governing rights and limitations under the michael@0: * License. michael@0: * michael@0: * The Initial Developers of the Original Code are Kevin Hendricks (MySpell) michael@0: * and László Németh (Hunspell). Portions created by the Initial Developers michael@0: * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved. michael@0: * michael@0: * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca) michael@0: * David Einstein (deinst@world.std.com) michael@0: * László Németh (nemethl@gyorsposta.hu) michael@0: * Caolan McNamara (caolanm@redhat.com) michael@0: * Davide Prina michael@0: * Giuseppe Modugno michael@0: * Gianluca Turconi michael@0: * Simon Brouwer michael@0: * Noll Janos michael@0: * Biro Arpad michael@0: * Goldman Eleonora michael@0: * Sarlos Tamas michael@0: * Bencsath Boldizsar michael@0: * Halacsy Peter michael@0: * Dvornik Laszlo michael@0: * Gefferth Andras michael@0: * Nagy Viktor michael@0: * Varga Daniel michael@0: * Chris Halls michael@0: * Rene Engelhard michael@0: * Bram Moolenaar michael@0: * Dafydd Jones michael@0: * Harri Pitkanen michael@0: * Andras Timar michael@0: * Tor Lillqvist michael@0: * michael@0: * Alternatively, the contents of this file may be used under the terms of michael@0: * either the GNU General Public License Version 2 or later (the "GPL"), or michael@0: * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), michael@0: * in which case the provisions of the GPL or the LGPL are applicable instead michael@0: * of those above. If you wish to allow use of your version of this file only michael@0: * under the terms of either the GPL or the LGPL, and not to allow others to michael@0: * use your version of this file under the terms of the MPL, indicate your michael@0: * decision by deleting the provisions above and replace them with the notice michael@0: * and other provisions required by the GPL or the LGPL. If you do not delete michael@0: * the provisions above, a recipient may use your version of this file under michael@0: * the terms of any one of the MPL, the GPL or the LGPL. michael@0: * michael@0: ******* END LICENSE BLOCK *******/ michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include "hunspell.hxx" michael@0: #include "hunspell.h" michael@0: #ifndef MOZILLA_CLIENT michael@0: # include "config.h" michael@0: #endif michael@0: #include "csutil.hxx" michael@0: michael@0: Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key) michael@0: { michael@0: encoding = NULL; michael@0: csconv = NULL; michael@0: utf8 = 0; michael@0: complexprefixes = 0; michael@0: affixpath = mystrdup(affpath); michael@0: maxdic = 0; michael@0: michael@0: /* first set up the hash manager */ michael@0: pHMgr[0] = new HashMgr(dpath, affpath, key); michael@0: if (pHMgr[0]) maxdic = 1; michael@0: michael@0: /* next set up the affix manager */ michael@0: /* it needs access to the hash manager lookup methods */ michael@0: pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key); michael@0: michael@0: /* get the preferred try string and the dictionary */ michael@0: /* encoding from the Affix Manager for that dictionary */ michael@0: char * try_string = pAMgr->get_try_string(); michael@0: encoding = pAMgr->get_encoding(); michael@0: langnum = pAMgr->get_langnum(); michael@0: utf8 = pAMgr->get_utf8(); michael@0: if (!utf8) michael@0: csconv = get_current_cs(encoding); michael@0: complexprefixes = pAMgr->get_complexprefixes(); michael@0: wordbreak = pAMgr->get_breaktable(); michael@0: michael@0: /* and finally set up the suggestion manager */ michael@0: pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr); michael@0: if (try_string) free(try_string); michael@0: } michael@0: michael@0: Hunspell::~Hunspell() michael@0: { michael@0: if (pSMgr) delete pSMgr; michael@0: if (pAMgr) delete pAMgr; michael@0: for (int i = 0; i < maxdic; i++) delete pHMgr[i]; michael@0: maxdic = 0; michael@0: pSMgr = NULL; michael@0: pAMgr = NULL; michael@0: #ifdef MOZILLA_CLIENT michael@0: delete [] csconv; michael@0: #endif michael@0: csconv= NULL; michael@0: if (encoding) free(encoding); michael@0: encoding = NULL; michael@0: if (affixpath) free(affixpath); michael@0: affixpath = NULL; michael@0: } michael@0: michael@0: // load extra dictionaries michael@0: int Hunspell::add_dic(const char * dpath, const char * key) { michael@0: if (maxdic == MAXDIC || !affixpath) return 1; michael@0: pHMgr[maxdic] = new HashMgr(dpath, affixpath, key); michael@0: if (pHMgr[maxdic]) maxdic++; else return 1; michael@0: return 0; michael@0: } michael@0: michael@0: // make a copy of src at destination while removing all leading michael@0: // blanks and removing any trailing periods after recording michael@0: // their presence with the abbreviation flag michael@0: // also since already going through character by character, michael@0: // set the capitalization type michael@0: // return the length of the "cleaned" (and UTF-8 encoded) word michael@0: michael@0: int Hunspell::cleanword2(char * dest, const char * src, michael@0: w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev) michael@0: { michael@0: unsigned char * p = (unsigned char *) dest; michael@0: const unsigned char * q = (const unsigned char * ) src; michael@0: michael@0: // first skip over any leading blanks michael@0: while ((*q != '\0') && (*q == ' ')) q++; michael@0: michael@0: // now strip off any trailing periods (recording their presence) michael@0: *pabbrev = 0; michael@0: int nl = strlen((const char *)q); michael@0: while ((nl > 0) && (*(q+nl-1)=='.')) { michael@0: nl--; michael@0: (*pabbrev)++; michael@0: } michael@0: michael@0: // if no characters are left it can't be capitalized michael@0: if (nl <= 0) { michael@0: *pcaptype = NOCAP; michael@0: *p = '\0'; michael@0: return 0; michael@0: } michael@0: michael@0: strncpy(dest, (char *) q, nl); michael@0: *(dest + nl) = '\0'; michael@0: nl = strlen(dest); michael@0: if (utf8) { michael@0: *nc = u8_u16(dest_utf, MAXWORDLEN, dest); michael@0: // don't check too long words michael@0: if (*nc >= MAXWORDLEN) return 0; michael@0: if (*nc == -1) { // big Unicode character (non BMP area) michael@0: *pcaptype = NOCAP; michael@0: return nl; michael@0: } michael@0: *pcaptype = get_captype_utf8(dest_utf, *nc, langnum); michael@0: } else { michael@0: *pcaptype = get_captype(dest, nl, csconv); michael@0: *nc = nl; michael@0: } michael@0: return nl; michael@0: } michael@0: michael@0: int Hunspell::cleanword(char * dest, const char * src, michael@0: int * pcaptype, int * pabbrev) michael@0: { michael@0: unsigned char * p = (unsigned char *) dest; michael@0: const unsigned char * q = (const unsigned char * ) src; michael@0: int firstcap = 0; michael@0: michael@0: // first skip over any leading blanks michael@0: while ((*q != '\0') && (*q == ' ')) q++; michael@0: michael@0: // now strip off any trailing periods (recording their presence) michael@0: *pabbrev = 0; michael@0: int nl = strlen((const char *)q); michael@0: while ((nl > 0) && (*(q+nl-1)=='.')) { michael@0: nl--; michael@0: (*pabbrev)++; michael@0: } michael@0: michael@0: // if no characters are left it can't be capitalized michael@0: if (nl <= 0) { michael@0: *pcaptype = NOCAP; michael@0: *p = '\0'; michael@0: return 0; michael@0: } michael@0: michael@0: // now determine the capitalization type of the first nl letters michael@0: int ncap = 0; michael@0: int nneutral = 0; michael@0: int nc = 0; michael@0: michael@0: if (!utf8) { michael@0: while (nl > 0) { michael@0: nc++; michael@0: if (csconv[(*q)].ccase) ncap++; michael@0: if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++; michael@0: *p++ = *q++; michael@0: nl--; michael@0: } michael@0: // remember to terminate the destination string michael@0: *p = '\0'; michael@0: firstcap = csconv[(unsigned char)(*dest)].ccase; michael@0: } else { michael@0: unsigned short idx; michael@0: w_char t[MAXWORDLEN]; michael@0: nc = u8_u16(t, MAXWORDLEN, src); michael@0: for (int i = 0; i < nc; i++) { michael@0: idx = (t[i].h << 8) + t[i].l; michael@0: unsigned short low = unicodetolower(idx, langnum); michael@0: if (idx != low) ncap++; michael@0: if (unicodetoupper(idx, langnum) == low) nneutral++; michael@0: } michael@0: u16_u8(dest, MAXWORDUTF8LEN, t, nc); michael@0: if (ncap) { michael@0: idx = (t[0].h << 8) + t[0].l; michael@0: firstcap = (idx != unicodetolower(idx, langnum)); michael@0: } michael@0: } michael@0: michael@0: // now finally set the captype michael@0: if (ncap == 0) { michael@0: *pcaptype = NOCAP; michael@0: } else if ((ncap == 1) && firstcap) { michael@0: *pcaptype = INITCAP; michael@0: } else if ((ncap == nc) || ((ncap + nneutral) == nc)){ michael@0: *pcaptype = ALLCAP; michael@0: } else if ((ncap > 1) && firstcap) { michael@0: *pcaptype = HUHINITCAP; michael@0: } else { michael@0: *pcaptype = HUHCAP; michael@0: } michael@0: return strlen(dest); michael@0: } michael@0: michael@0: void Hunspell::mkallcap(char * p) michael@0: { michael@0: if (utf8) { michael@0: w_char u[MAXWORDLEN]; michael@0: int nc = u8_u16(u, MAXWORDLEN, p); michael@0: unsigned short idx; michael@0: for (int i = 0; i < nc; i++) { michael@0: idx = (u[i].h << 8) + u[i].l; michael@0: if (idx != unicodetoupper(idx, langnum)) { michael@0: u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8); michael@0: u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF); michael@0: } michael@0: } michael@0: u16_u8(p, MAXWORDUTF8LEN, u, nc); michael@0: } else { michael@0: while (*p != '\0') { michael@0: *p = csconv[((unsigned char) *p)].cupper; michael@0: p++; michael@0: } michael@0: } michael@0: } michael@0: michael@0: int Hunspell::mkallcap2(char * p, w_char * u, int nc) michael@0: { michael@0: if (utf8) { michael@0: unsigned short idx; michael@0: for (int i = 0; i < nc; i++) { michael@0: idx = (u[i].h << 8) + u[i].l; michael@0: unsigned short up = unicodetoupper(idx, langnum); michael@0: if (idx != up) { michael@0: u[i].h = (unsigned char) (up >> 8); michael@0: u[i].l = (unsigned char) (up & 0x00FF); michael@0: } michael@0: } michael@0: u16_u8(p, MAXWORDUTF8LEN, u, nc); michael@0: return strlen(p); michael@0: } else { michael@0: while (*p != '\0') { michael@0: *p = csconv[((unsigned char) *p)].cupper; michael@0: p++; michael@0: } michael@0: } michael@0: return nc; michael@0: } michael@0: michael@0: michael@0: void Hunspell::mkallsmall(char * p) michael@0: { michael@0: while (*p != '\0') { michael@0: *p = csconv[((unsigned char) *p)].clower; michael@0: p++; michael@0: } michael@0: } michael@0: michael@0: int Hunspell::mkallsmall2(char * p, w_char * u, int nc) michael@0: { michael@0: if (utf8) { michael@0: unsigned short idx; michael@0: for (int i = 0; i < nc; i++) { michael@0: idx = (u[i].h << 8) + u[i].l; michael@0: unsigned short low = unicodetolower(idx, langnum); michael@0: if (idx != low) { michael@0: u[i].h = (unsigned char) (low >> 8); michael@0: u[i].l = (unsigned char) (low & 0x00FF); michael@0: } michael@0: } michael@0: u16_u8(p, MAXWORDUTF8LEN, u, nc); michael@0: return strlen(p); michael@0: } else { michael@0: while (*p != '\0') { michael@0: *p = csconv[((unsigned char) *p)].clower; michael@0: p++; michael@0: } michael@0: } michael@0: return nc; michael@0: } michael@0: michael@0: // convert UTF-8 sharp S codes to latin 1 michael@0: char * Hunspell::sharps_u8_l1(char * dest, char * source) { michael@0: char * p = dest; michael@0: *p = *source; michael@0: for (p++, source++; *(source - 1); p++, source++) { michael@0: *p = *source; michael@0: if (*source == '\x9F') *--p = '\xDF'; michael@0: } michael@0: return dest; michael@0: } michael@0: michael@0: // recursive search for right ss - sharp s permutations michael@0: hentry * Hunspell::spellsharps(char * base, char * pos, int n, michael@0: int repnum, char * tmp, int * info, char **root) { michael@0: pos = strstr(pos, "ss"); michael@0: if (pos && (n < MAXSHARPS)) { michael@0: *pos = '\xC3'; michael@0: *(pos + 1) = '\x9F'; michael@0: hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root); michael@0: if (h) return h; michael@0: *pos = 's'; michael@0: *(pos + 1) = 's'; michael@0: h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root); michael@0: if (h) return h; michael@0: } else if (repnum > 0) { michael@0: if (utf8) return checkword(base, info, root); michael@0: return checkword(sharps_u8_l1(tmp, base), info, root); michael@0: } michael@0: return NULL; michael@0: } michael@0: michael@0: int Hunspell::is_keepcase(const hentry * rv) { michael@0: return pAMgr && rv->astr && pAMgr->get_keepcase() && michael@0: TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen); michael@0: } michael@0: michael@0: /* insert a word to the beginning of the suggestion array and return ns */ michael@0: int Hunspell::insert_sug(char ***slst, char * word, int ns) { michael@0: char * dup = mystrdup(word); michael@0: if (!dup) return ns; michael@0: if (ns == MAXSUGGESTION) { michael@0: ns--; michael@0: free((*slst)[ns]); michael@0: } michael@0: for (int k = ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1]; michael@0: (*slst)[0] = dup; michael@0: return ns + 1; michael@0: } michael@0: michael@0: int Hunspell::spell(const char * word, int * info, char ** root) michael@0: { michael@0: struct hentry * rv=NULL; michael@0: // need larger vector. For example, Turkish capital letter I converted a michael@0: // 2-byte UTF-8 character (dotless i) by mkallsmall. michael@0: char cw[MAXWORDUTF8LEN]; michael@0: char wspace[MAXWORDUTF8LEN]; michael@0: w_char unicw[MAXWORDLEN]; michael@0: // Hunspell supports XML input of the simplified API (see manual) michael@0: if (strcmp(word, SPELL_XML) == 0) return 1; michael@0: int nc = strlen(word); michael@0: int wl2 = 0; michael@0: if (utf8) { michael@0: if (nc >= MAXWORDUTF8LEN) return 0; michael@0: } else { michael@0: if (nc >= MAXWORDLEN) return 0; michael@0: } michael@0: int captype = 0; michael@0: int abbv = 0; michael@0: int wl = 0; michael@0: michael@0: // input conversion michael@0: RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; michael@0: if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); michael@0: else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); michael@0: michael@0: int info2 = 0; michael@0: if (wl == 0 || maxdic == 0) return 1; michael@0: if (root) *root = NULL; michael@0: michael@0: // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.) michael@0: enum { NBEGIN, NNUM, NSEP }; michael@0: int nstate = NBEGIN; michael@0: int i; michael@0: michael@0: for (i = 0; (i < wl); i++) { michael@0: if ((cw[i] <= '9') && (cw[i] >= '0')) { michael@0: nstate = NNUM; michael@0: } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) { michael@0: if ((nstate == NSEP) || (i == 0)) break; michael@0: nstate = NSEP; michael@0: } else break; michael@0: } michael@0: if ((i == wl) && (nstate == NNUM)) return 1; michael@0: if (!info) info = &info2; else *info = 0; michael@0: michael@0: switch(captype) { michael@0: case HUHCAP: michael@0: case HUHINITCAP: michael@0: *info += SPELL_ORIGCAP; michael@0: case NOCAP: { michael@0: rv = checkword(cw, info, root); michael@0: if ((abbv) && !(rv)) { michael@0: memcpy(wspace,cw,wl); michael@0: *(wspace+wl) = '.'; michael@0: *(wspace+wl+1) = '\0'; michael@0: rv = checkword(wspace, info, root); michael@0: } michael@0: break; michael@0: } michael@0: case ALLCAP: { michael@0: *info += SPELL_ORIGCAP; michael@0: rv = checkword(cw, info, root); michael@0: if (rv) break; michael@0: if (abbv) { michael@0: memcpy(wspace,cw,wl); michael@0: *(wspace+wl) = '.'; michael@0: *(wspace+wl+1) = '\0'; michael@0: rv = checkword(wspace, info, root); michael@0: if (rv) break; michael@0: } michael@0: // Spec. prefix handling for Catalan, French, Italian: michael@0: // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia). michael@0: if (pAMgr && strchr(cw, '\'')) { michael@0: wl = mkallsmall2(cw, unicw, nc); michael@0: //There are no really sane circumstances where this could fail, michael@0: //but anyway... michael@0: if (char * apostrophe = strchr(cw, '\'')) { michael@0: if (utf8) { michael@0: w_char tmpword[MAXWORDLEN]; michael@0: *apostrophe = '\0'; michael@0: wl2 = u8_u16(tmpword, MAXWORDLEN, cw); michael@0: *apostrophe = '\''; michael@0: if (wl2 < nc) { michael@0: mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1); michael@0: rv = checkword(cw, info, root); michael@0: if (rv) break; michael@0: } michael@0: } else { michael@0: mkinitcap2(apostrophe + 1, unicw, nc); michael@0: rv = checkword(cw, info, root); michael@0: if (rv) break; michael@0: } michael@0: } michael@0: mkinitcap2(cw, unicw, nc); michael@0: rv = checkword(cw, info, root); michael@0: if (rv) break; michael@0: } michael@0: if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) { michael@0: char tmpword[MAXWORDUTF8LEN]; michael@0: wl = mkallsmall2(cw, unicw, nc); michael@0: memcpy(wspace,cw,(wl+1)); michael@0: rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root); michael@0: if (!rv) { michael@0: wl2 = mkinitcap2(cw, unicw, nc); michael@0: rv = spellsharps(cw, cw, 0, 0, tmpword, info, root); michael@0: } michael@0: if ((abbv) && !(rv)) { michael@0: *(wspace+wl) = '.'; michael@0: *(wspace+wl+1) = '\0'; michael@0: rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root); michael@0: if (!rv) { michael@0: memcpy(wspace, cw, wl2); michael@0: *(wspace+wl2) = '.'; michael@0: *(wspace+wl2+1) = '\0'; michael@0: rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root); michael@0: } michael@0: } michael@0: if (rv) break; michael@0: } michael@0: } michael@0: case INITCAP: { michael@0: *info += SPELL_ORIGCAP; michael@0: wl = mkallsmall2(cw, unicw, nc); michael@0: memcpy(wspace,cw,(wl+1)); michael@0: wl2 = mkinitcap2(cw, unicw, nc); michael@0: if (captype == INITCAP) *info += SPELL_INITCAP; michael@0: rv = checkword(cw, info, root); michael@0: if (captype == INITCAP) *info -= SPELL_INITCAP; michael@0: // forbid bad capitalization michael@0: // (for example, ijs -> Ijs instead of IJs in Dutch) michael@0: // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag) michael@0: if (*info & SPELL_FORBIDDEN) { michael@0: rv = NULL; michael@0: break; michael@0: } michael@0: if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL; michael@0: if (rv) break; michael@0: michael@0: rv = checkword(wspace, info, root); michael@0: if (abbv && !rv) { michael@0: michael@0: *(wspace+wl) = '.'; michael@0: *(wspace+wl+1) = '\0'; michael@0: rv = checkword(wspace, info, root); michael@0: if (!rv) { michael@0: memcpy(wspace, cw, wl2); michael@0: *(wspace+wl2) = '.'; michael@0: *(wspace+wl2+1) = '\0'; michael@0: if (captype == INITCAP) *info += SPELL_INITCAP; michael@0: rv = checkword(wspace, info, root); michael@0: if (captype == INITCAP) *info -= SPELL_INITCAP; michael@0: if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL; michael@0: break; michael@0: } michael@0: } michael@0: if (rv && is_keepcase(rv) && michael@0: ((captype == ALLCAP) || michael@0: // if CHECKSHARPS: KEEPCASE words with \xDF are allowed michael@0: // in INITCAP form, too. michael@0: !(pAMgr->get_checksharps() && michael@0: ((utf8 && strstr(wspace, "\xC3\x9F")) || michael@0: (!utf8 && strchr(wspace, '\xDF')))))) rv = NULL; michael@0: break; michael@0: } michael@0: } michael@0: michael@0: if (rv) { michael@0: if (pAMgr && pAMgr->get_warn() && rv->astr && michael@0: TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) { michael@0: *info += SPELL_WARN; michael@0: if (pAMgr->get_forbidwarn()) return 0; michael@0: return HUNSPELL_OK_WARN; michael@0: } michael@0: return HUNSPELL_OK; michael@0: } michael@0: michael@0: // recursive breaking at break points michael@0: if (wordbreak) { michael@0: char * s; michael@0: char r; michael@0: int nbr = 0; michael@0: wl = strlen(cw); michael@0: int numbreak = pAMgr ? pAMgr->get_numbreak() : 0; michael@0: michael@0: // calculate break points for recursion limit michael@0: for (int j = 0; j < numbreak; j++) { michael@0: s = cw; michael@0: do { michael@0: s = (char *) strstr(s, wordbreak[j]); michael@0: if (s) { michael@0: nbr++; michael@0: s++; michael@0: } michael@0: } while (s); michael@0: } michael@0: if (nbr >= 10) return 0; michael@0: michael@0: // check boundary patterns (^begin and end$) michael@0: for (int j = 0; j < numbreak; j++) { michael@0: int plen = strlen(wordbreak[j]); michael@0: if (plen == 1 || plen > wl) continue; michael@0: if (wordbreak[j][0] == '^' && strncmp(cw, wordbreak[j] + 1, plen - 1) == 0 michael@0: && spell(cw + plen - 1)) return 1; michael@0: if (wordbreak[j][plen - 1] == '$' && michael@0: strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) { michael@0: r = cw[wl - plen + 1]; michael@0: cw[wl - plen + 1] = '\0'; michael@0: if (spell(cw)) return 1; michael@0: cw[wl - plen + 1] = r; michael@0: } michael@0: } michael@0: michael@0: // other patterns michael@0: for (int j = 0; j < numbreak; j++) { michael@0: int plen = strlen(wordbreak[j]); michael@0: s=(char *) strstr(cw, wordbreak[j]); michael@0: if (s && (s > cw) && (s < cw + wl - plen)) { michael@0: if (!spell(s + plen)) continue; michael@0: r = *s; michael@0: *s = '\0'; michael@0: // examine 2 sides of the break point michael@0: if (spell(cw)) return 1; michael@0: *s = r; michael@0: michael@0: // LANG_hu: spec. dash rule michael@0: if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) { michael@0: r = s[1]; michael@0: s[1] = '\0'; michael@0: if (spell(cw)) return 1; // check the first part with dash michael@0: s[1] = r; michael@0: } michael@0: // end of LANG speficic region michael@0: michael@0: } michael@0: } michael@0: } michael@0: michael@0: return 0; michael@0: } michael@0: michael@0: struct hentry * Hunspell::checkword(const char * w, int * info, char ** root) michael@0: { michael@0: struct hentry * he = NULL; michael@0: int len, i; michael@0: char w2[MAXWORDUTF8LEN]; michael@0: const char * word; michael@0: michael@0: char * ignoredchars = pAMgr->get_ignore(); michael@0: if (ignoredchars != NULL) { michael@0: strcpy(w2, w); michael@0: if (utf8) { michael@0: int ignoredchars_utf16_len; michael@0: unsigned short * ignoredchars_utf16 = pAMgr->get_ignore_utf16(&ignoredchars_utf16_len); michael@0: remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len); michael@0: } else { michael@0: remove_ignored_chars(w2,ignoredchars); michael@0: } michael@0: word = w2; michael@0: } else word = w; michael@0: michael@0: len = strlen(word); michael@0: michael@0: if (!len) michael@0: return NULL; michael@0: michael@0: // word reversing wrapper for complex prefixes michael@0: if (complexprefixes) { michael@0: if (word != w2) { michael@0: strcpy(w2, word); michael@0: word = w2; michael@0: } michael@0: if (utf8) reverseword_utf(w2); else reverseword(w2); michael@0: } michael@0: michael@0: // look word in hash table michael@0: for (i = 0; (i < maxdic) && !he; i ++) { michael@0: he = (pHMgr[i])->lookup(word); michael@0: michael@0: // check forbidden and onlyincompound words michael@0: if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) { michael@0: if (info) *info += SPELL_FORBIDDEN; michael@0: // LANG_hu section: set dash information for suggestions michael@0: if (langnum == LANG_hu) { michael@0: if (pAMgr->get_compoundflag() && michael@0: TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) { michael@0: if (info) *info += SPELL_COMPOUND; michael@0: } michael@0: } michael@0: return NULL; michael@0: } michael@0: michael@0: // he = next not needaffix, onlyincompound homonym or onlyupcase word michael@0: while (he && (he->astr) && michael@0: ((pAMgr->get_needaffix() && TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) || michael@0: (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) || michael@0: (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)) michael@0: )) he = he->next_homonym; michael@0: } michael@0: michael@0: // check with affixes michael@0: if (!he && pAMgr) { michael@0: // try stripping off affixes */ michael@0: he = pAMgr->affix_check(word, len, 0); michael@0: michael@0: // check compound restriction and onlyupcase michael@0: if (he && he->astr && ( michael@0: (pAMgr->get_onlyincompound() && michael@0: TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) || michael@0: (info && (*info & SPELL_INITCAP) && michael@0: TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) { michael@0: he = NULL; michael@0: } michael@0: michael@0: if (he) { michael@0: if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) { michael@0: if (info) *info += SPELL_FORBIDDEN; michael@0: return NULL; michael@0: } michael@0: if (root) { michael@0: *root = mystrdup(he->word); michael@0: if (*root && complexprefixes) { michael@0: if (utf8) reverseword_utf(*root); else reverseword(*root); michael@0: } michael@0: } michael@0: // try check compound word michael@0: } else if (pAMgr->get_compound()) { michael@0: he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info); michael@0: // LANG_hu section: `moving rule' with last dash michael@0: if ((!he) && (langnum == LANG_hu) && (word[len-1] == '-')) { michael@0: char * dup = mystrdup(word); michael@0: if (!dup) return NULL; michael@0: dup[len-1] = '\0'; michael@0: he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0, info); michael@0: free(dup); michael@0: } michael@0: // end of LANG speficic region michael@0: if (he) { michael@0: if (root) { michael@0: *root = mystrdup(he->word); michael@0: if (*root && complexprefixes) { michael@0: if (utf8) reverseword_utf(*root); else reverseword(*root); michael@0: } michael@0: } michael@0: if (info) *info += SPELL_COMPOUND; michael@0: } michael@0: } michael@0: michael@0: } michael@0: michael@0: return he; michael@0: } michael@0: michael@0: int Hunspell::suggest(char*** slst, const char * word) michael@0: { michael@0: int onlycmpdsug = 0; michael@0: char cw[MAXWORDUTF8LEN]; michael@0: char wspace[MAXWORDUTF8LEN]; michael@0: if (!pSMgr || maxdic == 0) return 0; michael@0: w_char unicw[MAXWORDLEN]; michael@0: *slst = NULL; michael@0: // process XML input of the simplified API (see manual) michael@0: if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) { michael@0: return spellml(slst, word); michael@0: } michael@0: int nc = strlen(word); michael@0: if (utf8) { michael@0: if (nc >= MAXWORDUTF8LEN) return 0; michael@0: } else { michael@0: if (nc >= MAXWORDLEN) return 0; michael@0: } michael@0: int captype = 0; michael@0: int abbv = 0; michael@0: int wl = 0; michael@0: michael@0: // input conversion michael@0: RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; michael@0: if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); michael@0: else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); michael@0: michael@0: if (wl == 0) return 0; michael@0: int ns = 0; michael@0: int capwords = 0; michael@0: michael@0: // check capitalized form for FORCEUCASE michael@0: if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) { michael@0: int info = SPELL_ORIGCAP; michael@0: char ** wlst; michael@0: if (checkword(cw, &info, NULL)) { michael@0: if (*slst) { michael@0: wlst = *slst; michael@0: } else { michael@0: wlst = (char **) malloc(MAXSUGGESTION * sizeof(char *)); michael@0: if (wlst == NULL) return -1; michael@0: *slst = wlst; michael@0: for (int i = 0; i < MAXSUGGESTION; i++) { michael@0: wlst[i] = NULL; michael@0: } michael@0: } michael@0: wlst[0] = mystrdup(cw); michael@0: mkinitcap(wlst[0]); michael@0: return 1; michael@0: } michael@0: } michael@0: michael@0: switch(captype) { michael@0: case NOCAP: { michael@0: ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug); michael@0: break; michael@0: } michael@0: michael@0: case INITCAP: { michael@0: capwords = 1; michael@0: ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug); michael@0: if (ns == -1) break; michael@0: memcpy(wspace,cw,(wl+1)); michael@0: mkallsmall2(wspace, unicw, nc); michael@0: ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); michael@0: break; michael@0: } michael@0: case HUHINITCAP: michael@0: capwords = 1; michael@0: case HUHCAP: { michael@0: ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug); michael@0: if (ns != -1) { michael@0: int prevns; michael@0: // something.The -> something. The michael@0: char * dot = strchr(cw, '.'); michael@0: if (dot && (dot > cw)) { michael@0: int captype_; michael@0: if (utf8) { michael@0: w_char w_[MAXWORDLEN]; michael@0: int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1); michael@0: captype_ = get_captype_utf8(w_, wl_, langnum); michael@0: } else captype_ = get_captype(dot+1, strlen(dot+1), csconv); michael@0: if (captype_ == INITCAP) { michael@0: char * st = mystrdup(cw); michael@0: if (st) st = (char *) realloc(st, wl + 2); michael@0: if (st) { michael@0: st[(dot - cw) + 1] = ' '; michael@0: strcpy(st + (dot - cw) + 2, dot + 1); michael@0: ns = insert_sug(slst, st, ns); michael@0: free(st); michael@0: } michael@0: } michael@0: } michael@0: if (captype == HUHINITCAP) { michael@0: // TheOpenOffice.org -> The OpenOffice.org michael@0: memcpy(wspace,cw,(wl+1)); michael@0: mkinitsmall2(wspace, unicw, nc); michael@0: ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); michael@0: } michael@0: memcpy(wspace,cw,(wl+1)); michael@0: mkallsmall2(wspace, unicw, nc); michael@0: if (spell(wspace)) ns = insert_sug(slst, wspace, ns); michael@0: prevns = ns; michael@0: ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); michael@0: if (captype == HUHINITCAP) { michael@0: mkinitcap2(wspace, unicw, nc); michael@0: if (spell(wspace)) ns = insert_sug(slst, wspace, ns); michael@0: ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); michael@0: } michael@0: // aNew -> "a New" (instead of "a new") michael@0: for (int j = prevns; j < ns; j++) { michael@0: char * space = strchr((*slst)[j],' '); michael@0: if (space) { michael@0: int slen = strlen(space + 1); michael@0: // different case after space (need capitalisation) michael@0: if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) { michael@0: w_char w[MAXWORDLEN]; michael@0: int wc = 0; michael@0: char * r = (*slst)[j]; michael@0: if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1); michael@0: mkinitcap2(space + 1, w, wc); michael@0: // set as first suggestion michael@0: for (int k = j; k > 0; k--) (*slst)[k] = (*slst)[k - 1]; michael@0: (*slst)[0] = r; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: break; michael@0: } michael@0: michael@0: case ALLCAP: { michael@0: memcpy(wspace, cw, (wl+1)); michael@0: mkallsmall2(wspace, unicw, nc); michael@0: ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); michael@0: if (ns == -1) break; michael@0: if (pAMgr && pAMgr->get_keepcase() && spell(wspace)) michael@0: ns = insert_sug(slst, wspace, ns); michael@0: mkinitcap2(wspace, unicw, nc); michael@0: ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug); michael@0: for (int j=0; j < ns; j++) { michael@0: mkallcap((*slst)[j]); michael@0: if (pAMgr && pAMgr->get_checksharps()) { michael@0: char * pos; michael@0: if (utf8) { michael@0: pos = strstr((*slst)[j], "\xC3\x9F"); michael@0: while (pos) { michael@0: *pos = 'S'; michael@0: *(pos+1) = 'S'; michael@0: pos = strstr(pos+2, "\xC3\x9F"); michael@0: } michael@0: } else { michael@0: pos = strchr((*slst)[j], '\xDF'); michael@0: while (pos) { michael@0: (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 2); michael@0: mystrrep((*slst)[j], "\xDF", "SS"); michael@0: pos = strchr((*slst)[j], '\xDF'); michael@0: } michael@0: } michael@0: } michael@0: } michael@0: break; michael@0: } michael@0: } michael@0: michael@0: // LANG_hu section: replace '-' with ' ' in Hungarian michael@0: if (langnum == LANG_hu) { michael@0: for (int j=0; j < ns; j++) { michael@0: char * pos = strchr((*slst)[j],'-'); michael@0: if (pos) { michael@0: int info; michael@0: char w[MAXWORDUTF8LEN]; michael@0: *pos = '\0'; michael@0: strcpy(w, (*slst)[j]); michael@0: strcat(w, pos + 1); michael@0: spell(w, &info, NULL); michael@0: if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) { michael@0: *pos = ' '; michael@0: } else *pos = '-'; michael@0: } michael@0: } michael@0: } michael@0: // END OF LANG_hu section michael@0: michael@0: // try ngram approach since found nothing or only compound words michael@0: if (pAMgr && (ns == 0 || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0) && (*slst)) { michael@0: switch(captype) { michael@0: case NOCAP: { michael@0: ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic); michael@0: break; michael@0: } michael@0: case HUHINITCAP: michael@0: capwords = 1; michael@0: case HUHCAP: { michael@0: memcpy(wspace,cw,(wl+1)); michael@0: mkallsmall2(wspace, unicw, nc); michael@0: ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic); michael@0: break; michael@0: } michael@0: case INITCAP: { michael@0: capwords = 1; michael@0: memcpy(wspace,cw,(wl+1)); michael@0: mkallsmall2(wspace, unicw, nc); michael@0: ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic); michael@0: break; michael@0: } michael@0: case ALLCAP: { michael@0: memcpy(wspace,cw,(wl+1)); michael@0: mkallsmall2(wspace, unicw, nc); michael@0: int oldns = ns; michael@0: ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic); michael@0: for (int j = oldns; j < ns; j++) michael@0: mkallcap((*slst)[j]); michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: // try dash suggestion (Afo-American -> Afro-American) michael@0: if (char * pos = strchr(cw, '-')) { michael@0: char * ppos = cw; michael@0: int nodashsug = 1; michael@0: char ** nlst = NULL; michael@0: int nn = 0; michael@0: int last = 0; michael@0: if (*slst) { michael@0: for (int j = 0; j < ns && nodashsug == 1; j++) { michael@0: if (strchr((*slst)[j], '-')) nodashsug = 0; michael@0: } michael@0: } michael@0: while (nodashsug && !last) { michael@0: if (*pos == '\0') last = 1; else *pos = '\0'; michael@0: if (!spell(ppos)) { michael@0: nn = suggest(&nlst, ppos); michael@0: for (int j = nn - 1; j >= 0; j--) { michael@0: strncpy(wspace, cw, ppos - cw); michael@0: strcpy(wspace + (ppos - cw), nlst[j]); michael@0: if (!last) { michael@0: strcat(wspace, "-"); michael@0: strcat(wspace, pos + 1); michael@0: } michael@0: ns = insert_sug(slst, wspace, ns); michael@0: free(nlst[j]); michael@0: } michael@0: if (nlst != NULL) free(nlst); michael@0: nodashsug = 0; michael@0: } michael@0: if (!last) { michael@0: *pos = '-'; michael@0: ppos = pos + 1; michael@0: pos = strchr(ppos, '-'); michael@0: } michael@0: if (!pos) pos = cw + strlen(cw); michael@0: } michael@0: } michael@0: michael@0: // word reversing wrapper for complex prefixes michael@0: if (complexprefixes) { michael@0: for (int j = 0; j < ns; j++) { michael@0: if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]); michael@0: } michael@0: } michael@0: michael@0: // capitalize michael@0: if (capwords) for (int j=0; j < ns; j++) { michael@0: mkinitcap((*slst)[j]); michael@0: } michael@0: michael@0: // expand suggestions with dot(s) michael@0: if (abbv && pAMgr && pAMgr->get_sugswithdots()) { michael@0: for (int j = 0; j < ns; j++) { michael@0: (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv); michael@0: strcat((*slst)[j], word + strlen(word) - abbv); michael@0: } michael@0: } michael@0: michael@0: // remove bad capitalized and forbidden forms michael@0: if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) { michael@0: switch (captype) { michael@0: case INITCAP: michael@0: case ALLCAP: { michael@0: int l = 0; michael@0: for (int j=0; j < ns; j++) { michael@0: if (!strchr((*slst)[j],' ') && !spell((*slst)[j])) { michael@0: char s[MAXSWUTF8L]; michael@0: w_char w[MAXSWL]; michael@0: int len; michael@0: if (utf8) { michael@0: len = u8_u16(w, MAXSWL, (*slst)[j]); michael@0: } else { michael@0: strcpy(s, (*slst)[j]); michael@0: len = strlen(s); michael@0: } michael@0: mkallsmall2(s, w, len); michael@0: free((*slst)[j]); michael@0: if (spell(s)) { michael@0: (*slst)[l] = mystrdup(s); michael@0: if ((*slst)[l]) l++; michael@0: } else { michael@0: mkinitcap2(s, w, len); michael@0: if (spell(s)) { michael@0: (*slst)[l] = mystrdup(s); michael@0: if ((*slst)[l]) l++; michael@0: } michael@0: } michael@0: } else { michael@0: (*slst)[l] = (*slst)[j]; michael@0: l++; michael@0: } michael@0: } michael@0: ns = l; michael@0: } michael@0: } michael@0: } michael@0: michael@0: // remove duplications michael@0: int l = 0; michael@0: for (int j = 0; j < ns; j++) { michael@0: (*slst)[l] = (*slst)[j]; michael@0: for (int k = 0; k < l; k++) { michael@0: if (strcmp((*slst)[k], (*slst)[j]) == 0) { michael@0: free((*slst)[j]); michael@0: l--; michael@0: break; michael@0: } michael@0: } michael@0: l++; michael@0: } michael@0: ns = l; michael@0: michael@0: // output conversion michael@0: rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; michael@0: for (int j = 0; rl && j < ns; j++) { michael@0: if (rl->conv((*slst)[j], wspace)) { michael@0: free((*slst)[j]); michael@0: (*slst)[j] = mystrdup(wspace); michael@0: } michael@0: } michael@0: michael@0: // if suggestions removed by nosuggest, onlyincompound parameters michael@0: if (l == 0 && *slst) { michael@0: free(*slst); michael@0: *slst = NULL; michael@0: } michael@0: return l; michael@0: } michael@0: michael@0: void Hunspell::free_list(char *** slst, int n) { michael@0: freelist(slst, n); michael@0: } michael@0: michael@0: char * Hunspell::get_dic_encoding() michael@0: { michael@0: return encoding; michael@0: } michael@0: michael@0: #ifdef HUNSPELL_EXPERIMENTAL michael@0: // XXX need UTF-8 support michael@0: int Hunspell::suggest_auto(char*** slst, const char * word) michael@0: { michael@0: char cw[MAXWORDUTF8LEN]; michael@0: char wspace[MAXWORDUTF8LEN]; michael@0: if (!pSMgr || maxdic == 0) return 0; michael@0: int wl = strlen(word); michael@0: if (utf8) { michael@0: if (wl >= MAXWORDUTF8LEN) return 0; michael@0: } else { michael@0: if (wl >= MAXWORDLEN) return 0; michael@0: } michael@0: int captype = 0; michael@0: int abbv = 0; michael@0: wl = cleanword(cw, word, &captype, &abbv); michael@0: if (wl == 0) return 0; michael@0: int ns = 0; michael@0: *slst = NULL; // HU, nsug in pSMgr->suggest michael@0: michael@0: switch(captype) { michael@0: case NOCAP: { michael@0: ns = pSMgr->suggest_auto(slst, cw, ns); michael@0: if (ns>0) break; michael@0: break; michael@0: } michael@0: michael@0: case INITCAP: { michael@0: memcpy(wspace,cw,(wl+1)); michael@0: mkallsmall(wspace); michael@0: ns = pSMgr->suggest_auto(slst, wspace, ns); michael@0: for (int j=0; j < ns; j++) michael@0: mkinitcap((*slst)[j]); michael@0: ns = pSMgr->suggest_auto(slst, cw, ns); michael@0: break; michael@0: michael@0: } michael@0: michael@0: case HUHINITCAP: michael@0: case HUHCAP: { michael@0: ns = pSMgr->suggest_auto(slst, cw, ns); michael@0: if (ns == 0) { michael@0: memcpy(wspace,cw,(wl+1)); michael@0: mkallsmall(wspace); michael@0: ns = pSMgr->suggest_auto(slst, wspace, ns); michael@0: } michael@0: break; michael@0: } michael@0: michael@0: case ALLCAP: { michael@0: memcpy(wspace,cw,(wl+1)); michael@0: mkallsmall(wspace); michael@0: ns = pSMgr->suggest_auto(slst, wspace, ns); michael@0: michael@0: mkinitcap(wspace); michael@0: ns = pSMgr->suggest_auto(slst, wspace, ns); michael@0: michael@0: for (int j=0; j < ns; j++) michael@0: mkallcap((*slst)[j]); michael@0: break; michael@0: } michael@0: } michael@0: michael@0: // word reversing wrapper for complex prefixes michael@0: if (complexprefixes) { michael@0: for (int j = 0; j < ns; j++) { michael@0: if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]); michael@0: } michael@0: } michael@0: michael@0: // expand suggestions with dot(s) michael@0: if (abbv && pAMgr && pAMgr->get_sugswithdots()) { michael@0: for (int j = 0; j < ns; j++) { michael@0: (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv); michael@0: strcat((*slst)[j], word + strlen(word) - abbv); michael@0: } michael@0: } michael@0: michael@0: // LANG_hu section: replace '-' with ' ' in Hungarian michael@0: if (langnum == LANG_hu) { michael@0: for (int j=0; j < ns; j++) { michael@0: char * pos = strchr((*slst)[j],'-'); michael@0: if (pos) { michael@0: int info; michael@0: char w[MAXWORDUTF8LEN]; michael@0: *pos = '\0'; michael@0: strcpy(w, (*slst)[j]); michael@0: strcat(w, pos + 1); michael@0: spell(w, &info, NULL); michael@0: if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) { michael@0: *pos = ' '; michael@0: } else *pos = '-'; michael@0: } michael@0: } michael@0: } michael@0: // END OF LANG_hu section michael@0: return ns; michael@0: } michael@0: #endif michael@0: michael@0: int Hunspell::stem(char*** slst, char ** desc, int n) michael@0: { michael@0: char result[MAXLNLEN]; michael@0: char result2[MAXLNLEN]; michael@0: *slst = NULL; michael@0: if (n == 0) return 0; michael@0: *result2 = '\0'; michael@0: for (int i = 0; i < n; i++) { michael@0: *result = '\0'; michael@0: // add compound word parts (except the last one) michael@0: char * s = (char *) desc[i]; michael@0: char * part = strstr(s, MORPH_PART); michael@0: if (part) { michael@0: char * nextpart = strstr(part + 1, MORPH_PART); michael@0: while (nextpart) { michael@0: copy_field(result + strlen(result), part, MORPH_PART); michael@0: part = nextpart; michael@0: nextpart = strstr(part + 1, MORPH_PART); michael@0: } michael@0: s = part; michael@0: } michael@0: michael@0: char **pl; michael@0: char tok[MAXLNLEN]; michael@0: strcpy(tok, s); michael@0: char * alt = strstr(tok, " | "); michael@0: while (alt) { michael@0: alt[1] = MSEP_ALT; michael@0: alt = strstr(alt, " | "); michael@0: } michael@0: int pln = line_tok(tok, &pl, MSEP_ALT); michael@0: for (int k = 0; k < pln; k++) { michael@0: // add derivational suffixes michael@0: if (strstr(pl[k], MORPH_DERI_SFX)) { michael@0: // remove inflectional suffixes michael@0: char * is = strstr(pl[k], MORPH_INFL_SFX); michael@0: if (is) *is = '\0'; michael@0: char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]); michael@0: if (sg) { michael@0: char ** gen; michael@0: int genl = line_tok(sg, &gen, MSEP_REC); michael@0: free(sg); michael@0: for (int j = 0; j < genl; j++) { michael@0: sprintf(result2 + strlen(result2), "%c%s%s", michael@0: MSEP_REC, result, gen[j]); michael@0: } michael@0: freelist(&gen, genl); michael@0: } michael@0: } else { michael@0: sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result); michael@0: if (strstr(pl[k], MORPH_SURF_PFX)) { michael@0: copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX); michael@0: } michael@0: copy_field(result2 + strlen(result2), pl[k], MORPH_STEM); michael@0: } michael@0: } michael@0: freelist(&pl, pln); michael@0: } michael@0: int sln = line_tok(result2, slst, MSEP_REC); michael@0: return uniqlist(*slst, sln); michael@0: michael@0: } michael@0: michael@0: int Hunspell::stem(char*** slst, const char * word) michael@0: { michael@0: char ** pl; michael@0: int pln = analyze(&pl, word); michael@0: int pln2 = stem(slst, pl, pln); michael@0: freelist(&pl, pln); michael@0: return pln2; michael@0: } michael@0: michael@0: #ifdef HUNSPELL_EXPERIMENTAL michael@0: int Hunspell::suggest_pos_stems(char*** slst, const char * word) michael@0: { michael@0: char cw[MAXWORDUTF8LEN]; michael@0: char wspace[MAXWORDUTF8LEN]; michael@0: if (! pSMgr || maxdic == 0) return 0; michael@0: int wl = strlen(word); michael@0: if (utf8) { michael@0: if (wl >= MAXWORDUTF8LEN) return 0; michael@0: } else { michael@0: if (wl >= MAXWORDLEN) return 0; michael@0: } michael@0: int captype = 0; michael@0: int abbv = 0; michael@0: wl = cleanword(cw, word, &captype, &abbv); michael@0: if (wl == 0) return 0; michael@0: michael@0: int ns = 0; // ns=0 = normalized input michael@0: michael@0: *slst = NULL; // HU, nsug in pSMgr->suggest michael@0: michael@0: switch(captype) { michael@0: case HUHCAP: michael@0: case NOCAP: { michael@0: ns = pSMgr->suggest_pos_stems(slst, cw, ns); michael@0: michael@0: if ((abbv) && (ns == 0)) { michael@0: memcpy(wspace,cw,wl); michael@0: *(wspace+wl) = '.'; michael@0: *(wspace+wl+1) = '\0'; michael@0: ns = pSMgr->suggest_pos_stems(slst, wspace, ns); michael@0: } michael@0: michael@0: break; michael@0: } michael@0: michael@0: case INITCAP: { michael@0: michael@0: ns = pSMgr->suggest_pos_stems(slst, cw, ns); michael@0: michael@0: if (ns == 0 || ((*slst)[0][0] == '#')) { michael@0: memcpy(wspace,cw,(wl+1)); michael@0: mkallsmall(wspace); michael@0: ns = pSMgr->suggest_pos_stems(slst, wspace, ns); michael@0: } michael@0: michael@0: break; michael@0: michael@0: } michael@0: michael@0: case ALLCAP: { michael@0: ns = pSMgr->suggest_pos_stems(slst, cw, ns); michael@0: if (ns != 0) break; michael@0: michael@0: memcpy(wspace,cw,(wl+1)); michael@0: mkallsmall(wspace); michael@0: ns = pSMgr->suggest_pos_stems(slst, wspace, ns); michael@0: michael@0: if (ns == 0) { michael@0: mkinitcap(wspace); michael@0: ns = pSMgr->suggest_pos_stems(slst, wspace, ns); michael@0: } michael@0: break; michael@0: } michael@0: } michael@0: michael@0: return ns; michael@0: } michael@0: #endif // END OF HUNSPELL_EXPERIMENTAL CODE michael@0: michael@0: const char * Hunspell::get_wordchars() michael@0: { michael@0: return pAMgr->get_wordchars(); michael@0: } michael@0: michael@0: unsigned short * Hunspell::get_wordchars_utf16(int * len) michael@0: { michael@0: return pAMgr->get_wordchars_utf16(len); michael@0: } michael@0: michael@0: void Hunspell::mkinitcap(char * p) michael@0: { michael@0: if (!utf8) { michael@0: if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper; michael@0: } else { michael@0: int len; michael@0: w_char u[MAXWORDLEN]; michael@0: len = u8_u16(u, MAXWORDLEN, p); michael@0: unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum); michael@0: u[0].h = (unsigned char) (i >> 8); michael@0: u[0].l = (unsigned char) (i & 0x00FF); michael@0: u16_u8(p, MAXWORDUTF8LEN, u, len); michael@0: } michael@0: } michael@0: michael@0: int Hunspell::mkinitcap2(char * p, w_char * u, int nc) michael@0: { michael@0: if (!utf8) { michael@0: if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper; michael@0: } else if (nc > 0) { michael@0: unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum); michael@0: u[0].h = (unsigned char) (i >> 8); michael@0: u[0].l = (unsigned char) (i & 0x00FF); michael@0: u16_u8(p, MAXWORDUTF8LEN, u, nc); michael@0: return strlen(p); michael@0: } michael@0: return nc; michael@0: } michael@0: michael@0: int Hunspell::mkinitsmall2(char * p, w_char * u, int nc) michael@0: { michael@0: if (!utf8) { michael@0: if (*p != '\0') *p = csconv[((unsigned char)*p)].clower; michael@0: } else if (nc > 0) { michael@0: unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum); michael@0: u[0].h = (unsigned char) (i >> 8); michael@0: u[0].l = (unsigned char) (i & 0x00FF); michael@0: u16_u8(p, MAXWORDUTF8LEN, u, nc); michael@0: return strlen(p); michael@0: } michael@0: return nc; michael@0: } michael@0: michael@0: int Hunspell::add(const char * word) michael@0: { michael@0: if (pHMgr[0]) return (pHMgr[0])->add(word); michael@0: return 0; michael@0: } michael@0: michael@0: int Hunspell::add_with_affix(const char * word, const char * example) michael@0: { michael@0: if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example); michael@0: return 0; michael@0: } michael@0: michael@0: int Hunspell::remove(const char * word) michael@0: { michael@0: if (pHMgr[0]) return (pHMgr[0])->remove(word); michael@0: return 0; michael@0: } michael@0: michael@0: const char * Hunspell::get_version() michael@0: { michael@0: return pAMgr->get_version(); michael@0: } michael@0: michael@0: struct cs_info * Hunspell::get_csconv() michael@0: { michael@0: return csconv; michael@0: } michael@0: michael@0: void Hunspell::cat_result(char * result, char * st) michael@0: { michael@0: if (st) { michael@0: if (*result) mystrcat(result, "\n", MAXLNLEN); michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: } michael@0: michael@0: int Hunspell::analyze(char*** slst, const char * word) michael@0: { michael@0: char cw[MAXWORDUTF8LEN]; michael@0: char wspace[MAXWORDUTF8LEN]; michael@0: w_char unicw[MAXWORDLEN]; michael@0: int wl2 = 0; michael@0: *slst = NULL; michael@0: if (! pSMgr || maxdic == 0) return 0; michael@0: int nc = strlen(word); michael@0: if (utf8) { michael@0: if (nc >= MAXWORDUTF8LEN) return 0; michael@0: } else { michael@0: if (nc >= MAXWORDLEN) return 0; michael@0: } michael@0: int captype = 0; michael@0: int abbv = 0; michael@0: int wl = 0; michael@0: michael@0: // input conversion michael@0: RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; michael@0: if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv); michael@0: else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); michael@0: michael@0: if (wl == 0) { michael@0: if (abbv) { michael@0: for (wl = 0; wl < abbv; wl++) cw[wl] = '.'; michael@0: cw[wl] = '\0'; michael@0: abbv = 0; michael@0: } else return 0; michael@0: } michael@0: michael@0: char result[MAXLNLEN]; michael@0: char * st = NULL; michael@0: michael@0: *result = '\0'; michael@0: michael@0: int n = 0; michael@0: int n2 = 0; michael@0: int n3 = 0; michael@0: michael@0: // test numbers michael@0: // LANG_hu section: set dash information for suggestions michael@0: if (langnum == LANG_hu) { michael@0: while ((n < wl) && michael@0: (((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) { michael@0: n++; michael@0: if ((cw[n] == '.') || (cw[n] == ',')) { michael@0: if (((n2 == 0) && (n > 3)) || michael@0: ((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) break; michael@0: n2++; michael@0: n3 = n; michael@0: } michael@0: } michael@0: michael@0: if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0; michael@0: if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xB0')) && checkword(cw+n, NULL, NULL))) { michael@0: mystrcat(result, cw, MAXLNLEN); michael@0: result[n - 1] = '\0'; michael@0: if (n == wl) cat_result(result, pSMgr->suggest_morph(cw + n - 1)); michael@0: else { michael@0: char sign = cw[n]; michael@0: cw[n] = '\0'; michael@0: cat_result(result, pSMgr->suggest_morph(cw + n - 1)); michael@0: mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE michael@0: cw[n] = sign; michael@0: cat_result(result, pSMgr->suggest_morph(cw + n)); michael@0: } michael@0: return line_tok(result, slst, MSEP_REC); michael@0: } michael@0: } michael@0: // END OF LANG_hu section michael@0: michael@0: switch(captype) { michael@0: case HUHCAP: michael@0: case HUHINITCAP: michael@0: case NOCAP: { michael@0: cat_result(result, pSMgr->suggest_morph(cw)); michael@0: if (abbv) { michael@0: memcpy(wspace,cw,wl); michael@0: *(wspace+wl) = '.'; michael@0: *(wspace+wl+1) = '\0'; michael@0: cat_result(result, pSMgr->suggest_morph(wspace)); michael@0: } michael@0: break; michael@0: } michael@0: case INITCAP: { michael@0: wl = mkallsmall2(cw, unicw, nc); michael@0: memcpy(wspace,cw,(wl+1)); michael@0: wl2 = mkinitcap2(cw, unicw, nc); michael@0: cat_result(result, pSMgr->suggest_morph(wspace)); michael@0: cat_result(result, pSMgr->suggest_morph(cw)); michael@0: if (abbv) { michael@0: *(wspace+wl) = '.'; michael@0: *(wspace+wl+1) = '\0'; michael@0: cat_result(result, pSMgr->suggest_morph(wspace)); michael@0: michael@0: memcpy(wspace, cw, wl2); michael@0: *(wspace+wl2) = '.'; michael@0: *(wspace+wl2+1) = '\0'; michael@0: michael@0: cat_result(result, pSMgr->suggest_morph(wspace)); michael@0: } michael@0: break; michael@0: } michael@0: case ALLCAP: { michael@0: cat_result(result, pSMgr->suggest_morph(cw)); michael@0: if (abbv) { michael@0: memcpy(wspace,cw,wl); michael@0: *(wspace+wl) = '.'; michael@0: *(wspace+wl+1) = '\0'; michael@0: cat_result(result, pSMgr->suggest_morph(cw)); michael@0: } michael@0: wl = mkallsmall2(cw, unicw, nc); michael@0: memcpy(wspace,cw,(wl+1)); michael@0: wl2 = mkinitcap2(cw, unicw, nc); michael@0: michael@0: cat_result(result, pSMgr->suggest_morph(wspace)); michael@0: cat_result(result, pSMgr->suggest_morph(cw)); michael@0: if (abbv) { michael@0: *(wspace+wl) = '.'; michael@0: *(wspace+wl+1) = '\0'; michael@0: cat_result(result, pSMgr->suggest_morph(wspace)); michael@0: michael@0: memcpy(wspace, cw, wl2); michael@0: *(wspace+wl2) = '.'; michael@0: *(wspace+wl2+1) = '\0'; michael@0: michael@0: cat_result(result, pSMgr->suggest_morph(wspace)); michael@0: } michael@0: break; michael@0: } michael@0: } michael@0: michael@0: if (*result) { michael@0: // word reversing wrapper for complex prefixes michael@0: if (complexprefixes) { michael@0: if (utf8) reverseword_utf(result); else reverseword(result); michael@0: } michael@0: return line_tok(result, slst, MSEP_REC); michael@0: } michael@0: michael@0: // compound word with dash (HU) I18n michael@0: char * dash = NULL; michael@0: int nresult = 0; michael@0: // LANG_hu section: set dash information for suggestions michael@0: if (langnum == LANG_hu) dash = (char *) strchr(cw,'-'); michael@0: if ((langnum == LANG_hu) && dash) { michael@0: *dash='\0'; michael@0: // examine 2 sides of the dash michael@0: if (dash[1] == '\0') { // base word ending with dash michael@0: if (spell(cw)) { michael@0: char * p = pSMgr->suggest_morph(cw); michael@0: if (p) { michael@0: int ret = line_tok(p, slst, MSEP_REC); michael@0: free(p); michael@0: return ret; michael@0: } michael@0: michael@0: } michael@0: } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat. michael@0: if (spell(cw) && (spell("-e"))) { michael@0: st = pSMgr->suggest_morph(cw); michael@0: if (st) { michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE michael@0: st = pSMgr->suggest_morph("-e"); michael@0: if (st) { michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: return line_tok(result, slst, MSEP_REC); michael@0: } michael@0: } else { michael@0: // first word ending with dash: word- XXX ??? michael@0: char r2 = *(dash + 1); michael@0: dash[0]='-'; michael@0: dash[1]='\0'; michael@0: nresult = spell(cw); michael@0: dash[1] = r2; michael@0: dash[0]='\0'; michael@0: if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) || michael@0: ((dash[1] > '0') && (dash[1] < '9')))) { michael@0: st = pSMgr->suggest_morph(cw); michael@0: if (st) { michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE michael@0: } michael@0: st = pSMgr->suggest_morph(dash+1); michael@0: if (st) { michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: return line_tok(result, slst, MSEP_REC); michael@0: } michael@0: } michael@0: // affixed number in correct word michael@0: if (nresult && (dash > cw) && (((*(dash-1)<='9') && michael@0: (*(dash-1)>='0')) || (*(dash-1)=='.'))) { michael@0: *dash='-'; michael@0: n = 1; michael@0: if (*(dash - n) == '.') n++; michael@0: // search first not a number character to left from dash michael@0: while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) { michael@0: n++; michael@0: } michael@0: if ((dash - n) < cw) n--; michael@0: // numbers: valami1000000-hoz michael@0: // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz, michael@0: // 56-hoz, 6-hoz michael@0: for(; n >= 1; n--) { michael@0: if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && checkword(dash - n, NULL, NULL)) { michael@0: mystrcat(result, cw, MAXLNLEN); michael@0: result[dash - cw - n] = '\0'; michael@0: st = pSMgr->suggest_morph(dash - n); michael@0: if (st) { michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: return line_tok(result, slst, MSEP_REC); michael@0: } michael@0: } michael@0: } michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln) michael@0: { michael@0: *slst = NULL; michael@0: if (!pSMgr || !pln) return 0; michael@0: char **pl2; michael@0: int pl2n = analyze(&pl2, word); michael@0: int captype = 0; michael@0: int abbv = 0; michael@0: char cw[MAXWORDUTF8LEN]; michael@0: cleanword(cw, word, &captype, &abbv); michael@0: char result[MAXLNLEN]; michael@0: *result = '\0'; michael@0: michael@0: for (int i = 0; i < pln; i++) { michael@0: cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i])); michael@0: } michael@0: freelist(&pl2, pl2n); michael@0: michael@0: if (*result) { michael@0: // allcap michael@0: if (captype == ALLCAP) mkallcap(result); michael@0: michael@0: // line split michael@0: int linenum = line_tok(result, slst, MSEP_REC); michael@0: michael@0: // capitalize michael@0: if (captype == INITCAP || captype == HUHINITCAP) { michael@0: for (int j=0; j < linenum; j++) mkinitcap((*slst)[j]); michael@0: } michael@0: michael@0: // temporary filtering of prefix related errors (eg. michael@0: // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks") michael@0: michael@0: int r = 0; michael@0: for (int j=0; j < linenum; j++) { michael@0: if (!spell((*slst)[j])) { michael@0: free((*slst)[j]); michael@0: (*slst)[j] = NULL; michael@0: } else { michael@0: if (r < j) (*slst)[r] = (*slst)[j]; michael@0: r++; michael@0: } michael@0: } michael@0: if (r > 0) return r; michael@0: free(*slst); michael@0: *slst = NULL; michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: int Hunspell::generate(char*** slst, const char * word, const char * pattern) michael@0: { michael@0: char **pl; michael@0: int pln = analyze(&pl, pattern); michael@0: int n = generate(slst, word, pl, pln); michael@0: freelist(&pl, pln); michael@0: return uniqlist(*slst, n); michael@0: } michael@0: michael@0: // minimal XML parser functions michael@0: int Hunspell::get_xml_par(char * dest, const char * par, int max) michael@0: { michael@0: char * d = dest; michael@0: if (!par) return 0; michael@0: char end = *par; michael@0: char * dmax = dest + max; michael@0: if (end == '>') end = '<'; michael@0: else if (end != '\'' && end != '"') return 0; // bad XML michael@0: for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) *d = *par; michael@0: *d = '\0'; michael@0: mystrrep(dest, "<", "<"); michael@0: mystrrep(dest, "&", "&"); michael@0: return (int)(d - dest); michael@0: } michael@0: michael@0: int Hunspell::get_langnum() const michael@0: { michael@0: return langnum; michael@0: } michael@0: michael@0: // return the beginning of the element (attr == NULL) or the attribute michael@0: const char * Hunspell::get_xml_pos(const char * s, const char * attr) michael@0: { michael@0: const char * end = strchr(s, '>'); michael@0: const char * p = s; michael@0: if (attr == NULL) return end; michael@0: do { michael@0: p = strstr(p, attr); michael@0: if (!p || p >= end) return 0; michael@0: } while (*(p-1) != ' ' && *(p-1) != '\n'); michael@0: return p + strlen(attr); michael@0: } michael@0: michael@0: int Hunspell::check_xml_par(const char * q, const char * attr, const char * value) { michael@0: char cw[MAXWORDUTF8LEN]; michael@0: if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) && michael@0: strcmp(cw, value) == 0) return 1; michael@0: return 0; michael@0: } michael@0: michael@0: int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) { michael@0: int n = 0; michael@0: char * p; michael@0: if (!list) return 0; michael@0: for (p = list; (p = strstr(p, tag)); p++) n++; michael@0: if (n == 0) return 0; michael@0: *slst = (char **) malloc(sizeof(char *) * n); michael@0: if (!*slst) return 0; michael@0: for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) { michael@0: int l = strlen(p); michael@0: (*slst)[n] = (char *) malloc(l + 1); michael@0: if (!(*slst)[n]) return n; michael@0: if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) { michael@0: free((*slst)[n]); michael@0: break; michael@0: } michael@0: } michael@0: return n; michael@0: } michael@0: michael@0: int Hunspell::spellml(char*** slst, const char * word) michael@0: { michael@0: char *q, *q2; michael@0: char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN]; michael@0: q = (char *) strstr(word, "'); michael@0: if (!q2) return 0; // bad XML input michael@0: q2 = strstr(q2, "'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw); michael@0: if (n == 0) return 0; michael@0: // convert the result to ana1ana2 format michael@0: for (int i = 0; i < n; i++) s+= strlen((*slst)[i]); michael@0: char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->& michael@0: if (!r) return 0; michael@0: strcpy(r, ""); michael@0: for (int i = 0; i < n; i++) { michael@0: int l = strlen(r); michael@0: strcpy(r + l, ""); michael@0: strcpy(r + l + 3, (*slst)[i]); michael@0: mystrrep(r + l + 3, "\t", " "); michael@0: mystrrep(r + l + 3, "<", "<"); michael@0: mystrrep(r + l + 3, "&", "&"); michael@0: strcat(r, ""); michael@0: free((*slst)[i]); michael@0: } michael@0: strcat(r, ""); michael@0: (*slst)[0] = r; michael@0: return 1; michael@0: } else if (check_xml_par(q, "type=", "stem")) { michael@0: if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw); michael@0: } else if (check_xml_par(q, "type=", "generate")) { michael@0: int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1); michael@0: if (n == 0) return 0; michael@0: char * q3 = strstr(q2 + 1, "'), MAXWORDUTF8LEN - 1)) { michael@0: return generate(slst, cw, cw2); michael@0: } michael@0: } else { michael@0: if ((q2 = strstr(q2 + 1, "'), ""))) { michael@0: int n2 = generate(slst, cw, slst2, n); michael@0: freelist(&slst2, n); michael@0: return uniqlist(*slst, n2); michael@0: } michael@0: freelist(&slst2, n); michael@0: } michael@0: } michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: michael@0: #ifdef HUNSPELL_EXPERIMENTAL michael@0: // XXX need UTF-8 support michael@0: char * Hunspell::morph_with_correction(const char * word) michael@0: { michael@0: char cw[MAXWORDUTF8LEN]; michael@0: char wspace[MAXWORDUTF8LEN]; michael@0: if (! pSMgr || maxdic == 0) return NULL; michael@0: int wl = strlen(word); michael@0: if (utf8) { michael@0: if (wl >= MAXWORDUTF8LEN) return NULL; michael@0: } else { michael@0: if (wl >= MAXWORDLEN) return NULL; michael@0: } michael@0: int captype = 0; michael@0: int abbv = 0; michael@0: wl = cleanword(cw, word, &captype, &abbv); michael@0: if (wl == 0) return NULL; michael@0: michael@0: char result[MAXLNLEN]; michael@0: char * st = NULL; michael@0: michael@0: *result = '\0'; michael@0: michael@0: michael@0: switch(captype) { michael@0: case NOCAP: { michael@0: st = pSMgr->suggest_morph_for_spelling_error(cw); michael@0: if (st) { michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: if (abbv) { michael@0: memcpy(wspace,cw,wl); michael@0: *(wspace+wl) = '.'; michael@0: *(wspace+wl+1) = '\0'; michael@0: st = pSMgr->suggest_morph_for_spelling_error(wspace); michael@0: if (st) { michael@0: if (*result) mystrcat(result, "\n", MAXLNLEN); michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: } michael@0: break; michael@0: } michael@0: case INITCAP: { michael@0: memcpy(wspace,cw,(wl+1)); michael@0: mkallsmall(wspace); michael@0: st = pSMgr->suggest_morph_for_spelling_error(wspace); michael@0: if (st) { michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: st = pSMgr->suggest_morph_for_spelling_error(cw); michael@0: if (st) { michael@0: if (*result) mystrcat(result, "\n", MAXLNLEN); michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: if (abbv) { michael@0: memcpy(wspace,cw,wl); michael@0: *(wspace+wl) = '.'; michael@0: *(wspace+wl+1) = '\0'; michael@0: mkallsmall(wspace); michael@0: st = pSMgr->suggest_morph_for_spelling_error(wspace); michael@0: if (st) { michael@0: if (*result) mystrcat(result, "\n", MAXLNLEN); michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: mkinitcap(wspace); michael@0: st = pSMgr->suggest_morph_for_spelling_error(wspace); michael@0: if (st) { michael@0: if (*result) mystrcat(result, "\n", MAXLNLEN); michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: } michael@0: break; michael@0: } michael@0: case HUHCAP: { michael@0: st = pSMgr->suggest_morph_for_spelling_error(cw); michael@0: if (st) { michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: memcpy(wspace,cw,(wl+1)); michael@0: mkallsmall(wspace); michael@0: st = pSMgr->suggest_morph_for_spelling_error(wspace); michael@0: if (st) { michael@0: if (*result) mystrcat(result, "\n", MAXLNLEN); michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: break; michael@0: } michael@0: case ALLCAP: { michael@0: memcpy(wspace,cw,(wl+1)); michael@0: st = pSMgr->suggest_morph_for_spelling_error(wspace); michael@0: if (st) { michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: mkallsmall(wspace); michael@0: st = pSMgr->suggest_morph_for_spelling_error(wspace); michael@0: if (st) { michael@0: if (*result) mystrcat(result, "\n", MAXLNLEN); michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: mkinitcap(wspace); michael@0: st = pSMgr->suggest_morph_for_spelling_error(wspace); michael@0: if (st) { michael@0: if (*result) mystrcat(result, "\n", MAXLNLEN); michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: if (abbv) { michael@0: memcpy(wspace,cw,(wl+1)); michael@0: *(wspace+wl) = '.'; michael@0: *(wspace+wl+1) = '\0'; michael@0: if (*result) mystrcat(result, "\n", MAXLNLEN); michael@0: st = pSMgr->suggest_morph_for_spelling_error(wspace); michael@0: if (st) { michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: mkallsmall(wspace); michael@0: st = pSMgr->suggest_morph_for_spelling_error(wspace); michael@0: if (st) { michael@0: if (*result) mystrcat(result, "\n", MAXLNLEN); michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: mkinitcap(wspace); michael@0: st = pSMgr->suggest_morph_for_spelling_error(wspace); michael@0: if (st) { michael@0: if (*result) mystrcat(result, "\n", MAXLNLEN); michael@0: mystrcat(result, st, MAXLNLEN); michael@0: free(st); michael@0: } michael@0: } michael@0: break; michael@0: } michael@0: } michael@0: michael@0: if (*result) return mystrdup(result); michael@0: return NULL; michael@0: } michael@0: michael@0: #endif // END OF HUNSPELL_EXPERIMENTAL CODE michael@0: michael@0: Hunhandle *Hunspell_create(const char * affpath, const char * dpath) michael@0: { michael@0: return (Hunhandle*)(new Hunspell(affpath, dpath)); michael@0: } michael@0: michael@0: Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath, michael@0: const char * key) michael@0: { michael@0: return (Hunhandle*)(new Hunspell(affpath, dpath, key)); michael@0: } michael@0: michael@0: void Hunspell_destroy(Hunhandle *pHunspell) michael@0: { michael@0: delete (Hunspell*)(pHunspell); michael@0: } michael@0: michael@0: int Hunspell_spell(Hunhandle *pHunspell, const char *word) michael@0: { michael@0: return ((Hunspell*)pHunspell)->spell(word); michael@0: } michael@0: michael@0: char *Hunspell_get_dic_encoding(Hunhandle *pHunspell) michael@0: { michael@0: return ((Hunspell*)pHunspell)->get_dic_encoding(); michael@0: } michael@0: michael@0: int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word) michael@0: { michael@0: return ((Hunspell*)pHunspell)->suggest(slst, word); michael@0: } michael@0: michael@0: int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word) michael@0: { michael@0: return ((Hunspell*)pHunspell)->analyze(slst, word); michael@0: } michael@0: michael@0: int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word) michael@0: { michael@0: return ((Hunspell*)pHunspell)->stem(slst, word); michael@0: } michael@0: michael@0: int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n) michael@0: { michael@0: return ((Hunspell*)pHunspell)->stem(slst, desc, n); michael@0: } michael@0: michael@0: int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word, michael@0: const char * word2) michael@0: { michael@0: return ((Hunspell*)pHunspell)->generate(slst, word, word2); michael@0: } michael@0: michael@0: int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word, michael@0: char** desc, int n) michael@0: { michael@0: return ((Hunspell*)pHunspell)->generate(slst, word, desc, n); michael@0: } michael@0: michael@0: /* functions for run-time modification of the dictionary */ michael@0: michael@0: /* add word to the run-time dictionary */ michael@0: michael@0: int Hunspell_add(Hunhandle *pHunspell, const char * word) { michael@0: return ((Hunspell*)pHunspell)->add(word); michael@0: } michael@0: michael@0: /* add word to the run-time dictionary with affix flags of michael@0: * the example (a dictionary word): Hunspell will recognize michael@0: * affixed forms of the new word, too. michael@0: */ michael@0: michael@0: int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word, michael@0: const char * example) { michael@0: return ((Hunspell*)pHunspell)->add_with_affix(word, example); michael@0: } michael@0: michael@0: /* remove word from the run-time dictionary */ michael@0: michael@0: int Hunspell_remove(Hunhandle *pHunspell, const char * word) { michael@0: return ((Hunspell*)pHunspell)->remove(word); michael@0: } michael@0: michael@0: void Hunspell_free_list(Hunhandle *, char *** slst, int n) { michael@0: freelist(slst, n); michael@0: }