Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /******* BEGIN LICENSE BLOCK *******
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 *
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
8 *
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
12 * License.
13 *
14 * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
15 * and László Németh (Hunspell). Portions created by the Initial Developers
16 * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
17 *
18 * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
19 * David Einstein (deinst@world.std.com)
20 * László Németh (nemethl@gyorsposta.hu)
21 * Caolan McNamara (caolanm@redhat.com)
22 * Davide Prina
23 * Giuseppe Modugno
24 * Gianluca Turconi
25 * Simon Brouwer
26 * Noll Janos
27 * Biro Arpad
28 * Goldman Eleonora
29 * Sarlos Tamas
30 * Bencsath Boldizsar
31 * Halacsy Peter
32 * Dvornik Laszlo
33 * Gefferth Andras
34 * Nagy Viktor
35 * Varga Daniel
36 * Chris Halls
37 * Rene Engelhard
38 * Bram Moolenaar
39 * Dafydd Jones
40 * Harri Pitkanen
41 * Andras Timar
42 * Tor Lillqvist
43 *
44 * Alternatively, the contents of this file may be used under the terms of
45 * either the GNU General Public License Version 2 or later (the "GPL"), or
46 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
47 * in which case the provisions of the GPL or the LGPL are applicable instead
48 * of those above. If you wish to allow use of your version of this file only
49 * under the terms of either the GPL or the LGPL, and not to allow others to
50 * use your version of this file under the terms of the MPL, indicate your
51 * decision by deleting the provisions above and replace them with the notice
52 * and other provisions required by the GPL or the LGPL. If you do not delete
53 * the provisions above, a recipient may use your version of this file under
54 * the terms of any one of the MPL, the GPL or the LGPL.
55 *
56 ******* END LICENSE BLOCK *******/
58 #include <stdlib.h>
59 #include <string.h>
60 #include <stdio.h>
62 #include "hunspell.hxx"
63 #include "hunspell.h"
64 #ifndef MOZILLA_CLIENT
65 # include "config.h"
66 #endif
67 #include "csutil.hxx"
69 Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
70 {
71 encoding = NULL;
72 csconv = NULL;
73 utf8 = 0;
74 complexprefixes = 0;
75 affixpath = mystrdup(affpath);
76 maxdic = 0;
78 /* first set up the hash manager */
79 pHMgr[0] = new HashMgr(dpath, affpath, key);
80 if (pHMgr[0]) maxdic = 1;
82 /* next set up the affix manager */
83 /* it needs access to the hash manager lookup methods */
84 pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
86 /* get the preferred try string and the dictionary */
87 /* encoding from the Affix Manager for that dictionary */
88 char * try_string = pAMgr->get_try_string();
89 encoding = pAMgr->get_encoding();
90 langnum = pAMgr->get_langnum();
91 utf8 = pAMgr->get_utf8();
92 if (!utf8)
93 csconv = get_current_cs(encoding);
94 complexprefixes = pAMgr->get_complexprefixes();
95 wordbreak = pAMgr->get_breaktable();
97 /* and finally set up the suggestion manager */
98 pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
99 if (try_string) free(try_string);
100 }
102 Hunspell::~Hunspell()
103 {
104 if (pSMgr) delete pSMgr;
105 if (pAMgr) delete pAMgr;
106 for (int i = 0; i < maxdic; i++) delete pHMgr[i];
107 maxdic = 0;
108 pSMgr = NULL;
109 pAMgr = NULL;
110 #ifdef MOZILLA_CLIENT
111 delete [] csconv;
112 #endif
113 csconv= NULL;
114 if (encoding) free(encoding);
115 encoding = NULL;
116 if (affixpath) free(affixpath);
117 affixpath = NULL;
118 }
120 // load extra dictionaries
121 int Hunspell::add_dic(const char * dpath, const char * key) {
122 if (maxdic == MAXDIC || !affixpath) return 1;
123 pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);
124 if (pHMgr[maxdic]) maxdic++; else return 1;
125 return 0;
126 }
128 // make a copy of src at destination while removing all leading
129 // blanks and removing any trailing periods after recording
130 // their presence with the abbreviation flag
131 // also since already going through character by character,
132 // set the capitalization type
133 // return the length of the "cleaned" (and UTF-8 encoded) word
135 int Hunspell::cleanword2(char * dest, const char * src,
136 w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)
137 {
138 unsigned char * p = (unsigned char *) dest;
139 const unsigned char * q = (const unsigned char * ) src;
141 // first skip over any leading blanks
142 while ((*q != '\0') && (*q == ' ')) q++;
144 // now strip off any trailing periods (recording their presence)
145 *pabbrev = 0;
146 int nl = strlen((const char *)q);
147 while ((nl > 0) && (*(q+nl-1)=='.')) {
148 nl--;
149 (*pabbrev)++;
150 }
152 // if no characters are left it can't be capitalized
153 if (nl <= 0) {
154 *pcaptype = NOCAP;
155 *p = '\0';
156 return 0;
157 }
159 strncpy(dest, (char *) q, nl);
160 *(dest + nl) = '\0';
161 nl = strlen(dest);
162 if (utf8) {
163 *nc = u8_u16(dest_utf, MAXWORDLEN, dest);
164 // don't check too long words
165 if (*nc >= MAXWORDLEN) return 0;
166 if (*nc == -1) { // big Unicode character (non BMP area)
167 *pcaptype = NOCAP;
168 return nl;
169 }
170 *pcaptype = get_captype_utf8(dest_utf, *nc, langnum);
171 } else {
172 *pcaptype = get_captype(dest, nl, csconv);
173 *nc = nl;
174 }
175 return nl;
176 }
178 int Hunspell::cleanword(char * dest, const char * src,
179 int * pcaptype, int * pabbrev)
180 {
181 unsigned char * p = (unsigned char *) dest;
182 const unsigned char * q = (const unsigned char * ) src;
183 int firstcap = 0;
185 // first skip over any leading blanks
186 while ((*q != '\0') && (*q == ' ')) q++;
188 // now strip off any trailing periods (recording their presence)
189 *pabbrev = 0;
190 int nl = strlen((const char *)q);
191 while ((nl > 0) && (*(q+nl-1)=='.')) {
192 nl--;
193 (*pabbrev)++;
194 }
196 // if no characters are left it can't be capitalized
197 if (nl <= 0) {
198 *pcaptype = NOCAP;
199 *p = '\0';
200 return 0;
201 }
203 // now determine the capitalization type of the first nl letters
204 int ncap = 0;
205 int nneutral = 0;
206 int nc = 0;
208 if (!utf8) {
209 while (nl > 0) {
210 nc++;
211 if (csconv[(*q)].ccase) ncap++;
212 if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
213 *p++ = *q++;
214 nl--;
215 }
216 // remember to terminate the destination string
217 *p = '\0';
218 firstcap = csconv[(unsigned char)(*dest)].ccase;
219 } else {
220 unsigned short idx;
221 w_char t[MAXWORDLEN];
222 nc = u8_u16(t, MAXWORDLEN, src);
223 for (int i = 0; i < nc; i++) {
224 idx = (t[i].h << 8) + t[i].l;
225 unsigned short low = unicodetolower(idx, langnum);
226 if (idx != low) ncap++;
227 if (unicodetoupper(idx, langnum) == low) nneutral++;
228 }
229 u16_u8(dest, MAXWORDUTF8LEN, t, nc);
230 if (ncap) {
231 idx = (t[0].h << 8) + t[0].l;
232 firstcap = (idx != unicodetolower(idx, langnum));
233 }
234 }
236 // now finally set the captype
237 if (ncap == 0) {
238 *pcaptype = NOCAP;
239 } else if ((ncap == 1) && firstcap) {
240 *pcaptype = INITCAP;
241 } else if ((ncap == nc) || ((ncap + nneutral) == nc)){
242 *pcaptype = ALLCAP;
243 } else if ((ncap > 1) && firstcap) {
244 *pcaptype = HUHINITCAP;
245 } else {
246 *pcaptype = HUHCAP;
247 }
248 return strlen(dest);
249 }
251 void Hunspell::mkallcap(char * p)
252 {
253 if (utf8) {
254 w_char u[MAXWORDLEN];
255 int nc = u8_u16(u, MAXWORDLEN, p);
256 unsigned short idx;
257 for (int i = 0; i < nc; i++) {
258 idx = (u[i].h << 8) + u[i].l;
259 if (idx != unicodetoupper(idx, langnum)) {
260 u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);
261 u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);
262 }
263 }
264 u16_u8(p, MAXWORDUTF8LEN, u, nc);
265 } else {
266 while (*p != '\0') {
267 *p = csconv[((unsigned char) *p)].cupper;
268 p++;
269 }
270 }
271 }
273 int Hunspell::mkallcap2(char * p, w_char * u, int nc)
274 {
275 if (utf8) {
276 unsigned short idx;
277 for (int i = 0; i < nc; i++) {
278 idx = (u[i].h << 8) + u[i].l;
279 unsigned short up = unicodetoupper(idx, langnum);
280 if (idx != up) {
281 u[i].h = (unsigned char) (up >> 8);
282 u[i].l = (unsigned char) (up & 0x00FF);
283 }
284 }
285 u16_u8(p, MAXWORDUTF8LEN, u, nc);
286 return strlen(p);
287 } else {
288 while (*p != '\0') {
289 *p = csconv[((unsigned char) *p)].cupper;
290 p++;
291 }
292 }
293 return nc;
294 }
297 void Hunspell::mkallsmall(char * p)
298 {
299 while (*p != '\0') {
300 *p = csconv[((unsigned char) *p)].clower;
301 p++;
302 }
303 }
305 int Hunspell::mkallsmall2(char * p, w_char * u, int nc)
306 {
307 if (utf8) {
308 unsigned short idx;
309 for (int i = 0; i < nc; i++) {
310 idx = (u[i].h << 8) + u[i].l;
311 unsigned short low = unicodetolower(idx, langnum);
312 if (idx != low) {
313 u[i].h = (unsigned char) (low >> 8);
314 u[i].l = (unsigned char) (low & 0x00FF);
315 }
316 }
317 u16_u8(p, MAXWORDUTF8LEN, u, nc);
318 return strlen(p);
319 } else {
320 while (*p != '\0') {
321 *p = csconv[((unsigned char) *p)].clower;
322 p++;
323 }
324 }
325 return nc;
326 }
328 // convert UTF-8 sharp S codes to latin 1
329 char * Hunspell::sharps_u8_l1(char * dest, char * source) {
330 char * p = dest;
331 *p = *source;
332 for (p++, source++; *(source - 1); p++, source++) {
333 *p = *source;
334 if (*source == '\x9F') *--p = '\xDF';
335 }
336 return dest;
337 }
339 // recursive search for right ss - sharp s permutations
340 hentry * Hunspell::spellsharps(char * base, char * pos, int n,
341 int repnum, char * tmp, int * info, char **root) {
342 pos = strstr(pos, "ss");
343 if (pos && (n < MAXSHARPS)) {
344 *pos = '\xC3';
345 *(pos + 1) = '\x9F';
346 hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root);
347 if (h) return h;
348 *pos = 's';
349 *(pos + 1) = 's';
350 h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root);
351 if (h) return h;
352 } else if (repnum > 0) {
353 if (utf8) return checkword(base, info, root);
354 return checkword(sharps_u8_l1(tmp, base), info, root);
355 }
356 return NULL;
357 }
359 int Hunspell::is_keepcase(const hentry * rv) {
360 return pAMgr && rv->astr && pAMgr->get_keepcase() &&
361 TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
362 }
364 /* insert a word to the beginning of the suggestion array and return ns */
365 int Hunspell::insert_sug(char ***slst, char * word, int ns) {
366 char * dup = mystrdup(word);
367 if (!dup) return ns;
368 if (ns == MAXSUGGESTION) {
369 ns--;
370 free((*slst)[ns]);
371 }
372 for (int k = ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
373 (*slst)[0] = dup;
374 return ns + 1;
375 }
377 int Hunspell::spell(const char * word, int * info, char ** root)
378 {
379 struct hentry * rv=NULL;
380 // need larger vector. For example, Turkish capital letter I converted a
381 // 2-byte UTF-8 character (dotless i) by mkallsmall.
382 char cw[MAXWORDUTF8LEN];
383 char wspace[MAXWORDUTF8LEN];
384 w_char unicw[MAXWORDLEN];
385 // Hunspell supports XML input of the simplified API (see manual)
386 if (strcmp(word, SPELL_XML) == 0) return 1;
387 int nc = strlen(word);
388 int wl2 = 0;
389 if (utf8) {
390 if (nc >= MAXWORDUTF8LEN) return 0;
391 } else {
392 if (nc >= MAXWORDLEN) return 0;
393 }
394 int captype = 0;
395 int abbv = 0;
396 int wl = 0;
398 // input conversion
399 RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
400 if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
401 else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
403 int info2 = 0;
404 if (wl == 0 || maxdic == 0) return 1;
405 if (root) *root = NULL;
407 // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.)
408 enum { NBEGIN, NNUM, NSEP };
409 int nstate = NBEGIN;
410 int i;
412 for (i = 0; (i < wl); i++) {
413 if ((cw[i] <= '9') && (cw[i] >= '0')) {
414 nstate = NNUM;
415 } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) {
416 if ((nstate == NSEP) || (i == 0)) break;
417 nstate = NSEP;
418 } else break;
419 }
420 if ((i == wl) && (nstate == NNUM)) return 1;
421 if (!info) info = &info2; else *info = 0;
423 switch(captype) {
424 case HUHCAP:
425 case HUHINITCAP:
426 *info += SPELL_ORIGCAP;
427 case NOCAP: {
428 rv = checkword(cw, info, root);
429 if ((abbv) && !(rv)) {
430 memcpy(wspace,cw,wl);
431 *(wspace+wl) = '.';
432 *(wspace+wl+1) = '\0';
433 rv = checkword(wspace, info, root);
434 }
435 break;
436 }
437 case ALLCAP: {
438 *info += SPELL_ORIGCAP;
439 rv = checkword(cw, info, root);
440 if (rv) break;
441 if (abbv) {
442 memcpy(wspace,cw,wl);
443 *(wspace+wl) = '.';
444 *(wspace+wl+1) = '\0';
445 rv = checkword(wspace, info, root);
446 if (rv) break;
447 }
448 // Spec. prefix handling for Catalan, French, Italian:
449 // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
450 if (pAMgr && strchr(cw, '\'')) {
451 wl = mkallsmall2(cw, unicw, nc);
452 //There are no really sane circumstances where this could fail,
453 //but anyway...
454 if (char * apostrophe = strchr(cw, '\'')) {
455 if (utf8) {
456 w_char tmpword[MAXWORDLEN];
457 *apostrophe = '\0';
458 wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
459 *apostrophe = '\'';
460 if (wl2 < nc) {
461 mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
462 rv = checkword(cw, info, root);
463 if (rv) break;
464 }
465 } else {
466 mkinitcap2(apostrophe + 1, unicw, nc);
467 rv = checkword(cw, info, root);
468 if (rv) break;
469 }
470 }
471 mkinitcap2(cw, unicw, nc);
472 rv = checkword(cw, info, root);
473 if (rv) break;
474 }
475 if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {
476 char tmpword[MAXWORDUTF8LEN];
477 wl = mkallsmall2(cw, unicw, nc);
478 memcpy(wspace,cw,(wl+1));
479 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
480 if (!rv) {
481 wl2 = mkinitcap2(cw, unicw, nc);
482 rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);
483 }
484 if ((abbv) && !(rv)) {
485 *(wspace+wl) = '.';
486 *(wspace+wl+1) = '\0';
487 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
488 if (!rv) {
489 memcpy(wspace, cw, wl2);
490 *(wspace+wl2) = '.';
491 *(wspace+wl2+1) = '\0';
492 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
493 }
494 }
495 if (rv) break;
496 }
497 }
498 case INITCAP: {
499 *info += SPELL_ORIGCAP;
500 wl = mkallsmall2(cw, unicw, nc);
501 memcpy(wspace,cw,(wl+1));
502 wl2 = mkinitcap2(cw, unicw, nc);
503 if (captype == INITCAP) *info += SPELL_INITCAP;
504 rv = checkword(cw, info, root);
505 if (captype == INITCAP) *info -= SPELL_INITCAP;
506 // forbid bad capitalization
507 // (for example, ijs -> Ijs instead of IJs in Dutch)
508 // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
509 if (*info & SPELL_FORBIDDEN) {
510 rv = NULL;
511 break;
512 }
513 if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
514 if (rv) break;
516 rv = checkword(wspace, info, root);
517 if (abbv && !rv) {
519 *(wspace+wl) = '.';
520 *(wspace+wl+1) = '\0';
521 rv = checkword(wspace, info, root);
522 if (!rv) {
523 memcpy(wspace, cw, wl2);
524 *(wspace+wl2) = '.';
525 *(wspace+wl2+1) = '\0';
526 if (captype == INITCAP) *info += SPELL_INITCAP;
527 rv = checkword(wspace, info, root);
528 if (captype == INITCAP) *info -= SPELL_INITCAP;
529 if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
530 break;
531 }
532 }
533 if (rv && is_keepcase(rv) &&
534 ((captype == ALLCAP) ||
535 // if CHECKSHARPS: KEEPCASE words with \xDF are allowed
536 // in INITCAP form, too.
537 !(pAMgr->get_checksharps() &&
538 ((utf8 && strstr(wspace, "\xC3\x9F")) ||
539 (!utf8 && strchr(wspace, '\xDF')))))) rv = NULL;
540 break;
541 }
542 }
544 if (rv) {
545 if (pAMgr && pAMgr->get_warn() && rv->astr &&
546 TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
547 *info += SPELL_WARN;
548 if (pAMgr->get_forbidwarn()) return 0;
549 return HUNSPELL_OK_WARN;
550 }
551 return HUNSPELL_OK;
552 }
554 // recursive breaking at break points
555 if (wordbreak) {
556 char * s;
557 char r;
558 int nbr = 0;
559 wl = strlen(cw);
560 int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;
562 // calculate break points for recursion limit
563 for (int j = 0; j < numbreak; j++) {
564 s = cw;
565 do {
566 s = (char *) strstr(s, wordbreak[j]);
567 if (s) {
568 nbr++;
569 s++;
570 }
571 } while (s);
572 }
573 if (nbr >= 10) return 0;
575 // check boundary patterns (^begin and end$)
576 for (int j = 0; j < numbreak; j++) {
577 int plen = strlen(wordbreak[j]);
578 if (plen == 1 || plen > wl) continue;
579 if (wordbreak[j][0] == '^' && strncmp(cw, wordbreak[j] + 1, plen - 1) == 0
580 && spell(cw + plen - 1)) return 1;
581 if (wordbreak[j][plen - 1] == '$' &&
582 strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) {
583 r = cw[wl - plen + 1];
584 cw[wl - plen + 1] = '\0';
585 if (spell(cw)) return 1;
586 cw[wl - plen + 1] = r;
587 }
588 }
590 // other patterns
591 for (int j = 0; j < numbreak; j++) {
592 int plen = strlen(wordbreak[j]);
593 s=(char *) strstr(cw, wordbreak[j]);
594 if (s && (s > cw) && (s < cw + wl - plen)) {
595 if (!spell(s + plen)) continue;
596 r = *s;
597 *s = '\0';
598 // examine 2 sides of the break point
599 if (spell(cw)) return 1;
600 *s = r;
602 // LANG_hu: spec. dash rule
603 if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) {
604 r = s[1];
605 s[1] = '\0';
606 if (spell(cw)) return 1; // check the first part with dash
607 s[1] = r;
608 }
609 // end of LANG speficic region
611 }
612 }
613 }
615 return 0;
616 }
618 struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
619 {
620 struct hentry * he = NULL;
621 int len, i;
622 char w2[MAXWORDUTF8LEN];
623 const char * word;
625 char * ignoredchars = pAMgr->get_ignore();
626 if (ignoredchars != NULL) {
627 strcpy(w2, w);
628 if (utf8) {
629 int ignoredchars_utf16_len;
630 unsigned short * ignoredchars_utf16 = pAMgr->get_ignore_utf16(&ignoredchars_utf16_len);
631 remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len);
632 } else {
633 remove_ignored_chars(w2,ignoredchars);
634 }
635 word = w2;
636 } else word = w;
638 len = strlen(word);
640 if (!len)
641 return NULL;
643 // word reversing wrapper for complex prefixes
644 if (complexprefixes) {
645 if (word != w2) {
646 strcpy(w2, word);
647 word = w2;
648 }
649 if (utf8) reverseword_utf(w2); else reverseword(w2);
650 }
652 // look word in hash table
653 for (i = 0; (i < maxdic) && !he; i ++) {
654 he = (pHMgr[i])->lookup(word);
656 // check forbidden and onlyincompound words
657 if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
658 if (info) *info += SPELL_FORBIDDEN;
659 // LANG_hu section: set dash information for suggestions
660 if (langnum == LANG_hu) {
661 if (pAMgr->get_compoundflag() &&
662 TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
663 if (info) *info += SPELL_COMPOUND;
664 }
665 }
666 return NULL;
667 }
669 // he = next not needaffix, onlyincompound homonym or onlyupcase word
670 while (he && (he->astr) &&
671 ((pAMgr->get_needaffix() && TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
672 (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
673 (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))
674 )) he = he->next_homonym;
675 }
677 // check with affixes
678 if (!he && pAMgr) {
679 // try stripping off affixes */
680 he = pAMgr->affix_check(word, len, 0);
682 // check compound restriction and onlyupcase
683 if (he && he->astr && (
684 (pAMgr->get_onlyincompound() &&
685 TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
686 (info && (*info & SPELL_INITCAP) &&
687 TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
688 he = NULL;
689 }
691 if (he) {
692 if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
693 if (info) *info += SPELL_FORBIDDEN;
694 return NULL;
695 }
696 if (root) {
697 *root = mystrdup(he->word);
698 if (*root && complexprefixes) {
699 if (utf8) reverseword_utf(*root); else reverseword(*root);
700 }
701 }
702 // try check compound word
703 } else if (pAMgr->get_compound()) {
704 he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info);
705 // LANG_hu section: `moving rule' with last dash
706 if ((!he) && (langnum == LANG_hu) && (word[len-1] == '-')) {
707 char * dup = mystrdup(word);
708 if (!dup) return NULL;
709 dup[len-1] = '\0';
710 he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0, info);
711 free(dup);
712 }
713 // end of LANG speficic region
714 if (he) {
715 if (root) {
716 *root = mystrdup(he->word);
717 if (*root && complexprefixes) {
718 if (utf8) reverseword_utf(*root); else reverseword(*root);
719 }
720 }
721 if (info) *info += SPELL_COMPOUND;
722 }
723 }
725 }
727 return he;
728 }
730 int Hunspell::suggest(char*** slst, const char * word)
731 {
732 int onlycmpdsug = 0;
733 char cw[MAXWORDUTF8LEN];
734 char wspace[MAXWORDUTF8LEN];
735 if (!pSMgr || maxdic == 0) return 0;
736 w_char unicw[MAXWORDLEN];
737 *slst = NULL;
738 // process XML input of the simplified API (see manual)
739 if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
740 return spellml(slst, word);
741 }
742 int nc = strlen(word);
743 if (utf8) {
744 if (nc >= MAXWORDUTF8LEN) return 0;
745 } else {
746 if (nc >= MAXWORDLEN) return 0;
747 }
748 int captype = 0;
749 int abbv = 0;
750 int wl = 0;
752 // input conversion
753 RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
754 if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
755 else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
757 if (wl == 0) return 0;
758 int ns = 0;
759 int capwords = 0;
761 // check capitalized form for FORCEUCASE
762 if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
763 int info = SPELL_ORIGCAP;
764 char ** wlst;
765 if (checkword(cw, &info, NULL)) {
766 if (*slst) {
767 wlst = *slst;
768 } else {
769 wlst = (char **) malloc(MAXSUGGESTION * sizeof(char *));
770 if (wlst == NULL) return -1;
771 *slst = wlst;
772 for (int i = 0; i < MAXSUGGESTION; i++) {
773 wlst[i] = NULL;
774 }
775 }
776 wlst[0] = mystrdup(cw);
777 mkinitcap(wlst[0]);
778 return 1;
779 }
780 }
782 switch(captype) {
783 case NOCAP: {
784 ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
785 break;
786 }
788 case INITCAP: {
789 capwords = 1;
790 ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
791 if (ns == -1) break;
792 memcpy(wspace,cw,(wl+1));
793 mkallsmall2(wspace, unicw, nc);
794 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
795 break;
796 }
797 case HUHINITCAP:
798 capwords = 1;
799 case HUHCAP: {
800 ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
801 if (ns != -1) {
802 int prevns;
803 // something.The -> something. The
804 char * dot = strchr(cw, '.');
805 if (dot && (dot > cw)) {
806 int captype_;
807 if (utf8) {
808 w_char w_[MAXWORDLEN];
809 int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);
810 captype_ = get_captype_utf8(w_, wl_, langnum);
811 } else captype_ = get_captype(dot+1, strlen(dot+1), csconv);
812 if (captype_ == INITCAP) {
813 char * st = mystrdup(cw);
814 if (st) st = (char *) realloc(st, wl + 2);
815 if (st) {
816 st[(dot - cw) + 1] = ' ';
817 strcpy(st + (dot - cw) + 2, dot + 1);
818 ns = insert_sug(slst, st, ns);
819 free(st);
820 }
821 }
822 }
823 if (captype == HUHINITCAP) {
824 // TheOpenOffice.org -> The OpenOffice.org
825 memcpy(wspace,cw,(wl+1));
826 mkinitsmall2(wspace, unicw, nc);
827 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
828 }
829 memcpy(wspace,cw,(wl+1));
830 mkallsmall2(wspace, unicw, nc);
831 if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
832 prevns = ns;
833 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
834 if (captype == HUHINITCAP) {
835 mkinitcap2(wspace, unicw, nc);
836 if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
837 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
838 }
839 // aNew -> "a New" (instead of "a new")
840 for (int j = prevns; j < ns; j++) {
841 char * space = strchr((*slst)[j],' ');
842 if (space) {
843 int slen = strlen(space + 1);
844 // different case after space (need capitalisation)
845 if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {
846 w_char w[MAXWORDLEN];
847 int wc = 0;
848 char * r = (*slst)[j];
849 if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1);
850 mkinitcap2(space + 1, w, wc);
851 // set as first suggestion
852 for (int k = j; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
853 (*slst)[0] = r;
854 }
855 }
856 }
857 }
858 break;
859 }
861 case ALLCAP: {
862 memcpy(wspace, cw, (wl+1));
863 mkallsmall2(wspace, unicw, nc);
864 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
865 if (ns == -1) break;
866 if (pAMgr && pAMgr->get_keepcase() && spell(wspace))
867 ns = insert_sug(slst, wspace, ns);
868 mkinitcap2(wspace, unicw, nc);
869 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
870 for (int j=0; j < ns; j++) {
871 mkallcap((*slst)[j]);
872 if (pAMgr && pAMgr->get_checksharps()) {
873 char * pos;
874 if (utf8) {
875 pos = strstr((*slst)[j], "\xC3\x9F");
876 while (pos) {
877 *pos = 'S';
878 *(pos+1) = 'S';
879 pos = strstr(pos+2, "\xC3\x9F");
880 }
881 } else {
882 pos = strchr((*slst)[j], '\xDF');
883 while (pos) {
884 (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 2);
885 mystrrep((*slst)[j], "\xDF", "SS");
886 pos = strchr((*slst)[j], '\xDF');
887 }
888 }
889 }
890 }
891 break;
892 }
893 }
895 // LANG_hu section: replace '-' with ' ' in Hungarian
896 if (langnum == LANG_hu) {
897 for (int j=0; j < ns; j++) {
898 char * pos = strchr((*slst)[j],'-');
899 if (pos) {
900 int info;
901 char w[MAXWORDUTF8LEN];
902 *pos = '\0';
903 strcpy(w, (*slst)[j]);
904 strcat(w, pos + 1);
905 spell(w, &info, NULL);
906 if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
907 *pos = ' ';
908 } else *pos = '-';
909 }
910 }
911 }
912 // END OF LANG_hu section
914 // try ngram approach since found nothing or only compound words
915 if (pAMgr && (ns == 0 || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0) && (*slst)) {
916 switch(captype) {
917 case NOCAP: {
918 ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);
919 break;
920 }
921 case HUHINITCAP:
922 capwords = 1;
923 case HUHCAP: {
924 memcpy(wspace,cw,(wl+1));
925 mkallsmall2(wspace, unicw, nc);
926 ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
927 break;
928 }
929 case INITCAP: {
930 capwords = 1;
931 memcpy(wspace,cw,(wl+1));
932 mkallsmall2(wspace, unicw, nc);
933 ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
934 break;
935 }
936 case ALLCAP: {
937 memcpy(wspace,cw,(wl+1));
938 mkallsmall2(wspace, unicw, nc);
939 int oldns = ns;
940 ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
941 for (int j = oldns; j < ns; j++)
942 mkallcap((*slst)[j]);
943 break;
944 }
945 }
946 }
948 // try dash suggestion (Afo-American -> Afro-American)
949 if (char * pos = strchr(cw, '-')) {
950 char * ppos = cw;
951 int nodashsug = 1;
952 char ** nlst = NULL;
953 int nn = 0;
954 int last = 0;
955 if (*slst) {
956 for (int j = 0; j < ns && nodashsug == 1; j++) {
957 if (strchr((*slst)[j], '-')) nodashsug = 0;
958 }
959 }
960 while (nodashsug && !last) {
961 if (*pos == '\0') last = 1; else *pos = '\0';
962 if (!spell(ppos)) {
963 nn = suggest(&nlst, ppos);
964 for (int j = nn - 1; j >= 0; j--) {
965 strncpy(wspace, cw, ppos - cw);
966 strcpy(wspace + (ppos - cw), nlst[j]);
967 if (!last) {
968 strcat(wspace, "-");
969 strcat(wspace, pos + 1);
970 }
971 ns = insert_sug(slst, wspace, ns);
972 free(nlst[j]);
973 }
974 if (nlst != NULL) free(nlst);
975 nodashsug = 0;
976 }
977 if (!last) {
978 *pos = '-';
979 ppos = pos + 1;
980 pos = strchr(ppos, '-');
981 }
982 if (!pos) pos = cw + strlen(cw);
983 }
984 }
986 // word reversing wrapper for complex prefixes
987 if (complexprefixes) {
988 for (int j = 0; j < ns; j++) {
989 if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
990 }
991 }
993 // capitalize
994 if (capwords) for (int j=0; j < ns; j++) {
995 mkinitcap((*slst)[j]);
996 }
998 // expand suggestions with dot(s)
999 if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
1000 for (int j = 0; j < ns; j++) {
1001 (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
1002 strcat((*slst)[j], word + strlen(word) - abbv);
1003 }
1004 }
1006 // remove bad capitalized and forbidden forms
1007 if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
1008 switch (captype) {
1009 case INITCAP:
1010 case ALLCAP: {
1011 int l = 0;
1012 for (int j=0; j < ns; j++) {
1013 if (!strchr((*slst)[j],' ') && !spell((*slst)[j])) {
1014 char s[MAXSWUTF8L];
1015 w_char w[MAXSWL];
1016 int len;
1017 if (utf8) {
1018 len = u8_u16(w, MAXSWL, (*slst)[j]);
1019 } else {
1020 strcpy(s, (*slst)[j]);
1021 len = strlen(s);
1022 }
1023 mkallsmall2(s, w, len);
1024 free((*slst)[j]);
1025 if (spell(s)) {
1026 (*slst)[l] = mystrdup(s);
1027 if ((*slst)[l]) l++;
1028 } else {
1029 mkinitcap2(s, w, len);
1030 if (spell(s)) {
1031 (*slst)[l] = mystrdup(s);
1032 if ((*slst)[l]) l++;
1033 }
1034 }
1035 } else {
1036 (*slst)[l] = (*slst)[j];
1037 l++;
1038 }
1039 }
1040 ns = l;
1041 }
1042 }
1043 }
1045 // remove duplications
1046 int l = 0;
1047 for (int j = 0; j < ns; j++) {
1048 (*slst)[l] = (*slst)[j];
1049 for (int k = 0; k < l; k++) {
1050 if (strcmp((*slst)[k], (*slst)[j]) == 0) {
1051 free((*slst)[j]);
1052 l--;
1053 break;
1054 }
1055 }
1056 l++;
1057 }
1058 ns = l;
1060 // output conversion
1061 rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
1062 for (int j = 0; rl && j < ns; j++) {
1063 if (rl->conv((*slst)[j], wspace)) {
1064 free((*slst)[j]);
1065 (*slst)[j] = mystrdup(wspace);
1066 }
1067 }
1069 // if suggestions removed by nosuggest, onlyincompound parameters
1070 if (l == 0 && *slst) {
1071 free(*slst);
1072 *slst = NULL;
1073 }
1074 return l;
1075 }
1077 void Hunspell::free_list(char *** slst, int n) {
1078 freelist(slst, n);
1079 }
1081 char * Hunspell::get_dic_encoding()
1082 {
1083 return encoding;
1084 }
1086 #ifdef HUNSPELL_EXPERIMENTAL
1087 // XXX need UTF-8 support
1088 int Hunspell::suggest_auto(char*** slst, const char * word)
1089 {
1090 char cw[MAXWORDUTF8LEN];
1091 char wspace[MAXWORDUTF8LEN];
1092 if (!pSMgr || maxdic == 0) return 0;
1093 int wl = strlen(word);
1094 if (utf8) {
1095 if (wl >= MAXWORDUTF8LEN) return 0;
1096 } else {
1097 if (wl >= MAXWORDLEN) return 0;
1098 }
1099 int captype = 0;
1100 int abbv = 0;
1101 wl = cleanword(cw, word, &captype, &abbv);
1102 if (wl == 0) return 0;
1103 int ns = 0;
1104 *slst = NULL; // HU, nsug in pSMgr->suggest
1106 switch(captype) {
1107 case NOCAP: {
1108 ns = pSMgr->suggest_auto(slst, cw, ns);
1109 if (ns>0) break;
1110 break;
1111 }
1113 case INITCAP: {
1114 memcpy(wspace,cw,(wl+1));
1115 mkallsmall(wspace);
1116 ns = pSMgr->suggest_auto(slst, wspace, ns);
1117 for (int j=0; j < ns; j++)
1118 mkinitcap((*slst)[j]);
1119 ns = pSMgr->suggest_auto(slst, cw, ns);
1120 break;
1122 }
1124 case HUHINITCAP:
1125 case HUHCAP: {
1126 ns = pSMgr->suggest_auto(slst, cw, ns);
1127 if (ns == 0) {
1128 memcpy(wspace,cw,(wl+1));
1129 mkallsmall(wspace);
1130 ns = pSMgr->suggest_auto(slst, wspace, ns);
1131 }
1132 break;
1133 }
1135 case ALLCAP: {
1136 memcpy(wspace,cw,(wl+1));
1137 mkallsmall(wspace);
1138 ns = pSMgr->suggest_auto(slst, wspace, ns);
1140 mkinitcap(wspace);
1141 ns = pSMgr->suggest_auto(slst, wspace, ns);
1143 for (int j=0; j < ns; j++)
1144 mkallcap((*slst)[j]);
1145 break;
1146 }
1147 }
1149 // word reversing wrapper for complex prefixes
1150 if (complexprefixes) {
1151 for (int j = 0; j < ns; j++) {
1152 if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
1153 }
1154 }
1156 // expand suggestions with dot(s)
1157 if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
1158 for (int j = 0; j < ns; j++) {
1159 (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
1160 strcat((*slst)[j], word + strlen(word) - abbv);
1161 }
1162 }
1164 // LANG_hu section: replace '-' with ' ' in Hungarian
1165 if (langnum == LANG_hu) {
1166 for (int j=0; j < ns; j++) {
1167 char * pos = strchr((*slst)[j],'-');
1168 if (pos) {
1169 int info;
1170 char w[MAXWORDUTF8LEN];
1171 *pos = '\0';
1172 strcpy(w, (*slst)[j]);
1173 strcat(w, pos + 1);
1174 spell(w, &info, NULL);
1175 if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
1176 *pos = ' ';
1177 } else *pos = '-';
1178 }
1179 }
1180 }
1181 // END OF LANG_hu section
1182 return ns;
1183 }
1184 #endif
1186 int Hunspell::stem(char*** slst, char ** desc, int n)
1187 {
1188 char result[MAXLNLEN];
1189 char result2[MAXLNLEN];
1190 *slst = NULL;
1191 if (n == 0) return 0;
1192 *result2 = '\0';
1193 for (int i = 0; i < n; i++) {
1194 *result = '\0';
1195 // add compound word parts (except the last one)
1196 char * s = (char *) desc[i];
1197 char * part = strstr(s, MORPH_PART);
1198 if (part) {
1199 char * nextpart = strstr(part + 1, MORPH_PART);
1200 while (nextpart) {
1201 copy_field(result + strlen(result), part, MORPH_PART);
1202 part = nextpart;
1203 nextpart = strstr(part + 1, MORPH_PART);
1204 }
1205 s = part;
1206 }
1208 char **pl;
1209 char tok[MAXLNLEN];
1210 strcpy(tok, s);
1211 char * alt = strstr(tok, " | ");
1212 while (alt) {
1213 alt[1] = MSEP_ALT;
1214 alt = strstr(alt, " | ");
1215 }
1216 int pln = line_tok(tok, &pl, MSEP_ALT);
1217 for (int k = 0; k < pln; k++) {
1218 // add derivational suffixes
1219 if (strstr(pl[k], MORPH_DERI_SFX)) {
1220 // remove inflectional suffixes
1221 char * is = strstr(pl[k], MORPH_INFL_SFX);
1222 if (is) *is = '\0';
1223 char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);
1224 if (sg) {
1225 char ** gen;
1226 int genl = line_tok(sg, &gen, MSEP_REC);
1227 free(sg);
1228 for (int j = 0; j < genl; j++) {
1229 sprintf(result2 + strlen(result2), "%c%s%s",
1230 MSEP_REC, result, gen[j]);
1231 }
1232 freelist(&gen, genl);
1233 }
1234 } else {
1235 sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);
1236 if (strstr(pl[k], MORPH_SURF_PFX)) {
1237 copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);
1238 }
1239 copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);
1240 }
1241 }
1242 freelist(&pl, pln);
1243 }
1244 int sln = line_tok(result2, slst, MSEP_REC);
1245 return uniqlist(*slst, sln);
1247 }
1249 int Hunspell::stem(char*** slst, const char * word)
1250 {
1251 char ** pl;
1252 int pln = analyze(&pl, word);
1253 int pln2 = stem(slst, pl, pln);
1254 freelist(&pl, pln);
1255 return pln2;
1256 }
1258 #ifdef HUNSPELL_EXPERIMENTAL
1259 int Hunspell::suggest_pos_stems(char*** slst, const char * word)
1260 {
1261 char cw[MAXWORDUTF8LEN];
1262 char wspace[MAXWORDUTF8LEN];
1263 if (! pSMgr || maxdic == 0) return 0;
1264 int wl = strlen(word);
1265 if (utf8) {
1266 if (wl >= MAXWORDUTF8LEN) return 0;
1267 } else {
1268 if (wl >= MAXWORDLEN) return 0;
1269 }
1270 int captype = 0;
1271 int abbv = 0;
1272 wl = cleanword(cw, word, &captype, &abbv);
1273 if (wl == 0) return 0;
1275 int ns = 0; // ns=0 = normalized input
1277 *slst = NULL; // HU, nsug in pSMgr->suggest
1279 switch(captype) {
1280 case HUHCAP:
1281 case NOCAP: {
1282 ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1284 if ((abbv) && (ns == 0)) {
1285 memcpy(wspace,cw,wl);
1286 *(wspace+wl) = '.';
1287 *(wspace+wl+1) = '\0';
1288 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1289 }
1291 break;
1292 }
1294 case INITCAP: {
1296 ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1298 if (ns == 0 || ((*slst)[0][0] == '#')) {
1299 memcpy(wspace,cw,(wl+1));
1300 mkallsmall(wspace);
1301 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1302 }
1304 break;
1306 }
1308 case ALLCAP: {
1309 ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1310 if (ns != 0) break;
1312 memcpy(wspace,cw,(wl+1));
1313 mkallsmall(wspace);
1314 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1316 if (ns == 0) {
1317 mkinitcap(wspace);
1318 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1319 }
1320 break;
1321 }
1322 }
1324 return ns;
1325 }
1326 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1328 const char * Hunspell::get_wordchars()
1329 {
1330 return pAMgr->get_wordchars();
1331 }
1333 unsigned short * Hunspell::get_wordchars_utf16(int * len)
1334 {
1335 return pAMgr->get_wordchars_utf16(len);
1336 }
1338 void Hunspell::mkinitcap(char * p)
1339 {
1340 if (!utf8) {
1341 if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
1342 } else {
1343 int len;
1344 w_char u[MAXWORDLEN];
1345 len = u8_u16(u, MAXWORDLEN, p);
1346 unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
1347 u[0].h = (unsigned char) (i >> 8);
1348 u[0].l = (unsigned char) (i & 0x00FF);
1349 u16_u8(p, MAXWORDUTF8LEN, u, len);
1350 }
1351 }
1353 int Hunspell::mkinitcap2(char * p, w_char * u, int nc)
1354 {
1355 if (!utf8) {
1356 if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
1357 } else if (nc > 0) {
1358 unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
1359 u[0].h = (unsigned char) (i >> 8);
1360 u[0].l = (unsigned char) (i & 0x00FF);
1361 u16_u8(p, MAXWORDUTF8LEN, u, nc);
1362 return strlen(p);
1363 }
1364 return nc;
1365 }
1367 int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)
1368 {
1369 if (!utf8) {
1370 if (*p != '\0') *p = csconv[((unsigned char)*p)].clower;
1371 } else if (nc > 0) {
1372 unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);
1373 u[0].h = (unsigned char) (i >> 8);
1374 u[0].l = (unsigned char) (i & 0x00FF);
1375 u16_u8(p, MAXWORDUTF8LEN, u, nc);
1376 return strlen(p);
1377 }
1378 return nc;
1379 }
1381 int Hunspell::add(const char * word)
1382 {
1383 if (pHMgr[0]) return (pHMgr[0])->add(word);
1384 return 0;
1385 }
1387 int Hunspell::add_with_affix(const char * word, const char * example)
1388 {
1389 if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example);
1390 return 0;
1391 }
1393 int Hunspell::remove(const char * word)
1394 {
1395 if (pHMgr[0]) return (pHMgr[0])->remove(word);
1396 return 0;
1397 }
1399 const char * Hunspell::get_version()
1400 {
1401 return pAMgr->get_version();
1402 }
1404 struct cs_info * Hunspell::get_csconv()
1405 {
1406 return csconv;
1407 }
1409 void Hunspell::cat_result(char * result, char * st)
1410 {
1411 if (st) {
1412 if (*result) mystrcat(result, "\n", MAXLNLEN);
1413 mystrcat(result, st, MAXLNLEN);
1414 free(st);
1415 }
1416 }
1418 int Hunspell::analyze(char*** slst, const char * word)
1419 {
1420 char cw[MAXWORDUTF8LEN];
1421 char wspace[MAXWORDUTF8LEN];
1422 w_char unicw[MAXWORDLEN];
1423 int wl2 = 0;
1424 *slst = NULL;
1425 if (! pSMgr || maxdic == 0) return 0;
1426 int nc = strlen(word);
1427 if (utf8) {
1428 if (nc >= MAXWORDUTF8LEN) return 0;
1429 } else {
1430 if (nc >= MAXWORDLEN) return 0;
1431 }
1432 int captype = 0;
1433 int abbv = 0;
1434 int wl = 0;
1436 // input conversion
1437 RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
1438 if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
1439 else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
1441 if (wl == 0) {
1442 if (abbv) {
1443 for (wl = 0; wl < abbv; wl++) cw[wl] = '.';
1444 cw[wl] = '\0';
1445 abbv = 0;
1446 } else return 0;
1447 }
1449 char result[MAXLNLEN];
1450 char * st = NULL;
1452 *result = '\0';
1454 int n = 0;
1455 int n2 = 0;
1456 int n3 = 0;
1458 // test numbers
1459 // LANG_hu section: set dash information for suggestions
1460 if (langnum == LANG_hu) {
1461 while ((n < wl) &&
1462 (((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) {
1463 n++;
1464 if ((cw[n] == '.') || (cw[n] == ',')) {
1465 if (((n2 == 0) && (n > 3)) ||
1466 ((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) break;
1467 n2++;
1468 n3 = n;
1469 }
1470 }
1472 if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0;
1473 if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xB0')) && checkword(cw+n, NULL, NULL))) {
1474 mystrcat(result, cw, MAXLNLEN);
1475 result[n - 1] = '\0';
1476 if (n == wl) cat_result(result, pSMgr->suggest_morph(cw + n - 1));
1477 else {
1478 char sign = cw[n];
1479 cw[n] = '\0';
1480 cat_result(result, pSMgr->suggest_morph(cw + n - 1));
1481 mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE
1482 cw[n] = sign;
1483 cat_result(result, pSMgr->suggest_morph(cw + n));
1484 }
1485 return line_tok(result, slst, MSEP_REC);
1486 }
1487 }
1488 // END OF LANG_hu section
1490 switch(captype) {
1491 case HUHCAP:
1492 case HUHINITCAP:
1493 case NOCAP: {
1494 cat_result(result, pSMgr->suggest_morph(cw));
1495 if (abbv) {
1496 memcpy(wspace,cw,wl);
1497 *(wspace+wl) = '.';
1498 *(wspace+wl+1) = '\0';
1499 cat_result(result, pSMgr->suggest_morph(wspace));
1500 }
1501 break;
1502 }
1503 case INITCAP: {
1504 wl = mkallsmall2(cw, unicw, nc);
1505 memcpy(wspace,cw,(wl+1));
1506 wl2 = mkinitcap2(cw, unicw, nc);
1507 cat_result(result, pSMgr->suggest_morph(wspace));
1508 cat_result(result, pSMgr->suggest_morph(cw));
1509 if (abbv) {
1510 *(wspace+wl) = '.';
1511 *(wspace+wl+1) = '\0';
1512 cat_result(result, pSMgr->suggest_morph(wspace));
1514 memcpy(wspace, cw, wl2);
1515 *(wspace+wl2) = '.';
1516 *(wspace+wl2+1) = '\0';
1518 cat_result(result, pSMgr->suggest_morph(wspace));
1519 }
1520 break;
1521 }
1522 case ALLCAP: {
1523 cat_result(result, pSMgr->suggest_morph(cw));
1524 if (abbv) {
1525 memcpy(wspace,cw,wl);
1526 *(wspace+wl) = '.';
1527 *(wspace+wl+1) = '\0';
1528 cat_result(result, pSMgr->suggest_morph(cw));
1529 }
1530 wl = mkallsmall2(cw, unicw, nc);
1531 memcpy(wspace,cw,(wl+1));
1532 wl2 = mkinitcap2(cw, unicw, nc);
1534 cat_result(result, pSMgr->suggest_morph(wspace));
1535 cat_result(result, pSMgr->suggest_morph(cw));
1536 if (abbv) {
1537 *(wspace+wl) = '.';
1538 *(wspace+wl+1) = '\0';
1539 cat_result(result, pSMgr->suggest_morph(wspace));
1541 memcpy(wspace, cw, wl2);
1542 *(wspace+wl2) = '.';
1543 *(wspace+wl2+1) = '\0';
1545 cat_result(result, pSMgr->suggest_morph(wspace));
1546 }
1547 break;
1548 }
1549 }
1551 if (*result) {
1552 // word reversing wrapper for complex prefixes
1553 if (complexprefixes) {
1554 if (utf8) reverseword_utf(result); else reverseword(result);
1555 }
1556 return line_tok(result, slst, MSEP_REC);
1557 }
1559 // compound word with dash (HU) I18n
1560 char * dash = NULL;
1561 int nresult = 0;
1562 // LANG_hu section: set dash information for suggestions
1563 if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');
1564 if ((langnum == LANG_hu) && dash) {
1565 *dash='\0';
1566 // examine 2 sides of the dash
1567 if (dash[1] == '\0') { // base word ending with dash
1568 if (spell(cw)) {
1569 char * p = pSMgr->suggest_morph(cw);
1570 if (p) {
1571 int ret = line_tok(p, slst, MSEP_REC);
1572 free(p);
1573 return ret;
1574 }
1576 }
1577 } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.
1578 if (spell(cw) && (spell("-e"))) {
1579 st = pSMgr->suggest_morph(cw);
1580 if (st) {
1581 mystrcat(result, st, MAXLNLEN);
1582 free(st);
1583 }
1584 mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
1585 st = pSMgr->suggest_morph("-e");
1586 if (st) {
1587 mystrcat(result, st, MAXLNLEN);
1588 free(st);
1589 }
1590 return line_tok(result, slst, MSEP_REC);
1591 }
1592 } else {
1593 // first word ending with dash: word- XXX ???
1594 char r2 = *(dash + 1);
1595 dash[0]='-';
1596 dash[1]='\0';
1597 nresult = spell(cw);
1598 dash[1] = r2;
1599 dash[0]='\0';
1600 if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||
1601 ((dash[1] > '0') && (dash[1] < '9')))) {
1602 st = pSMgr->suggest_morph(cw);
1603 if (st) {
1604 mystrcat(result, st, MAXLNLEN);
1605 free(st);
1606 mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
1607 }
1608 st = pSMgr->suggest_morph(dash+1);
1609 if (st) {
1610 mystrcat(result, st, MAXLNLEN);
1611 free(st);
1612 }
1613 return line_tok(result, slst, MSEP_REC);
1614 }
1615 }
1616 // affixed number in correct word
1617 if (nresult && (dash > cw) && (((*(dash-1)<='9') &&
1618 (*(dash-1)>='0')) || (*(dash-1)=='.'))) {
1619 *dash='-';
1620 n = 1;
1621 if (*(dash - n) == '.') n++;
1622 // search first not a number character to left from dash
1623 while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {
1624 n++;
1625 }
1626 if ((dash - n) < cw) n--;
1627 // numbers: valami1000000-hoz
1628 // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
1629 // 56-hoz, 6-hoz
1630 for(; n >= 1; n--) {
1631 if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && checkword(dash - n, NULL, NULL)) {
1632 mystrcat(result, cw, MAXLNLEN);
1633 result[dash - cw - n] = '\0';
1634 st = pSMgr->suggest_morph(dash - n);
1635 if (st) {
1636 mystrcat(result, st, MAXLNLEN);
1637 free(st);
1638 }
1639 return line_tok(result, slst, MSEP_REC);
1640 }
1641 }
1642 }
1643 }
1644 return 0;
1645 }
1647 int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)
1648 {
1649 *slst = NULL;
1650 if (!pSMgr || !pln) return 0;
1651 char **pl2;
1652 int pl2n = analyze(&pl2, word);
1653 int captype = 0;
1654 int abbv = 0;
1655 char cw[MAXWORDUTF8LEN];
1656 cleanword(cw, word, &captype, &abbv);
1657 char result[MAXLNLEN];
1658 *result = '\0';
1660 for (int i = 0; i < pln; i++) {
1661 cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));
1662 }
1663 freelist(&pl2, pl2n);
1665 if (*result) {
1666 // allcap
1667 if (captype == ALLCAP) mkallcap(result);
1669 // line split
1670 int linenum = line_tok(result, slst, MSEP_REC);
1672 // capitalize
1673 if (captype == INITCAP || captype == HUHINITCAP) {
1674 for (int j=0; j < linenum; j++) mkinitcap((*slst)[j]);
1675 }
1677 // temporary filtering of prefix related errors (eg.
1678 // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
1680 int r = 0;
1681 for (int j=0; j < linenum; j++) {
1682 if (!spell((*slst)[j])) {
1683 free((*slst)[j]);
1684 (*slst)[j] = NULL;
1685 } else {
1686 if (r < j) (*slst)[r] = (*slst)[j];
1687 r++;
1688 }
1689 }
1690 if (r > 0) return r;
1691 free(*slst);
1692 *slst = NULL;
1693 }
1694 return 0;
1695 }
1697 int Hunspell::generate(char*** slst, const char * word, const char * pattern)
1698 {
1699 char **pl;
1700 int pln = analyze(&pl, pattern);
1701 int n = generate(slst, word, pl, pln);
1702 freelist(&pl, pln);
1703 return uniqlist(*slst, n);
1704 }
1706 // minimal XML parser functions
1707 int Hunspell::get_xml_par(char * dest, const char * par, int max)
1708 {
1709 char * d = dest;
1710 if (!par) return 0;
1711 char end = *par;
1712 char * dmax = dest + max;
1713 if (end == '>') end = '<';
1714 else if (end != '\'' && end != '"') return 0; // bad XML
1715 for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) *d = *par;
1716 *d = '\0';
1717 mystrrep(dest, "<", "<");
1718 mystrrep(dest, "&", "&");
1719 return (int)(d - dest);
1720 }
1722 int Hunspell::get_langnum() const
1723 {
1724 return langnum;
1725 }
1727 // return the beginning of the element (attr == NULL) or the attribute
1728 const char * Hunspell::get_xml_pos(const char * s, const char * attr)
1729 {
1730 const char * end = strchr(s, '>');
1731 const char * p = s;
1732 if (attr == NULL) return end;
1733 do {
1734 p = strstr(p, attr);
1735 if (!p || p >= end) return 0;
1736 } while (*(p-1) != ' ' && *(p-1) != '\n');
1737 return p + strlen(attr);
1738 }
1740 int Hunspell::check_xml_par(const char * q, const char * attr, const char * value) {
1741 char cw[MAXWORDUTF8LEN];
1742 if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) &&
1743 strcmp(cw, value) == 0) return 1;
1744 return 0;
1745 }
1747 int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {
1748 int n = 0;
1749 char * p;
1750 if (!list) return 0;
1751 for (p = list; (p = strstr(p, tag)); p++) n++;
1752 if (n == 0) return 0;
1753 *slst = (char **) malloc(sizeof(char *) * n);
1754 if (!*slst) return 0;
1755 for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) {
1756 int l = strlen(p);
1757 (*slst)[n] = (char *) malloc(l + 1);
1758 if (!(*slst)[n]) return n;
1759 if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) {
1760 free((*slst)[n]);
1761 break;
1762 }
1763 }
1764 return n;
1765 }
1767 int Hunspell::spellml(char*** slst, const char * word)
1768 {
1769 char *q, *q2;
1770 char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN];
1771 q = (char *) strstr(word, "<query");
1772 if (!q) return 0; // bad XML input
1773 q2 = strchr(q, '>');
1774 if (!q2) return 0; // bad XML input
1775 q2 = strstr(q2, "<word");
1776 if (!q2) return 0; // bad XML input
1777 if (check_xml_par(q, "type=", "analyze")) {
1778 int n = 0, s = 0;
1779 if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw);
1780 if (n == 0) return 0;
1781 // convert the result to <code><a>ana1</a><a>ana2</a></code> format
1782 for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);
1783 char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->&
1784 if (!r) return 0;
1785 strcpy(r, "<code>");
1786 for (int i = 0; i < n; i++) {
1787 int l = strlen(r);
1788 strcpy(r + l, "<a>");
1789 strcpy(r + l + 3, (*slst)[i]);
1790 mystrrep(r + l + 3, "\t", " ");
1791 mystrrep(r + l + 3, "<", "<");
1792 mystrrep(r + l + 3, "&", "&");
1793 strcat(r, "</a>");
1794 free((*slst)[i]);
1795 }
1796 strcat(r, "</code>");
1797 (*slst)[0] = r;
1798 return 1;
1799 } else if (check_xml_par(q, "type=", "stem")) {
1800 if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw);
1801 } else if (check_xml_par(q, "type=", "generate")) {
1802 int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1);
1803 if (n == 0) return 0;
1804 char * q3 = strstr(q2 + 1, "<word");
1805 if (q3) {
1806 if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN - 1)) {
1807 return generate(slst, cw, cw2);
1808 }
1809 } else {
1810 if ((q2 = strstr(q2 + 1, "<code"))) {
1811 char ** slst2;
1812 if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>"))) {
1813 int n2 = generate(slst, cw, slst2, n);
1814 freelist(&slst2, n);
1815 return uniqlist(*slst, n2);
1816 }
1817 freelist(&slst2, n);
1818 }
1819 }
1820 }
1821 return 0;
1822 }
1825 #ifdef HUNSPELL_EXPERIMENTAL
1826 // XXX need UTF-8 support
1827 char * Hunspell::morph_with_correction(const char * word)
1828 {
1829 char cw[MAXWORDUTF8LEN];
1830 char wspace[MAXWORDUTF8LEN];
1831 if (! pSMgr || maxdic == 0) return NULL;
1832 int wl = strlen(word);
1833 if (utf8) {
1834 if (wl >= MAXWORDUTF8LEN) return NULL;
1835 } else {
1836 if (wl >= MAXWORDLEN) return NULL;
1837 }
1838 int captype = 0;
1839 int abbv = 0;
1840 wl = cleanword(cw, word, &captype, &abbv);
1841 if (wl == 0) return NULL;
1843 char result[MAXLNLEN];
1844 char * st = NULL;
1846 *result = '\0';
1849 switch(captype) {
1850 case NOCAP: {
1851 st = pSMgr->suggest_morph_for_spelling_error(cw);
1852 if (st) {
1853 mystrcat(result, st, MAXLNLEN);
1854 free(st);
1855 }
1856 if (abbv) {
1857 memcpy(wspace,cw,wl);
1858 *(wspace+wl) = '.';
1859 *(wspace+wl+1) = '\0';
1860 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1861 if (st) {
1862 if (*result) mystrcat(result, "\n", MAXLNLEN);
1863 mystrcat(result, st, MAXLNLEN);
1864 free(st);
1865 }
1866 }
1867 break;
1868 }
1869 case INITCAP: {
1870 memcpy(wspace,cw,(wl+1));
1871 mkallsmall(wspace);
1872 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1873 if (st) {
1874 mystrcat(result, st, MAXLNLEN);
1875 free(st);
1876 }
1877 st = pSMgr->suggest_morph_for_spelling_error(cw);
1878 if (st) {
1879 if (*result) mystrcat(result, "\n", MAXLNLEN);
1880 mystrcat(result, st, MAXLNLEN);
1881 free(st);
1882 }
1883 if (abbv) {
1884 memcpy(wspace,cw,wl);
1885 *(wspace+wl) = '.';
1886 *(wspace+wl+1) = '\0';
1887 mkallsmall(wspace);
1888 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1889 if (st) {
1890 if (*result) mystrcat(result, "\n", MAXLNLEN);
1891 mystrcat(result, st, MAXLNLEN);
1892 free(st);
1893 }
1894 mkinitcap(wspace);
1895 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1896 if (st) {
1897 if (*result) mystrcat(result, "\n", MAXLNLEN);
1898 mystrcat(result, st, MAXLNLEN);
1899 free(st);
1900 }
1901 }
1902 break;
1903 }
1904 case HUHCAP: {
1905 st = pSMgr->suggest_morph_for_spelling_error(cw);
1906 if (st) {
1907 mystrcat(result, st, MAXLNLEN);
1908 free(st);
1909 }
1910 memcpy(wspace,cw,(wl+1));
1911 mkallsmall(wspace);
1912 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1913 if (st) {
1914 if (*result) mystrcat(result, "\n", MAXLNLEN);
1915 mystrcat(result, st, MAXLNLEN);
1916 free(st);
1917 }
1918 break;
1919 }
1920 case ALLCAP: {
1921 memcpy(wspace,cw,(wl+1));
1922 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1923 if (st) {
1924 mystrcat(result, st, MAXLNLEN);
1925 free(st);
1926 }
1927 mkallsmall(wspace);
1928 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1929 if (st) {
1930 if (*result) mystrcat(result, "\n", MAXLNLEN);
1931 mystrcat(result, st, MAXLNLEN);
1932 free(st);
1933 }
1934 mkinitcap(wspace);
1935 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1936 if (st) {
1937 if (*result) mystrcat(result, "\n", MAXLNLEN);
1938 mystrcat(result, st, MAXLNLEN);
1939 free(st);
1940 }
1941 if (abbv) {
1942 memcpy(wspace,cw,(wl+1));
1943 *(wspace+wl) = '.';
1944 *(wspace+wl+1) = '\0';
1945 if (*result) mystrcat(result, "\n", MAXLNLEN);
1946 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1947 if (st) {
1948 mystrcat(result, st, MAXLNLEN);
1949 free(st);
1950 }
1951 mkallsmall(wspace);
1952 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1953 if (st) {
1954 if (*result) mystrcat(result, "\n", MAXLNLEN);
1955 mystrcat(result, st, MAXLNLEN);
1956 free(st);
1957 }
1958 mkinitcap(wspace);
1959 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1960 if (st) {
1961 if (*result) mystrcat(result, "\n", MAXLNLEN);
1962 mystrcat(result, st, MAXLNLEN);
1963 free(st);
1964 }
1965 }
1966 break;
1967 }
1968 }
1970 if (*result) return mystrdup(result);
1971 return NULL;
1972 }
1974 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1976 Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
1977 {
1978 return (Hunhandle*)(new Hunspell(affpath, dpath));
1979 }
1981 Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
1982 const char * key)
1983 {
1984 return (Hunhandle*)(new Hunspell(affpath, dpath, key));
1985 }
1987 void Hunspell_destroy(Hunhandle *pHunspell)
1988 {
1989 delete (Hunspell*)(pHunspell);
1990 }
1992 int Hunspell_spell(Hunhandle *pHunspell, const char *word)
1993 {
1994 return ((Hunspell*)pHunspell)->spell(word);
1995 }
1997 char *Hunspell_get_dic_encoding(Hunhandle *pHunspell)
1998 {
1999 return ((Hunspell*)pHunspell)->get_dic_encoding();
2000 }
2002 int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word)
2003 {
2004 return ((Hunspell*)pHunspell)->suggest(slst, word);
2005 }
2007 int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word)
2008 {
2009 return ((Hunspell*)pHunspell)->analyze(slst, word);
2010 }
2012 int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)
2013 {
2014 return ((Hunspell*)pHunspell)->stem(slst, word);
2015 }
2017 int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n)
2018 {
2019 return ((Hunspell*)pHunspell)->stem(slst, desc, n);
2020 }
2022 int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
2023 const char * word2)
2024 {
2025 return ((Hunspell*)pHunspell)->generate(slst, word, word2);
2026 }
2028 int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
2029 char** desc, int n)
2030 {
2031 return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);
2032 }
2034 /* functions for run-time modification of the dictionary */
2036 /* add word to the run-time dictionary */
2038 int Hunspell_add(Hunhandle *pHunspell, const char * word) {
2039 return ((Hunspell*)pHunspell)->add(word);
2040 }
2042 /* add word to the run-time dictionary with affix flags of
2043 * the example (a dictionary word): Hunspell will recognize
2044 * affixed forms of the new word, too.
2045 */
2047 int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word,
2048 const char * example) {
2049 return ((Hunspell*)pHunspell)->add_with_affix(word, example);
2050 }
2052 /* remove word from the run-time dictionary */
2054 int Hunspell_remove(Hunhandle *pHunspell, const char * word) {
2055 return ((Hunspell*)pHunspell)->remove(word);
2056 }
2058 void Hunspell_free_list(Hunhandle *, char *** slst, int n) {
2059 freelist(slst, n);
2060 }