extensions/spellcheck/hunspell/src/hunspell.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /******* BEGIN LICENSE BLOCK *******
michael@0 2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
michael@0 3 *
michael@0 4 * The contents of this file are subject to the Mozilla Public License Version
michael@0 5 * 1.1 (the "License"); you may not use this file except in compliance with
michael@0 6 * the License. You may obtain a copy of the License at
michael@0 7 * http://www.mozilla.org/MPL/
michael@0 8 *
michael@0 9 * Software distributed under the License is distributed on an "AS IS" basis,
michael@0 10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
michael@0 11 * for the specific language governing rights and limitations under the
michael@0 12 * License.
michael@0 13 *
michael@0 14 * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
michael@0 15 * and László Németh (Hunspell). Portions created by the Initial Developers
michael@0 16 * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
michael@0 17 *
michael@0 18 * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
michael@0 19 * David Einstein (deinst@world.std.com)
michael@0 20 * László Németh (nemethl@gyorsposta.hu)
michael@0 21 * Caolan McNamara (caolanm@redhat.com)
michael@0 22 * Davide Prina
michael@0 23 * Giuseppe Modugno
michael@0 24 * Gianluca Turconi
michael@0 25 * Simon Brouwer
michael@0 26 * Noll Janos
michael@0 27 * Biro Arpad
michael@0 28 * Goldman Eleonora
michael@0 29 * Sarlos Tamas
michael@0 30 * Bencsath Boldizsar
michael@0 31 * Halacsy Peter
michael@0 32 * Dvornik Laszlo
michael@0 33 * Gefferth Andras
michael@0 34 * Nagy Viktor
michael@0 35 * Varga Daniel
michael@0 36 * Chris Halls
michael@0 37 * Rene Engelhard
michael@0 38 * Bram Moolenaar
michael@0 39 * Dafydd Jones
michael@0 40 * Harri Pitkanen
michael@0 41 * Andras Timar
michael@0 42 * Tor Lillqvist
michael@0 43 *
michael@0 44 * Alternatively, the contents of this file may be used under the terms of
michael@0 45 * either the GNU General Public License Version 2 or later (the "GPL"), or
michael@0 46 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
michael@0 47 * in which case the provisions of the GPL or the LGPL are applicable instead
michael@0 48 * of those above. If you wish to allow use of your version of this file only
michael@0 49 * under the terms of either the GPL or the LGPL, and not to allow others to
michael@0 50 * use your version of this file under the terms of the MPL, indicate your
michael@0 51 * decision by deleting the provisions above and replace them with the notice
michael@0 52 * and other provisions required by the GPL or the LGPL. If you do not delete
michael@0 53 * the provisions above, a recipient may use your version of this file under
michael@0 54 * the terms of any one of the MPL, the GPL or the LGPL.
michael@0 55 *
michael@0 56 ******* END LICENSE BLOCK *******/
michael@0 57
michael@0 58 #include <stdlib.h>
michael@0 59 #include <string.h>
michael@0 60 #include <stdio.h>
michael@0 61
michael@0 62 #include "hunspell.hxx"
michael@0 63 #include "hunspell.h"
michael@0 64 #ifndef MOZILLA_CLIENT
michael@0 65 # include "config.h"
michael@0 66 #endif
michael@0 67 #include "csutil.hxx"
michael@0 68
michael@0 69 Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
michael@0 70 {
michael@0 71 encoding = NULL;
michael@0 72 csconv = NULL;
michael@0 73 utf8 = 0;
michael@0 74 complexprefixes = 0;
michael@0 75 affixpath = mystrdup(affpath);
michael@0 76 maxdic = 0;
michael@0 77
michael@0 78 /* first set up the hash manager */
michael@0 79 pHMgr[0] = new HashMgr(dpath, affpath, key);
michael@0 80 if (pHMgr[0]) maxdic = 1;
michael@0 81
michael@0 82 /* next set up the affix manager */
michael@0 83 /* it needs access to the hash manager lookup methods */
michael@0 84 pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
michael@0 85
michael@0 86 /* get the preferred try string and the dictionary */
michael@0 87 /* encoding from the Affix Manager for that dictionary */
michael@0 88 char * try_string = pAMgr->get_try_string();
michael@0 89 encoding = pAMgr->get_encoding();
michael@0 90 langnum = pAMgr->get_langnum();
michael@0 91 utf8 = pAMgr->get_utf8();
michael@0 92 if (!utf8)
michael@0 93 csconv = get_current_cs(encoding);
michael@0 94 complexprefixes = pAMgr->get_complexprefixes();
michael@0 95 wordbreak = pAMgr->get_breaktable();
michael@0 96
michael@0 97 /* and finally set up the suggestion manager */
michael@0 98 pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
michael@0 99 if (try_string) free(try_string);
michael@0 100 }
michael@0 101
michael@0 102 Hunspell::~Hunspell()
michael@0 103 {
michael@0 104 if (pSMgr) delete pSMgr;
michael@0 105 if (pAMgr) delete pAMgr;
michael@0 106 for (int i = 0; i < maxdic; i++) delete pHMgr[i];
michael@0 107 maxdic = 0;
michael@0 108 pSMgr = NULL;
michael@0 109 pAMgr = NULL;
michael@0 110 #ifdef MOZILLA_CLIENT
michael@0 111 delete [] csconv;
michael@0 112 #endif
michael@0 113 csconv= NULL;
michael@0 114 if (encoding) free(encoding);
michael@0 115 encoding = NULL;
michael@0 116 if (affixpath) free(affixpath);
michael@0 117 affixpath = NULL;
michael@0 118 }
michael@0 119
michael@0 120 // load extra dictionaries
michael@0 121 int Hunspell::add_dic(const char * dpath, const char * key) {
michael@0 122 if (maxdic == MAXDIC || !affixpath) return 1;
michael@0 123 pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);
michael@0 124 if (pHMgr[maxdic]) maxdic++; else return 1;
michael@0 125 return 0;
michael@0 126 }
michael@0 127
michael@0 128 // make a copy of src at destination while removing all leading
michael@0 129 // blanks and removing any trailing periods after recording
michael@0 130 // their presence with the abbreviation flag
michael@0 131 // also since already going through character by character,
michael@0 132 // set the capitalization type
michael@0 133 // return the length of the "cleaned" (and UTF-8 encoded) word
michael@0 134
michael@0 135 int Hunspell::cleanword2(char * dest, const char * src,
michael@0 136 w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)
michael@0 137 {
michael@0 138 unsigned char * p = (unsigned char *) dest;
michael@0 139 const unsigned char * q = (const unsigned char * ) src;
michael@0 140
michael@0 141 // first skip over any leading blanks
michael@0 142 while ((*q != '\0') && (*q == ' ')) q++;
michael@0 143
michael@0 144 // now strip off any trailing periods (recording their presence)
michael@0 145 *pabbrev = 0;
michael@0 146 int nl = strlen((const char *)q);
michael@0 147 while ((nl > 0) && (*(q+nl-1)=='.')) {
michael@0 148 nl--;
michael@0 149 (*pabbrev)++;
michael@0 150 }
michael@0 151
michael@0 152 // if no characters are left it can't be capitalized
michael@0 153 if (nl <= 0) {
michael@0 154 *pcaptype = NOCAP;
michael@0 155 *p = '\0';
michael@0 156 return 0;
michael@0 157 }
michael@0 158
michael@0 159 strncpy(dest, (char *) q, nl);
michael@0 160 *(dest + nl) = '\0';
michael@0 161 nl = strlen(dest);
michael@0 162 if (utf8) {
michael@0 163 *nc = u8_u16(dest_utf, MAXWORDLEN, dest);
michael@0 164 // don't check too long words
michael@0 165 if (*nc >= MAXWORDLEN) return 0;
michael@0 166 if (*nc == -1) { // big Unicode character (non BMP area)
michael@0 167 *pcaptype = NOCAP;
michael@0 168 return nl;
michael@0 169 }
michael@0 170 *pcaptype = get_captype_utf8(dest_utf, *nc, langnum);
michael@0 171 } else {
michael@0 172 *pcaptype = get_captype(dest, nl, csconv);
michael@0 173 *nc = nl;
michael@0 174 }
michael@0 175 return nl;
michael@0 176 }
michael@0 177
michael@0 178 int Hunspell::cleanword(char * dest, const char * src,
michael@0 179 int * pcaptype, int * pabbrev)
michael@0 180 {
michael@0 181 unsigned char * p = (unsigned char *) dest;
michael@0 182 const unsigned char * q = (const unsigned char * ) src;
michael@0 183 int firstcap = 0;
michael@0 184
michael@0 185 // first skip over any leading blanks
michael@0 186 while ((*q != '\0') && (*q == ' ')) q++;
michael@0 187
michael@0 188 // now strip off any trailing periods (recording their presence)
michael@0 189 *pabbrev = 0;
michael@0 190 int nl = strlen((const char *)q);
michael@0 191 while ((nl > 0) && (*(q+nl-1)=='.')) {
michael@0 192 nl--;
michael@0 193 (*pabbrev)++;
michael@0 194 }
michael@0 195
michael@0 196 // if no characters are left it can't be capitalized
michael@0 197 if (nl <= 0) {
michael@0 198 *pcaptype = NOCAP;
michael@0 199 *p = '\0';
michael@0 200 return 0;
michael@0 201 }
michael@0 202
michael@0 203 // now determine the capitalization type of the first nl letters
michael@0 204 int ncap = 0;
michael@0 205 int nneutral = 0;
michael@0 206 int nc = 0;
michael@0 207
michael@0 208 if (!utf8) {
michael@0 209 while (nl > 0) {
michael@0 210 nc++;
michael@0 211 if (csconv[(*q)].ccase) ncap++;
michael@0 212 if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
michael@0 213 *p++ = *q++;
michael@0 214 nl--;
michael@0 215 }
michael@0 216 // remember to terminate the destination string
michael@0 217 *p = '\0';
michael@0 218 firstcap = csconv[(unsigned char)(*dest)].ccase;
michael@0 219 } else {
michael@0 220 unsigned short idx;
michael@0 221 w_char t[MAXWORDLEN];
michael@0 222 nc = u8_u16(t, MAXWORDLEN, src);
michael@0 223 for (int i = 0; i < nc; i++) {
michael@0 224 idx = (t[i].h << 8) + t[i].l;
michael@0 225 unsigned short low = unicodetolower(idx, langnum);
michael@0 226 if (idx != low) ncap++;
michael@0 227 if (unicodetoupper(idx, langnum) == low) nneutral++;
michael@0 228 }
michael@0 229 u16_u8(dest, MAXWORDUTF8LEN, t, nc);
michael@0 230 if (ncap) {
michael@0 231 idx = (t[0].h << 8) + t[0].l;
michael@0 232 firstcap = (idx != unicodetolower(idx, langnum));
michael@0 233 }
michael@0 234 }
michael@0 235
michael@0 236 // now finally set the captype
michael@0 237 if (ncap == 0) {
michael@0 238 *pcaptype = NOCAP;
michael@0 239 } else if ((ncap == 1) && firstcap) {
michael@0 240 *pcaptype = INITCAP;
michael@0 241 } else if ((ncap == nc) || ((ncap + nneutral) == nc)){
michael@0 242 *pcaptype = ALLCAP;
michael@0 243 } else if ((ncap > 1) && firstcap) {
michael@0 244 *pcaptype = HUHINITCAP;
michael@0 245 } else {
michael@0 246 *pcaptype = HUHCAP;
michael@0 247 }
michael@0 248 return strlen(dest);
michael@0 249 }
michael@0 250
michael@0 251 void Hunspell::mkallcap(char * p)
michael@0 252 {
michael@0 253 if (utf8) {
michael@0 254 w_char u[MAXWORDLEN];
michael@0 255 int nc = u8_u16(u, MAXWORDLEN, p);
michael@0 256 unsigned short idx;
michael@0 257 for (int i = 0; i < nc; i++) {
michael@0 258 idx = (u[i].h << 8) + u[i].l;
michael@0 259 if (idx != unicodetoupper(idx, langnum)) {
michael@0 260 u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);
michael@0 261 u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);
michael@0 262 }
michael@0 263 }
michael@0 264 u16_u8(p, MAXWORDUTF8LEN, u, nc);
michael@0 265 } else {
michael@0 266 while (*p != '\0') {
michael@0 267 *p = csconv[((unsigned char) *p)].cupper;
michael@0 268 p++;
michael@0 269 }
michael@0 270 }
michael@0 271 }
michael@0 272
michael@0 273 int Hunspell::mkallcap2(char * p, w_char * u, int nc)
michael@0 274 {
michael@0 275 if (utf8) {
michael@0 276 unsigned short idx;
michael@0 277 for (int i = 0; i < nc; i++) {
michael@0 278 idx = (u[i].h << 8) + u[i].l;
michael@0 279 unsigned short up = unicodetoupper(idx, langnum);
michael@0 280 if (idx != up) {
michael@0 281 u[i].h = (unsigned char) (up >> 8);
michael@0 282 u[i].l = (unsigned char) (up & 0x00FF);
michael@0 283 }
michael@0 284 }
michael@0 285 u16_u8(p, MAXWORDUTF8LEN, u, nc);
michael@0 286 return strlen(p);
michael@0 287 } else {
michael@0 288 while (*p != '\0') {
michael@0 289 *p = csconv[((unsigned char) *p)].cupper;
michael@0 290 p++;
michael@0 291 }
michael@0 292 }
michael@0 293 return nc;
michael@0 294 }
michael@0 295
michael@0 296
michael@0 297 void Hunspell::mkallsmall(char * p)
michael@0 298 {
michael@0 299 while (*p != '\0') {
michael@0 300 *p = csconv[((unsigned char) *p)].clower;
michael@0 301 p++;
michael@0 302 }
michael@0 303 }
michael@0 304
michael@0 305 int Hunspell::mkallsmall2(char * p, w_char * u, int nc)
michael@0 306 {
michael@0 307 if (utf8) {
michael@0 308 unsigned short idx;
michael@0 309 for (int i = 0; i < nc; i++) {
michael@0 310 idx = (u[i].h << 8) + u[i].l;
michael@0 311 unsigned short low = unicodetolower(idx, langnum);
michael@0 312 if (idx != low) {
michael@0 313 u[i].h = (unsigned char) (low >> 8);
michael@0 314 u[i].l = (unsigned char) (low & 0x00FF);
michael@0 315 }
michael@0 316 }
michael@0 317 u16_u8(p, MAXWORDUTF8LEN, u, nc);
michael@0 318 return strlen(p);
michael@0 319 } else {
michael@0 320 while (*p != '\0') {
michael@0 321 *p = csconv[((unsigned char) *p)].clower;
michael@0 322 p++;
michael@0 323 }
michael@0 324 }
michael@0 325 return nc;
michael@0 326 }
michael@0 327
michael@0 328 // convert UTF-8 sharp S codes to latin 1
michael@0 329 char * Hunspell::sharps_u8_l1(char * dest, char * source) {
michael@0 330 char * p = dest;
michael@0 331 *p = *source;
michael@0 332 for (p++, source++; *(source - 1); p++, source++) {
michael@0 333 *p = *source;
michael@0 334 if (*source == '\x9F') *--p = '\xDF';
michael@0 335 }
michael@0 336 return dest;
michael@0 337 }
michael@0 338
michael@0 339 // recursive search for right ss - sharp s permutations
michael@0 340 hentry * Hunspell::spellsharps(char * base, char * pos, int n,
michael@0 341 int repnum, char * tmp, int * info, char **root) {
michael@0 342 pos = strstr(pos, "ss");
michael@0 343 if (pos && (n < MAXSHARPS)) {
michael@0 344 *pos = '\xC3';
michael@0 345 *(pos + 1) = '\x9F';
michael@0 346 hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root);
michael@0 347 if (h) return h;
michael@0 348 *pos = 's';
michael@0 349 *(pos + 1) = 's';
michael@0 350 h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root);
michael@0 351 if (h) return h;
michael@0 352 } else if (repnum > 0) {
michael@0 353 if (utf8) return checkword(base, info, root);
michael@0 354 return checkword(sharps_u8_l1(tmp, base), info, root);
michael@0 355 }
michael@0 356 return NULL;
michael@0 357 }
michael@0 358
michael@0 359 int Hunspell::is_keepcase(const hentry * rv) {
michael@0 360 return pAMgr && rv->astr && pAMgr->get_keepcase() &&
michael@0 361 TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
michael@0 362 }
michael@0 363
michael@0 364 /* insert a word to the beginning of the suggestion array and return ns */
michael@0 365 int Hunspell::insert_sug(char ***slst, char * word, int ns) {
michael@0 366 char * dup = mystrdup(word);
michael@0 367 if (!dup) return ns;
michael@0 368 if (ns == MAXSUGGESTION) {
michael@0 369 ns--;
michael@0 370 free((*slst)[ns]);
michael@0 371 }
michael@0 372 for (int k = ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
michael@0 373 (*slst)[0] = dup;
michael@0 374 return ns + 1;
michael@0 375 }
michael@0 376
michael@0 377 int Hunspell::spell(const char * word, int * info, char ** root)
michael@0 378 {
michael@0 379 struct hentry * rv=NULL;
michael@0 380 // need larger vector. For example, Turkish capital letter I converted a
michael@0 381 // 2-byte UTF-8 character (dotless i) by mkallsmall.
michael@0 382 char cw[MAXWORDUTF8LEN];
michael@0 383 char wspace[MAXWORDUTF8LEN];
michael@0 384 w_char unicw[MAXWORDLEN];
michael@0 385 // Hunspell supports XML input of the simplified API (see manual)
michael@0 386 if (strcmp(word, SPELL_XML) == 0) return 1;
michael@0 387 int nc = strlen(word);
michael@0 388 int wl2 = 0;
michael@0 389 if (utf8) {
michael@0 390 if (nc >= MAXWORDUTF8LEN) return 0;
michael@0 391 } else {
michael@0 392 if (nc >= MAXWORDLEN) return 0;
michael@0 393 }
michael@0 394 int captype = 0;
michael@0 395 int abbv = 0;
michael@0 396 int wl = 0;
michael@0 397
michael@0 398 // input conversion
michael@0 399 RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
michael@0 400 if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
michael@0 401 else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
michael@0 402
michael@0 403 int info2 = 0;
michael@0 404 if (wl == 0 || maxdic == 0) return 1;
michael@0 405 if (root) *root = NULL;
michael@0 406
michael@0 407 // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.)
michael@0 408 enum { NBEGIN, NNUM, NSEP };
michael@0 409 int nstate = NBEGIN;
michael@0 410 int i;
michael@0 411
michael@0 412 for (i = 0; (i < wl); i++) {
michael@0 413 if ((cw[i] <= '9') && (cw[i] >= '0')) {
michael@0 414 nstate = NNUM;
michael@0 415 } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) {
michael@0 416 if ((nstate == NSEP) || (i == 0)) break;
michael@0 417 nstate = NSEP;
michael@0 418 } else break;
michael@0 419 }
michael@0 420 if ((i == wl) && (nstate == NNUM)) return 1;
michael@0 421 if (!info) info = &info2; else *info = 0;
michael@0 422
michael@0 423 switch(captype) {
michael@0 424 case HUHCAP:
michael@0 425 case HUHINITCAP:
michael@0 426 *info += SPELL_ORIGCAP;
michael@0 427 case NOCAP: {
michael@0 428 rv = checkword(cw, info, root);
michael@0 429 if ((abbv) && !(rv)) {
michael@0 430 memcpy(wspace,cw,wl);
michael@0 431 *(wspace+wl) = '.';
michael@0 432 *(wspace+wl+1) = '\0';
michael@0 433 rv = checkword(wspace, info, root);
michael@0 434 }
michael@0 435 break;
michael@0 436 }
michael@0 437 case ALLCAP: {
michael@0 438 *info += SPELL_ORIGCAP;
michael@0 439 rv = checkword(cw, info, root);
michael@0 440 if (rv) break;
michael@0 441 if (abbv) {
michael@0 442 memcpy(wspace,cw,wl);
michael@0 443 *(wspace+wl) = '.';
michael@0 444 *(wspace+wl+1) = '\0';
michael@0 445 rv = checkword(wspace, info, root);
michael@0 446 if (rv) break;
michael@0 447 }
michael@0 448 // Spec. prefix handling for Catalan, French, Italian:
michael@0 449 // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
michael@0 450 if (pAMgr && strchr(cw, '\'')) {
michael@0 451 wl = mkallsmall2(cw, unicw, nc);
michael@0 452 //There are no really sane circumstances where this could fail,
michael@0 453 //but anyway...
michael@0 454 if (char * apostrophe = strchr(cw, '\'')) {
michael@0 455 if (utf8) {
michael@0 456 w_char tmpword[MAXWORDLEN];
michael@0 457 *apostrophe = '\0';
michael@0 458 wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
michael@0 459 *apostrophe = '\'';
michael@0 460 if (wl2 < nc) {
michael@0 461 mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
michael@0 462 rv = checkword(cw, info, root);
michael@0 463 if (rv) break;
michael@0 464 }
michael@0 465 } else {
michael@0 466 mkinitcap2(apostrophe + 1, unicw, nc);
michael@0 467 rv = checkword(cw, info, root);
michael@0 468 if (rv) break;
michael@0 469 }
michael@0 470 }
michael@0 471 mkinitcap2(cw, unicw, nc);
michael@0 472 rv = checkword(cw, info, root);
michael@0 473 if (rv) break;
michael@0 474 }
michael@0 475 if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {
michael@0 476 char tmpword[MAXWORDUTF8LEN];
michael@0 477 wl = mkallsmall2(cw, unicw, nc);
michael@0 478 memcpy(wspace,cw,(wl+1));
michael@0 479 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
michael@0 480 if (!rv) {
michael@0 481 wl2 = mkinitcap2(cw, unicw, nc);
michael@0 482 rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);
michael@0 483 }
michael@0 484 if ((abbv) && !(rv)) {
michael@0 485 *(wspace+wl) = '.';
michael@0 486 *(wspace+wl+1) = '\0';
michael@0 487 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
michael@0 488 if (!rv) {
michael@0 489 memcpy(wspace, cw, wl2);
michael@0 490 *(wspace+wl2) = '.';
michael@0 491 *(wspace+wl2+1) = '\0';
michael@0 492 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
michael@0 493 }
michael@0 494 }
michael@0 495 if (rv) break;
michael@0 496 }
michael@0 497 }
michael@0 498 case INITCAP: {
michael@0 499 *info += SPELL_ORIGCAP;
michael@0 500 wl = mkallsmall2(cw, unicw, nc);
michael@0 501 memcpy(wspace,cw,(wl+1));
michael@0 502 wl2 = mkinitcap2(cw, unicw, nc);
michael@0 503 if (captype == INITCAP) *info += SPELL_INITCAP;
michael@0 504 rv = checkword(cw, info, root);
michael@0 505 if (captype == INITCAP) *info -= SPELL_INITCAP;
michael@0 506 // forbid bad capitalization
michael@0 507 // (for example, ijs -> Ijs instead of IJs in Dutch)
michael@0 508 // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
michael@0 509 if (*info & SPELL_FORBIDDEN) {
michael@0 510 rv = NULL;
michael@0 511 break;
michael@0 512 }
michael@0 513 if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
michael@0 514 if (rv) break;
michael@0 515
michael@0 516 rv = checkword(wspace, info, root);
michael@0 517 if (abbv && !rv) {
michael@0 518
michael@0 519 *(wspace+wl) = '.';
michael@0 520 *(wspace+wl+1) = '\0';
michael@0 521 rv = checkword(wspace, info, root);
michael@0 522 if (!rv) {
michael@0 523 memcpy(wspace, cw, wl2);
michael@0 524 *(wspace+wl2) = '.';
michael@0 525 *(wspace+wl2+1) = '\0';
michael@0 526 if (captype == INITCAP) *info += SPELL_INITCAP;
michael@0 527 rv = checkword(wspace, info, root);
michael@0 528 if (captype == INITCAP) *info -= SPELL_INITCAP;
michael@0 529 if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
michael@0 530 break;
michael@0 531 }
michael@0 532 }
michael@0 533 if (rv && is_keepcase(rv) &&
michael@0 534 ((captype == ALLCAP) ||
michael@0 535 // if CHECKSHARPS: KEEPCASE words with \xDF are allowed
michael@0 536 // in INITCAP form, too.
michael@0 537 !(pAMgr->get_checksharps() &&
michael@0 538 ((utf8 && strstr(wspace, "\xC3\x9F")) ||
michael@0 539 (!utf8 && strchr(wspace, '\xDF')))))) rv = NULL;
michael@0 540 break;
michael@0 541 }
michael@0 542 }
michael@0 543
michael@0 544 if (rv) {
michael@0 545 if (pAMgr && pAMgr->get_warn() && rv->astr &&
michael@0 546 TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
michael@0 547 *info += SPELL_WARN;
michael@0 548 if (pAMgr->get_forbidwarn()) return 0;
michael@0 549 return HUNSPELL_OK_WARN;
michael@0 550 }
michael@0 551 return HUNSPELL_OK;
michael@0 552 }
michael@0 553
michael@0 554 // recursive breaking at break points
michael@0 555 if (wordbreak) {
michael@0 556 char * s;
michael@0 557 char r;
michael@0 558 int nbr = 0;
michael@0 559 wl = strlen(cw);
michael@0 560 int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;
michael@0 561
michael@0 562 // calculate break points for recursion limit
michael@0 563 for (int j = 0; j < numbreak; j++) {
michael@0 564 s = cw;
michael@0 565 do {
michael@0 566 s = (char *) strstr(s, wordbreak[j]);
michael@0 567 if (s) {
michael@0 568 nbr++;
michael@0 569 s++;
michael@0 570 }
michael@0 571 } while (s);
michael@0 572 }
michael@0 573 if (nbr >= 10) return 0;
michael@0 574
michael@0 575 // check boundary patterns (^begin and end$)
michael@0 576 for (int j = 0; j < numbreak; j++) {
michael@0 577 int plen = strlen(wordbreak[j]);
michael@0 578 if (plen == 1 || plen > wl) continue;
michael@0 579 if (wordbreak[j][0] == '^' && strncmp(cw, wordbreak[j] + 1, plen - 1) == 0
michael@0 580 && spell(cw + plen - 1)) return 1;
michael@0 581 if (wordbreak[j][plen - 1] == '$' &&
michael@0 582 strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) {
michael@0 583 r = cw[wl - plen + 1];
michael@0 584 cw[wl - plen + 1] = '\0';
michael@0 585 if (spell(cw)) return 1;
michael@0 586 cw[wl - plen + 1] = r;
michael@0 587 }
michael@0 588 }
michael@0 589
michael@0 590 // other patterns
michael@0 591 for (int j = 0; j < numbreak; j++) {
michael@0 592 int plen = strlen(wordbreak[j]);
michael@0 593 s=(char *) strstr(cw, wordbreak[j]);
michael@0 594 if (s && (s > cw) && (s < cw + wl - plen)) {
michael@0 595 if (!spell(s + plen)) continue;
michael@0 596 r = *s;
michael@0 597 *s = '\0';
michael@0 598 // examine 2 sides of the break point
michael@0 599 if (spell(cw)) return 1;
michael@0 600 *s = r;
michael@0 601
michael@0 602 // LANG_hu: spec. dash rule
michael@0 603 if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) {
michael@0 604 r = s[1];
michael@0 605 s[1] = '\0';
michael@0 606 if (spell(cw)) return 1; // check the first part with dash
michael@0 607 s[1] = r;
michael@0 608 }
michael@0 609 // end of LANG speficic region
michael@0 610
michael@0 611 }
michael@0 612 }
michael@0 613 }
michael@0 614
michael@0 615 return 0;
michael@0 616 }
michael@0 617
michael@0 618 struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
michael@0 619 {
michael@0 620 struct hentry * he = NULL;
michael@0 621 int len, i;
michael@0 622 char w2[MAXWORDUTF8LEN];
michael@0 623 const char * word;
michael@0 624
michael@0 625 char * ignoredchars = pAMgr->get_ignore();
michael@0 626 if (ignoredchars != NULL) {
michael@0 627 strcpy(w2, w);
michael@0 628 if (utf8) {
michael@0 629 int ignoredchars_utf16_len;
michael@0 630 unsigned short * ignoredchars_utf16 = pAMgr->get_ignore_utf16(&ignoredchars_utf16_len);
michael@0 631 remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len);
michael@0 632 } else {
michael@0 633 remove_ignored_chars(w2,ignoredchars);
michael@0 634 }
michael@0 635 word = w2;
michael@0 636 } else word = w;
michael@0 637
michael@0 638 len = strlen(word);
michael@0 639
michael@0 640 if (!len)
michael@0 641 return NULL;
michael@0 642
michael@0 643 // word reversing wrapper for complex prefixes
michael@0 644 if (complexprefixes) {
michael@0 645 if (word != w2) {
michael@0 646 strcpy(w2, word);
michael@0 647 word = w2;
michael@0 648 }
michael@0 649 if (utf8) reverseword_utf(w2); else reverseword(w2);
michael@0 650 }
michael@0 651
michael@0 652 // look word in hash table
michael@0 653 for (i = 0; (i < maxdic) && !he; i ++) {
michael@0 654 he = (pHMgr[i])->lookup(word);
michael@0 655
michael@0 656 // check forbidden and onlyincompound words
michael@0 657 if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
michael@0 658 if (info) *info += SPELL_FORBIDDEN;
michael@0 659 // LANG_hu section: set dash information for suggestions
michael@0 660 if (langnum == LANG_hu) {
michael@0 661 if (pAMgr->get_compoundflag() &&
michael@0 662 TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
michael@0 663 if (info) *info += SPELL_COMPOUND;
michael@0 664 }
michael@0 665 }
michael@0 666 return NULL;
michael@0 667 }
michael@0 668
michael@0 669 // he = next not needaffix, onlyincompound homonym or onlyupcase word
michael@0 670 while (he && (he->astr) &&
michael@0 671 ((pAMgr->get_needaffix() && TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
michael@0 672 (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
michael@0 673 (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))
michael@0 674 )) he = he->next_homonym;
michael@0 675 }
michael@0 676
michael@0 677 // check with affixes
michael@0 678 if (!he && pAMgr) {
michael@0 679 // try stripping off affixes */
michael@0 680 he = pAMgr->affix_check(word, len, 0);
michael@0 681
michael@0 682 // check compound restriction and onlyupcase
michael@0 683 if (he && he->astr && (
michael@0 684 (pAMgr->get_onlyincompound() &&
michael@0 685 TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
michael@0 686 (info && (*info & SPELL_INITCAP) &&
michael@0 687 TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
michael@0 688 he = NULL;
michael@0 689 }
michael@0 690
michael@0 691 if (he) {
michael@0 692 if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
michael@0 693 if (info) *info += SPELL_FORBIDDEN;
michael@0 694 return NULL;
michael@0 695 }
michael@0 696 if (root) {
michael@0 697 *root = mystrdup(he->word);
michael@0 698 if (*root && complexprefixes) {
michael@0 699 if (utf8) reverseword_utf(*root); else reverseword(*root);
michael@0 700 }
michael@0 701 }
michael@0 702 // try check compound word
michael@0 703 } else if (pAMgr->get_compound()) {
michael@0 704 he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info);
michael@0 705 // LANG_hu section: `moving rule' with last dash
michael@0 706 if ((!he) && (langnum == LANG_hu) && (word[len-1] == '-')) {
michael@0 707 char * dup = mystrdup(word);
michael@0 708 if (!dup) return NULL;
michael@0 709 dup[len-1] = '\0';
michael@0 710 he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0, info);
michael@0 711 free(dup);
michael@0 712 }
michael@0 713 // end of LANG speficic region
michael@0 714 if (he) {
michael@0 715 if (root) {
michael@0 716 *root = mystrdup(he->word);
michael@0 717 if (*root && complexprefixes) {
michael@0 718 if (utf8) reverseword_utf(*root); else reverseword(*root);
michael@0 719 }
michael@0 720 }
michael@0 721 if (info) *info += SPELL_COMPOUND;
michael@0 722 }
michael@0 723 }
michael@0 724
michael@0 725 }
michael@0 726
michael@0 727 return he;
michael@0 728 }
michael@0 729
michael@0 730 int Hunspell::suggest(char*** slst, const char * word)
michael@0 731 {
michael@0 732 int onlycmpdsug = 0;
michael@0 733 char cw[MAXWORDUTF8LEN];
michael@0 734 char wspace[MAXWORDUTF8LEN];
michael@0 735 if (!pSMgr || maxdic == 0) return 0;
michael@0 736 w_char unicw[MAXWORDLEN];
michael@0 737 *slst = NULL;
michael@0 738 // process XML input of the simplified API (see manual)
michael@0 739 if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
michael@0 740 return spellml(slst, word);
michael@0 741 }
michael@0 742 int nc = strlen(word);
michael@0 743 if (utf8) {
michael@0 744 if (nc >= MAXWORDUTF8LEN) return 0;
michael@0 745 } else {
michael@0 746 if (nc >= MAXWORDLEN) return 0;
michael@0 747 }
michael@0 748 int captype = 0;
michael@0 749 int abbv = 0;
michael@0 750 int wl = 0;
michael@0 751
michael@0 752 // input conversion
michael@0 753 RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
michael@0 754 if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
michael@0 755 else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
michael@0 756
michael@0 757 if (wl == 0) return 0;
michael@0 758 int ns = 0;
michael@0 759 int capwords = 0;
michael@0 760
michael@0 761 // check capitalized form for FORCEUCASE
michael@0 762 if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
michael@0 763 int info = SPELL_ORIGCAP;
michael@0 764 char ** wlst;
michael@0 765 if (checkword(cw, &info, NULL)) {
michael@0 766 if (*slst) {
michael@0 767 wlst = *slst;
michael@0 768 } else {
michael@0 769 wlst = (char **) malloc(MAXSUGGESTION * sizeof(char *));
michael@0 770 if (wlst == NULL) return -1;
michael@0 771 *slst = wlst;
michael@0 772 for (int i = 0; i < MAXSUGGESTION; i++) {
michael@0 773 wlst[i] = NULL;
michael@0 774 }
michael@0 775 }
michael@0 776 wlst[0] = mystrdup(cw);
michael@0 777 mkinitcap(wlst[0]);
michael@0 778 return 1;
michael@0 779 }
michael@0 780 }
michael@0 781
michael@0 782 switch(captype) {
michael@0 783 case NOCAP: {
michael@0 784 ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
michael@0 785 break;
michael@0 786 }
michael@0 787
michael@0 788 case INITCAP: {
michael@0 789 capwords = 1;
michael@0 790 ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
michael@0 791 if (ns == -1) break;
michael@0 792 memcpy(wspace,cw,(wl+1));
michael@0 793 mkallsmall2(wspace, unicw, nc);
michael@0 794 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
michael@0 795 break;
michael@0 796 }
michael@0 797 case HUHINITCAP:
michael@0 798 capwords = 1;
michael@0 799 case HUHCAP: {
michael@0 800 ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
michael@0 801 if (ns != -1) {
michael@0 802 int prevns;
michael@0 803 // something.The -> something. The
michael@0 804 char * dot = strchr(cw, '.');
michael@0 805 if (dot && (dot > cw)) {
michael@0 806 int captype_;
michael@0 807 if (utf8) {
michael@0 808 w_char w_[MAXWORDLEN];
michael@0 809 int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);
michael@0 810 captype_ = get_captype_utf8(w_, wl_, langnum);
michael@0 811 } else captype_ = get_captype(dot+1, strlen(dot+1), csconv);
michael@0 812 if (captype_ == INITCAP) {
michael@0 813 char * st = mystrdup(cw);
michael@0 814 if (st) st = (char *) realloc(st, wl + 2);
michael@0 815 if (st) {
michael@0 816 st[(dot - cw) + 1] = ' ';
michael@0 817 strcpy(st + (dot - cw) + 2, dot + 1);
michael@0 818 ns = insert_sug(slst, st, ns);
michael@0 819 free(st);
michael@0 820 }
michael@0 821 }
michael@0 822 }
michael@0 823 if (captype == HUHINITCAP) {
michael@0 824 // TheOpenOffice.org -> The OpenOffice.org
michael@0 825 memcpy(wspace,cw,(wl+1));
michael@0 826 mkinitsmall2(wspace, unicw, nc);
michael@0 827 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
michael@0 828 }
michael@0 829 memcpy(wspace,cw,(wl+1));
michael@0 830 mkallsmall2(wspace, unicw, nc);
michael@0 831 if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
michael@0 832 prevns = ns;
michael@0 833 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
michael@0 834 if (captype == HUHINITCAP) {
michael@0 835 mkinitcap2(wspace, unicw, nc);
michael@0 836 if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
michael@0 837 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
michael@0 838 }
michael@0 839 // aNew -> "a New" (instead of "a new")
michael@0 840 for (int j = prevns; j < ns; j++) {
michael@0 841 char * space = strchr((*slst)[j],' ');
michael@0 842 if (space) {
michael@0 843 int slen = strlen(space + 1);
michael@0 844 // different case after space (need capitalisation)
michael@0 845 if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {
michael@0 846 w_char w[MAXWORDLEN];
michael@0 847 int wc = 0;
michael@0 848 char * r = (*slst)[j];
michael@0 849 if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1);
michael@0 850 mkinitcap2(space + 1, w, wc);
michael@0 851 // set as first suggestion
michael@0 852 for (int k = j; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
michael@0 853 (*slst)[0] = r;
michael@0 854 }
michael@0 855 }
michael@0 856 }
michael@0 857 }
michael@0 858 break;
michael@0 859 }
michael@0 860
michael@0 861 case ALLCAP: {
michael@0 862 memcpy(wspace, cw, (wl+1));
michael@0 863 mkallsmall2(wspace, unicw, nc);
michael@0 864 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
michael@0 865 if (ns == -1) break;
michael@0 866 if (pAMgr && pAMgr->get_keepcase() && spell(wspace))
michael@0 867 ns = insert_sug(slst, wspace, ns);
michael@0 868 mkinitcap2(wspace, unicw, nc);
michael@0 869 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
michael@0 870 for (int j=0; j < ns; j++) {
michael@0 871 mkallcap((*slst)[j]);
michael@0 872 if (pAMgr && pAMgr->get_checksharps()) {
michael@0 873 char * pos;
michael@0 874 if (utf8) {
michael@0 875 pos = strstr((*slst)[j], "\xC3\x9F");
michael@0 876 while (pos) {
michael@0 877 *pos = 'S';
michael@0 878 *(pos+1) = 'S';
michael@0 879 pos = strstr(pos+2, "\xC3\x9F");
michael@0 880 }
michael@0 881 } else {
michael@0 882 pos = strchr((*slst)[j], '\xDF');
michael@0 883 while (pos) {
michael@0 884 (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 2);
michael@0 885 mystrrep((*slst)[j], "\xDF", "SS");
michael@0 886 pos = strchr((*slst)[j], '\xDF');
michael@0 887 }
michael@0 888 }
michael@0 889 }
michael@0 890 }
michael@0 891 break;
michael@0 892 }
michael@0 893 }
michael@0 894
michael@0 895 // LANG_hu section: replace '-' with ' ' in Hungarian
michael@0 896 if (langnum == LANG_hu) {
michael@0 897 for (int j=0; j < ns; j++) {
michael@0 898 char * pos = strchr((*slst)[j],'-');
michael@0 899 if (pos) {
michael@0 900 int info;
michael@0 901 char w[MAXWORDUTF8LEN];
michael@0 902 *pos = '\0';
michael@0 903 strcpy(w, (*slst)[j]);
michael@0 904 strcat(w, pos + 1);
michael@0 905 spell(w, &info, NULL);
michael@0 906 if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
michael@0 907 *pos = ' ';
michael@0 908 } else *pos = '-';
michael@0 909 }
michael@0 910 }
michael@0 911 }
michael@0 912 // END OF LANG_hu section
michael@0 913
michael@0 914 // try ngram approach since found nothing or only compound words
michael@0 915 if (pAMgr && (ns == 0 || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0) && (*slst)) {
michael@0 916 switch(captype) {
michael@0 917 case NOCAP: {
michael@0 918 ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);
michael@0 919 break;
michael@0 920 }
michael@0 921 case HUHINITCAP:
michael@0 922 capwords = 1;
michael@0 923 case HUHCAP: {
michael@0 924 memcpy(wspace,cw,(wl+1));
michael@0 925 mkallsmall2(wspace, unicw, nc);
michael@0 926 ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
michael@0 927 break;
michael@0 928 }
michael@0 929 case INITCAP: {
michael@0 930 capwords = 1;
michael@0 931 memcpy(wspace,cw,(wl+1));
michael@0 932 mkallsmall2(wspace, unicw, nc);
michael@0 933 ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
michael@0 934 break;
michael@0 935 }
michael@0 936 case ALLCAP: {
michael@0 937 memcpy(wspace,cw,(wl+1));
michael@0 938 mkallsmall2(wspace, unicw, nc);
michael@0 939 int oldns = ns;
michael@0 940 ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
michael@0 941 for (int j = oldns; j < ns; j++)
michael@0 942 mkallcap((*slst)[j]);
michael@0 943 break;
michael@0 944 }
michael@0 945 }
michael@0 946 }
michael@0 947
michael@0 948 // try dash suggestion (Afo-American -> Afro-American)
michael@0 949 if (char * pos = strchr(cw, '-')) {
michael@0 950 char * ppos = cw;
michael@0 951 int nodashsug = 1;
michael@0 952 char ** nlst = NULL;
michael@0 953 int nn = 0;
michael@0 954 int last = 0;
michael@0 955 if (*slst) {
michael@0 956 for (int j = 0; j < ns && nodashsug == 1; j++) {
michael@0 957 if (strchr((*slst)[j], '-')) nodashsug = 0;
michael@0 958 }
michael@0 959 }
michael@0 960 while (nodashsug && !last) {
michael@0 961 if (*pos == '\0') last = 1; else *pos = '\0';
michael@0 962 if (!spell(ppos)) {
michael@0 963 nn = suggest(&nlst, ppos);
michael@0 964 for (int j = nn - 1; j >= 0; j--) {
michael@0 965 strncpy(wspace, cw, ppos - cw);
michael@0 966 strcpy(wspace + (ppos - cw), nlst[j]);
michael@0 967 if (!last) {
michael@0 968 strcat(wspace, "-");
michael@0 969 strcat(wspace, pos + 1);
michael@0 970 }
michael@0 971 ns = insert_sug(slst, wspace, ns);
michael@0 972 free(nlst[j]);
michael@0 973 }
michael@0 974 if (nlst != NULL) free(nlst);
michael@0 975 nodashsug = 0;
michael@0 976 }
michael@0 977 if (!last) {
michael@0 978 *pos = '-';
michael@0 979 ppos = pos + 1;
michael@0 980 pos = strchr(ppos, '-');
michael@0 981 }
michael@0 982 if (!pos) pos = cw + strlen(cw);
michael@0 983 }
michael@0 984 }
michael@0 985
michael@0 986 // word reversing wrapper for complex prefixes
michael@0 987 if (complexprefixes) {
michael@0 988 for (int j = 0; j < ns; j++) {
michael@0 989 if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
michael@0 990 }
michael@0 991 }
michael@0 992
michael@0 993 // capitalize
michael@0 994 if (capwords) for (int j=0; j < ns; j++) {
michael@0 995 mkinitcap((*slst)[j]);
michael@0 996 }
michael@0 997
michael@0 998 // expand suggestions with dot(s)
michael@0 999 if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
michael@0 1000 for (int j = 0; j < ns; j++) {
michael@0 1001 (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
michael@0 1002 strcat((*slst)[j], word + strlen(word) - abbv);
michael@0 1003 }
michael@0 1004 }
michael@0 1005
michael@0 1006 // remove bad capitalized and forbidden forms
michael@0 1007 if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
michael@0 1008 switch (captype) {
michael@0 1009 case INITCAP:
michael@0 1010 case ALLCAP: {
michael@0 1011 int l = 0;
michael@0 1012 for (int j=0; j < ns; j++) {
michael@0 1013 if (!strchr((*slst)[j],' ') && !spell((*slst)[j])) {
michael@0 1014 char s[MAXSWUTF8L];
michael@0 1015 w_char w[MAXSWL];
michael@0 1016 int len;
michael@0 1017 if (utf8) {
michael@0 1018 len = u8_u16(w, MAXSWL, (*slst)[j]);
michael@0 1019 } else {
michael@0 1020 strcpy(s, (*slst)[j]);
michael@0 1021 len = strlen(s);
michael@0 1022 }
michael@0 1023 mkallsmall2(s, w, len);
michael@0 1024 free((*slst)[j]);
michael@0 1025 if (spell(s)) {
michael@0 1026 (*slst)[l] = mystrdup(s);
michael@0 1027 if ((*slst)[l]) l++;
michael@0 1028 } else {
michael@0 1029 mkinitcap2(s, w, len);
michael@0 1030 if (spell(s)) {
michael@0 1031 (*slst)[l] = mystrdup(s);
michael@0 1032 if ((*slst)[l]) l++;
michael@0 1033 }
michael@0 1034 }
michael@0 1035 } else {
michael@0 1036 (*slst)[l] = (*slst)[j];
michael@0 1037 l++;
michael@0 1038 }
michael@0 1039 }
michael@0 1040 ns = l;
michael@0 1041 }
michael@0 1042 }
michael@0 1043 }
michael@0 1044
michael@0 1045 // remove duplications
michael@0 1046 int l = 0;
michael@0 1047 for (int j = 0; j < ns; j++) {
michael@0 1048 (*slst)[l] = (*slst)[j];
michael@0 1049 for (int k = 0; k < l; k++) {
michael@0 1050 if (strcmp((*slst)[k], (*slst)[j]) == 0) {
michael@0 1051 free((*slst)[j]);
michael@0 1052 l--;
michael@0 1053 break;
michael@0 1054 }
michael@0 1055 }
michael@0 1056 l++;
michael@0 1057 }
michael@0 1058 ns = l;
michael@0 1059
michael@0 1060 // output conversion
michael@0 1061 rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
michael@0 1062 for (int j = 0; rl && j < ns; j++) {
michael@0 1063 if (rl->conv((*slst)[j], wspace)) {
michael@0 1064 free((*slst)[j]);
michael@0 1065 (*slst)[j] = mystrdup(wspace);
michael@0 1066 }
michael@0 1067 }
michael@0 1068
michael@0 1069 // if suggestions removed by nosuggest, onlyincompound parameters
michael@0 1070 if (l == 0 && *slst) {
michael@0 1071 free(*slst);
michael@0 1072 *slst = NULL;
michael@0 1073 }
michael@0 1074 return l;
michael@0 1075 }
michael@0 1076
michael@0 1077 void Hunspell::free_list(char *** slst, int n) {
michael@0 1078 freelist(slst, n);
michael@0 1079 }
michael@0 1080
michael@0 1081 char * Hunspell::get_dic_encoding()
michael@0 1082 {
michael@0 1083 return encoding;
michael@0 1084 }
michael@0 1085
michael@0 1086 #ifdef HUNSPELL_EXPERIMENTAL
michael@0 1087 // XXX need UTF-8 support
michael@0 1088 int Hunspell::suggest_auto(char*** slst, const char * word)
michael@0 1089 {
michael@0 1090 char cw[MAXWORDUTF8LEN];
michael@0 1091 char wspace[MAXWORDUTF8LEN];
michael@0 1092 if (!pSMgr || maxdic == 0) return 0;
michael@0 1093 int wl = strlen(word);
michael@0 1094 if (utf8) {
michael@0 1095 if (wl >= MAXWORDUTF8LEN) return 0;
michael@0 1096 } else {
michael@0 1097 if (wl >= MAXWORDLEN) return 0;
michael@0 1098 }
michael@0 1099 int captype = 0;
michael@0 1100 int abbv = 0;
michael@0 1101 wl = cleanword(cw, word, &captype, &abbv);
michael@0 1102 if (wl == 0) return 0;
michael@0 1103 int ns = 0;
michael@0 1104 *slst = NULL; // HU, nsug in pSMgr->suggest
michael@0 1105
michael@0 1106 switch(captype) {
michael@0 1107 case NOCAP: {
michael@0 1108 ns = pSMgr->suggest_auto(slst, cw, ns);
michael@0 1109 if (ns>0) break;
michael@0 1110 break;
michael@0 1111 }
michael@0 1112
michael@0 1113 case INITCAP: {
michael@0 1114 memcpy(wspace,cw,(wl+1));
michael@0 1115 mkallsmall(wspace);
michael@0 1116 ns = pSMgr->suggest_auto(slst, wspace, ns);
michael@0 1117 for (int j=0; j < ns; j++)
michael@0 1118 mkinitcap((*slst)[j]);
michael@0 1119 ns = pSMgr->suggest_auto(slst, cw, ns);
michael@0 1120 break;
michael@0 1121
michael@0 1122 }
michael@0 1123
michael@0 1124 case HUHINITCAP:
michael@0 1125 case HUHCAP: {
michael@0 1126 ns = pSMgr->suggest_auto(slst, cw, ns);
michael@0 1127 if (ns == 0) {
michael@0 1128 memcpy(wspace,cw,(wl+1));
michael@0 1129 mkallsmall(wspace);
michael@0 1130 ns = pSMgr->suggest_auto(slst, wspace, ns);
michael@0 1131 }
michael@0 1132 break;
michael@0 1133 }
michael@0 1134
michael@0 1135 case ALLCAP: {
michael@0 1136 memcpy(wspace,cw,(wl+1));
michael@0 1137 mkallsmall(wspace);
michael@0 1138 ns = pSMgr->suggest_auto(slst, wspace, ns);
michael@0 1139
michael@0 1140 mkinitcap(wspace);
michael@0 1141 ns = pSMgr->suggest_auto(slst, wspace, ns);
michael@0 1142
michael@0 1143 for (int j=0; j < ns; j++)
michael@0 1144 mkallcap((*slst)[j]);
michael@0 1145 break;
michael@0 1146 }
michael@0 1147 }
michael@0 1148
michael@0 1149 // word reversing wrapper for complex prefixes
michael@0 1150 if (complexprefixes) {
michael@0 1151 for (int j = 0; j < ns; j++) {
michael@0 1152 if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
michael@0 1153 }
michael@0 1154 }
michael@0 1155
michael@0 1156 // expand suggestions with dot(s)
michael@0 1157 if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
michael@0 1158 for (int j = 0; j < ns; j++) {
michael@0 1159 (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
michael@0 1160 strcat((*slst)[j], word + strlen(word) - abbv);
michael@0 1161 }
michael@0 1162 }
michael@0 1163
michael@0 1164 // LANG_hu section: replace '-' with ' ' in Hungarian
michael@0 1165 if (langnum == LANG_hu) {
michael@0 1166 for (int j=0; j < ns; j++) {
michael@0 1167 char * pos = strchr((*slst)[j],'-');
michael@0 1168 if (pos) {
michael@0 1169 int info;
michael@0 1170 char w[MAXWORDUTF8LEN];
michael@0 1171 *pos = '\0';
michael@0 1172 strcpy(w, (*slst)[j]);
michael@0 1173 strcat(w, pos + 1);
michael@0 1174 spell(w, &info, NULL);
michael@0 1175 if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
michael@0 1176 *pos = ' ';
michael@0 1177 } else *pos = '-';
michael@0 1178 }
michael@0 1179 }
michael@0 1180 }
michael@0 1181 // END OF LANG_hu section
michael@0 1182 return ns;
michael@0 1183 }
michael@0 1184 #endif
michael@0 1185
michael@0 1186 int Hunspell::stem(char*** slst, char ** desc, int n)
michael@0 1187 {
michael@0 1188 char result[MAXLNLEN];
michael@0 1189 char result2[MAXLNLEN];
michael@0 1190 *slst = NULL;
michael@0 1191 if (n == 0) return 0;
michael@0 1192 *result2 = '\0';
michael@0 1193 for (int i = 0; i < n; i++) {
michael@0 1194 *result = '\0';
michael@0 1195 // add compound word parts (except the last one)
michael@0 1196 char * s = (char *) desc[i];
michael@0 1197 char * part = strstr(s, MORPH_PART);
michael@0 1198 if (part) {
michael@0 1199 char * nextpart = strstr(part + 1, MORPH_PART);
michael@0 1200 while (nextpart) {
michael@0 1201 copy_field(result + strlen(result), part, MORPH_PART);
michael@0 1202 part = nextpart;
michael@0 1203 nextpart = strstr(part + 1, MORPH_PART);
michael@0 1204 }
michael@0 1205 s = part;
michael@0 1206 }
michael@0 1207
michael@0 1208 char **pl;
michael@0 1209 char tok[MAXLNLEN];
michael@0 1210 strcpy(tok, s);
michael@0 1211 char * alt = strstr(tok, " | ");
michael@0 1212 while (alt) {
michael@0 1213 alt[1] = MSEP_ALT;
michael@0 1214 alt = strstr(alt, " | ");
michael@0 1215 }
michael@0 1216 int pln = line_tok(tok, &pl, MSEP_ALT);
michael@0 1217 for (int k = 0; k < pln; k++) {
michael@0 1218 // add derivational suffixes
michael@0 1219 if (strstr(pl[k], MORPH_DERI_SFX)) {
michael@0 1220 // remove inflectional suffixes
michael@0 1221 char * is = strstr(pl[k], MORPH_INFL_SFX);
michael@0 1222 if (is) *is = '\0';
michael@0 1223 char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);
michael@0 1224 if (sg) {
michael@0 1225 char ** gen;
michael@0 1226 int genl = line_tok(sg, &gen, MSEP_REC);
michael@0 1227 free(sg);
michael@0 1228 for (int j = 0; j < genl; j++) {
michael@0 1229 sprintf(result2 + strlen(result2), "%c%s%s",
michael@0 1230 MSEP_REC, result, gen[j]);
michael@0 1231 }
michael@0 1232 freelist(&gen, genl);
michael@0 1233 }
michael@0 1234 } else {
michael@0 1235 sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);
michael@0 1236 if (strstr(pl[k], MORPH_SURF_PFX)) {
michael@0 1237 copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);
michael@0 1238 }
michael@0 1239 copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);
michael@0 1240 }
michael@0 1241 }
michael@0 1242 freelist(&pl, pln);
michael@0 1243 }
michael@0 1244 int sln = line_tok(result2, slst, MSEP_REC);
michael@0 1245 return uniqlist(*slst, sln);
michael@0 1246
michael@0 1247 }
michael@0 1248
michael@0 1249 int Hunspell::stem(char*** slst, const char * word)
michael@0 1250 {
michael@0 1251 char ** pl;
michael@0 1252 int pln = analyze(&pl, word);
michael@0 1253 int pln2 = stem(slst, pl, pln);
michael@0 1254 freelist(&pl, pln);
michael@0 1255 return pln2;
michael@0 1256 }
michael@0 1257
michael@0 1258 #ifdef HUNSPELL_EXPERIMENTAL
michael@0 1259 int Hunspell::suggest_pos_stems(char*** slst, const char * word)
michael@0 1260 {
michael@0 1261 char cw[MAXWORDUTF8LEN];
michael@0 1262 char wspace[MAXWORDUTF8LEN];
michael@0 1263 if (! pSMgr || maxdic == 0) return 0;
michael@0 1264 int wl = strlen(word);
michael@0 1265 if (utf8) {
michael@0 1266 if (wl >= MAXWORDUTF8LEN) return 0;
michael@0 1267 } else {
michael@0 1268 if (wl >= MAXWORDLEN) return 0;
michael@0 1269 }
michael@0 1270 int captype = 0;
michael@0 1271 int abbv = 0;
michael@0 1272 wl = cleanword(cw, word, &captype, &abbv);
michael@0 1273 if (wl == 0) return 0;
michael@0 1274
michael@0 1275 int ns = 0; // ns=0 = normalized input
michael@0 1276
michael@0 1277 *slst = NULL; // HU, nsug in pSMgr->suggest
michael@0 1278
michael@0 1279 switch(captype) {
michael@0 1280 case HUHCAP:
michael@0 1281 case NOCAP: {
michael@0 1282 ns = pSMgr->suggest_pos_stems(slst, cw, ns);
michael@0 1283
michael@0 1284 if ((abbv) && (ns == 0)) {
michael@0 1285 memcpy(wspace,cw,wl);
michael@0 1286 *(wspace+wl) = '.';
michael@0 1287 *(wspace+wl+1) = '\0';
michael@0 1288 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
michael@0 1289 }
michael@0 1290
michael@0 1291 break;
michael@0 1292 }
michael@0 1293
michael@0 1294 case INITCAP: {
michael@0 1295
michael@0 1296 ns = pSMgr->suggest_pos_stems(slst, cw, ns);
michael@0 1297
michael@0 1298 if (ns == 0 || ((*slst)[0][0] == '#')) {
michael@0 1299 memcpy(wspace,cw,(wl+1));
michael@0 1300 mkallsmall(wspace);
michael@0 1301 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
michael@0 1302 }
michael@0 1303
michael@0 1304 break;
michael@0 1305
michael@0 1306 }
michael@0 1307
michael@0 1308 case ALLCAP: {
michael@0 1309 ns = pSMgr->suggest_pos_stems(slst, cw, ns);
michael@0 1310 if (ns != 0) break;
michael@0 1311
michael@0 1312 memcpy(wspace,cw,(wl+1));
michael@0 1313 mkallsmall(wspace);
michael@0 1314 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
michael@0 1315
michael@0 1316 if (ns == 0) {
michael@0 1317 mkinitcap(wspace);
michael@0 1318 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
michael@0 1319 }
michael@0 1320 break;
michael@0 1321 }
michael@0 1322 }
michael@0 1323
michael@0 1324 return ns;
michael@0 1325 }
michael@0 1326 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
michael@0 1327
michael@0 1328 const char * Hunspell::get_wordchars()
michael@0 1329 {
michael@0 1330 return pAMgr->get_wordchars();
michael@0 1331 }
michael@0 1332
michael@0 1333 unsigned short * Hunspell::get_wordchars_utf16(int * len)
michael@0 1334 {
michael@0 1335 return pAMgr->get_wordchars_utf16(len);
michael@0 1336 }
michael@0 1337
michael@0 1338 void Hunspell::mkinitcap(char * p)
michael@0 1339 {
michael@0 1340 if (!utf8) {
michael@0 1341 if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
michael@0 1342 } else {
michael@0 1343 int len;
michael@0 1344 w_char u[MAXWORDLEN];
michael@0 1345 len = u8_u16(u, MAXWORDLEN, p);
michael@0 1346 unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
michael@0 1347 u[0].h = (unsigned char) (i >> 8);
michael@0 1348 u[0].l = (unsigned char) (i & 0x00FF);
michael@0 1349 u16_u8(p, MAXWORDUTF8LEN, u, len);
michael@0 1350 }
michael@0 1351 }
michael@0 1352
michael@0 1353 int Hunspell::mkinitcap2(char * p, w_char * u, int nc)
michael@0 1354 {
michael@0 1355 if (!utf8) {
michael@0 1356 if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
michael@0 1357 } else if (nc > 0) {
michael@0 1358 unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
michael@0 1359 u[0].h = (unsigned char) (i >> 8);
michael@0 1360 u[0].l = (unsigned char) (i & 0x00FF);
michael@0 1361 u16_u8(p, MAXWORDUTF8LEN, u, nc);
michael@0 1362 return strlen(p);
michael@0 1363 }
michael@0 1364 return nc;
michael@0 1365 }
michael@0 1366
michael@0 1367 int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)
michael@0 1368 {
michael@0 1369 if (!utf8) {
michael@0 1370 if (*p != '\0') *p = csconv[((unsigned char)*p)].clower;
michael@0 1371 } else if (nc > 0) {
michael@0 1372 unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);
michael@0 1373 u[0].h = (unsigned char) (i >> 8);
michael@0 1374 u[0].l = (unsigned char) (i & 0x00FF);
michael@0 1375 u16_u8(p, MAXWORDUTF8LEN, u, nc);
michael@0 1376 return strlen(p);
michael@0 1377 }
michael@0 1378 return nc;
michael@0 1379 }
michael@0 1380
michael@0 1381 int Hunspell::add(const char * word)
michael@0 1382 {
michael@0 1383 if (pHMgr[0]) return (pHMgr[0])->add(word);
michael@0 1384 return 0;
michael@0 1385 }
michael@0 1386
michael@0 1387 int Hunspell::add_with_affix(const char * word, const char * example)
michael@0 1388 {
michael@0 1389 if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example);
michael@0 1390 return 0;
michael@0 1391 }
michael@0 1392
michael@0 1393 int Hunspell::remove(const char * word)
michael@0 1394 {
michael@0 1395 if (pHMgr[0]) return (pHMgr[0])->remove(word);
michael@0 1396 return 0;
michael@0 1397 }
michael@0 1398
michael@0 1399 const char * Hunspell::get_version()
michael@0 1400 {
michael@0 1401 return pAMgr->get_version();
michael@0 1402 }
michael@0 1403
michael@0 1404 struct cs_info * Hunspell::get_csconv()
michael@0 1405 {
michael@0 1406 return csconv;
michael@0 1407 }
michael@0 1408
michael@0 1409 void Hunspell::cat_result(char * result, char * st)
michael@0 1410 {
michael@0 1411 if (st) {
michael@0 1412 if (*result) mystrcat(result, "\n", MAXLNLEN);
michael@0 1413 mystrcat(result, st, MAXLNLEN);
michael@0 1414 free(st);
michael@0 1415 }
michael@0 1416 }
michael@0 1417
michael@0 1418 int Hunspell::analyze(char*** slst, const char * word)
michael@0 1419 {
michael@0 1420 char cw[MAXWORDUTF8LEN];
michael@0 1421 char wspace[MAXWORDUTF8LEN];
michael@0 1422 w_char unicw[MAXWORDLEN];
michael@0 1423 int wl2 = 0;
michael@0 1424 *slst = NULL;
michael@0 1425 if (! pSMgr || maxdic == 0) return 0;
michael@0 1426 int nc = strlen(word);
michael@0 1427 if (utf8) {
michael@0 1428 if (nc >= MAXWORDUTF8LEN) return 0;
michael@0 1429 } else {
michael@0 1430 if (nc >= MAXWORDLEN) return 0;
michael@0 1431 }
michael@0 1432 int captype = 0;
michael@0 1433 int abbv = 0;
michael@0 1434 int wl = 0;
michael@0 1435
michael@0 1436 // input conversion
michael@0 1437 RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
michael@0 1438 if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
michael@0 1439 else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
michael@0 1440
michael@0 1441 if (wl == 0) {
michael@0 1442 if (abbv) {
michael@0 1443 for (wl = 0; wl < abbv; wl++) cw[wl] = '.';
michael@0 1444 cw[wl] = '\0';
michael@0 1445 abbv = 0;
michael@0 1446 } else return 0;
michael@0 1447 }
michael@0 1448
michael@0 1449 char result[MAXLNLEN];
michael@0 1450 char * st = NULL;
michael@0 1451
michael@0 1452 *result = '\0';
michael@0 1453
michael@0 1454 int n = 0;
michael@0 1455 int n2 = 0;
michael@0 1456 int n3 = 0;
michael@0 1457
michael@0 1458 // test numbers
michael@0 1459 // LANG_hu section: set dash information for suggestions
michael@0 1460 if (langnum == LANG_hu) {
michael@0 1461 while ((n < wl) &&
michael@0 1462 (((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) {
michael@0 1463 n++;
michael@0 1464 if ((cw[n] == '.') || (cw[n] == ',')) {
michael@0 1465 if (((n2 == 0) && (n > 3)) ||
michael@0 1466 ((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) break;
michael@0 1467 n2++;
michael@0 1468 n3 = n;
michael@0 1469 }
michael@0 1470 }
michael@0 1471
michael@0 1472 if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0;
michael@0 1473 if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xB0')) && checkword(cw+n, NULL, NULL))) {
michael@0 1474 mystrcat(result, cw, MAXLNLEN);
michael@0 1475 result[n - 1] = '\0';
michael@0 1476 if (n == wl) cat_result(result, pSMgr->suggest_morph(cw + n - 1));
michael@0 1477 else {
michael@0 1478 char sign = cw[n];
michael@0 1479 cw[n] = '\0';
michael@0 1480 cat_result(result, pSMgr->suggest_morph(cw + n - 1));
michael@0 1481 mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE
michael@0 1482 cw[n] = sign;
michael@0 1483 cat_result(result, pSMgr->suggest_morph(cw + n));
michael@0 1484 }
michael@0 1485 return line_tok(result, slst, MSEP_REC);
michael@0 1486 }
michael@0 1487 }
michael@0 1488 // END OF LANG_hu section
michael@0 1489
michael@0 1490 switch(captype) {
michael@0 1491 case HUHCAP:
michael@0 1492 case HUHINITCAP:
michael@0 1493 case NOCAP: {
michael@0 1494 cat_result(result, pSMgr->suggest_morph(cw));
michael@0 1495 if (abbv) {
michael@0 1496 memcpy(wspace,cw,wl);
michael@0 1497 *(wspace+wl) = '.';
michael@0 1498 *(wspace+wl+1) = '\0';
michael@0 1499 cat_result(result, pSMgr->suggest_morph(wspace));
michael@0 1500 }
michael@0 1501 break;
michael@0 1502 }
michael@0 1503 case INITCAP: {
michael@0 1504 wl = mkallsmall2(cw, unicw, nc);
michael@0 1505 memcpy(wspace,cw,(wl+1));
michael@0 1506 wl2 = mkinitcap2(cw, unicw, nc);
michael@0 1507 cat_result(result, pSMgr->suggest_morph(wspace));
michael@0 1508 cat_result(result, pSMgr->suggest_morph(cw));
michael@0 1509 if (abbv) {
michael@0 1510 *(wspace+wl) = '.';
michael@0 1511 *(wspace+wl+1) = '\0';
michael@0 1512 cat_result(result, pSMgr->suggest_morph(wspace));
michael@0 1513
michael@0 1514 memcpy(wspace, cw, wl2);
michael@0 1515 *(wspace+wl2) = '.';
michael@0 1516 *(wspace+wl2+1) = '\0';
michael@0 1517
michael@0 1518 cat_result(result, pSMgr->suggest_morph(wspace));
michael@0 1519 }
michael@0 1520 break;
michael@0 1521 }
michael@0 1522 case ALLCAP: {
michael@0 1523 cat_result(result, pSMgr->suggest_morph(cw));
michael@0 1524 if (abbv) {
michael@0 1525 memcpy(wspace,cw,wl);
michael@0 1526 *(wspace+wl) = '.';
michael@0 1527 *(wspace+wl+1) = '\0';
michael@0 1528 cat_result(result, pSMgr->suggest_morph(cw));
michael@0 1529 }
michael@0 1530 wl = mkallsmall2(cw, unicw, nc);
michael@0 1531 memcpy(wspace,cw,(wl+1));
michael@0 1532 wl2 = mkinitcap2(cw, unicw, nc);
michael@0 1533
michael@0 1534 cat_result(result, pSMgr->suggest_morph(wspace));
michael@0 1535 cat_result(result, pSMgr->suggest_morph(cw));
michael@0 1536 if (abbv) {
michael@0 1537 *(wspace+wl) = '.';
michael@0 1538 *(wspace+wl+1) = '\0';
michael@0 1539 cat_result(result, pSMgr->suggest_morph(wspace));
michael@0 1540
michael@0 1541 memcpy(wspace, cw, wl2);
michael@0 1542 *(wspace+wl2) = '.';
michael@0 1543 *(wspace+wl2+1) = '\0';
michael@0 1544
michael@0 1545 cat_result(result, pSMgr->suggest_morph(wspace));
michael@0 1546 }
michael@0 1547 break;
michael@0 1548 }
michael@0 1549 }
michael@0 1550
michael@0 1551 if (*result) {
michael@0 1552 // word reversing wrapper for complex prefixes
michael@0 1553 if (complexprefixes) {
michael@0 1554 if (utf8) reverseword_utf(result); else reverseword(result);
michael@0 1555 }
michael@0 1556 return line_tok(result, slst, MSEP_REC);
michael@0 1557 }
michael@0 1558
michael@0 1559 // compound word with dash (HU) I18n
michael@0 1560 char * dash = NULL;
michael@0 1561 int nresult = 0;
michael@0 1562 // LANG_hu section: set dash information for suggestions
michael@0 1563 if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');
michael@0 1564 if ((langnum == LANG_hu) && dash) {
michael@0 1565 *dash='\0';
michael@0 1566 // examine 2 sides of the dash
michael@0 1567 if (dash[1] == '\0') { // base word ending with dash
michael@0 1568 if (spell(cw)) {
michael@0 1569 char * p = pSMgr->suggest_morph(cw);
michael@0 1570 if (p) {
michael@0 1571 int ret = line_tok(p, slst, MSEP_REC);
michael@0 1572 free(p);
michael@0 1573 return ret;
michael@0 1574 }
michael@0 1575
michael@0 1576 }
michael@0 1577 } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.
michael@0 1578 if (spell(cw) && (spell("-e"))) {
michael@0 1579 st = pSMgr->suggest_morph(cw);
michael@0 1580 if (st) {
michael@0 1581 mystrcat(result, st, MAXLNLEN);
michael@0 1582 free(st);
michael@0 1583 }
michael@0 1584 mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
michael@0 1585 st = pSMgr->suggest_morph("-e");
michael@0 1586 if (st) {
michael@0 1587 mystrcat(result, st, MAXLNLEN);
michael@0 1588 free(st);
michael@0 1589 }
michael@0 1590 return line_tok(result, slst, MSEP_REC);
michael@0 1591 }
michael@0 1592 } else {
michael@0 1593 // first word ending with dash: word- XXX ???
michael@0 1594 char r2 = *(dash + 1);
michael@0 1595 dash[0]='-';
michael@0 1596 dash[1]='\0';
michael@0 1597 nresult = spell(cw);
michael@0 1598 dash[1] = r2;
michael@0 1599 dash[0]='\0';
michael@0 1600 if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||
michael@0 1601 ((dash[1] > '0') && (dash[1] < '9')))) {
michael@0 1602 st = pSMgr->suggest_morph(cw);
michael@0 1603 if (st) {
michael@0 1604 mystrcat(result, st, MAXLNLEN);
michael@0 1605 free(st);
michael@0 1606 mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
michael@0 1607 }
michael@0 1608 st = pSMgr->suggest_morph(dash+1);
michael@0 1609 if (st) {
michael@0 1610 mystrcat(result, st, MAXLNLEN);
michael@0 1611 free(st);
michael@0 1612 }
michael@0 1613 return line_tok(result, slst, MSEP_REC);
michael@0 1614 }
michael@0 1615 }
michael@0 1616 // affixed number in correct word
michael@0 1617 if (nresult && (dash > cw) && (((*(dash-1)<='9') &&
michael@0 1618 (*(dash-1)>='0')) || (*(dash-1)=='.'))) {
michael@0 1619 *dash='-';
michael@0 1620 n = 1;
michael@0 1621 if (*(dash - n) == '.') n++;
michael@0 1622 // search first not a number character to left from dash
michael@0 1623 while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {
michael@0 1624 n++;
michael@0 1625 }
michael@0 1626 if ((dash - n) < cw) n--;
michael@0 1627 // numbers: valami1000000-hoz
michael@0 1628 // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
michael@0 1629 // 56-hoz, 6-hoz
michael@0 1630 for(; n >= 1; n--) {
michael@0 1631 if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && checkword(dash - n, NULL, NULL)) {
michael@0 1632 mystrcat(result, cw, MAXLNLEN);
michael@0 1633 result[dash - cw - n] = '\0';
michael@0 1634 st = pSMgr->suggest_morph(dash - n);
michael@0 1635 if (st) {
michael@0 1636 mystrcat(result, st, MAXLNLEN);
michael@0 1637 free(st);
michael@0 1638 }
michael@0 1639 return line_tok(result, slst, MSEP_REC);
michael@0 1640 }
michael@0 1641 }
michael@0 1642 }
michael@0 1643 }
michael@0 1644 return 0;
michael@0 1645 }
michael@0 1646
michael@0 1647 int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)
michael@0 1648 {
michael@0 1649 *slst = NULL;
michael@0 1650 if (!pSMgr || !pln) return 0;
michael@0 1651 char **pl2;
michael@0 1652 int pl2n = analyze(&pl2, word);
michael@0 1653 int captype = 0;
michael@0 1654 int abbv = 0;
michael@0 1655 char cw[MAXWORDUTF8LEN];
michael@0 1656 cleanword(cw, word, &captype, &abbv);
michael@0 1657 char result[MAXLNLEN];
michael@0 1658 *result = '\0';
michael@0 1659
michael@0 1660 for (int i = 0; i < pln; i++) {
michael@0 1661 cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));
michael@0 1662 }
michael@0 1663 freelist(&pl2, pl2n);
michael@0 1664
michael@0 1665 if (*result) {
michael@0 1666 // allcap
michael@0 1667 if (captype == ALLCAP) mkallcap(result);
michael@0 1668
michael@0 1669 // line split
michael@0 1670 int linenum = line_tok(result, slst, MSEP_REC);
michael@0 1671
michael@0 1672 // capitalize
michael@0 1673 if (captype == INITCAP || captype == HUHINITCAP) {
michael@0 1674 for (int j=0; j < linenum; j++) mkinitcap((*slst)[j]);
michael@0 1675 }
michael@0 1676
michael@0 1677 // temporary filtering of prefix related errors (eg.
michael@0 1678 // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
michael@0 1679
michael@0 1680 int r = 0;
michael@0 1681 for (int j=0; j < linenum; j++) {
michael@0 1682 if (!spell((*slst)[j])) {
michael@0 1683 free((*slst)[j]);
michael@0 1684 (*slst)[j] = NULL;
michael@0 1685 } else {
michael@0 1686 if (r < j) (*slst)[r] = (*slst)[j];
michael@0 1687 r++;
michael@0 1688 }
michael@0 1689 }
michael@0 1690 if (r > 0) return r;
michael@0 1691 free(*slst);
michael@0 1692 *slst = NULL;
michael@0 1693 }
michael@0 1694 return 0;
michael@0 1695 }
michael@0 1696
michael@0 1697 int Hunspell::generate(char*** slst, const char * word, const char * pattern)
michael@0 1698 {
michael@0 1699 char **pl;
michael@0 1700 int pln = analyze(&pl, pattern);
michael@0 1701 int n = generate(slst, word, pl, pln);
michael@0 1702 freelist(&pl, pln);
michael@0 1703 return uniqlist(*slst, n);
michael@0 1704 }
michael@0 1705
michael@0 1706 // minimal XML parser functions
michael@0 1707 int Hunspell::get_xml_par(char * dest, const char * par, int max)
michael@0 1708 {
michael@0 1709 char * d = dest;
michael@0 1710 if (!par) return 0;
michael@0 1711 char end = *par;
michael@0 1712 char * dmax = dest + max;
michael@0 1713 if (end == '>') end = '<';
michael@0 1714 else if (end != '\'' && end != '"') return 0; // bad XML
michael@0 1715 for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) *d = *par;
michael@0 1716 *d = '\0';
michael@0 1717 mystrrep(dest, "&lt;", "<");
michael@0 1718 mystrrep(dest, "&amp;", "&");
michael@0 1719 return (int)(d - dest);
michael@0 1720 }
michael@0 1721
michael@0 1722 int Hunspell::get_langnum() const
michael@0 1723 {
michael@0 1724 return langnum;
michael@0 1725 }
michael@0 1726
michael@0 1727 // return the beginning of the element (attr == NULL) or the attribute
michael@0 1728 const char * Hunspell::get_xml_pos(const char * s, const char * attr)
michael@0 1729 {
michael@0 1730 const char * end = strchr(s, '>');
michael@0 1731 const char * p = s;
michael@0 1732 if (attr == NULL) return end;
michael@0 1733 do {
michael@0 1734 p = strstr(p, attr);
michael@0 1735 if (!p || p >= end) return 0;
michael@0 1736 } while (*(p-1) != ' ' && *(p-1) != '\n');
michael@0 1737 return p + strlen(attr);
michael@0 1738 }
michael@0 1739
michael@0 1740 int Hunspell::check_xml_par(const char * q, const char * attr, const char * value) {
michael@0 1741 char cw[MAXWORDUTF8LEN];
michael@0 1742 if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) &&
michael@0 1743 strcmp(cw, value) == 0) return 1;
michael@0 1744 return 0;
michael@0 1745 }
michael@0 1746
michael@0 1747 int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {
michael@0 1748 int n = 0;
michael@0 1749 char * p;
michael@0 1750 if (!list) return 0;
michael@0 1751 for (p = list; (p = strstr(p, tag)); p++) n++;
michael@0 1752 if (n == 0) return 0;
michael@0 1753 *slst = (char **) malloc(sizeof(char *) * n);
michael@0 1754 if (!*slst) return 0;
michael@0 1755 for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) {
michael@0 1756 int l = strlen(p);
michael@0 1757 (*slst)[n] = (char *) malloc(l + 1);
michael@0 1758 if (!(*slst)[n]) return n;
michael@0 1759 if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) {
michael@0 1760 free((*slst)[n]);
michael@0 1761 break;
michael@0 1762 }
michael@0 1763 }
michael@0 1764 return n;
michael@0 1765 }
michael@0 1766
michael@0 1767 int Hunspell::spellml(char*** slst, const char * word)
michael@0 1768 {
michael@0 1769 char *q, *q2;
michael@0 1770 char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN];
michael@0 1771 q = (char *) strstr(word, "<query");
michael@0 1772 if (!q) return 0; // bad XML input
michael@0 1773 q2 = strchr(q, '>');
michael@0 1774 if (!q2) return 0; // bad XML input
michael@0 1775 q2 = strstr(q2, "<word");
michael@0 1776 if (!q2) return 0; // bad XML input
michael@0 1777 if (check_xml_par(q, "type=", "analyze")) {
michael@0 1778 int n = 0, s = 0;
michael@0 1779 if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw);
michael@0 1780 if (n == 0) return 0;
michael@0 1781 // convert the result to <code><a>ana1</a><a>ana2</a></code> format
michael@0 1782 for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);
michael@0 1783 char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->&amp;
michael@0 1784 if (!r) return 0;
michael@0 1785 strcpy(r, "<code>");
michael@0 1786 for (int i = 0; i < n; i++) {
michael@0 1787 int l = strlen(r);
michael@0 1788 strcpy(r + l, "<a>");
michael@0 1789 strcpy(r + l + 3, (*slst)[i]);
michael@0 1790 mystrrep(r + l + 3, "\t", " ");
michael@0 1791 mystrrep(r + l + 3, "<", "&lt;");
michael@0 1792 mystrrep(r + l + 3, "&", "&amp;");
michael@0 1793 strcat(r, "</a>");
michael@0 1794 free((*slst)[i]);
michael@0 1795 }
michael@0 1796 strcat(r, "</code>");
michael@0 1797 (*slst)[0] = r;
michael@0 1798 return 1;
michael@0 1799 } else if (check_xml_par(q, "type=", "stem")) {
michael@0 1800 if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw);
michael@0 1801 } else if (check_xml_par(q, "type=", "generate")) {
michael@0 1802 int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1);
michael@0 1803 if (n == 0) return 0;
michael@0 1804 char * q3 = strstr(q2 + 1, "<word");
michael@0 1805 if (q3) {
michael@0 1806 if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN - 1)) {
michael@0 1807 return generate(slst, cw, cw2);
michael@0 1808 }
michael@0 1809 } else {
michael@0 1810 if ((q2 = strstr(q2 + 1, "<code"))) {
michael@0 1811 char ** slst2;
michael@0 1812 if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>"))) {
michael@0 1813 int n2 = generate(slst, cw, slst2, n);
michael@0 1814 freelist(&slst2, n);
michael@0 1815 return uniqlist(*slst, n2);
michael@0 1816 }
michael@0 1817 freelist(&slst2, n);
michael@0 1818 }
michael@0 1819 }
michael@0 1820 }
michael@0 1821 return 0;
michael@0 1822 }
michael@0 1823
michael@0 1824
michael@0 1825 #ifdef HUNSPELL_EXPERIMENTAL
michael@0 1826 // XXX need UTF-8 support
michael@0 1827 char * Hunspell::morph_with_correction(const char * word)
michael@0 1828 {
michael@0 1829 char cw[MAXWORDUTF8LEN];
michael@0 1830 char wspace[MAXWORDUTF8LEN];
michael@0 1831 if (! pSMgr || maxdic == 0) return NULL;
michael@0 1832 int wl = strlen(word);
michael@0 1833 if (utf8) {
michael@0 1834 if (wl >= MAXWORDUTF8LEN) return NULL;
michael@0 1835 } else {
michael@0 1836 if (wl >= MAXWORDLEN) return NULL;
michael@0 1837 }
michael@0 1838 int captype = 0;
michael@0 1839 int abbv = 0;
michael@0 1840 wl = cleanword(cw, word, &captype, &abbv);
michael@0 1841 if (wl == 0) return NULL;
michael@0 1842
michael@0 1843 char result[MAXLNLEN];
michael@0 1844 char * st = NULL;
michael@0 1845
michael@0 1846 *result = '\0';
michael@0 1847
michael@0 1848
michael@0 1849 switch(captype) {
michael@0 1850 case NOCAP: {
michael@0 1851 st = pSMgr->suggest_morph_for_spelling_error(cw);
michael@0 1852 if (st) {
michael@0 1853 mystrcat(result, st, MAXLNLEN);
michael@0 1854 free(st);
michael@0 1855 }
michael@0 1856 if (abbv) {
michael@0 1857 memcpy(wspace,cw,wl);
michael@0 1858 *(wspace+wl) = '.';
michael@0 1859 *(wspace+wl+1) = '\0';
michael@0 1860 st = pSMgr->suggest_morph_for_spelling_error(wspace);
michael@0 1861 if (st) {
michael@0 1862 if (*result) mystrcat(result, "\n", MAXLNLEN);
michael@0 1863 mystrcat(result, st, MAXLNLEN);
michael@0 1864 free(st);
michael@0 1865 }
michael@0 1866 }
michael@0 1867 break;
michael@0 1868 }
michael@0 1869 case INITCAP: {
michael@0 1870 memcpy(wspace,cw,(wl+1));
michael@0 1871 mkallsmall(wspace);
michael@0 1872 st = pSMgr->suggest_morph_for_spelling_error(wspace);
michael@0 1873 if (st) {
michael@0 1874 mystrcat(result, st, MAXLNLEN);
michael@0 1875 free(st);
michael@0 1876 }
michael@0 1877 st = pSMgr->suggest_morph_for_spelling_error(cw);
michael@0 1878 if (st) {
michael@0 1879 if (*result) mystrcat(result, "\n", MAXLNLEN);
michael@0 1880 mystrcat(result, st, MAXLNLEN);
michael@0 1881 free(st);
michael@0 1882 }
michael@0 1883 if (abbv) {
michael@0 1884 memcpy(wspace,cw,wl);
michael@0 1885 *(wspace+wl) = '.';
michael@0 1886 *(wspace+wl+1) = '\0';
michael@0 1887 mkallsmall(wspace);
michael@0 1888 st = pSMgr->suggest_morph_for_spelling_error(wspace);
michael@0 1889 if (st) {
michael@0 1890 if (*result) mystrcat(result, "\n", MAXLNLEN);
michael@0 1891 mystrcat(result, st, MAXLNLEN);
michael@0 1892 free(st);
michael@0 1893 }
michael@0 1894 mkinitcap(wspace);
michael@0 1895 st = pSMgr->suggest_morph_for_spelling_error(wspace);
michael@0 1896 if (st) {
michael@0 1897 if (*result) mystrcat(result, "\n", MAXLNLEN);
michael@0 1898 mystrcat(result, st, MAXLNLEN);
michael@0 1899 free(st);
michael@0 1900 }
michael@0 1901 }
michael@0 1902 break;
michael@0 1903 }
michael@0 1904 case HUHCAP: {
michael@0 1905 st = pSMgr->suggest_morph_for_spelling_error(cw);
michael@0 1906 if (st) {
michael@0 1907 mystrcat(result, st, MAXLNLEN);
michael@0 1908 free(st);
michael@0 1909 }
michael@0 1910 memcpy(wspace,cw,(wl+1));
michael@0 1911 mkallsmall(wspace);
michael@0 1912 st = pSMgr->suggest_morph_for_spelling_error(wspace);
michael@0 1913 if (st) {
michael@0 1914 if (*result) mystrcat(result, "\n", MAXLNLEN);
michael@0 1915 mystrcat(result, st, MAXLNLEN);
michael@0 1916 free(st);
michael@0 1917 }
michael@0 1918 break;
michael@0 1919 }
michael@0 1920 case ALLCAP: {
michael@0 1921 memcpy(wspace,cw,(wl+1));
michael@0 1922 st = pSMgr->suggest_morph_for_spelling_error(wspace);
michael@0 1923 if (st) {
michael@0 1924 mystrcat(result, st, MAXLNLEN);
michael@0 1925 free(st);
michael@0 1926 }
michael@0 1927 mkallsmall(wspace);
michael@0 1928 st = pSMgr->suggest_morph_for_spelling_error(wspace);
michael@0 1929 if (st) {
michael@0 1930 if (*result) mystrcat(result, "\n", MAXLNLEN);
michael@0 1931 mystrcat(result, st, MAXLNLEN);
michael@0 1932 free(st);
michael@0 1933 }
michael@0 1934 mkinitcap(wspace);
michael@0 1935 st = pSMgr->suggest_morph_for_spelling_error(wspace);
michael@0 1936 if (st) {
michael@0 1937 if (*result) mystrcat(result, "\n", MAXLNLEN);
michael@0 1938 mystrcat(result, st, MAXLNLEN);
michael@0 1939 free(st);
michael@0 1940 }
michael@0 1941 if (abbv) {
michael@0 1942 memcpy(wspace,cw,(wl+1));
michael@0 1943 *(wspace+wl) = '.';
michael@0 1944 *(wspace+wl+1) = '\0';
michael@0 1945 if (*result) mystrcat(result, "\n", MAXLNLEN);
michael@0 1946 st = pSMgr->suggest_morph_for_spelling_error(wspace);
michael@0 1947 if (st) {
michael@0 1948 mystrcat(result, st, MAXLNLEN);
michael@0 1949 free(st);
michael@0 1950 }
michael@0 1951 mkallsmall(wspace);
michael@0 1952 st = pSMgr->suggest_morph_for_spelling_error(wspace);
michael@0 1953 if (st) {
michael@0 1954 if (*result) mystrcat(result, "\n", MAXLNLEN);
michael@0 1955 mystrcat(result, st, MAXLNLEN);
michael@0 1956 free(st);
michael@0 1957 }
michael@0 1958 mkinitcap(wspace);
michael@0 1959 st = pSMgr->suggest_morph_for_spelling_error(wspace);
michael@0 1960 if (st) {
michael@0 1961 if (*result) mystrcat(result, "\n", MAXLNLEN);
michael@0 1962 mystrcat(result, st, MAXLNLEN);
michael@0 1963 free(st);
michael@0 1964 }
michael@0 1965 }
michael@0 1966 break;
michael@0 1967 }
michael@0 1968 }
michael@0 1969
michael@0 1970 if (*result) return mystrdup(result);
michael@0 1971 return NULL;
michael@0 1972 }
michael@0 1973
michael@0 1974 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
michael@0 1975
michael@0 1976 Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
michael@0 1977 {
michael@0 1978 return (Hunhandle*)(new Hunspell(affpath, dpath));
michael@0 1979 }
michael@0 1980
michael@0 1981 Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
michael@0 1982 const char * key)
michael@0 1983 {
michael@0 1984 return (Hunhandle*)(new Hunspell(affpath, dpath, key));
michael@0 1985 }
michael@0 1986
michael@0 1987 void Hunspell_destroy(Hunhandle *pHunspell)
michael@0 1988 {
michael@0 1989 delete (Hunspell*)(pHunspell);
michael@0 1990 }
michael@0 1991
michael@0 1992 int Hunspell_spell(Hunhandle *pHunspell, const char *word)
michael@0 1993 {
michael@0 1994 return ((Hunspell*)pHunspell)->spell(word);
michael@0 1995 }
michael@0 1996
michael@0 1997 char *Hunspell_get_dic_encoding(Hunhandle *pHunspell)
michael@0 1998 {
michael@0 1999 return ((Hunspell*)pHunspell)->get_dic_encoding();
michael@0 2000 }
michael@0 2001
michael@0 2002 int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word)
michael@0 2003 {
michael@0 2004 return ((Hunspell*)pHunspell)->suggest(slst, word);
michael@0 2005 }
michael@0 2006
michael@0 2007 int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word)
michael@0 2008 {
michael@0 2009 return ((Hunspell*)pHunspell)->analyze(slst, word);
michael@0 2010 }
michael@0 2011
michael@0 2012 int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)
michael@0 2013 {
michael@0 2014 return ((Hunspell*)pHunspell)->stem(slst, word);
michael@0 2015 }
michael@0 2016
michael@0 2017 int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n)
michael@0 2018 {
michael@0 2019 return ((Hunspell*)pHunspell)->stem(slst, desc, n);
michael@0 2020 }
michael@0 2021
michael@0 2022 int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
michael@0 2023 const char * word2)
michael@0 2024 {
michael@0 2025 return ((Hunspell*)pHunspell)->generate(slst, word, word2);
michael@0 2026 }
michael@0 2027
michael@0 2028 int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
michael@0 2029 char** desc, int n)
michael@0 2030 {
michael@0 2031 return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);
michael@0 2032 }
michael@0 2033
michael@0 2034 /* functions for run-time modification of the dictionary */
michael@0 2035
michael@0 2036 /* add word to the run-time dictionary */
michael@0 2037
michael@0 2038 int Hunspell_add(Hunhandle *pHunspell, const char * word) {
michael@0 2039 return ((Hunspell*)pHunspell)->add(word);
michael@0 2040 }
michael@0 2041
michael@0 2042 /* add word to the run-time dictionary with affix flags of
michael@0 2043 * the example (a dictionary word): Hunspell will recognize
michael@0 2044 * affixed forms of the new word, too.
michael@0 2045 */
michael@0 2046
michael@0 2047 int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word,
michael@0 2048 const char * example) {
michael@0 2049 return ((Hunspell*)pHunspell)->add_with_affix(word, example);
michael@0 2050 }
michael@0 2051
michael@0 2052 /* remove word from the run-time dictionary */
michael@0 2053
michael@0 2054 int Hunspell_remove(Hunhandle *pHunspell, const char * word) {
michael@0 2055 return ((Hunspell*)pHunspell)->remove(word);
michael@0 2056 }
michael@0 2057
michael@0 2058 void Hunspell_free_list(Hunhandle *, char *** slst, int n) {
michael@0 2059 freelist(slst, n);
michael@0 2060 }

mercurial