extensions/spellcheck/hunspell/src/hunspell.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/extensions/spellcheck/hunspell/src/hunspell.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,2060 @@
     1.4 +/******* BEGIN LICENSE BLOCK *******
     1.5 + * Version: MPL 1.1/GPL 2.0/LGPL 2.1
     1.6 + * 
     1.7 + * The contents of this file are subject to the Mozilla Public License Version
     1.8 + * 1.1 (the "License"); you may not use this file except in compliance with
     1.9 + * the License. You may obtain a copy of the License at
    1.10 + * http://www.mozilla.org/MPL/
    1.11 + * 
    1.12 + * Software distributed under the License is distributed on an "AS IS" basis,
    1.13 + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
    1.14 + * for the specific language governing rights and limitations under the
    1.15 + * License.
    1.16 + * 
    1.17 + * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
    1.18 + * and László Németh (Hunspell). Portions created by the Initial Developers
    1.19 + * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
    1.20 + * 
    1.21 + * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
    1.22 + *                 David Einstein (deinst@world.std.com)
    1.23 + *                 László Németh (nemethl@gyorsposta.hu)
    1.24 + *                 Caolan McNamara (caolanm@redhat.com)
    1.25 + *                 Davide Prina
    1.26 + *                 Giuseppe Modugno
    1.27 + *                 Gianluca Turconi
    1.28 + *                 Simon Brouwer
    1.29 + *                 Noll Janos
    1.30 + *                 Biro Arpad
    1.31 + *                 Goldman Eleonora
    1.32 + *                 Sarlos Tamas
    1.33 + *                 Bencsath Boldizsar
    1.34 + *                 Halacsy Peter
    1.35 + *                 Dvornik Laszlo
    1.36 + *                 Gefferth Andras
    1.37 + *                 Nagy Viktor
    1.38 + *                 Varga Daniel
    1.39 + *                 Chris Halls
    1.40 + *                 Rene Engelhard
    1.41 + *                 Bram Moolenaar
    1.42 + *                 Dafydd Jones
    1.43 + *                 Harri Pitkanen
    1.44 + *                 Andras Timar
    1.45 + *                 Tor Lillqvist
    1.46 + * 
    1.47 + * Alternatively, the contents of this file may be used under the terms of
    1.48 + * either the GNU General Public License Version 2 or later (the "GPL"), or
    1.49 + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
    1.50 + * in which case the provisions of the GPL or the LGPL are applicable instead
    1.51 + * of those above. If you wish to allow use of your version of this file only
    1.52 + * under the terms of either the GPL or the LGPL, and not to allow others to
    1.53 + * use your version of this file under the terms of the MPL, indicate your
    1.54 + * decision by deleting the provisions above and replace them with the notice
    1.55 + * and other provisions required by the GPL or the LGPL. If you do not delete
    1.56 + * the provisions above, a recipient may use your version of this file under
    1.57 + * the terms of any one of the MPL, the GPL or the LGPL.
    1.58 + *
    1.59 + ******* END LICENSE BLOCK *******/
    1.60 +
    1.61 +#include <stdlib.h>
    1.62 +#include <string.h>
    1.63 +#include <stdio.h>
    1.64 +
    1.65 +#include "hunspell.hxx"
    1.66 +#include "hunspell.h"
    1.67 +#ifndef MOZILLA_CLIENT
    1.68 +#    include "config.h"
    1.69 +#endif
    1.70 +#include "csutil.hxx"
    1.71 +
    1.72 +Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
    1.73 +{
    1.74 +    encoding = NULL;
    1.75 +    csconv = NULL;
    1.76 +    utf8 = 0;
    1.77 +    complexprefixes = 0;
    1.78 +    affixpath = mystrdup(affpath);
    1.79 +    maxdic = 0;
    1.80 +
    1.81 +    /* first set up the hash manager */
    1.82 +    pHMgr[0] = new HashMgr(dpath, affpath, key);
    1.83 +    if (pHMgr[0]) maxdic = 1;
    1.84 +
    1.85 +    /* next set up the affix manager */
    1.86 +    /* it needs access to the hash manager lookup methods */
    1.87 +    pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
    1.88 +
    1.89 +    /* get the preferred try string and the dictionary */
    1.90 +    /* encoding from the Affix Manager for that dictionary */
    1.91 +    char * try_string = pAMgr->get_try_string();
    1.92 +    encoding = pAMgr->get_encoding();
    1.93 +    langnum = pAMgr->get_langnum();
    1.94 +    utf8 = pAMgr->get_utf8();
    1.95 +    if (!utf8)
    1.96 +        csconv = get_current_cs(encoding);
    1.97 +    complexprefixes = pAMgr->get_complexprefixes();
    1.98 +    wordbreak = pAMgr->get_breaktable();
    1.99 +
   1.100 +    /* and finally set up the suggestion manager */
   1.101 +    pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
   1.102 +    if (try_string) free(try_string);
   1.103 +}
   1.104 +
   1.105 +Hunspell::~Hunspell()
   1.106 +{
   1.107 +    if (pSMgr) delete pSMgr;
   1.108 +    if (pAMgr) delete pAMgr;
   1.109 +    for (int i = 0; i < maxdic; i++) delete pHMgr[i];
   1.110 +    maxdic = 0;
   1.111 +    pSMgr = NULL;
   1.112 +    pAMgr = NULL;
   1.113 +#ifdef MOZILLA_CLIENT
   1.114 +    delete [] csconv;
   1.115 +#endif
   1.116 +    csconv= NULL;
   1.117 +    if (encoding) free(encoding);
   1.118 +    encoding = NULL;
   1.119 +    if (affixpath) free(affixpath);
   1.120 +    affixpath = NULL;
   1.121 +}
   1.122 +
   1.123 +// load extra dictionaries
   1.124 +int Hunspell::add_dic(const char * dpath, const char * key) {
   1.125 +    if (maxdic == MAXDIC || !affixpath) return 1;
   1.126 +    pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);
   1.127 +    if (pHMgr[maxdic]) maxdic++; else return 1;
   1.128 +    return 0;
   1.129 +}
   1.130 +
   1.131 +// make a copy of src at destination while removing all leading
   1.132 +// blanks and removing any trailing periods after recording
   1.133 +// their presence with the abbreviation flag
   1.134 +// also since already going through character by character,
   1.135 +// set the capitalization type
   1.136 +// return the length of the "cleaned" (and UTF-8 encoded) word
   1.137 +
   1.138 +int Hunspell::cleanword2(char * dest, const char * src,
   1.139 +    w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)
   1.140 +{
   1.141 +   unsigned char * p = (unsigned char *) dest;
   1.142 +   const unsigned char * q = (const unsigned char * ) src;
   1.143 +
   1.144 +   // first skip over any leading blanks
   1.145 +   while ((*q != '\0') && (*q == ' ')) q++;
   1.146 +
   1.147 +   // now strip off any trailing periods (recording their presence)
   1.148 +   *pabbrev = 0;
   1.149 +   int nl = strlen((const char *)q);
   1.150 +   while ((nl > 0) && (*(q+nl-1)=='.')) {
   1.151 +       nl--;
   1.152 +       (*pabbrev)++;
   1.153 +   }
   1.154 +
   1.155 +   // if no characters are left it can't be capitalized
   1.156 +   if (nl <= 0) {
   1.157 +       *pcaptype = NOCAP;
   1.158 +       *p = '\0';
   1.159 +       return 0;
   1.160 +   }
   1.161 +
   1.162 +   strncpy(dest, (char *) q, nl);
   1.163 +   *(dest + nl) = '\0';
   1.164 +   nl = strlen(dest);
   1.165 +   if (utf8) {
   1.166 +      *nc = u8_u16(dest_utf, MAXWORDLEN, dest);
   1.167 +      // don't check too long words
   1.168 +      if (*nc >= MAXWORDLEN) return 0;
   1.169 +      if (*nc == -1) { // big Unicode character (non BMP area)
   1.170 +         *pcaptype = NOCAP;
   1.171 +         return nl;
   1.172 +      }
   1.173 +     *pcaptype = get_captype_utf8(dest_utf, *nc, langnum);
   1.174 +   } else {
   1.175 +     *pcaptype = get_captype(dest, nl, csconv);
   1.176 +     *nc = nl;
   1.177 +   }
   1.178 +   return nl;
   1.179 +}
   1.180 +
   1.181 +int Hunspell::cleanword(char * dest, const char * src,
   1.182 +    int * pcaptype, int * pabbrev)
   1.183 +{
   1.184 +   unsigned char * p = (unsigned char *) dest;
   1.185 +   const unsigned char * q = (const unsigned char * ) src;
   1.186 +   int firstcap = 0;
   1.187 +
   1.188 +   // first skip over any leading blanks
   1.189 +   while ((*q != '\0') && (*q == ' ')) q++;
   1.190 +
   1.191 +   // now strip off any trailing periods (recording their presence)
   1.192 +   *pabbrev = 0;
   1.193 +   int nl = strlen((const char *)q);
   1.194 +   while ((nl > 0) && (*(q+nl-1)=='.')) {
   1.195 +       nl--;
   1.196 +       (*pabbrev)++;
   1.197 +   }
   1.198 +
   1.199 +   // if no characters are left it can't be capitalized
   1.200 +   if (nl <= 0) {
   1.201 +       *pcaptype = NOCAP;
   1.202 +       *p = '\0';
   1.203 +       return 0;
   1.204 +   }
   1.205 +
   1.206 +   // now determine the capitalization type of the first nl letters
   1.207 +   int ncap = 0;
   1.208 +   int nneutral = 0;
   1.209 +   int nc = 0;
   1.210 +
   1.211 +   if (!utf8) {
   1.212 +      while (nl > 0) {
   1.213 +         nc++;
   1.214 +         if (csconv[(*q)].ccase) ncap++;
   1.215 +         if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
   1.216 +         *p++ = *q++;
   1.217 +         nl--;
   1.218 +      }
   1.219 +      // remember to terminate the destination string
   1.220 +      *p = '\0';
   1.221 +      firstcap = csconv[(unsigned char)(*dest)].ccase;
   1.222 +   } else {
   1.223 +      unsigned short idx;
   1.224 +      w_char t[MAXWORDLEN];
   1.225 +      nc = u8_u16(t, MAXWORDLEN, src);
   1.226 +      for (int i = 0; i < nc; i++) {
   1.227 +         idx = (t[i].h << 8) + t[i].l;
   1.228 +         unsigned short low = unicodetolower(idx, langnum);
   1.229 +         if (idx != low) ncap++;
   1.230 +         if (unicodetoupper(idx, langnum) == low) nneutral++;
   1.231 +      }
   1.232 +      u16_u8(dest, MAXWORDUTF8LEN, t, nc);
   1.233 +      if (ncap) {
   1.234 +         idx = (t[0].h << 8) + t[0].l;
   1.235 +         firstcap = (idx != unicodetolower(idx, langnum));
   1.236 +      }
   1.237 +   }
   1.238 +
   1.239 +   // now finally set the captype
   1.240 +   if (ncap == 0) {
   1.241 +        *pcaptype = NOCAP;
   1.242 +   } else if ((ncap == 1) && firstcap) {
   1.243 +        *pcaptype = INITCAP;
   1.244 +   } else if ((ncap == nc) || ((ncap + nneutral) == nc)){
   1.245 +        *pcaptype = ALLCAP;
   1.246 +   } else if ((ncap > 1) && firstcap) {
   1.247 +        *pcaptype = HUHINITCAP;
   1.248 +   } else {
   1.249 +        *pcaptype = HUHCAP;
   1.250 +   }
   1.251 +   return strlen(dest);
   1.252 +}
   1.253 +
   1.254 +void Hunspell::mkallcap(char * p)
   1.255 +{
   1.256 +  if (utf8) {
   1.257 +      w_char u[MAXWORDLEN];
   1.258 +      int nc = u8_u16(u, MAXWORDLEN, p);
   1.259 +      unsigned short idx;
   1.260 +      for (int i = 0; i < nc; i++) {
   1.261 +         idx = (u[i].h << 8) + u[i].l;
   1.262 +         if (idx != unicodetoupper(idx, langnum)) {
   1.263 +            u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);
   1.264 +            u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);
   1.265 +         }
   1.266 +      }
   1.267 +      u16_u8(p, MAXWORDUTF8LEN, u, nc);
   1.268 +  } else {
   1.269 +    while (*p != '\0') {
   1.270 +        *p = csconv[((unsigned char) *p)].cupper;
   1.271 +        p++;
   1.272 +    }
   1.273 +  }
   1.274 +}
   1.275 +
   1.276 +int Hunspell::mkallcap2(char * p, w_char * u, int nc)
   1.277 +{
   1.278 +  if (utf8) {
   1.279 +      unsigned short idx;
   1.280 +      for (int i = 0; i < nc; i++) {
   1.281 +         idx = (u[i].h << 8) + u[i].l;
   1.282 +         unsigned short up = unicodetoupper(idx, langnum);
   1.283 +         if (idx != up) {
   1.284 +            u[i].h = (unsigned char) (up >> 8);
   1.285 +            u[i].l = (unsigned char) (up & 0x00FF);
   1.286 +         }
   1.287 +      }
   1.288 +      u16_u8(p, MAXWORDUTF8LEN, u, nc);
   1.289 +      return strlen(p);
   1.290 +  } else {
   1.291 +    while (*p != '\0') {
   1.292 +        *p = csconv[((unsigned char) *p)].cupper;
   1.293 +        p++;
   1.294 +    }
   1.295 +  }
   1.296 +  return nc;
   1.297 +}
   1.298 +
   1.299 +
   1.300 +void Hunspell::mkallsmall(char * p)
   1.301 +{
   1.302 +    while (*p != '\0') {
   1.303 +        *p = csconv[((unsigned char) *p)].clower;
   1.304 +        p++;
   1.305 +    }
   1.306 +}
   1.307 +
   1.308 +int Hunspell::mkallsmall2(char * p, w_char * u, int nc)
   1.309 +{
   1.310 +  if (utf8) {
   1.311 +      unsigned short idx;
   1.312 +      for (int i = 0; i < nc; i++) {
   1.313 +         idx = (u[i].h << 8) + u[i].l;
   1.314 +         unsigned short low = unicodetolower(idx, langnum);
   1.315 +         if (idx != low) {
   1.316 +            u[i].h = (unsigned char) (low >> 8);
   1.317 +            u[i].l = (unsigned char) (low & 0x00FF);
   1.318 +         }
   1.319 +      }
   1.320 +      u16_u8(p, MAXWORDUTF8LEN, u, nc);
   1.321 +      return strlen(p);
   1.322 +  } else {
   1.323 +    while (*p != '\0') {
   1.324 +        *p = csconv[((unsigned char) *p)].clower;
   1.325 +        p++;
   1.326 +    }
   1.327 +  }
   1.328 +  return nc;
   1.329 +}
   1.330 +
   1.331 +// convert UTF-8 sharp S codes to latin 1
   1.332 +char * Hunspell::sharps_u8_l1(char * dest, char * source) {
   1.333 +    char * p = dest;
   1.334 +    *p = *source;
   1.335 +    for (p++, source++; *(source - 1); p++, source++) {
   1.336 +        *p = *source;
   1.337 +        if (*source == '\x9F') *--p = '\xDF';
   1.338 +    }
   1.339 +    return dest;
   1.340 +}
   1.341 +
   1.342 +// recursive search for right ss - sharp s permutations
   1.343 +hentry * Hunspell::spellsharps(char * base, char * pos, int n,
   1.344 +        int repnum, char * tmp, int * info, char **root) {
   1.345 +    pos = strstr(pos, "ss");
   1.346 +    if (pos && (n < MAXSHARPS)) {
   1.347 +        *pos = '\xC3';
   1.348 +        *(pos + 1) = '\x9F';
   1.349 +        hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root);
   1.350 +        if (h) return h;
   1.351 +        *pos = 's';
   1.352 +        *(pos + 1) = 's';
   1.353 +        h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root);
   1.354 +        if (h) return h;
   1.355 +    } else if (repnum > 0) {
   1.356 +        if (utf8) return checkword(base, info, root);
   1.357 +        return checkword(sharps_u8_l1(tmp, base), info, root);
   1.358 +    }
   1.359 +    return NULL;
   1.360 +}
   1.361 +
   1.362 +int Hunspell::is_keepcase(const hentry * rv) {
   1.363 +    return pAMgr && rv->astr && pAMgr->get_keepcase() &&
   1.364 +        TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
   1.365 +}
   1.366 +
   1.367 +/* insert a word to the beginning of the suggestion array and return ns */
   1.368 +int Hunspell::insert_sug(char ***slst, char * word, int ns) {
   1.369 +    char * dup = mystrdup(word);
   1.370 +    if (!dup) return ns;
   1.371 +    if (ns == MAXSUGGESTION) {
   1.372 +        ns--;
   1.373 +        free((*slst)[ns]);
   1.374 +    }
   1.375 +    for (int k = ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
   1.376 +    (*slst)[0] = dup;
   1.377 +    return ns + 1;
   1.378 +}
   1.379 +
   1.380 +int Hunspell::spell(const char * word, int * info, char ** root)
   1.381 +{
   1.382 +  struct hentry * rv=NULL;
   1.383 +  // need larger vector. For example, Turkish capital letter I converted a
   1.384 +  // 2-byte UTF-8 character (dotless i) by mkallsmall.
   1.385 +  char cw[MAXWORDUTF8LEN];
   1.386 +  char wspace[MAXWORDUTF8LEN];
   1.387 +  w_char unicw[MAXWORDLEN];
   1.388 +  // Hunspell supports XML input of the simplified API (see manual)
   1.389 +  if (strcmp(word, SPELL_XML) == 0) return 1;
   1.390 +  int nc = strlen(word);
   1.391 +  int wl2 = 0;
   1.392 +  if (utf8) {
   1.393 +    if (nc >= MAXWORDUTF8LEN) return 0;
   1.394 +  } else {
   1.395 +    if (nc >= MAXWORDLEN) return 0;
   1.396 +  }
   1.397 +  int captype = 0;
   1.398 +  int abbv = 0;
   1.399 +  int wl = 0;
   1.400 +
   1.401 +  // input conversion
   1.402 +  RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
   1.403 +  if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
   1.404 +  else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
   1.405 +
   1.406 +  int info2 = 0;
   1.407 +  if (wl == 0 || maxdic == 0) return 1;
   1.408 +  if (root) *root = NULL;
   1.409 +
   1.410 +  // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.)
   1.411 +  enum { NBEGIN, NNUM, NSEP };
   1.412 +  int nstate = NBEGIN;
   1.413 +  int i;
   1.414 +
   1.415 +  for (i = 0; (i < wl); i++) {
   1.416 +    if ((cw[i] <= '9') && (cw[i] >= '0')) {
   1.417 +        nstate = NNUM;
   1.418 +    } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) {
   1.419 +        if ((nstate == NSEP) || (i == 0)) break;
   1.420 +        nstate = NSEP;
   1.421 +    } else break;
   1.422 +  }
   1.423 +  if ((i == wl) && (nstate == NNUM)) return 1;
   1.424 +  if (!info) info = &info2; else *info = 0;
   1.425 +
   1.426 +  switch(captype) {
   1.427 +     case HUHCAP:
   1.428 +     case HUHINITCAP:
   1.429 +            *info += SPELL_ORIGCAP;
   1.430 +     case NOCAP: {
   1.431 +            rv = checkword(cw, info, root);
   1.432 +            if ((abbv) && !(rv)) {
   1.433 +                memcpy(wspace,cw,wl);
   1.434 +                *(wspace+wl) = '.';
   1.435 +                *(wspace+wl+1) = '\0';
   1.436 +                rv = checkword(wspace, info, root);
   1.437 +            }
   1.438 +            break;
   1.439 +         }
   1.440 +     case ALLCAP: {
   1.441 +            *info += SPELL_ORIGCAP;
   1.442 +            rv = checkword(cw, info, root);
   1.443 +            if (rv) break;
   1.444 +            if (abbv) {
   1.445 +                memcpy(wspace,cw,wl);
   1.446 +                *(wspace+wl) = '.';
   1.447 +                *(wspace+wl+1) = '\0';
   1.448 +                rv = checkword(wspace, info, root);
   1.449 +                if (rv) break;
   1.450 +            }
   1.451 +            // Spec. prefix handling for Catalan, French, Italian:
   1.452 +	    // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
   1.453 +            if (pAMgr && strchr(cw, '\'')) {
   1.454 +                wl = mkallsmall2(cw, unicw, nc);
   1.455 +        	//There are no really sane circumstances where this could fail,
   1.456 +        	//but anyway...
   1.457 +        	if (char * apostrophe = strchr(cw, '\'')) {
   1.458 +                    if (utf8) {
   1.459 +            	        w_char tmpword[MAXWORDLEN];
   1.460 +            	        *apostrophe = '\0';
   1.461 +            	        wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
   1.462 +            	        *apostrophe = '\'';
   1.463 +		        if (wl2 < nc) {
   1.464 +		            mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
   1.465 +			    rv = checkword(cw, info, root);
   1.466 +			    if (rv) break;
   1.467 +		        }
   1.468 +                    } else {
   1.469 +		        mkinitcap2(apostrophe + 1, unicw, nc);
   1.470 +		        rv = checkword(cw, info, root);
   1.471 +		        if (rv) break;
   1.472 +		    }
   1.473 +		}
   1.474 +		mkinitcap2(cw, unicw, nc);
   1.475 +		rv = checkword(cw, info, root);
   1.476 +		if (rv) break;
   1.477 +            }
   1.478 +            if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {
   1.479 +                char tmpword[MAXWORDUTF8LEN];
   1.480 +                wl = mkallsmall2(cw, unicw, nc);
   1.481 +                memcpy(wspace,cw,(wl+1));
   1.482 +                rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
   1.483 +                if (!rv) {
   1.484 +                    wl2 = mkinitcap2(cw, unicw, nc);
   1.485 +                    rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);
   1.486 +                }
   1.487 +                if ((abbv) && !(rv)) {
   1.488 +                    *(wspace+wl) = '.';
   1.489 +                    *(wspace+wl+1) = '\0';
   1.490 +                    rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
   1.491 +                    if (!rv) {
   1.492 +                        memcpy(wspace, cw, wl2);
   1.493 +                        *(wspace+wl2) = '.';
   1.494 +                        *(wspace+wl2+1) = '\0';
   1.495 +                        rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
   1.496 +                    }
   1.497 +                }
   1.498 +                if (rv) break;
   1.499 +            }
   1.500 +        }
   1.501 +     case INITCAP: {
   1.502 +             *info += SPELL_ORIGCAP;
   1.503 +             wl = mkallsmall2(cw, unicw, nc);
   1.504 +             memcpy(wspace,cw,(wl+1));
   1.505 +             wl2 = mkinitcap2(cw, unicw, nc);
   1.506 +             if (captype == INITCAP) *info += SPELL_INITCAP;
   1.507 +             rv = checkword(cw, info, root);
   1.508 +             if (captype == INITCAP) *info -= SPELL_INITCAP;
   1.509 +             // forbid bad capitalization
   1.510 +             // (for example, ijs -> Ijs instead of IJs in Dutch)
   1.511 +             // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
   1.512 +             if (*info & SPELL_FORBIDDEN) {
   1.513 +                rv = NULL;
   1.514 +                break;
   1.515 +             }
   1.516 +             if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
   1.517 +             if (rv) break;
   1.518 +
   1.519 +             rv = checkword(wspace, info, root);
   1.520 +             if (abbv && !rv) {
   1.521 +
   1.522 +                 *(wspace+wl) = '.';
   1.523 +                 *(wspace+wl+1) = '\0';
   1.524 +                 rv = checkword(wspace, info, root);
   1.525 +                 if (!rv) {
   1.526 +                    memcpy(wspace, cw, wl2);
   1.527 +                    *(wspace+wl2) = '.';
   1.528 +                    *(wspace+wl2+1) = '\0';
   1.529 +    	    	    if (captype == INITCAP) *info += SPELL_INITCAP;
   1.530 +                    rv = checkword(wspace, info, root);
   1.531 +    	    	    if (captype == INITCAP) *info -= SPELL_INITCAP;
   1.532 +                    if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
   1.533 +                    break;
   1.534 +                 }
   1.535 +             }
   1.536 +             if (rv && is_keepcase(rv) &&
   1.537 +                ((captype == ALLCAP) ||
   1.538 +                   // if CHECKSHARPS: KEEPCASE words with \xDF  are allowed
   1.539 +                   // in INITCAP form, too.
   1.540 +                   !(pAMgr->get_checksharps() &&
   1.541 +                      ((utf8 && strstr(wspace, "\xC3\x9F")) ||
   1.542 +                      (!utf8 && strchr(wspace, '\xDF')))))) rv = NULL;
   1.543 +             break;
   1.544 +           }
   1.545 +  }
   1.546 +
   1.547 +  if (rv) {
   1.548 +      if (pAMgr && pAMgr->get_warn() && rv->astr &&
   1.549 +          TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
   1.550 +              *info += SPELL_WARN;
   1.551 +	      if (pAMgr->get_forbidwarn()) return 0;
   1.552 +              return HUNSPELL_OK_WARN;
   1.553 +      }
   1.554 +      return HUNSPELL_OK;
   1.555 +  }
   1.556 +
   1.557 +  // recursive breaking at break points
   1.558 +  if (wordbreak) {
   1.559 +    char * s;
   1.560 +    char r;
   1.561 +    int nbr = 0;
   1.562 +    wl = strlen(cw);
   1.563 +    int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;
   1.564 +
   1.565 +    // calculate break points for recursion limit
   1.566 +    for (int j = 0; j < numbreak; j++) {
   1.567 +      s = cw;
   1.568 +      do {
   1.569 +      	s = (char *) strstr(s, wordbreak[j]);
   1.570 +      	if (s) { 
   1.571 +		nbr++;
   1.572 +		s++;
   1.573 +	}
   1.574 +      } while (s);
   1.575 +    } 
   1.576 +    if (nbr >= 10) return 0;
   1.577 +
   1.578 +    // check boundary patterns (^begin and end$)
   1.579 +    for (int j = 0; j < numbreak; j++) {
   1.580 +      int plen = strlen(wordbreak[j]);
   1.581 +      if (plen == 1 || plen > wl) continue;
   1.582 +      if (wordbreak[j][0] == '^' && strncmp(cw, wordbreak[j] + 1, plen - 1) == 0
   1.583 +        && spell(cw + plen - 1)) return 1;
   1.584 +      if (wordbreak[j][plen - 1] == '$' &&
   1.585 +        strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) {
   1.586 +	    r = cw[wl - plen + 1];
   1.587 +	    cw[wl - plen + 1] = '\0';
   1.588 +    	    if (spell(cw)) return 1;
   1.589 +	    cw[wl - plen + 1] = r;
   1.590 +	}
   1.591 +    }
   1.592 +
   1.593 +    // other patterns
   1.594 +    for (int j = 0; j < numbreak; j++) {
   1.595 +      int plen = strlen(wordbreak[j]);
   1.596 +      s=(char *) strstr(cw, wordbreak[j]);
   1.597 +      if (s && (s > cw) && (s < cw + wl - plen)) {
   1.598 +	if (!spell(s + plen)) continue;
   1.599 +        r = *s;
   1.600 +        *s = '\0';
   1.601 +        // examine 2 sides of the break point
   1.602 +        if (spell(cw)) return 1;
   1.603 +        *s = r;
   1.604 +
   1.605 +        // LANG_hu: spec. dash rule
   1.606 +	if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) {
   1.607 +	  r = s[1];
   1.608 +	  s[1] = '\0';
   1.609 +          if (spell(cw)) return 1; // check the first part with dash
   1.610 +          s[1] = r;
   1.611 +	}
   1.612 +        // end of LANG speficic region
   1.613 +
   1.614 +      }
   1.615 +    }
   1.616 +  }
   1.617 +
   1.618 +  return 0;
   1.619 +}
   1.620 +
   1.621 +struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
   1.622 +{
   1.623 +  struct hentry * he = NULL;
   1.624 +  int len, i;
   1.625 +  char w2[MAXWORDUTF8LEN];
   1.626 +  const char * word;
   1.627 +
   1.628 +  char * ignoredchars = pAMgr->get_ignore();
   1.629 +  if (ignoredchars != NULL) {
   1.630 +     strcpy(w2, w);
   1.631 +     if (utf8) {
   1.632 +        int ignoredchars_utf16_len;
   1.633 +        unsigned short * ignoredchars_utf16 = pAMgr->get_ignore_utf16(&ignoredchars_utf16_len);
   1.634 +        remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len);
   1.635 +     } else {
   1.636 +        remove_ignored_chars(w2,ignoredchars);
   1.637 +     }
   1.638 +     word = w2;
   1.639 +  } else word = w;
   1.640 +
   1.641 +  len = strlen(word);
   1.642 +
   1.643 +  if (!len)
   1.644 +      return NULL;
   1.645 +
   1.646 +  // word reversing wrapper for complex prefixes
   1.647 +  if (complexprefixes) {
   1.648 +    if (word != w2) {
   1.649 +      strcpy(w2, word);
   1.650 +      word = w2;
   1.651 +    }
   1.652 +    if (utf8) reverseword_utf(w2); else reverseword(w2);
   1.653 +  }
   1.654 +
   1.655 +  // look word in hash table
   1.656 +  for (i = 0; (i < maxdic) && !he; i ++) {
   1.657 +  he = (pHMgr[i])->lookup(word);
   1.658 +
   1.659 +  // check forbidden and onlyincompound words
   1.660 +  if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
   1.661 +    if (info) *info += SPELL_FORBIDDEN;
   1.662 +    // LANG_hu section: set dash information for suggestions
   1.663 +    if (langnum == LANG_hu) {
   1.664 +        if (pAMgr->get_compoundflag() &&
   1.665 +            TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
   1.666 +                if (info) *info += SPELL_COMPOUND;
   1.667 +        }
   1.668 +    }
   1.669 +    return NULL;
   1.670 +  }
   1.671 +
   1.672 +  // he = next not needaffix, onlyincompound homonym or onlyupcase word
   1.673 +  while (he && (he->astr) &&
   1.674 +    ((pAMgr->get_needaffix() && TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
   1.675 +       (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
   1.676 +       (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))
   1.677 +    )) he = he->next_homonym;
   1.678 +  }
   1.679 +
   1.680 +  // check with affixes
   1.681 +  if (!he && pAMgr) {
   1.682 +     // try stripping off affixes */
   1.683 +     he = pAMgr->affix_check(word, len, 0);
   1.684 +
   1.685 +     // check compound restriction and onlyupcase
   1.686 +     if (he && he->astr && (
   1.687 +        (pAMgr->get_onlyincompound() &&
   1.688 +    	    TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
   1.689 +        (info && (*info & SPELL_INITCAP) &&
   1.690 +    	    TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
   1.691 +    	    he = NULL;
   1.692 +     }
   1.693 +
   1.694 +     if (he) {
   1.695 +        if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
   1.696 +            if (info) *info += SPELL_FORBIDDEN;
   1.697 +            return NULL;
   1.698 +        }
   1.699 +        if (root) {
   1.700 +            *root = mystrdup(he->word);
   1.701 +            if (*root && complexprefixes) {
   1.702 +                if (utf8) reverseword_utf(*root); else reverseword(*root);
   1.703 +            }
   1.704 +        }
   1.705 +     // try check compound word
   1.706 +     } else if (pAMgr->get_compound()) {
   1.707 +          he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info);
   1.708 +          // LANG_hu section: `moving rule' with last dash
   1.709 +          if ((!he) && (langnum == LANG_hu) && (word[len-1] == '-')) {
   1.710 +             char * dup = mystrdup(word);
   1.711 +             if (!dup) return NULL;
   1.712 +             dup[len-1] = '\0';
   1.713 +             he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0, info);
   1.714 +             free(dup);
   1.715 +          }
   1.716 +          // end of LANG speficic region
   1.717 +          if (he) {
   1.718 +                if (root) {
   1.719 +                    *root = mystrdup(he->word);
   1.720 +                    if (*root && complexprefixes) {
   1.721 +                        if (utf8) reverseword_utf(*root); else reverseword(*root);
   1.722 +                    }
   1.723 +                }
   1.724 +                if (info) *info += SPELL_COMPOUND;
   1.725 +          }
   1.726 +     }
   1.727 +
   1.728 +  }
   1.729 +
   1.730 +  return he;
   1.731 +}
   1.732 +
   1.733 +int Hunspell::suggest(char*** slst, const char * word)
   1.734 +{
   1.735 +  int onlycmpdsug = 0;
   1.736 +  char cw[MAXWORDUTF8LEN];
   1.737 +  char wspace[MAXWORDUTF8LEN];
   1.738 +  if (!pSMgr || maxdic == 0) return 0;
   1.739 +  w_char unicw[MAXWORDLEN];
   1.740 +  *slst = NULL;
   1.741 +  // process XML input of the simplified API (see manual)
   1.742 +  if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
   1.743 +     return spellml(slst, word);
   1.744 +  }
   1.745 +  int nc = strlen(word);
   1.746 +  if (utf8) {
   1.747 +    if (nc >= MAXWORDUTF8LEN) return 0;
   1.748 +  } else {
   1.749 +    if (nc >= MAXWORDLEN) return 0;
   1.750 +  }
   1.751 +  int captype = 0;
   1.752 +  int abbv = 0;
   1.753 +  int wl = 0;
   1.754 +
   1.755 +  // input conversion
   1.756 +  RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
   1.757 +  if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
   1.758 +  else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
   1.759 +
   1.760 +  if (wl == 0) return 0;
   1.761 +  int ns = 0;
   1.762 +  int capwords = 0;
   1.763 +
   1.764 +  // check capitalized form for FORCEUCASE
   1.765 +  if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
   1.766 +    int info = SPELL_ORIGCAP;
   1.767 +    char ** wlst;
   1.768 +    if (checkword(cw, &info, NULL)) {
   1.769 +        if (*slst) {
   1.770 +            wlst = *slst;
   1.771 +        } else {
   1.772 +            wlst = (char **) malloc(MAXSUGGESTION * sizeof(char *));
   1.773 +            if (wlst == NULL) return -1;
   1.774 +            *slst = wlst;
   1.775 +            for (int i = 0; i < MAXSUGGESTION; i++) {
   1.776 +                wlst[i] = NULL;
   1.777 +            }
   1.778 +        }
   1.779 +        wlst[0] = mystrdup(cw);
   1.780 +        mkinitcap(wlst[0]);
   1.781 +        return 1;
   1.782 +    }
   1.783 +  }
   1.784 + 
   1.785 +  switch(captype) {
   1.786 +     case NOCAP:   {
   1.787 +                     ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
   1.788 +                     break;
   1.789 +                   }
   1.790 +
   1.791 +     case INITCAP: {
   1.792 +                     capwords = 1;
   1.793 +                     ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
   1.794 +                     if (ns == -1) break;
   1.795 +                     memcpy(wspace,cw,(wl+1));
   1.796 +                     mkallsmall2(wspace, unicw, nc);
   1.797 +                     ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
   1.798 +                     break;
   1.799 +                   }
   1.800 +     case HUHINITCAP:
   1.801 +                    capwords = 1;
   1.802 +     case HUHCAP: {
   1.803 +                     ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
   1.804 +                     if (ns != -1) {
   1.805 +                        int prevns;
   1.806 +    		        // something.The -> something. The
   1.807 +                        char * dot = strchr(cw, '.');
   1.808 +		        if (dot && (dot > cw)) {
   1.809 +		            int captype_;
   1.810 +		            if (utf8) {
   1.811 +		               w_char w_[MAXWORDLEN];
   1.812 +			       int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);
   1.813 +		               captype_ = get_captype_utf8(w_, wl_, langnum);
   1.814 +		            } else captype_ = get_captype(dot+1, strlen(dot+1), csconv);
   1.815 +		    	    if (captype_ == INITCAP) {
   1.816 +                        	char * st = mystrdup(cw);
   1.817 +                        	if (st) st = (char *) realloc(st, wl + 2);
   1.818 +				if (st) {
   1.819 +                        		st[(dot - cw) + 1] = ' ';
   1.820 +                        		strcpy(st + (dot - cw) + 2, dot + 1);
   1.821 +                    			ns = insert_sug(slst, st, ns);
   1.822 +					free(st);
   1.823 +				}
   1.824 +		    	    }
   1.825 +		        }
   1.826 +                        if (captype == HUHINITCAP) {
   1.827 +                            // TheOpenOffice.org -> The OpenOffice.org
   1.828 +                            memcpy(wspace,cw,(wl+1));
   1.829 +                            mkinitsmall2(wspace, unicw, nc);
   1.830 +                            ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
   1.831 +                        }
   1.832 +                        memcpy(wspace,cw,(wl+1));
   1.833 +                        mkallsmall2(wspace, unicw, nc);
   1.834 +                        if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
   1.835 +                        prevns = ns;
   1.836 +                        ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
   1.837 +                        if (captype == HUHINITCAP) {
   1.838 +                            mkinitcap2(wspace, unicw, nc);
   1.839 +                            if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
   1.840 +                            ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
   1.841 +                        }
   1.842 +                        // aNew -> "a New" (instead of "a new")
   1.843 +                        for (int j = prevns; j < ns; j++) {
   1.844 +                           char * space = strchr((*slst)[j],' ');
   1.845 +                           if (space) {
   1.846 +                                int slen = strlen(space + 1);
   1.847 +                                // different case after space (need capitalisation)
   1.848 +                                if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {
   1.849 +                                    w_char w[MAXWORDLEN];
   1.850 +                                    int wc = 0;
   1.851 +                                    char * r = (*slst)[j];
   1.852 +                                    if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1);
   1.853 +                                    mkinitcap2(space + 1, w, wc);
   1.854 +                                    // set as first suggestion
   1.855 +                                    for (int k = j; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
   1.856 +                                    (*slst)[0] = r;
   1.857 +                                }
   1.858 +                           }
   1.859 +                        }
   1.860 +                     }
   1.861 +                     break;
   1.862 +                   }
   1.863 +
   1.864 +     case ALLCAP: {
   1.865 +                     memcpy(wspace, cw, (wl+1));
   1.866 +                     mkallsmall2(wspace, unicw, nc);
   1.867 +                     ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
   1.868 +                     if (ns == -1) break;
   1.869 +                     if (pAMgr && pAMgr->get_keepcase() && spell(wspace))
   1.870 +                        ns = insert_sug(slst, wspace, ns);
   1.871 +                     mkinitcap2(wspace, unicw, nc);
   1.872 +                     ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
   1.873 +                     for (int j=0; j < ns; j++) {
   1.874 +                        mkallcap((*slst)[j]);
   1.875 +                        if (pAMgr && pAMgr->get_checksharps()) {
   1.876 +                            char * pos;
   1.877 +                            if (utf8) {
   1.878 +                                pos = strstr((*slst)[j], "\xC3\x9F");
   1.879 +                                while (pos) {
   1.880 +                                    *pos = 'S';
   1.881 +                                    *(pos+1) = 'S';
   1.882 +                                    pos = strstr(pos+2, "\xC3\x9F");
   1.883 +                                }
   1.884 +                            } else {
   1.885 +                                pos = strchr((*slst)[j], '\xDF');
   1.886 +                                while (pos) {
   1.887 +                                    (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 2);
   1.888 +                                    mystrrep((*slst)[j], "\xDF", "SS");
   1.889 +                                    pos = strchr((*slst)[j], '\xDF');
   1.890 +                                }
   1.891 +                            }
   1.892 +                        }
   1.893 +                     }
   1.894 +                     break;
   1.895 +                   }
   1.896 +  }
   1.897 +
   1.898 + // LANG_hu section: replace '-' with ' ' in Hungarian
   1.899 +  if (langnum == LANG_hu) {
   1.900 +      for (int j=0; j < ns; j++) {
   1.901 +          char * pos = strchr((*slst)[j],'-');
   1.902 +          if (pos) {
   1.903 +              int info;
   1.904 +              char w[MAXWORDUTF8LEN];
   1.905 +              *pos = '\0';
   1.906 +              strcpy(w, (*slst)[j]);
   1.907 +              strcat(w, pos + 1);
   1.908 +              spell(w, &info, NULL);
   1.909 +              if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
   1.910 +                  *pos = ' ';
   1.911 +              } else *pos = '-';
   1.912 +          }
   1.913 +      }
   1.914 +  }
   1.915 +  // END OF LANG_hu section
   1.916 +
   1.917 +  // try ngram approach since found nothing or only compound words
   1.918 +  if (pAMgr && (ns == 0 || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0) && (*slst)) {
   1.919 +      switch(captype) {
   1.920 +          case NOCAP: {
   1.921 +              ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);
   1.922 +              break;
   1.923 +          }
   1.924 +	  case HUHINITCAP:
   1.925 +              capwords = 1;
   1.926 +          case HUHCAP: {
   1.927 +              memcpy(wspace,cw,(wl+1));
   1.928 +              mkallsmall2(wspace, unicw, nc);
   1.929 +              ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
   1.930 +	      break;
   1.931 +          }
   1.932 +         case INITCAP: {
   1.933 +              capwords = 1;
   1.934 +              memcpy(wspace,cw,(wl+1));
   1.935 +              mkallsmall2(wspace, unicw, nc);
   1.936 +              ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
   1.937 +              break;
   1.938 +          }
   1.939 +          case ALLCAP: {
   1.940 +              memcpy(wspace,cw,(wl+1));
   1.941 +              mkallsmall2(wspace, unicw, nc);
   1.942 +	      int oldns = ns;
   1.943 +              ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
   1.944 +              for (int j = oldns; j < ns; j++)
   1.945 +                  mkallcap((*slst)[j]);
   1.946 +              break;
   1.947 +         }
   1.948 +      }
   1.949 +  }
   1.950 +
   1.951 +  // try dash suggestion (Afo-American -> Afro-American)
   1.952 +  if (char * pos = strchr(cw, '-')) {
   1.953 +     char * ppos = cw;
   1.954 +     int nodashsug = 1;
   1.955 +     char ** nlst = NULL;
   1.956 +     int nn = 0;
   1.957 +     int last = 0;
   1.958 +     if (*slst) {
   1.959 +        for (int j = 0; j < ns && nodashsug == 1; j++) {
   1.960 +           if (strchr((*slst)[j], '-')) nodashsug = 0;
   1.961 +        }
   1.962 +     }
   1.963 +     while (nodashsug && !last) {
   1.964 +	if (*pos == '\0') last = 1; else *pos = '\0';
   1.965 +        if (!spell(ppos)) {
   1.966 +          nn = suggest(&nlst, ppos);
   1.967 +          for (int j = nn - 1; j >= 0; j--) {
   1.968 +            strncpy(wspace, cw, ppos - cw);
   1.969 +            strcpy(wspace + (ppos - cw), nlst[j]);
   1.970 +            if (!last) {
   1.971 +            	strcat(wspace, "-");
   1.972 +		strcat(wspace, pos + 1);
   1.973 +	    }
   1.974 +            ns = insert_sug(slst, wspace, ns);
   1.975 +            free(nlst[j]);
   1.976 +          }
   1.977 +          if (nlst != NULL) free(nlst);
   1.978 +          nodashsug = 0;
   1.979 +        }
   1.980 +	if (!last) {
   1.981 +          *pos = '-';
   1.982 +          ppos = pos + 1;
   1.983 +          pos = strchr(ppos, '-');
   1.984 +        }
   1.985 +	if (!pos) pos = cw + strlen(cw);
   1.986 +     }
   1.987 +  }
   1.988 +
   1.989 +  // word reversing wrapper for complex prefixes
   1.990 +  if (complexprefixes) {
   1.991 +    for (int j = 0; j < ns; j++) {
   1.992 +      if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
   1.993 +    }
   1.994 +  }
   1.995 +
   1.996 +  // capitalize
   1.997 +  if (capwords) for (int j=0; j < ns; j++) {
   1.998 +      mkinitcap((*slst)[j]);
   1.999 +  }
  1.1000 +
  1.1001 +  // expand suggestions with dot(s)
  1.1002 +  if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
  1.1003 +    for (int j = 0; j < ns; j++) {
  1.1004 +      (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
  1.1005 +      strcat((*slst)[j], word + strlen(word) - abbv);
  1.1006 +    }
  1.1007 +  }
  1.1008 +
  1.1009 +  // remove bad capitalized and forbidden forms
  1.1010 +  if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
  1.1011 +  switch (captype) {
  1.1012 +    case INITCAP:
  1.1013 +    case ALLCAP: {
  1.1014 +      int l = 0;
  1.1015 +      for (int j=0; j < ns; j++) {
  1.1016 +        if (!strchr((*slst)[j],' ') && !spell((*slst)[j])) {
  1.1017 +          char s[MAXSWUTF8L];
  1.1018 +          w_char w[MAXSWL];
  1.1019 +          int len;
  1.1020 +          if (utf8) {
  1.1021 +            len = u8_u16(w, MAXSWL, (*slst)[j]);
  1.1022 +          } else {
  1.1023 +            strcpy(s, (*slst)[j]);
  1.1024 +            len = strlen(s);
  1.1025 +          }
  1.1026 +          mkallsmall2(s, w, len);
  1.1027 +          free((*slst)[j]);
  1.1028 +          if (spell(s)) {
  1.1029 +            (*slst)[l] = mystrdup(s);
  1.1030 +            if ((*slst)[l]) l++;
  1.1031 +          } else {
  1.1032 +            mkinitcap2(s, w, len);
  1.1033 +            if (spell(s)) {
  1.1034 +              (*slst)[l] = mystrdup(s);
  1.1035 +              if ((*slst)[l]) l++;
  1.1036 +            }
  1.1037 +          }
  1.1038 +        } else {
  1.1039 +          (*slst)[l] = (*slst)[j];
  1.1040 +          l++;
  1.1041 +        }
  1.1042 +      }
  1.1043 +      ns = l;
  1.1044 +    }
  1.1045 +  }
  1.1046 +  }
  1.1047 +
  1.1048 +  // remove duplications
  1.1049 +  int l = 0;
  1.1050 +  for (int j = 0; j < ns; j++) {
  1.1051 +    (*slst)[l] = (*slst)[j];
  1.1052 +    for (int k = 0; k < l; k++) {
  1.1053 +      if (strcmp((*slst)[k], (*slst)[j]) == 0) {
  1.1054 +        free((*slst)[j]);
  1.1055 +        l--;
  1.1056 +        break;
  1.1057 +      }
  1.1058 +    }
  1.1059 +    l++;
  1.1060 +  }
  1.1061 +  ns = l;
  1.1062 +
  1.1063 +  // output conversion
  1.1064 +  rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
  1.1065 +  for (int j = 0; rl && j < ns; j++) {
  1.1066 +    if (rl->conv((*slst)[j], wspace)) {
  1.1067 +      free((*slst)[j]);
  1.1068 +      (*slst)[j] = mystrdup(wspace);
  1.1069 +    }
  1.1070 +  }
  1.1071 +
  1.1072 +  // if suggestions removed by nosuggest, onlyincompound parameters
  1.1073 +  if (l == 0 && *slst) {
  1.1074 +    free(*slst);
  1.1075 +    *slst = NULL;
  1.1076 +  }
  1.1077 +  return l;
  1.1078 +}
  1.1079 +
  1.1080 +void Hunspell::free_list(char *** slst, int n) {
  1.1081 +        freelist(slst, n);
  1.1082 +}
  1.1083 +
  1.1084 +char * Hunspell::get_dic_encoding()
  1.1085 +{
  1.1086 +  return encoding;
  1.1087 +}
  1.1088 +
  1.1089 +#ifdef HUNSPELL_EXPERIMENTAL
  1.1090 +// XXX need UTF-8 support
  1.1091 +int Hunspell::suggest_auto(char*** slst, const char * word)
  1.1092 +{
  1.1093 +  char cw[MAXWORDUTF8LEN];
  1.1094 +  char wspace[MAXWORDUTF8LEN];
  1.1095 +  if (!pSMgr || maxdic == 0) return 0;
  1.1096 +  int wl = strlen(word);
  1.1097 +  if (utf8) {
  1.1098 +    if (wl >= MAXWORDUTF8LEN) return 0;
  1.1099 +  } else {
  1.1100 +    if (wl >= MAXWORDLEN) return 0;
  1.1101 +  }
  1.1102 +  int captype = 0;
  1.1103 +  int abbv = 0;
  1.1104 +  wl = cleanword(cw, word, &captype, &abbv);
  1.1105 +  if (wl == 0) return 0;
  1.1106 +  int ns = 0;
  1.1107 +  *slst = NULL; // HU, nsug in pSMgr->suggest
  1.1108 +
  1.1109 +  switch(captype) {
  1.1110 +     case NOCAP:   {
  1.1111 +                     ns = pSMgr->suggest_auto(slst, cw, ns);
  1.1112 +                     if (ns>0) break;
  1.1113 +                     break;
  1.1114 +                   }
  1.1115 +
  1.1116 +     case INITCAP: {
  1.1117 +                     memcpy(wspace,cw,(wl+1));
  1.1118 +                     mkallsmall(wspace);
  1.1119 +                     ns = pSMgr->suggest_auto(slst, wspace, ns);
  1.1120 +                     for (int j=0; j < ns; j++)
  1.1121 +                       mkinitcap((*slst)[j]);
  1.1122 +                     ns = pSMgr->suggest_auto(slst, cw, ns);
  1.1123 +                     break;
  1.1124 +
  1.1125 +                   }
  1.1126 +
  1.1127 +     case HUHINITCAP:
  1.1128 +     case HUHCAP: {
  1.1129 +                     ns = pSMgr->suggest_auto(slst, cw, ns);
  1.1130 +                     if (ns == 0) {
  1.1131 +                        memcpy(wspace,cw,(wl+1));
  1.1132 +                        mkallsmall(wspace);
  1.1133 +                        ns = pSMgr->suggest_auto(slst, wspace, ns);
  1.1134 +                     }
  1.1135 +                     break;
  1.1136 +                   }
  1.1137 +
  1.1138 +     case ALLCAP: {
  1.1139 +                     memcpy(wspace,cw,(wl+1));
  1.1140 +                     mkallsmall(wspace);
  1.1141 +                     ns = pSMgr->suggest_auto(slst, wspace, ns);
  1.1142 +
  1.1143 +                     mkinitcap(wspace);
  1.1144 +                     ns = pSMgr->suggest_auto(slst, wspace, ns);
  1.1145 +
  1.1146 +                     for (int j=0; j < ns; j++)
  1.1147 +                       mkallcap((*slst)[j]);
  1.1148 +                     break;
  1.1149 +                   }
  1.1150 +  }
  1.1151 +
  1.1152 +  // word reversing wrapper for complex prefixes
  1.1153 +  if (complexprefixes) {
  1.1154 +    for (int j = 0; j < ns; j++) {
  1.1155 +      if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
  1.1156 +    }
  1.1157 +  }
  1.1158 +
  1.1159 +  // expand suggestions with dot(s)
  1.1160 +  if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
  1.1161 +    for (int j = 0; j < ns; j++) {
  1.1162 +      (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
  1.1163 +      strcat((*slst)[j], word + strlen(word) - abbv);
  1.1164 +    }
  1.1165 +  }
  1.1166 +
  1.1167 +  // LANG_hu section: replace '-' with ' ' in Hungarian
  1.1168 +  if (langnum == LANG_hu) {
  1.1169 +      for (int j=0; j < ns; j++) {
  1.1170 +          char * pos = strchr((*slst)[j],'-');
  1.1171 +          if (pos) {
  1.1172 +              int info;
  1.1173 +              char w[MAXWORDUTF8LEN];
  1.1174 +              *pos = '\0';
  1.1175 +              strcpy(w, (*slst)[j]);
  1.1176 +              strcat(w, pos + 1);
  1.1177 +              spell(w, &info, NULL);
  1.1178 +              if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
  1.1179 +                  *pos = ' ';
  1.1180 +              } else *pos = '-';
  1.1181 +          }
  1.1182 +      }
  1.1183 +  }
  1.1184 +  // END OF LANG_hu section
  1.1185 +  return ns;
  1.1186 +}
  1.1187 +#endif
  1.1188 +
  1.1189 +int Hunspell::stem(char*** slst, char ** desc, int n)
  1.1190 +{
  1.1191 +  char result[MAXLNLEN];
  1.1192 +  char result2[MAXLNLEN];
  1.1193 +  *slst = NULL;
  1.1194 +  if (n == 0) return 0;
  1.1195 +  *result2 = '\0';
  1.1196 +  for (int i = 0; i < n; i++) {
  1.1197 +    *result = '\0';
  1.1198 +    // add compound word parts (except the last one)
  1.1199 +    char * s = (char *) desc[i];
  1.1200 +    char * part = strstr(s, MORPH_PART);
  1.1201 +    if (part) {
  1.1202 +        char * nextpart = strstr(part + 1, MORPH_PART);
  1.1203 +        while (nextpart) {
  1.1204 +            copy_field(result + strlen(result), part, MORPH_PART);
  1.1205 +            part = nextpart;
  1.1206 +            nextpart = strstr(part + 1, MORPH_PART);
  1.1207 +        }
  1.1208 +        s = part;
  1.1209 +    }
  1.1210 +
  1.1211 +    char **pl;
  1.1212 +    char tok[MAXLNLEN];
  1.1213 +    strcpy(tok, s);
  1.1214 +    char * alt = strstr(tok, " | ");
  1.1215 +    while (alt) {
  1.1216 +        alt[1] = MSEP_ALT;
  1.1217 +        alt = strstr(alt, " | ");
  1.1218 +    }
  1.1219 +    int pln = line_tok(tok, &pl, MSEP_ALT);
  1.1220 +    for (int k = 0; k < pln; k++) {
  1.1221 +        // add derivational suffixes
  1.1222 +        if (strstr(pl[k], MORPH_DERI_SFX)) {
  1.1223 +            // remove inflectional suffixes
  1.1224 +            char * is = strstr(pl[k], MORPH_INFL_SFX);
  1.1225 +            if (is) *is = '\0';
  1.1226 +            char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);
  1.1227 +            if (sg) {
  1.1228 +                char ** gen;
  1.1229 +                int genl = line_tok(sg, &gen, MSEP_REC);
  1.1230 +                free(sg);
  1.1231 +                for (int j = 0; j < genl; j++) {
  1.1232 +                    sprintf(result2 + strlen(result2), "%c%s%s",
  1.1233 +                            MSEP_REC, result, gen[j]);
  1.1234 +                }
  1.1235 +                freelist(&gen, genl);
  1.1236 +            }
  1.1237 +        } else {
  1.1238 +            sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);
  1.1239 +            if (strstr(pl[k], MORPH_SURF_PFX)) {
  1.1240 +                copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);
  1.1241 +            }
  1.1242 +            copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);
  1.1243 +        }
  1.1244 +    }
  1.1245 +    freelist(&pl, pln);
  1.1246 +  }
  1.1247 +  int sln = line_tok(result2, slst, MSEP_REC);
  1.1248 +  return uniqlist(*slst, sln);
  1.1249 +
  1.1250 +}
  1.1251 +
  1.1252 +int Hunspell::stem(char*** slst, const char * word)
  1.1253 +{
  1.1254 +  char ** pl;
  1.1255 +  int pln = analyze(&pl, word);
  1.1256 +  int pln2 = stem(slst, pl, pln);
  1.1257 +  freelist(&pl, pln);
  1.1258 +  return pln2;
  1.1259 +}
  1.1260 +
  1.1261 +#ifdef HUNSPELL_EXPERIMENTAL
  1.1262 +int Hunspell::suggest_pos_stems(char*** slst, const char * word)
  1.1263 +{
  1.1264 +  char cw[MAXWORDUTF8LEN];
  1.1265 +  char wspace[MAXWORDUTF8LEN];
  1.1266 +  if (! pSMgr || maxdic == 0) return 0;
  1.1267 +  int wl = strlen(word);
  1.1268 +  if (utf8) {
  1.1269 +    if (wl >= MAXWORDUTF8LEN) return 0;
  1.1270 +  } else {
  1.1271 +    if (wl >= MAXWORDLEN) return 0;
  1.1272 +  }
  1.1273 +  int captype = 0;
  1.1274 +  int abbv = 0;
  1.1275 +  wl = cleanword(cw, word, &captype, &abbv);
  1.1276 +  if (wl == 0) return 0;
  1.1277 +
  1.1278 +  int ns = 0; // ns=0 = normalized input
  1.1279 +
  1.1280 +  *slst = NULL; // HU, nsug in pSMgr->suggest
  1.1281 +
  1.1282 +  switch(captype) {
  1.1283 +     case HUHCAP:
  1.1284 +     case NOCAP:   {
  1.1285 +                     ns = pSMgr->suggest_pos_stems(slst, cw, ns);
  1.1286 +
  1.1287 +                     if ((abbv) && (ns == 0)) {
  1.1288 +                         memcpy(wspace,cw,wl);
  1.1289 +                         *(wspace+wl) = '.';
  1.1290 +                         *(wspace+wl+1) = '\0';
  1.1291 +                         ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
  1.1292 +                     }
  1.1293 +
  1.1294 +                     break;
  1.1295 +                   }
  1.1296 +
  1.1297 +     case INITCAP: {
  1.1298 +
  1.1299 +                     ns = pSMgr->suggest_pos_stems(slst, cw, ns);
  1.1300 +
  1.1301 +                     if (ns == 0 || ((*slst)[0][0] == '#')) {
  1.1302 +                        memcpy(wspace,cw,(wl+1));
  1.1303 +                        mkallsmall(wspace);
  1.1304 +                        ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
  1.1305 +                     }
  1.1306 +
  1.1307 +                     break;
  1.1308 +
  1.1309 +                   }
  1.1310 +
  1.1311 +     case ALLCAP: {
  1.1312 +                     ns = pSMgr->suggest_pos_stems(slst, cw, ns);
  1.1313 +                     if (ns != 0) break;
  1.1314 +
  1.1315 +                     memcpy(wspace,cw,(wl+1));
  1.1316 +                     mkallsmall(wspace);
  1.1317 +                     ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
  1.1318 +
  1.1319 +                     if (ns == 0) {
  1.1320 +                         mkinitcap(wspace);
  1.1321 +                         ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
  1.1322 +                     }
  1.1323 +                     break;
  1.1324 +                   }
  1.1325 +  }
  1.1326 +
  1.1327 +  return ns;
  1.1328 +}
  1.1329 +#endif // END OF HUNSPELL_EXPERIMENTAL CODE
  1.1330 +
  1.1331 +const char * Hunspell::get_wordchars()
  1.1332 +{
  1.1333 +  return pAMgr->get_wordchars();
  1.1334 +}
  1.1335 +
  1.1336 +unsigned short * Hunspell::get_wordchars_utf16(int * len)
  1.1337 +{
  1.1338 +  return pAMgr->get_wordchars_utf16(len);
  1.1339 +}
  1.1340 +
  1.1341 +void Hunspell::mkinitcap(char * p)
  1.1342 +{
  1.1343 +  if (!utf8) {
  1.1344 +    if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
  1.1345 +  } else {
  1.1346 +      int len;
  1.1347 +      w_char u[MAXWORDLEN];
  1.1348 +      len = u8_u16(u, MAXWORDLEN, p);
  1.1349 +      unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
  1.1350 +      u[0].h = (unsigned char) (i >> 8);
  1.1351 +      u[0].l = (unsigned char) (i & 0x00FF);
  1.1352 +      u16_u8(p, MAXWORDUTF8LEN, u, len);
  1.1353 +  }
  1.1354 +}
  1.1355 +
  1.1356 +int Hunspell::mkinitcap2(char * p, w_char * u, int nc)
  1.1357 +{
  1.1358 +  if (!utf8) {
  1.1359 +    if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
  1.1360 +  } else if (nc > 0) {
  1.1361 +      unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
  1.1362 +      u[0].h = (unsigned char) (i >> 8);
  1.1363 +      u[0].l = (unsigned char) (i & 0x00FF);
  1.1364 +      u16_u8(p, MAXWORDUTF8LEN, u, nc);
  1.1365 +      return strlen(p);
  1.1366 +  }
  1.1367 +  return nc;
  1.1368 +}
  1.1369 +
  1.1370 +int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)
  1.1371 +{
  1.1372 +  if (!utf8) {
  1.1373 +    if (*p != '\0') *p = csconv[((unsigned char)*p)].clower;
  1.1374 +  } else if (nc > 0) {
  1.1375 +      unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);
  1.1376 +      u[0].h = (unsigned char) (i >> 8);
  1.1377 +      u[0].l = (unsigned char) (i & 0x00FF);
  1.1378 +      u16_u8(p, MAXWORDUTF8LEN, u, nc);
  1.1379 +      return strlen(p);
  1.1380 +  }
  1.1381 +  return nc;
  1.1382 +}
  1.1383 +
  1.1384 +int Hunspell::add(const char * word)
  1.1385 +{
  1.1386 +    if (pHMgr[0]) return (pHMgr[0])->add(word);
  1.1387 +    return 0;
  1.1388 +}
  1.1389 +
  1.1390 +int Hunspell::add_with_affix(const char * word, const char * example)
  1.1391 +{
  1.1392 +    if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example);
  1.1393 +    return 0;
  1.1394 +}
  1.1395 +
  1.1396 +int Hunspell::remove(const char * word)
  1.1397 +{
  1.1398 +    if (pHMgr[0]) return (pHMgr[0])->remove(word);
  1.1399 +    return 0;
  1.1400 +}
  1.1401 +
  1.1402 +const char * Hunspell::get_version()
  1.1403 +{
  1.1404 +  return pAMgr->get_version();
  1.1405 +}
  1.1406 +
  1.1407 +struct cs_info * Hunspell::get_csconv()
  1.1408 +{
  1.1409 +  return csconv;
  1.1410 +}
  1.1411 +
  1.1412 +void Hunspell::cat_result(char * result, char * st)
  1.1413 +{
  1.1414 +    if (st) {
  1.1415 +        if (*result) mystrcat(result, "\n", MAXLNLEN);
  1.1416 +        mystrcat(result, st, MAXLNLEN);
  1.1417 +        free(st);
  1.1418 +    }
  1.1419 +}
  1.1420 +
  1.1421 +int Hunspell::analyze(char*** slst, const char * word)
  1.1422 +{
  1.1423 +  char cw[MAXWORDUTF8LEN];
  1.1424 +  char wspace[MAXWORDUTF8LEN];
  1.1425 +  w_char unicw[MAXWORDLEN];
  1.1426 +  int wl2 = 0;
  1.1427 +  *slst = NULL;
  1.1428 +  if (! pSMgr || maxdic == 0) return 0;
  1.1429 +  int nc = strlen(word);
  1.1430 +  if (utf8) {
  1.1431 +    if (nc >= MAXWORDUTF8LEN) return 0;
  1.1432 +  } else {
  1.1433 +    if (nc >= MAXWORDLEN) return 0;
  1.1434 +  }
  1.1435 +  int captype = 0;
  1.1436 +  int abbv = 0;
  1.1437 +  int wl = 0;
  1.1438 +
  1.1439 +  // input conversion
  1.1440 +  RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
  1.1441 +  if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
  1.1442 +  else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
  1.1443 +
  1.1444 +  if (wl == 0) {
  1.1445 +      if (abbv) {
  1.1446 +          for (wl = 0; wl < abbv; wl++) cw[wl] = '.';
  1.1447 +          cw[wl] = '\0';
  1.1448 +          abbv = 0;
  1.1449 +      } else return 0;
  1.1450 +  }
  1.1451 +
  1.1452 +  char result[MAXLNLEN];
  1.1453 +  char * st = NULL;
  1.1454 +
  1.1455 +  *result = '\0';
  1.1456 +
  1.1457 +  int n = 0;
  1.1458 +  int n2 = 0;
  1.1459 +  int n3 = 0;
  1.1460 +
  1.1461 +  // test numbers
  1.1462 +  // LANG_hu section: set dash information for suggestions
  1.1463 +  if (langnum == LANG_hu) {
  1.1464 +  while ((n < wl) &&
  1.1465 +        (((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) {
  1.1466 +        n++;
  1.1467 +        if ((cw[n] == '.') || (cw[n] == ',')) {
  1.1468 +                if (((n2 == 0) && (n > 3)) ||
  1.1469 +                        ((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) break;
  1.1470 +                n2++;
  1.1471 +                n3 = n;
  1.1472 +        }
  1.1473 +  }
  1.1474 +
  1.1475 +  if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0;
  1.1476 +  if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xB0')) && checkword(cw+n, NULL, NULL))) {
  1.1477 +        mystrcat(result, cw, MAXLNLEN);
  1.1478 +        result[n - 1] = '\0';
  1.1479 +        if (n == wl) cat_result(result, pSMgr->suggest_morph(cw + n - 1));
  1.1480 +        else {
  1.1481 +                char sign = cw[n];
  1.1482 +                cw[n] = '\0';
  1.1483 +                cat_result(result, pSMgr->suggest_morph(cw + n - 1));
  1.1484 +                mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE
  1.1485 +                cw[n] = sign;
  1.1486 +                cat_result(result, pSMgr->suggest_morph(cw + n));
  1.1487 +        }
  1.1488 +        return line_tok(result, slst, MSEP_REC);
  1.1489 +  }
  1.1490 +  }
  1.1491 +  // END OF LANG_hu section
  1.1492 +
  1.1493 +  switch(captype) {
  1.1494 +     case HUHCAP:
  1.1495 +     case HUHINITCAP:
  1.1496 +     case NOCAP:  {
  1.1497 +                    cat_result(result, pSMgr->suggest_morph(cw));
  1.1498 +                    if (abbv) {
  1.1499 +                        memcpy(wspace,cw,wl);
  1.1500 +                        *(wspace+wl) = '.';
  1.1501 +                        *(wspace+wl+1) = '\0';
  1.1502 +                        cat_result(result, pSMgr->suggest_morph(wspace));
  1.1503 +                    }
  1.1504 +                    break;
  1.1505 +                }
  1.1506 +     case INITCAP: {
  1.1507 +                     wl = mkallsmall2(cw, unicw, nc);
  1.1508 +                     memcpy(wspace,cw,(wl+1));
  1.1509 +                     wl2 = mkinitcap2(cw, unicw, nc);
  1.1510 +                     cat_result(result, pSMgr->suggest_morph(wspace));
  1.1511 +                     cat_result(result, pSMgr->suggest_morph(cw));
  1.1512 +                     if (abbv) {
  1.1513 +                         *(wspace+wl) = '.';
  1.1514 +                         *(wspace+wl+1) = '\0';
  1.1515 +                         cat_result(result, pSMgr->suggest_morph(wspace));
  1.1516 +
  1.1517 +                         memcpy(wspace, cw, wl2);
  1.1518 +                         *(wspace+wl2) = '.';
  1.1519 +                         *(wspace+wl2+1) = '\0';
  1.1520 +
  1.1521 +                         cat_result(result, pSMgr->suggest_morph(wspace));
  1.1522 +                     }
  1.1523 +                     break;
  1.1524 +                   }
  1.1525 +     case ALLCAP: {
  1.1526 +                     cat_result(result, pSMgr->suggest_morph(cw));
  1.1527 +                     if (abbv) {
  1.1528 +                         memcpy(wspace,cw,wl);
  1.1529 +                         *(wspace+wl) = '.';
  1.1530 +                         *(wspace+wl+1) = '\0';
  1.1531 +                         cat_result(result, pSMgr->suggest_morph(cw));
  1.1532 +                     }
  1.1533 +                     wl = mkallsmall2(cw, unicw, nc);
  1.1534 +                     memcpy(wspace,cw,(wl+1));
  1.1535 +                     wl2 = mkinitcap2(cw, unicw, nc);
  1.1536 +
  1.1537 +                     cat_result(result, pSMgr->suggest_morph(wspace));
  1.1538 +                     cat_result(result, pSMgr->suggest_morph(cw));
  1.1539 +                     if (abbv) {
  1.1540 +                         *(wspace+wl) = '.';
  1.1541 +                         *(wspace+wl+1) = '\0';
  1.1542 +                         cat_result(result, pSMgr->suggest_morph(wspace));
  1.1543 +
  1.1544 +                         memcpy(wspace, cw, wl2);
  1.1545 +                         *(wspace+wl2) = '.';
  1.1546 +                         *(wspace+wl2+1) = '\0';
  1.1547 +
  1.1548 +                         cat_result(result, pSMgr->suggest_morph(wspace));
  1.1549 +                     }
  1.1550 +                     break;
  1.1551 +                   }
  1.1552 +  }
  1.1553 +
  1.1554 +  if (*result) {
  1.1555 +    // word reversing wrapper for complex prefixes
  1.1556 +    if (complexprefixes) {
  1.1557 +      if (utf8) reverseword_utf(result); else reverseword(result);
  1.1558 +    }
  1.1559 +    return line_tok(result, slst, MSEP_REC);
  1.1560 +  }
  1.1561 +
  1.1562 +  // compound word with dash (HU) I18n
  1.1563 +  char * dash = NULL;
  1.1564 +  int nresult = 0;
  1.1565 +  // LANG_hu section: set dash information for suggestions
  1.1566 +  if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');
  1.1567 +  if ((langnum == LANG_hu) && dash) {
  1.1568 +      *dash='\0';
  1.1569 +      // examine 2 sides of the dash
  1.1570 +      if (dash[1] == '\0') { // base word ending with dash
  1.1571 +        if (spell(cw)) {
  1.1572 +		char * p = pSMgr->suggest_morph(cw);
  1.1573 +		if (p) {
  1.1574 +		    int ret = line_tok(p, slst, MSEP_REC);
  1.1575 +		    free(p);
  1.1576 +		    return ret;
  1.1577 +		}
  1.1578 +		
  1.1579 +	}
  1.1580 +      } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.
  1.1581 +        if (spell(cw) && (spell("-e"))) {
  1.1582 +                        st = pSMgr->suggest_morph(cw);
  1.1583 +                        if (st) {
  1.1584 +                                mystrcat(result, st, MAXLNLEN);
  1.1585 +                                free(st);
  1.1586 +                        }
  1.1587 +                        mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
  1.1588 +                        st = pSMgr->suggest_morph("-e");
  1.1589 +                        if (st) {
  1.1590 +                                mystrcat(result, st, MAXLNLEN);
  1.1591 +                                free(st);
  1.1592 +                        }
  1.1593 +                        return line_tok(result, slst, MSEP_REC);
  1.1594 +                }
  1.1595 +      } else {
  1.1596 +      // first word ending with dash: word- XXX ???
  1.1597 +        char r2 = *(dash + 1);
  1.1598 +        dash[0]='-';
  1.1599 +        dash[1]='\0';
  1.1600 +        nresult = spell(cw);
  1.1601 +        dash[1] = r2;
  1.1602 +        dash[0]='\0';
  1.1603 +        if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||
  1.1604 +                ((dash[1] > '0') && (dash[1] < '9')))) {
  1.1605 +                            st = pSMgr->suggest_morph(cw);
  1.1606 +                            if (st) {
  1.1607 +                                mystrcat(result, st, MAXLNLEN);
  1.1608 +                                    free(st);
  1.1609 +                                mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
  1.1610 +                            }
  1.1611 +                            st = pSMgr->suggest_morph(dash+1);
  1.1612 +                            if (st) {
  1.1613 +                                    mystrcat(result, st, MAXLNLEN);
  1.1614 +                                    free(st);
  1.1615 +                            }
  1.1616 +                            return line_tok(result, slst, MSEP_REC);
  1.1617 +                        }
  1.1618 +      }
  1.1619 +      // affixed number in correct word
  1.1620 +     if (nresult && (dash > cw) && (((*(dash-1)<='9') &&
  1.1621 +                        (*(dash-1)>='0')) || (*(dash-1)=='.'))) {
  1.1622 +         *dash='-';
  1.1623 +         n = 1;
  1.1624 +         if (*(dash - n) == '.') n++;
  1.1625 +         // search first not a number character to left from dash
  1.1626 +         while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {
  1.1627 +            n++;
  1.1628 +         }
  1.1629 +         if ((dash - n) < cw) n--;
  1.1630 +         // numbers: valami1000000-hoz
  1.1631 +         // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
  1.1632 +         // 56-hoz, 6-hoz
  1.1633 +         for(; n >= 1; n--) {
  1.1634 +            if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && checkword(dash - n, NULL, NULL)) {
  1.1635 +                    mystrcat(result, cw, MAXLNLEN);
  1.1636 +                    result[dash - cw - n] = '\0';
  1.1637 +                        st = pSMgr->suggest_morph(dash - n);
  1.1638 +                        if (st) {
  1.1639 +                        mystrcat(result, st, MAXLNLEN);
  1.1640 +                                free(st);
  1.1641 +                        }
  1.1642 +                        return line_tok(result, slst, MSEP_REC);
  1.1643 +            }
  1.1644 +         }
  1.1645 +     }
  1.1646 +  }
  1.1647 +  return 0;
  1.1648 +}
  1.1649 +
  1.1650 +int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)
  1.1651 +{
  1.1652 +  *slst = NULL;
  1.1653 +  if (!pSMgr || !pln) return 0;
  1.1654 +  char **pl2;
  1.1655 +  int pl2n = analyze(&pl2, word);
  1.1656 +  int captype = 0;
  1.1657 +  int abbv = 0;
  1.1658 +  char cw[MAXWORDUTF8LEN];
  1.1659 +  cleanword(cw, word, &captype, &abbv);
  1.1660 +  char result[MAXLNLEN];
  1.1661 +  *result = '\0';
  1.1662 +
  1.1663 +  for (int i = 0; i < pln; i++) {
  1.1664 +    cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));
  1.1665 +  }
  1.1666 +  freelist(&pl2, pl2n);
  1.1667 +
  1.1668 +  if (*result) {
  1.1669 +    // allcap
  1.1670 +    if (captype == ALLCAP) mkallcap(result);
  1.1671 +
  1.1672 +    // line split
  1.1673 +    int linenum = line_tok(result, slst, MSEP_REC);
  1.1674 +
  1.1675 +    // capitalize
  1.1676 +    if (captype == INITCAP || captype == HUHINITCAP) {
  1.1677 +        for (int j=0; j < linenum; j++) mkinitcap((*slst)[j]);
  1.1678 +    }
  1.1679 +
  1.1680 +    // temporary filtering of prefix related errors (eg.
  1.1681 +    // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
  1.1682 +
  1.1683 +    int r = 0;
  1.1684 +    for (int j=0; j < linenum; j++) {
  1.1685 +        if (!spell((*slst)[j])) {
  1.1686 +            free((*slst)[j]);
  1.1687 +            (*slst)[j] = NULL;
  1.1688 +        } else {
  1.1689 +            if (r < j) (*slst)[r] = (*slst)[j];
  1.1690 +            r++;
  1.1691 +        }
  1.1692 +    }
  1.1693 +    if (r > 0) return r;
  1.1694 +    free(*slst);
  1.1695 +    *slst = NULL;
  1.1696 +  }
  1.1697 +  return 0;
  1.1698 +}
  1.1699 +
  1.1700 +int Hunspell::generate(char*** slst, const char * word, const char * pattern)
  1.1701 +{
  1.1702 +  char **pl;
  1.1703 +  int pln = analyze(&pl, pattern);
  1.1704 +  int n = generate(slst, word, pl, pln);
  1.1705 +  freelist(&pl, pln);
  1.1706 +  return uniqlist(*slst, n);
  1.1707 +}
  1.1708 +
  1.1709 +// minimal XML parser functions
  1.1710 +int Hunspell::get_xml_par(char * dest, const char * par, int max)
  1.1711 +{
  1.1712 +   char * d = dest;
  1.1713 +   if (!par) return 0;
  1.1714 +   char end = *par;
  1.1715 +   char * dmax = dest + max;
  1.1716 +   if (end == '>') end = '<';
  1.1717 +   else if (end != '\'' && end != '"') return 0; // bad XML
  1.1718 +   for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) *d = *par;
  1.1719 +   *d = '\0';
  1.1720 +   mystrrep(dest, "&lt;", "<");
  1.1721 +   mystrrep(dest, "&amp;", "&");
  1.1722 +   return (int)(d - dest);
  1.1723 +}
  1.1724 +
  1.1725 +int Hunspell::get_langnum() const
  1.1726 +{
  1.1727 +   return langnum;
  1.1728 +}
  1.1729 +
  1.1730 +// return the beginning of the element (attr == NULL) or the attribute
  1.1731 +const char * Hunspell::get_xml_pos(const char * s, const char * attr)
  1.1732 +{
  1.1733 +  const char * end = strchr(s, '>');
  1.1734 +  const char * p = s;
  1.1735 +  if (attr == NULL) return end;
  1.1736 +  do {
  1.1737 +    p = strstr(p, attr);
  1.1738 +    if (!p || p >= end) return 0;
  1.1739 +  } while (*(p-1) != ' ' &&  *(p-1) != '\n');
  1.1740 +  return p + strlen(attr);
  1.1741 +}
  1.1742 +
  1.1743 +int Hunspell::check_xml_par(const char * q, const char * attr, const char * value) {
  1.1744 +  char cw[MAXWORDUTF8LEN];
  1.1745 +  if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) &&
  1.1746 +    strcmp(cw, value) == 0) return 1;
  1.1747 +  return 0;
  1.1748 +}
  1.1749 +
  1.1750 +int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {
  1.1751 +    int n = 0;
  1.1752 +    char * p;
  1.1753 +    if (!list) return 0;
  1.1754 +    for (p = list; (p = strstr(p, tag)); p++) n++;
  1.1755 +    if (n == 0) return 0;
  1.1756 +    *slst = (char **) malloc(sizeof(char *) * n);
  1.1757 +    if (!*slst) return 0;
  1.1758 +    for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) {
  1.1759 +        int l = strlen(p);
  1.1760 +        (*slst)[n] = (char *) malloc(l + 1);
  1.1761 +        if (!(*slst)[n]) return n;
  1.1762 +        if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) {
  1.1763 +            free((*slst)[n]);
  1.1764 +            break;
  1.1765 +        }
  1.1766 +    }
  1.1767 +    return n;
  1.1768 +}
  1.1769 +
  1.1770 +int Hunspell::spellml(char*** slst, const char * word)
  1.1771 +{
  1.1772 +  char *q, *q2;
  1.1773 +  char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN];
  1.1774 +  q = (char *) strstr(word, "<query");
  1.1775 +  if (!q) return 0; // bad XML input
  1.1776 +  q2 = strchr(q, '>');
  1.1777 +  if (!q2) return 0; // bad XML input
  1.1778 +  q2 = strstr(q2, "<word");
  1.1779 +  if (!q2) return 0; // bad XML input
  1.1780 +  if (check_xml_par(q, "type=", "analyze")) {
  1.1781 +      int n = 0, s = 0;
  1.1782 +      if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw);
  1.1783 +      if (n == 0) return 0;
  1.1784 +      // convert the result to <code><a>ana1</a><a>ana2</a></code> format
  1.1785 +      for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);
  1.1786 +      char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->&amp;
  1.1787 +      if (!r) return 0;
  1.1788 +      strcpy(r, "<code>");
  1.1789 +      for (int i = 0; i < n; i++) {
  1.1790 +        int l = strlen(r);
  1.1791 +        strcpy(r + l, "<a>");
  1.1792 +        strcpy(r + l + 3, (*slst)[i]);
  1.1793 +        mystrrep(r + l + 3, "\t", " ");
  1.1794 +        mystrrep(r + l + 3, "<", "&lt;");
  1.1795 +        mystrrep(r + l + 3, "&", "&amp;");
  1.1796 +        strcat(r, "</a>");
  1.1797 +        free((*slst)[i]);
  1.1798 +      }
  1.1799 +      strcat(r, "</code>");
  1.1800 +      (*slst)[0] = r;
  1.1801 +      return 1;
  1.1802 +  } else if (check_xml_par(q, "type=", "stem")) {
  1.1803 +      if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw);
  1.1804 +  } else if (check_xml_par(q, "type=", "generate")) {
  1.1805 +      int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1);
  1.1806 +      if (n == 0) return 0;
  1.1807 +      char * q3 = strstr(q2 + 1, "<word");
  1.1808 +      if (q3) {
  1.1809 +        if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN - 1)) {
  1.1810 +            return generate(slst, cw, cw2);
  1.1811 +        }
  1.1812 +      } else {
  1.1813 +        if ((q2 = strstr(q2 + 1, "<code"))) {
  1.1814 +          char ** slst2;
  1.1815 +          if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>"))) {
  1.1816 +            int n2 = generate(slst, cw, slst2, n);
  1.1817 +            freelist(&slst2, n);
  1.1818 +            return uniqlist(*slst, n2);
  1.1819 +          }
  1.1820 +          freelist(&slst2, n);
  1.1821 +        }
  1.1822 +      }
  1.1823 +  }
  1.1824 +  return 0;
  1.1825 +}
  1.1826 +
  1.1827 +
  1.1828 +#ifdef HUNSPELL_EXPERIMENTAL
  1.1829 +// XXX need UTF-8 support
  1.1830 +char * Hunspell::morph_with_correction(const char * word)
  1.1831 +{
  1.1832 +  char cw[MAXWORDUTF8LEN];
  1.1833 +  char wspace[MAXWORDUTF8LEN];
  1.1834 +  if (! pSMgr || maxdic == 0) return NULL;
  1.1835 +  int wl = strlen(word);
  1.1836 +  if (utf8) {
  1.1837 +    if (wl >= MAXWORDUTF8LEN) return NULL;
  1.1838 +  } else {
  1.1839 +    if (wl >= MAXWORDLEN) return NULL;
  1.1840 +  }
  1.1841 +  int captype = 0;
  1.1842 +  int abbv = 0;
  1.1843 +  wl = cleanword(cw, word, &captype, &abbv);
  1.1844 +  if (wl == 0) return NULL;
  1.1845 +
  1.1846 +  char result[MAXLNLEN];
  1.1847 +  char * st = NULL;
  1.1848 +
  1.1849 +  *result = '\0';
  1.1850 +
  1.1851 +
  1.1852 +  switch(captype) {
  1.1853 +     case NOCAP:   {
  1.1854 +                     st = pSMgr->suggest_morph_for_spelling_error(cw);
  1.1855 +                     if (st) {
  1.1856 +                        mystrcat(result, st, MAXLNLEN);
  1.1857 +                        free(st);
  1.1858 +                     }
  1.1859 +                     if (abbv) {
  1.1860 +                         memcpy(wspace,cw,wl);
  1.1861 +                         *(wspace+wl) = '.';
  1.1862 +                         *(wspace+wl+1) = '\0';
  1.1863 +                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1.1864 +                         if (st) {
  1.1865 +                            if (*result) mystrcat(result, "\n", MAXLNLEN);
  1.1866 +                            mystrcat(result, st, MAXLNLEN);
  1.1867 +                            free(st);
  1.1868 +                                                 }
  1.1869 +                     }
  1.1870 +                                         break;
  1.1871 +                   }
  1.1872 +     case INITCAP: {
  1.1873 +                     memcpy(wspace,cw,(wl+1));
  1.1874 +                     mkallsmall(wspace);
  1.1875 +                     st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1.1876 +                     if (st) {
  1.1877 +                        mystrcat(result, st, MAXLNLEN);
  1.1878 +                        free(st);
  1.1879 +                     }
  1.1880 +                     st = pSMgr->suggest_morph_for_spelling_error(cw);
  1.1881 +                     if (st) {
  1.1882 +                        if (*result) mystrcat(result, "\n", MAXLNLEN);
  1.1883 +                        mystrcat(result, st, MAXLNLEN);
  1.1884 +                        free(st);
  1.1885 +                     }
  1.1886 +                     if (abbv) {
  1.1887 +                         memcpy(wspace,cw,wl);
  1.1888 +                         *(wspace+wl) = '.';
  1.1889 +                         *(wspace+wl+1) = '\0';
  1.1890 +                         mkallsmall(wspace);
  1.1891 +                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1.1892 +                         if (st) {
  1.1893 +                            if (*result) mystrcat(result, "\n", MAXLNLEN);
  1.1894 +                            mystrcat(result, st, MAXLNLEN);
  1.1895 +                            free(st);
  1.1896 +                         }
  1.1897 +                         mkinitcap(wspace);
  1.1898 +                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1.1899 +                         if (st) {
  1.1900 +                            if (*result) mystrcat(result, "\n", MAXLNLEN);
  1.1901 +                            mystrcat(result, st, MAXLNLEN);
  1.1902 +                            free(st);
  1.1903 +                         }
  1.1904 +                     }
  1.1905 +                     break;
  1.1906 +                   }
  1.1907 +     case HUHCAP: {
  1.1908 +                     st = pSMgr->suggest_morph_for_spelling_error(cw);
  1.1909 +                     if (st) {
  1.1910 +                        mystrcat(result, st, MAXLNLEN);
  1.1911 +                        free(st);
  1.1912 +                     }
  1.1913 +                     memcpy(wspace,cw,(wl+1));
  1.1914 +                     mkallsmall(wspace);
  1.1915 +                     st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1.1916 +                     if (st) {
  1.1917 +                        if (*result) mystrcat(result, "\n", MAXLNLEN);
  1.1918 +                        mystrcat(result, st, MAXLNLEN);
  1.1919 +                        free(st);
  1.1920 +                     }
  1.1921 +                     break;
  1.1922 +                 }
  1.1923 +     case ALLCAP: {
  1.1924 +                     memcpy(wspace,cw,(wl+1));
  1.1925 +                     st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1.1926 +                     if (st) {
  1.1927 +                        mystrcat(result, st, MAXLNLEN);
  1.1928 +                        free(st);
  1.1929 +                     }
  1.1930 +                     mkallsmall(wspace);
  1.1931 +                     st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1.1932 +                     if (st) {
  1.1933 +                        if (*result) mystrcat(result, "\n", MAXLNLEN);
  1.1934 +                        mystrcat(result, st, MAXLNLEN);
  1.1935 +                        free(st);
  1.1936 +                     }
  1.1937 +                     mkinitcap(wspace);
  1.1938 +                     st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1.1939 +                     if (st) {
  1.1940 +                        if (*result) mystrcat(result, "\n", MAXLNLEN);
  1.1941 +                        mystrcat(result, st, MAXLNLEN);
  1.1942 +                        free(st);
  1.1943 +                     }
  1.1944 +                     if (abbv) {
  1.1945 +                        memcpy(wspace,cw,(wl+1));
  1.1946 +                        *(wspace+wl) = '.';
  1.1947 +                        *(wspace+wl+1) = '\0';
  1.1948 +                        if (*result) mystrcat(result, "\n", MAXLNLEN);
  1.1949 +                        st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1.1950 +                        if (st) {
  1.1951 +                            mystrcat(result, st, MAXLNLEN);
  1.1952 +                            free(st);
  1.1953 +                        }
  1.1954 +                        mkallsmall(wspace);
  1.1955 +                        st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1.1956 +                        if (st) {
  1.1957 +                          if (*result) mystrcat(result, "\n", MAXLNLEN);
  1.1958 +                          mystrcat(result, st, MAXLNLEN);
  1.1959 +                          free(st);
  1.1960 +                        }
  1.1961 +                        mkinitcap(wspace);
  1.1962 +                        st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1.1963 +                        if (st) {
  1.1964 +                          if (*result) mystrcat(result, "\n", MAXLNLEN);
  1.1965 +                          mystrcat(result, st, MAXLNLEN);
  1.1966 +                          free(st);
  1.1967 +                        }
  1.1968 +                     }
  1.1969 +                     break;
  1.1970 +                   }
  1.1971 +  }
  1.1972 +
  1.1973 +  if (*result) return mystrdup(result);
  1.1974 +  return NULL;
  1.1975 +}
  1.1976 +
  1.1977 +#endif // END OF HUNSPELL_EXPERIMENTAL CODE
  1.1978 +
  1.1979 +Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
  1.1980 +{
  1.1981 +        return (Hunhandle*)(new Hunspell(affpath, dpath));
  1.1982 +}
  1.1983 +
  1.1984 +Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
  1.1985 +    const char * key)
  1.1986 +{
  1.1987 +        return (Hunhandle*)(new Hunspell(affpath, dpath, key));
  1.1988 +}
  1.1989 +
  1.1990 +void Hunspell_destroy(Hunhandle *pHunspell)
  1.1991 +{
  1.1992 +        delete (Hunspell*)(pHunspell);
  1.1993 +}
  1.1994 +
  1.1995 +int Hunspell_spell(Hunhandle *pHunspell, const char *word)
  1.1996 +{
  1.1997 +        return ((Hunspell*)pHunspell)->spell(word);
  1.1998 +}
  1.1999 +
  1.2000 +char *Hunspell_get_dic_encoding(Hunhandle *pHunspell)
  1.2001 +{
  1.2002 +        return ((Hunspell*)pHunspell)->get_dic_encoding();
  1.2003 +}
  1.2004 +
  1.2005 +int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word)
  1.2006 +{
  1.2007 +        return ((Hunspell*)pHunspell)->suggest(slst, word);
  1.2008 +}
  1.2009 +
  1.2010 +int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word)
  1.2011 +{
  1.2012 +        return ((Hunspell*)pHunspell)->analyze(slst, word);
  1.2013 +}
  1.2014 +
  1.2015 +int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)
  1.2016 +{
  1.2017 +        return ((Hunspell*)pHunspell)->stem(slst, word);
  1.2018 +}
  1.2019 +
  1.2020 +int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n)
  1.2021 +{
  1.2022 +        return ((Hunspell*)pHunspell)->stem(slst, desc, n);
  1.2023 +}
  1.2024 +
  1.2025 +int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
  1.2026 +    const char * word2)
  1.2027 +{
  1.2028 +        return ((Hunspell*)pHunspell)->generate(slst, word, word2);
  1.2029 +}
  1.2030 +
  1.2031 +int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
  1.2032 +    char** desc, int n)
  1.2033 +{
  1.2034 +        return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);
  1.2035 +}
  1.2036 +
  1.2037 +  /* functions for run-time modification of the dictionary */
  1.2038 +
  1.2039 +  /* add word to the run-time dictionary */
  1.2040 +
  1.2041 +int Hunspell_add(Hunhandle *pHunspell, const char * word) {
  1.2042 +        return ((Hunspell*)pHunspell)->add(word);
  1.2043 +}
  1.2044 +
  1.2045 +  /* add word to the run-time dictionary with affix flags of
  1.2046 +   * the example (a dictionary word): Hunspell will recognize
  1.2047 +   * affixed forms of the new word, too.
  1.2048 +   */
  1.2049 +
  1.2050 +int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word,
  1.2051 +        const char * example) {
  1.2052 +        return ((Hunspell*)pHunspell)->add_with_affix(word, example);
  1.2053 +}
  1.2054 +
  1.2055 +  /* remove word from the run-time dictionary */
  1.2056 +
  1.2057 +int Hunspell_remove(Hunhandle *pHunspell, const char * word) {
  1.2058 +        return ((Hunspell*)pHunspell)->remove(word);
  1.2059 +}
  1.2060 +
  1.2061 +void Hunspell_free_list(Hunhandle *, char *** slst, int n) {
  1.2062 +        freelist(slst, n);
  1.2063 +}

mercurial