extensions/spellcheck/hunspell/src/hunspell.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /******* BEGIN LICENSE BLOCK *******
     2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
     3  * 
     4  * The contents of this file are subject to the Mozilla Public License Version
     5  * 1.1 (the "License"); you may not use this file except in compliance with
     6  * the License. You may obtain a copy of the License at
     7  * http://www.mozilla.org/MPL/
     8  * 
     9  * Software distributed under the License is distributed on an "AS IS" basis,
    10  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
    11  * for the specific language governing rights and limitations under the
    12  * License.
    13  * 
    14  * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
    15  * and László Németh (Hunspell). Portions created by the Initial Developers
    16  * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
    17  * 
    18  * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
    19  *                 David Einstein (deinst@world.std.com)
    20  *                 László Németh (nemethl@gyorsposta.hu)
    21  *                 Caolan McNamara (caolanm@redhat.com)
    22  *                 Davide Prina
    23  *                 Giuseppe Modugno
    24  *                 Gianluca Turconi
    25  *                 Simon Brouwer
    26  *                 Noll Janos
    27  *                 Biro Arpad
    28  *                 Goldman Eleonora
    29  *                 Sarlos Tamas
    30  *                 Bencsath Boldizsar
    31  *                 Halacsy Peter
    32  *                 Dvornik Laszlo
    33  *                 Gefferth Andras
    34  *                 Nagy Viktor
    35  *                 Varga Daniel
    36  *                 Chris Halls
    37  *                 Rene Engelhard
    38  *                 Bram Moolenaar
    39  *                 Dafydd Jones
    40  *                 Harri Pitkanen
    41  *                 Andras Timar
    42  *                 Tor Lillqvist
    43  * 
    44  * Alternatively, the contents of this file may be used under the terms of
    45  * either the GNU General Public License Version 2 or later (the "GPL"), or
    46  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
    47  * in which case the provisions of the GPL or the LGPL are applicable instead
    48  * of those above. If you wish to allow use of your version of this file only
    49  * under the terms of either the GPL or the LGPL, and not to allow others to
    50  * use your version of this file under the terms of the MPL, indicate your
    51  * decision by deleting the provisions above and replace them with the notice
    52  * and other provisions required by the GPL or the LGPL. If you do not delete
    53  * the provisions above, a recipient may use your version of this file under
    54  * the terms of any one of the MPL, the GPL or the LGPL.
    55  *
    56  ******* END LICENSE BLOCK *******/
    58 #include <stdlib.h>
    59 #include <string.h>
    60 #include <stdio.h>
    62 #include "hunspell.hxx"
    63 #include "hunspell.h"
    64 #ifndef MOZILLA_CLIENT
    65 #    include "config.h"
    66 #endif
    67 #include "csutil.hxx"
    69 Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
    70 {
    71     encoding = NULL;
    72     csconv = NULL;
    73     utf8 = 0;
    74     complexprefixes = 0;
    75     affixpath = mystrdup(affpath);
    76     maxdic = 0;
    78     /* first set up the hash manager */
    79     pHMgr[0] = new HashMgr(dpath, affpath, key);
    80     if (pHMgr[0]) maxdic = 1;
    82     /* next set up the affix manager */
    83     /* it needs access to the hash manager lookup methods */
    84     pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
    86     /* get the preferred try string and the dictionary */
    87     /* encoding from the Affix Manager for that dictionary */
    88     char * try_string = pAMgr->get_try_string();
    89     encoding = pAMgr->get_encoding();
    90     langnum = pAMgr->get_langnum();
    91     utf8 = pAMgr->get_utf8();
    92     if (!utf8)
    93         csconv = get_current_cs(encoding);
    94     complexprefixes = pAMgr->get_complexprefixes();
    95     wordbreak = pAMgr->get_breaktable();
    97     /* and finally set up the suggestion manager */
    98     pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
    99     if (try_string) free(try_string);
   100 }
   102 Hunspell::~Hunspell()
   103 {
   104     if (pSMgr) delete pSMgr;
   105     if (pAMgr) delete pAMgr;
   106     for (int i = 0; i < maxdic; i++) delete pHMgr[i];
   107     maxdic = 0;
   108     pSMgr = NULL;
   109     pAMgr = NULL;
   110 #ifdef MOZILLA_CLIENT
   111     delete [] csconv;
   112 #endif
   113     csconv= NULL;
   114     if (encoding) free(encoding);
   115     encoding = NULL;
   116     if (affixpath) free(affixpath);
   117     affixpath = NULL;
   118 }
   120 // load extra dictionaries
   121 int Hunspell::add_dic(const char * dpath, const char * key) {
   122     if (maxdic == MAXDIC || !affixpath) return 1;
   123     pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);
   124     if (pHMgr[maxdic]) maxdic++; else return 1;
   125     return 0;
   126 }
   128 // make a copy of src at destination while removing all leading
   129 // blanks and removing any trailing periods after recording
   130 // their presence with the abbreviation flag
   131 // also since already going through character by character,
   132 // set the capitalization type
   133 // return the length of the "cleaned" (and UTF-8 encoded) word
   135 int Hunspell::cleanword2(char * dest, const char * src,
   136     w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)
   137 {
   138    unsigned char * p = (unsigned char *) dest;
   139    const unsigned char * q = (const unsigned char * ) src;
   141    // first skip over any leading blanks
   142    while ((*q != '\0') && (*q == ' ')) q++;
   144    // now strip off any trailing periods (recording their presence)
   145    *pabbrev = 0;
   146    int nl = strlen((const char *)q);
   147    while ((nl > 0) && (*(q+nl-1)=='.')) {
   148        nl--;
   149        (*pabbrev)++;
   150    }
   152    // if no characters are left it can't be capitalized
   153    if (nl <= 0) {
   154        *pcaptype = NOCAP;
   155        *p = '\0';
   156        return 0;
   157    }
   159    strncpy(dest, (char *) q, nl);
   160    *(dest + nl) = '\0';
   161    nl = strlen(dest);
   162    if (utf8) {
   163       *nc = u8_u16(dest_utf, MAXWORDLEN, dest);
   164       // don't check too long words
   165       if (*nc >= MAXWORDLEN) return 0;
   166       if (*nc == -1) { // big Unicode character (non BMP area)
   167          *pcaptype = NOCAP;
   168          return nl;
   169       }
   170      *pcaptype = get_captype_utf8(dest_utf, *nc, langnum);
   171    } else {
   172      *pcaptype = get_captype(dest, nl, csconv);
   173      *nc = nl;
   174    }
   175    return nl;
   176 }
   178 int Hunspell::cleanword(char * dest, const char * src,
   179     int * pcaptype, int * pabbrev)
   180 {
   181    unsigned char * p = (unsigned char *) dest;
   182    const unsigned char * q = (const unsigned char * ) src;
   183    int firstcap = 0;
   185    // first skip over any leading blanks
   186    while ((*q != '\0') && (*q == ' ')) q++;
   188    // now strip off any trailing periods (recording their presence)
   189    *pabbrev = 0;
   190    int nl = strlen((const char *)q);
   191    while ((nl > 0) && (*(q+nl-1)=='.')) {
   192        nl--;
   193        (*pabbrev)++;
   194    }
   196    // if no characters are left it can't be capitalized
   197    if (nl <= 0) {
   198        *pcaptype = NOCAP;
   199        *p = '\0';
   200        return 0;
   201    }
   203    // now determine the capitalization type of the first nl letters
   204    int ncap = 0;
   205    int nneutral = 0;
   206    int nc = 0;
   208    if (!utf8) {
   209       while (nl > 0) {
   210          nc++;
   211          if (csconv[(*q)].ccase) ncap++;
   212          if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
   213          *p++ = *q++;
   214          nl--;
   215       }
   216       // remember to terminate the destination string
   217       *p = '\0';
   218       firstcap = csconv[(unsigned char)(*dest)].ccase;
   219    } else {
   220       unsigned short idx;
   221       w_char t[MAXWORDLEN];
   222       nc = u8_u16(t, MAXWORDLEN, src);
   223       for (int i = 0; i < nc; i++) {
   224          idx = (t[i].h << 8) + t[i].l;
   225          unsigned short low = unicodetolower(idx, langnum);
   226          if (idx != low) ncap++;
   227          if (unicodetoupper(idx, langnum) == low) nneutral++;
   228       }
   229       u16_u8(dest, MAXWORDUTF8LEN, t, nc);
   230       if (ncap) {
   231          idx = (t[0].h << 8) + t[0].l;
   232          firstcap = (idx != unicodetolower(idx, langnum));
   233       }
   234    }
   236    // now finally set the captype
   237    if (ncap == 0) {
   238         *pcaptype = NOCAP;
   239    } else if ((ncap == 1) && firstcap) {
   240         *pcaptype = INITCAP;
   241    } else if ((ncap == nc) || ((ncap + nneutral) == nc)){
   242         *pcaptype = ALLCAP;
   243    } else if ((ncap > 1) && firstcap) {
   244         *pcaptype = HUHINITCAP;
   245    } else {
   246         *pcaptype = HUHCAP;
   247    }
   248    return strlen(dest);
   249 }
   251 void Hunspell::mkallcap(char * p)
   252 {
   253   if (utf8) {
   254       w_char u[MAXWORDLEN];
   255       int nc = u8_u16(u, MAXWORDLEN, p);
   256       unsigned short idx;
   257       for (int i = 0; i < nc; i++) {
   258          idx = (u[i].h << 8) + u[i].l;
   259          if (idx != unicodetoupper(idx, langnum)) {
   260             u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);
   261             u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);
   262          }
   263       }
   264       u16_u8(p, MAXWORDUTF8LEN, u, nc);
   265   } else {
   266     while (*p != '\0') {
   267         *p = csconv[((unsigned char) *p)].cupper;
   268         p++;
   269     }
   270   }
   271 }
   273 int Hunspell::mkallcap2(char * p, w_char * u, int nc)
   274 {
   275   if (utf8) {
   276       unsigned short idx;
   277       for (int i = 0; i < nc; i++) {
   278          idx = (u[i].h << 8) + u[i].l;
   279          unsigned short up = unicodetoupper(idx, langnum);
   280          if (idx != up) {
   281             u[i].h = (unsigned char) (up >> 8);
   282             u[i].l = (unsigned char) (up & 0x00FF);
   283          }
   284       }
   285       u16_u8(p, MAXWORDUTF8LEN, u, nc);
   286       return strlen(p);
   287   } else {
   288     while (*p != '\0') {
   289         *p = csconv[((unsigned char) *p)].cupper;
   290         p++;
   291     }
   292   }
   293   return nc;
   294 }
   297 void Hunspell::mkallsmall(char * p)
   298 {
   299     while (*p != '\0') {
   300         *p = csconv[((unsigned char) *p)].clower;
   301         p++;
   302     }
   303 }
   305 int Hunspell::mkallsmall2(char * p, w_char * u, int nc)
   306 {
   307   if (utf8) {
   308       unsigned short idx;
   309       for (int i = 0; i < nc; i++) {
   310          idx = (u[i].h << 8) + u[i].l;
   311          unsigned short low = unicodetolower(idx, langnum);
   312          if (idx != low) {
   313             u[i].h = (unsigned char) (low >> 8);
   314             u[i].l = (unsigned char) (low & 0x00FF);
   315          }
   316       }
   317       u16_u8(p, MAXWORDUTF8LEN, u, nc);
   318       return strlen(p);
   319   } else {
   320     while (*p != '\0') {
   321         *p = csconv[((unsigned char) *p)].clower;
   322         p++;
   323     }
   324   }
   325   return nc;
   326 }
   328 // convert UTF-8 sharp S codes to latin 1
   329 char * Hunspell::sharps_u8_l1(char * dest, char * source) {
   330     char * p = dest;
   331     *p = *source;
   332     for (p++, source++; *(source - 1); p++, source++) {
   333         *p = *source;
   334         if (*source == '\x9F') *--p = '\xDF';
   335     }
   336     return dest;
   337 }
   339 // recursive search for right ss - sharp s permutations
   340 hentry * Hunspell::spellsharps(char * base, char * pos, int n,
   341         int repnum, char * tmp, int * info, char **root) {
   342     pos = strstr(pos, "ss");
   343     if (pos && (n < MAXSHARPS)) {
   344         *pos = '\xC3';
   345         *(pos + 1) = '\x9F';
   346         hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root);
   347         if (h) return h;
   348         *pos = 's';
   349         *(pos + 1) = 's';
   350         h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root);
   351         if (h) return h;
   352     } else if (repnum > 0) {
   353         if (utf8) return checkword(base, info, root);
   354         return checkword(sharps_u8_l1(tmp, base), info, root);
   355     }
   356     return NULL;
   357 }
   359 int Hunspell::is_keepcase(const hentry * rv) {
   360     return pAMgr && rv->astr && pAMgr->get_keepcase() &&
   361         TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
   362 }
   364 /* insert a word to the beginning of the suggestion array and return ns */
   365 int Hunspell::insert_sug(char ***slst, char * word, int ns) {
   366     char * dup = mystrdup(word);
   367     if (!dup) return ns;
   368     if (ns == MAXSUGGESTION) {
   369         ns--;
   370         free((*slst)[ns]);
   371     }
   372     for (int k = ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
   373     (*slst)[0] = dup;
   374     return ns + 1;
   375 }
   377 int Hunspell::spell(const char * word, int * info, char ** root)
   378 {
   379   struct hentry * rv=NULL;
   380   // need larger vector. For example, Turkish capital letter I converted a
   381   // 2-byte UTF-8 character (dotless i) by mkallsmall.
   382   char cw[MAXWORDUTF8LEN];
   383   char wspace[MAXWORDUTF8LEN];
   384   w_char unicw[MAXWORDLEN];
   385   // Hunspell supports XML input of the simplified API (see manual)
   386   if (strcmp(word, SPELL_XML) == 0) return 1;
   387   int nc = strlen(word);
   388   int wl2 = 0;
   389   if (utf8) {
   390     if (nc >= MAXWORDUTF8LEN) return 0;
   391   } else {
   392     if (nc >= MAXWORDLEN) return 0;
   393   }
   394   int captype = 0;
   395   int abbv = 0;
   396   int wl = 0;
   398   // input conversion
   399   RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
   400   if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
   401   else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
   403   int info2 = 0;
   404   if (wl == 0 || maxdic == 0) return 1;
   405   if (root) *root = NULL;
   407   // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.)
   408   enum { NBEGIN, NNUM, NSEP };
   409   int nstate = NBEGIN;
   410   int i;
   412   for (i = 0; (i < wl); i++) {
   413     if ((cw[i] <= '9') && (cw[i] >= '0')) {
   414         nstate = NNUM;
   415     } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) {
   416         if ((nstate == NSEP) || (i == 0)) break;
   417         nstate = NSEP;
   418     } else break;
   419   }
   420   if ((i == wl) && (nstate == NNUM)) return 1;
   421   if (!info) info = &info2; else *info = 0;
   423   switch(captype) {
   424      case HUHCAP:
   425      case HUHINITCAP:
   426             *info += SPELL_ORIGCAP;
   427      case NOCAP: {
   428             rv = checkword(cw, info, root);
   429             if ((abbv) && !(rv)) {
   430                 memcpy(wspace,cw,wl);
   431                 *(wspace+wl) = '.';
   432                 *(wspace+wl+1) = '\0';
   433                 rv = checkword(wspace, info, root);
   434             }
   435             break;
   436          }
   437      case ALLCAP: {
   438             *info += SPELL_ORIGCAP;
   439             rv = checkword(cw, info, root);
   440             if (rv) break;
   441             if (abbv) {
   442                 memcpy(wspace,cw,wl);
   443                 *(wspace+wl) = '.';
   444                 *(wspace+wl+1) = '\0';
   445                 rv = checkword(wspace, info, root);
   446                 if (rv) break;
   447             }
   448             // Spec. prefix handling for Catalan, French, Italian:
   449 	    // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
   450             if (pAMgr && strchr(cw, '\'')) {
   451                 wl = mkallsmall2(cw, unicw, nc);
   452         	//There are no really sane circumstances where this could fail,
   453         	//but anyway...
   454         	if (char * apostrophe = strchr(cw, '\'')) {
   455                     if (utf8) {
   456             	        w_char tmpword[MAXWORDLEN];
   457             	        *apostrophe = '\0';
   458             	        wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
   459             	        *apostrophe = '\'';
   460 		        if (wl2 < nc) {
   461 		            mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
   462 			    rv = checkword(cw, info, root);
   463 			    if (rv) break;
   464 		        }
   465                     } else {
   466 		        mkinitcap2(apostrophe + 1, unicw, nc);
   467 		        rv = checkword(cw, info, root);
   468 		        if (rv) break;
   469 		    }
   470 		}
   471 		mkinitcap2(cw, unicw, nc);
   472 		rv = checkword(cw, info, root);
   473 		if (rv) break;
   474             }
   475             if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {
   476                 char tmpword[MAXWORDUTF8LEN];
   477                 wl = mkallsmall2(cw, unicw, nc);
   478                 memcpy(wspace,cw,(wl+1));
   479                 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
   480                 if (!rv) {
   481                     wl2 = mkinitcap2(cw, unicw, nc);
   482                     rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);
   483                 }
   484                 if ((abbv) && !(rv)) {
   485                     *(wspace+wl) = '.';
   486                     *(wspace+wl+1) = '\0';
   487                     rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
   488                     if (!rv) {
   489                         memcpy(wspace, cw, wl2);
   490                         *(wspace+wl2) = '.';
   491                         *(wspace+wl2+1) = '\0';
   492                         rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
   493                     }
   494                 }
   495                 if (rv) break;
   496             }
   497         }
   498      case INITCAP: {
   499              *info += SPELL_ORIGCAP;
   500              wl = mkallsmall2(cw, unicw, nc);
   501              memcpy(wspace,cw,(wl+1));
   502              wl2 = mkinitcap2(cw, unicw, nc);
   503              if (captype == INITCAP) *info += SPELL_INITCAP;
   504              rv = checkword(cw, info, root);
   505              if (captype == INITCAP) *info -= SPELL_INITCAP;
   506              // forbid bad capitalization
   507              // (for example, ijs -> Ijs instead of IJs in Dutch)
   508              // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
   509              if (*info & SPELL_FORBIDDEN) {
   510                 rv = NULL;
   511                 break;
   512              }
   513              if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
   514              if (rv) break;
   516              rv = checkword(wspace, info, root);
   517              if (abbv && !rv) {
   519                  *(wspace+wl) = '.';
   520                  *(wspace+wl+1) = '\0';
   521                  rv = checkword(wspace, info, root);
   522                  if (!rv) {
   523                     memcpy(wspace, cw, wl2);
   524                     *(wspace+wl2) = '.';
   525                     *(wspace+wl2+1) = '\0';
   526     	    	    if (captype == INITCAP) *info += SPELL_INITCAP;
   527                     rv = checkword(wspace, info, root);
   528     	    	    if (captype == INITCAP) *info -= SPELL_INITCAP;
   529                     if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
   530                     break;
   531                  }
   532              }
   533              if (rv && is_keepcase(rv) &&
   534                 ((captype == ALLCAP) ||
   535                    // if CHECKSHARPS: KEEPCASE words with \xDF  are allowed
   536                    // in INITCAP form, too.
   537                    !(pAMgr->get_checksharps() &&
   538                       ((utf8 && strstr(wspace, "\xC3\x9F")) ||
   539                       (!utf8 && strchr(wspace, '\xDF')))))) rv = NULL;
   540              break;
   541            }
   542   }
   544   if (rv) {
   545       if (pAMgr && pAMgr->get_warn() && rv->astr &&
   546           TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
   547               *info += SPELL_WARN;
   548 	      if (pAMgr->get_forbidwarn()) return 0;
   549               return HUNSPELL_OK_WARN;
   550       }
   551       return HUNSPELL_OK;
   552   }
   554   // recursive breaking at break points
   555   if (wordbreak) {
   556     char * s;
   557     char r;
   558     int nbr = 0;
   559     wl = strlen(cw);
   560     int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;
   562     // calculate break points for recursion limit
   563     for (int j = 0; j < numbreak; j++) {
   564       s = cw;
   565       do {
   566       	s = (char *) strstr(s, wordbreak[j]);
   567       	if (s) { 
   568 		nbr++;
   569 		s++;
   570 	}
   571       } while (s);
   572     } 
   573     if (nbr >= 10) return 0;
   575     // check boundary patterns (^begin and end$)
   576     for (int j = 0; j < numbreak; j++) {
   577       int plen = strlen(wordbreak[j]);
   578       if (plen == 1 || plen > wl) continue;
   579       if (wordbreak[j][0] == '^' && strncmp(cw, wordbreak[j] + 1, plen - 1) == 0
   580         && spell(cw + plen - 1)) return 1;
   581       if (wordbreak[j][plen - 1] == '$' &&
   582         strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) {
   583 	    r = cw[wl - plen + 1];
   584 	    cw[wl - plen + 1] = '\0';
   585     	    if (spell(cw)) return 1;
   586 	    cw[wl - plen + 1] = r;
   587 	}
   588     }
   590     // other patterns
   591     for (int j = 0; j < numbreak; j++) {
   592       int plen = strlen(wordbreak[j]);
   593       s=(char *) strstr(cw, wordbreak[j]);
   594       if (s && (s > cw) && (s < cw + wl - plen)) {
   595 	if (!spell(s + plen)) continue;
   596         r = *s;
   597         *s = '\0';
   598         // examine 2 sides of the break point
   599         if (spell(cw)) return 1;
   600         *s = r;
   602         // LANG_hu: spec. dash rule
   603 	if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) {
   604 	  r = s[1];
   605 	  s[1] = '\0';
   606           if (spell(cw)) return 1; // check the first part with dash
   607           s[1] = r;
   608 	}
   609         // end of LANG speficic region
   611       }
   612     }
   613   }
   615   return 0;
   616 }
   618 struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
   619 {
   620   struct hentry * he = NULL;
   621   int len, i;
   622   char w2[MAXWORDUTF8LEN];
   623   const char * word;
   625   char * ignoredchars = pAMgr->get_ignore();
   626   if (ignoredchars != NULL) {
   627      strcpy(w2, w);
   628      if (utf8) {
   629         int ignoredchars_utf16_len;
   630         unsigned short * ignoredchars_utf16 = pAMgr->get_ignore_utf16(&ignoredchars_utf16_len);
   631         remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len);
   632      } else {
   633         remove_ignored_chars(w2,ignoredchars);
   634      }
   635      word = w2;
   636   } else word = w;
   638   len = strlen(word);
   640   if (!len)
   641       return NULL;
   643   // word reversing wrapper for complex prefixes
   644   if (complexprefixes) {
   645     if (word != w2) {
   646       strcpy(w2, word);
   647       word = w2;
   648     }
   649     if (utf8) reverseword_utf(w2); else reverseword(w2);
   650   }
   652   // look word in hash table
   653   for (i = 0; (i < maxdic) && !he; i ++) {
   654   he = (pHMgr[i])->lookup(word);
   656   // check forbidden and onlyincompound words
   657   if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
   658     if (info) *info += SPELL_FORBIDDEN;
   659     // LANG_hu section: set dash information for suggestions
   660     if (langnum == LANG_hu) {
   661         if (pAMgr->get_compoundflag() &&
   662             TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
   663                 if (info) *info += SPELL_COMPOUND;
   664         }
   665     }
   666     return NULL;
   667   }
   669   // he = next not needaffix, onlyincompound homonym or onlyupcase word
   670   while (he && (he->astr) &&
   671     ((pAMgr->get_needaffix() && TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
   672        (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
   673        (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))
   674     )) he = he->next_homonym;
   675   }
   677   // check with affixes
   678   if (!he && pAMgr) {
   679      // try stripping off affixes */
   680      he = pAMgr->affix_check(word, len, 0);
   682      // check compound restriction and onlyupcase
   683      if (he && he->astr && (
   684         (pAMgr->get_onlyincompound() &&
   685     	    TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
   686         (info && (*info & SPELL_INITCAP) &&
   687     	    TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
   688     	    he = NULL;
   689      }
   691      if (he) {
   692         if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
   693             if (info) *info += SPELL_FORBIDDEN;
   694             return NULL;
   695         }
   696         if (root) {
   697             *root = mystrdup(he->word);
   698             if (*root && complexprefixes) {
   699                 if (utf8) reverseword_utf(*root); else reverseword(*root);
   700             }
   701         }
   702      // try check compound word
   703      } else if (pAMgr->get_compound()) {
   704           he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info);
   705           // LANG_hu section: `moving rule' with last dash
   706           if ((!he) && (langnum == LANG_hu) && (word[len-1] == '-')) {
   707              char * dup = mystrdup(word);
   708              if (!dup) return NULL;
   709              dup[len-1] = '\0';
   710              he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0, info);
   711              free(dup);
   712           }
   713           // end of LANG speficic region
   714           if (he) {
   715                 if (root) {
   716                     *root = mystrdup(he->word);
   717                     if (*root && complexprefixes) {
   718                         if (utf8) reverseword_utf(*root); else reverseword(*root);
   719                     }
   720                 }
   721                 if (info) *info += SPELL_COMPOUND;
   722           }
   723      }
   725   }
   727   return he;
   728 }
   730 int Hunspell::suggest(char*** slst, const char * word)
   731 {
   732   int onlycmpdsug = 0;
   733   char cw[MAXWORDUTF8LEN];
   734   char wspace[MAXWORDUTF8LEN];
   735   if (!pSMgr || maxdic == 0) return 0;
   736   w_char unicw[MAXWORDLEN];
   737   *slst = NULL;
   738   // process XML input of the simplified API (see manual)
   739   if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
   740      return spellml(slst, word);
   741   }
   742   int nc = strlen(word);
   743   if (utf8) {
   744     if (nc >= MAXWORDUTF8LEN) return 0;
   745   } else {
   746     if (nc >= MAXWORDLEN) return 0;
   747   }
   748   int captype = 0;
   749   int abbv = 0;
   750   int wl = 0;
   752   // input conversion
   753   RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
   754   if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
   755   else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
   757   if (wl == 0) return 0;
   758   int ns = 0;
   759   int capwords = 0;
   761   // check capitalized form for FORCEUCASE
   762   if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
   763     int info = SPELL_ORIGCAP;
   764     char ** wlst;
   765     if (checkword(cw, &info, NULL)) {
   766         if (*slst) {
   767             wlst = *slst;
   768         } else {
   769             wlst = (char **) malloc(MAXSUGGESTION * sizeof(char *));
   770             if (wlst == NULL) return -1;
   771             *slst = wlst;
   772             for (int i = 0; i < MAXSUGGESTION; i++) {
   773                 wlst[i] = NULL;
   774             }
   775         }
   776         wlst[0] = mystrdup(cw);
   777         mkinitcap(wlst[0]);
   778         return 1;
   779     }
   780   }
   782   switch(captype) {
   783      case NOCAP:   {
   784                      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
   785                      break;
   786                    }
   788      case INITCAP: {
   789                      capwords = 1;
   790                      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
   791                      if (ns == -1) break;
   792                      memcpy(wspace,cw,(wl+1));
   793                      mkallsmall2(wspace, unicw, nc);
   794                      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
   795                      break;
   796                    }
   797      case HUHINITCAP:
   798                     capwords = 1;
   799      case HUHCAP: {
   800                      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
   801                      if (ns != -1) {
   802                         int prevns;
   803     		        // something.The -> something. The
   804                         char * dot = strchr(cw, '.');
   805 		        if (dot && (dot > cw)) {
   806 		            int captype_;
   807 		            if (utf8) {
   808 		               w_char w_[MAXWORDLEN];
   809 			       int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);
   810 		               captype_ = get_captype_utf8(w_, wl_, langnum);
   811 		            } else captype_ = get_captype(dot+1, strlen(dot+1), csconv);
   812 		    	    if (captype_ == INITCAP) {
   813                         	char * st = mystrdup(cw);
   814                         	if (st) st = (char *) realloc(st, wl + 2);
   815 				if (st) {
   816                         		st[(dot - cw) + 1] = ' ';
   817                         		strcpy(st + (dot - cw) + 2, dot + 1);
   818                     			ns = insert_sug(slst, st, ns);
   819 					free(st);
   820 				}
   821 		    	    }
   822 		        }
   823                         if (captype == HUHINITCAP) {
   824                             // TheOpenOffice.org -> The OpenOffice.org
   825                             memcpy(wspace,cw,(wl+1));
   826                             mkinitsmall2(wspace, unicw, nc);
   827                             ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
   828                         }
   829                         memcpy(wspace,cw,(wl+1));
   830                         mkallsmall2(wspace, unicw, nc);
   831                         if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
   832                         prevns = ns;
   833                         ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
   834                         if (captype == HUHINITCAP) {
   835                             mkinitcap2(wspace, unicw, nc);
   836                             if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
   837                             ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
   838                         }
   839                         // aNew -> "a New" (instead of "a new")
   840                         for (int j = prevns; j < ns; j++) {
   841                            char * space = strchr((*slst)[j],' ');
   842                            if (space) {
   843                                 int slen = strlen(space + 1);
   844                                 // different case after space (need capitalisation)
   845                                 if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {
   846                                     w_char w[MAXWORDLEN];
   847                                     int wc = 0;
   848                                     char * r = (*slst)[j];
   849                                     if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1);
   850                                     mkinitcap2(space + 1, w, wc);
   851                                     // set as first suggestion
   852                                     for (int k = j; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
   853                                     (*slst)[0] = r;
   854                                 }
   855                            }
   856                         }
   857                      }
   858                      break;
   859                    }
   861      case ALLCAP: {
   862                      memcpy(wspace, cw, (wl+1));
   863                      mkallsmall2(wspace, unicw, nc);
   864                      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
   865                      if (ns == -1) break;
   866                      if (pAMgr && pAMgr->get_keepcase() && spell(wspace))
   867                         ns = insert_sug(slst, wspace, ns);
   868                      mkinitcap2(wspace, unicw, nc);
   869                      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
   870                      for (int j=0; j < ns; j++) {
   871                         mkallcap((*slst)[j]);
   872                         if (pAMgr && pAMgr->get_checksharps()) {
   873                             char * pos;
   874                             if (utf8) {
   875                                 pos = strstr((*slst)[j], "\xC3\x9F");
   876                                 while (pos) {
   877                                     *pos = 'S';
   878                                     *(pos+1) = 'S';
   879                                     pos = strstr(pos+2, "\xC3\x9F");
   880                                 }
   881                             } else {
   882                                 pos = strchr((*slst)[j], '\xDF');
   883                                 while (pos) {
   884                                     (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 2);
   885                                     mystrrep((*slst)[j], "\xDF", "SS");
   886                                     pos = strchr((*slst)[j], '\xDF');
   887                                 }
   888                             }
   889                         }
   890                      }
   891                      break;
   892                    }
   893   }
   895  // LANG_hu section: replace '-' with ' ' in Hungarian
   896   if (langnum == LANG_hu) {
   897       for (int j=0; j < ns; j++) {
   898           char * pos = strchr((*slst)[j],'-');
   899           if (pos) {
   900               int info;
   901               char w[MAXWORDUTF8LEN];
   902               *pos = '\0';
   903               strcpy(w, (*slst)[j]);
   904               strcat(w, pos + 1);
   905               spell(w, &info, NULL);
   906               if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
   907                   *pos = ' ';
   908               } else *pos = '-';
   909           }
   910       }
   911   }
   912   // END OF LANG_hu section
   914   // try ngram approach since found nothing or only compound words
   915   if (pAMgr && (ns == 0 || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0) && (*slst)) {
   916       switch(captype) {
   917           case NOCAP: {
   918               ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);
   919               break;
   920           }
   921 	  case HUHINITCAP:
   922               capwords = 1;
   923           case HUHCAP: {
   924               memcpy(wspace,cw,(wl+1));
   925               mkallsmall2(wspace, unicw, nc);
   926               ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
   927 	      break;
   928           }
   929          case INITCAP: {
   930               capwords = 1;
   931               memcpy(wspace,cw,(wl+1));
   932               mkallsmall2(wspace, unicw, nc);
   933               ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
   934               break;
   935           }
   936           case ALLCAP: {
   937               memcpy(wspace,cw,(wl+1));
   938               mkallsmall2(wspace, unicw, nc);
   939 	      int oldns = ns;
   940               ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
   941               for (int j = oldns; j < ns; j++)
   942                   mkallcap((*slst)[j]);
   943               break;
   944          }
   945       }
   946   }
   948   // try dash suggestion (Afo-American -> Afro-American)
   949   if (char * pos = strchr(cw, '-')) {
   950      char * ppos = cw;
   951      int nodashsug = 1;
   952      char ** nlst = NULL;
   953      int nn = 0;
   954      int last = 0;
   955      if (*slst) {
   956         for (int j = 0; j < ns && nodashsug == 1; j++) {
   957            if (strchr((*slst)[j], '-')) nodashsug = 0;
   958         }
   959      }
   960      while (nodashsug && !last) {
   961 	if (*pos == '\0') last = 1; else *pos = '\0';
   962         if (!spell(ppos)) {
   963           nn = suggest(&nlst, ppos);
   964           for (int j = nn - 1; j >= 0; j--) {
   965             strncpy(wspace, cw, ppos - cw);
   966             strcpy(wspace + (ppos - cw), nlst[j]);
   967             if (!last) {
   968             	strcat(wspace, "-");
   969 		strcat(wspace, pos + 1);
   970 	    }
   971             ns = insert_sug(slst, wspace, ns);
   972             free(nlst[j]);
   973           }
   974           if (nlst != NULL) free(nlst);
   975           nodashsug = 0;
   976         }
   977 	if (!last) {
   978           *pos = '-';
   979           ppos = pos + 1;
   980           pos = strchr(ppos, '-');
   981         }
   982 	if (!pos) pos = cw + strlen(cw);
   983      }
   984   }
   986   // word reversing wrapper for complex prefixes
   987   if (complexprefixes) {
   988     for (int j = 0; j < ns; j++) {
   989       if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
   990     }
   991   }
   993   // capitalize
   994   if (capwords) for (int j=0; j < ns; j++) {
   995       mkinitcap((*slst)[j]);
   996   }
   998   // expand suggestions with dot(s)
   999   if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
  1000     for (int j = 0; j < ns; j++) {
  1001       (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
  1002       strcat((*slst)[j], word + strlen(word) - abbv);
  1006   // remove bad capitalized and forbidden forms
  1007   if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
  1008   switch (captype) {
  1009     case INITCAP:
  1010     case ALLCAP: {
  1011       int l = 0;
  1012       for (int j=0; j < ns; j++) {
  1013         if (!strchr((*slst)[j],' ') && !spell((*slst)[j])) {
  1014           char s[MAXSWUTF8L];
  1015           w_char w[MAXSWL];
  1016           int len;
  1017           if (utf8) {
  1018             len = u8_u16(w, MAXSWL, (*slst)[j]);
  1019           } else {
  1020             strcpy(s, (*slst)[j]);
  1021             len = strlen(s);
  1023           mkallsmall2(s, w, len);
  1024           free((*slst)[j]);
  1025           if (spell(s)) {
  1026             (*slst)[l] = mystrdup(s);
  1027             if ((*slst)[l]) l++;
  1028           } else {
  1029             mkinitcap2(s, w, len);
  1030             if (spell(s)) {
  1031               (*slst)[l] = mystrdup(s);
  1032               if ((*slst)[l]) l++;
  1035         } else {
  1036           (*slst)[l] = (*slst)[j];
  1037           l++;
  1040       ns = l;
  1045   // remove duplications
  1046   int l = 0;
  1047   for (int j = 0; j < ns; j++) {
  1048     (*slst)[l] = (*slst)[j];
  1049     for (int k = 0; k < l; k++) {
  1050       if (strcmp((*slst)[k], (*slst)[j]) == 0) {
  1051         free((*slst)[j]);
  1052         l--;
  1053         break;
  1056     l++;
  1058   ns = l;
  1060   // output conversion
  1061   rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
  1062   for (int j = 0; rl && j < ns; j++) {
  1063     if (rl->conv((*slst)[j], wspace)) {
  1064       free((*slst)[j]);
  1065       (*slst)[j] = mystrdup(wspace);
  1069   // if suggestions removed by nosuggest, onlyincompound parameters
  1070   if (l == 0 && *slst) {
  1071     free(*slst);
  1072     *slst = NULL;
  1074   return l;
  1077 void Hunspell::free_list(char *** slst, int n) {
  1078         freelist(slst, n);
  1081 char * Hunspell::get_dic_encoding()
  1083   return encoding;
  1086 #ifdef HUNSPELL_EXPERIMENTAL
  1087 // XXX need UTF-8 support
  1088 int Hunspell::suggest_auto(char*** slst, const char * word)
  1090   char cw[MAXWORDUTF8LEN];
  1091   char wspace[MAXWORDUTF8LEN];
  1092   if (!pSMgr || maxdic == 0) return 0;
  1093   int wl = strlen(word);
  1094   if (utf8) {
  1095     if (wl >= MAXWORDUTF8LEN) return 0;
  1096   } else {
  1097     if (wl >= MAXWORDLEN) return 0;
  1099   int captype = 0;
  1100   int abbv = 0;
  1101   wl = cleanword(cw, word, &captype, &abbv);
  1102   if (wl == 0) return 0;
  1103   int ns = 0;
  1104   *slst = NULL; // HU, nsug in pSMgr->suggest
  1106   switch(captype) {
  1107      case NOCAP:   {
  1108                      ns = pSMgr->suggest_auto(slst, cw, ns);
  1109                      if (ns>0) break;
  1110                      break;
  1113      case INITCAP: {
  1114                      memcpy(wspace,cw,(wl+1));
  1115                      mkallsmall(wspace);
  1116                      ns = pSMgr->suggest_auto(slst, wspace, ns);
  1117                      for (int j=0; j < ns; j++)
  1118                        mkinitcap((*slst)[j]);
  1119                      ns = pSMgr->suggest_auto(slst, cw, ns);
  1120                      break;
  1124      case HUHINITCAP:
  1125      case HUHCAP: {
  1126                      ns = pSMgr->suggest_auto(slst, cw, ns);
  1127                      if (ns == 0) {
  1128                         memcpy(wspace,cw,(wl+1));
  1129                         mkallsmall(wspace);
  1130                         ns = pSMgr->suggest_auto(slst, wspace, ns);
  1132                      break;
  1135      case ALLCAP: {
  1136                      memcpy(wspace,cw,(wl+1));
  1137                      mkallsmall(wspace);
  1138                      ns = pSMgr->suggest_auto(slst, wspace, ns);
  1140                      mkinitcap(wspace);
  1141                      ns = pSMgr->suggest_auto(slst, wspace, ns);
  1143                      for (int j=0; j < ns; j++)
  1144                        mkallcap((*slst)[j]);
  1145                      break;
  1149   // word reversing wrapper for complex prefixes
  1150   if (complexprefixes) {
  1151     for (int j = 0; j < ns; j++) {
  1152       if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
  1156   // expand suggestions with dot(s)
  1157   if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
  1158     for (int j = 0; j < ns; j++) {
  1159       (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
  1160       strcat((*slst)[j], word + strlen(word) - abbv);
  1164   // LANG_hu section: replace '-' with ' ' in Hungarian
  1165   if (langnum == LANG_hu) {
  1166       for (int j=0; j < ns; j++) {
  1167           char * pos = strchr((*slst)[j],'-');
  1168           if (pos) {
  1169               int info;
  1170               char w[MAXWORDUTF8LEN];
  1171               *pos = '\0';
  1172               strcpy(w, (*slst)[j]);
  1173               strcat(w, pos + 1);
  1174               spell(w, &info, NULL);
  1175               if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
  1176                   *pos = ' ';
  1177               } else *pos = '-';
  1181   // END OF LANG_hu section
  1182   return ns;
  1184 #endif
  1186 int Hunspell::stem(char*** slst, char ** desc, int n)
  1188   char result[MAXLNLEN];
  1189   char result2[MAXLNLEN];
  1190   *slst = NULL;
  1191   if (n == 0) return 0;
  1192   *result2 = '\0';
  1193   for (int i = 0; i < n; i++) {
  1194     *result = '\0';
  1195     // add compound word parts (except the last one)
  1196     char * s = (char *) desc[i];
  1197     char * part = strstr(s, MORPH_PART);
  1198     if (part) {
  1199         char * nextpart = strstr(part + 1, MORPH_PART);
  1200         while (nextpart) {
  1201             copy_field(result + strlen(result), part, MORPH_PART);
  1202             part = nextpart;
  1203             nextpart = strstr(part + 1, MORPH_PART);
  1205         s = part;
  1208     char **pl;
  1209     char tok[MAXLNLEN];
  1210     strcpy(tok, s);
  1211     char * alt = strstr(tok, " | ");
  1212     while (alt) {
  1213         alt[1] = MSEP_ALT;
  1214         alt = strstr(alt, " | ");
  1216     int pln = line_tok(tok, &pl, MSEP_ALT);
  1217     for (int k = 0; k < pln; k++) {
  1218         // add derivational suffixes
  1219         if (strstr(pl[k], MORPH_DERI_SFX)) {
  1220             // remove inflectional suffixes
  1221             char * is = strstr(pl[k], MORPH_INFL_SFX);
  1222             if (is) *is = '\0';
  1223             char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);
  1224             if (sg) {
  1225                 char ** gen;
  1226                 int genl = line_tok(sg, &gen, MSEP_REC);
  1227                 free(sg);
  1228                 for (int j = 0; j < genl; j++) {
  1229                     sprintf(result2 + strlen(result2), "%c%s%s",
  1230                             MSEP_REC, result, gen[j]);
  1232                 freelist(&gen, genl);
  1234         } else {
  1235             sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);
  1236             if (strstr(pl[k], MORPH_SURF_PFX)) {
  1237                 copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);
  1239             copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);
  1242     freelist(&pl, pln);
  1244   int sln = line_tok(result2, slst, MSEP_REC);
  1245   return uniqlist(*slst, sln);
  1249 int Hunspell::stem(char*** slst, const char * word)
  1251   char ** pl;
  1252   int pln = analyze(&pl, word);
  1253   int pln2 = stem(slst, pl, pln);
  1254   freelist(&pl, pln);
  1255   return pln2;
  1258 #ifdef HUNSPELL_EXPERIMENTAL
  1259 int Hunspell::suggest_pos_stems(char*** slst, const char * word)
  1261   char cw[MAXWORDUTF8LEN];
  1262   char wspace[MAXWORDUTF8LEN];
  1263   if (! pSMgr || maxdic == 0) return 0;
  1264   int wl = strlen(word);
  1265   if (utf8) {
  1266     if (wl >= MAXWORDUTF8LEN) return 0;
  1267   } else {
  1268     if (wl >= MAXWORDLEN) return 0;
  1270   int captype = 0;
  1271   int abbv = 0;
  1272   wl = cleanword(cw, word, &captype, &abbv);
  1273   if (wl == 0) return 0;
  1275   int ns = 0; // ns=0 = normalized input
  1277   *slst = NULL; // HU, nsug in pSMgr->suggest
  1279   switch(captype) {
  1280      case HUHCAP:
  1281      case NOCAP:   {
  1282                      ns = pSMgr->suggest_pos_stems(slst, cw, ns);
  1284                      if ((abbv) && (ns == 0)) {
  1285                          memcpy(wspace,cw,wl);
  1286                          *(wspace+wl) = '.';
  1287                          *(wspace+wl+1) = '\0';
  1288                          ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
  1291                      break;
  1294      case INITCAP: {
  1296                      ns = pSMgr->suggest_pos_stems(slst, cw, ns);
  1298                      if (ns == 0 || ((*slst)[0][0] == '#')) {
  1299                         memcpy(wspace,cw,(wl+1));
  1300                         mkallsmall(wspace);
  1301                         ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
  1304                      break;
  1308      case ALLCAP: {
  1309                      ns = pSMgr->suggest_pos_stems(slst, cw, ns);
  1310                      if (ns != 0) break;
  1312                      memcpy(wspace,cw,(wl+1));
  1313                      mkallsmall(wspace);
  1314                      ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
  1316                      if (ns == 0) {
  1317                          mkinitcap(wspace);
  1318                          ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
  1320                      break;
  1324   return ns;
  1326 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
  1328 const char * Hunspell::get_wordchars()
  1330   return pAMgr->get_wordchars();
  1333 unsigned short * Hunspell::get_wordchars_utf16(int * len)
  1335   return pAMgr->get_wordchars_utf16(len);
  1338 void Hunspell::mkinitcap(char * p)
  1340   if (!utf8) {
  1341     if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
  1342   } else {
  1343       int len;
  1344       w_char u[MAXWORDLEN];
  1345       len = u8_u16(u, MAXWORDLEN, p);
  1346       unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
  1347       u[0].h = (unsigned char) (i >> 8);
  1348       u[0].l = (unsigned char) (i & 0x00FF);
  1349       u16_u8(p, MAXWORDUTF8LEN, u, len);
  1353 int Hunspell::mkinitcap2(char * p, w_char * u, int nc)
  1355   if (!utf8) {
  1356     if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
  1357   } else if (nc > 0) {
  1358       unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
  1359       u[0].h = (unsigned char) (i >> 8);
  1360       u[0].l = (unsigned char) (i & 0x00FF);
  1361       u16_u8(p, MAXWORDUTF8LEN, u, nc);
  1362       return strlen(p);
  1364   return nc;
  1367 int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)
  1369   if (!utf8) {
  1370     if (*p != '\0') *p = csconv[((unsigned char)*p)].clower;
  1371   } else if (nc > 0) {
  1372       unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);
  1373       u[0].h = (unsigned char) (i >> 8);
  1374       u[0].l = (unsigned char) (i & 0x00FF);
  1375       u16_u8(p, MAXWORDUTF8LEN, u, nc);
  1376       return strlen(p);
  1378   return nc;
  1381 int Hunspell::add(const char * word)
  1383     if (pHMgr[0]) return (pHMgr[0])->add(word);
  1384     return 0;
  1387 int Hunspell::add_with_affix(const char * word, const char * example)
  1389     if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example);
  1390     return 0;
  1393 int Hunspell::remove(const char * word)
  1395     if (pHMgr[0]) return (pHMgr[0])->remove(word);
  1396     return 0;
  1399 const char * Hunspell::get_version()
  1401   return pAMgr->get_version();
  1404 struct cs_info * Hunspell::get_csconv()
  1406   return csconv;
  1409 void Hunspell::cat_result(char * result, char * st)
  1411     if (st) {
  1412         if (*result) mystrcat(result, "\n", MAXLNLEN);
  1413         mystrcat(result, st, MAXLNLEN);
  1414         free(st);
  1418 int Hunspell::analyze(char*** slst, const char * word)
  1420   char cw[MAXWORDUTF8LEN];
  1421   char wspace[MAXWORDUTF8LEN];
  1422   w_char unicw[MAXWORDLEN];
  1423   int wl2 = 0;
  1424   *slst = NULL;
  1425   if (! pSMgr || maxdic == 0) return 0;
  1426   int nc = strlen(word);
  1427   if (utf8) {
  1428     if (nc >= MAXWORDUTF8LEN) return 0;
  1429   } else {
  1430     if (nc >= MAXWORDLEN) return 0;
  1432   int captype = 0;
  1433   int abbv = 0;
  1434   int wl = 0;
  1436   // input conversion
  1437   RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
  1438   if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
  1439   else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
  1441   if (wl == 0) {
  1442       if (abbv) {
  1443           for (wl = 0; wl < abbv; wl++) cw[wl] = '.';
  1444           cw[wl] = '\0';
  1445           abbv = 0;
  1446       } else return 0;
  1449   char result[MAXLNLEN];
  1450   char * st = NULL;
  1452   *result = '\0';
  1454   int n = 0;
  1455   int n2 = 0;
  1456   int n3 = 0;
  1458   // test numbers
  1459   // LANG_hu section: set dash information for suggestions
  1460   if (langnum == LANG_hu) {
  1461   while ((n < wl) &&
  1462         (((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) {
  1463         n++;
  1464         if ((cw[n] == '.') || (cw[n] == ',')) {
  1465                 if (((n2 == 0) && (n > 3)) ||
  1466                         ((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) break;
  1467                 n2++;
  1468                 n3 = n;
  1472   if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0;
  1473   if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xB0')) && checkword(cw+n, NULL, NULL))) {
  1474         mystrcat(result, cw, MAXLNLEN);
  1475         result[n - 1] = '\0';
  1476         if (n == wl) cat_result(result, pSMgr->suggest_morph(cw + n - 1));
  1477         else {
  1478                 char sign = cw[n];
  1479                 cw[n] = '\0';
  1480                 cat_result(result, pSMgr->suggest_morph(cw + n - 1));
  1481                 mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE
  1482                 cw[n] = sign;
  1483                 cat_result(result, pSMgr->suggest_morph(cw + n));
  1485         return line_tok(result, slst, MSEP_REC);
  1488   // END OF LANG_hu section
  1490   switch(captype) {
  1491      case HUHCAP:
  1492      case HUHINITCAP:
  1493      case NOCAP:  {
  1494                     cat_result(result, pSMgr->suggest_morph(cw));
  1495                     if (abbv) {
  1496                         memcpy(wspace,cw,wl);
  1497                         *(wspace+wl) = '.';
  1498                         *(wspace+wl+1) = '\0';
  1499                         cat_result(result, pSMgr->suggest_morph(wspace));
  1501                     break;
  1503      case INITCAP: {
  1504                      wl = mkallsmall2(cw, unicw, nc);
  1505                      memcpy(wspace,cw,(wl+1));
  1506                      wl2 = mkinitcap2(cw, unicw, nc);
  1507                      cat_result(result, pSMgr->suggest_morph(wspace));
  1508                      cat_result(result, pSMgr->suggest_morph(cw));
  1509                      if (abbv) {
  1510                          *(wspace+wl) = '.';
  1511                          *(wspace+wl+1) = '\0';
  1512                          cat_result(result, pSMgr->suggest_morph(wspace));
  1514                          memcpy(wspace, cw, wl2);
  1515                          *(wspace+wl2) = '.';
  1516                          *(wspace+wl2+1) = '\0';
  1518                          cat_result(result, pSMgr->suggest_morph(wspace));
  1520                      break;
  1522      case ALLCAP: {
  1523                      cat_result(result, pSMgr->suggest_morph(cw));
  1524                      if (abbv) {
  1525                          memcpy(wspace,cw,wl);
  1526                          *(wspace+wl) = '.';
  1527                          *(wspace+wl+1) = '\0';
  1528                          cat_result(result, pSMgr->suggest_morph(cw));
  1530                      wl = mkallsmall2(cw, unicw, nc);
  1531                      memcpy(wspace,cw,(wl+1));
  1532                      wl2 = mkinitcap2(cw, unicw, nc);
  1534                      cat_result(result, pSMgr->suggest_morph(wspace));
  1535                      cat_result(result, pSMgr->suggest_morph(cw));
  1536                      if (abbv) {
  1537                          *(wspace+wl) = '.';
  1538                          *(wspace+wl+1) = '\0';
  1539                          cat_result(result, pSMgr->suggest_morph(wspace));
  1541                          memcpy(wspace, cw, wl2);
  1542                          *(wspace+wl2) = '.';
  1543                          *(wspace+wl2+1) = '\0';
  1545                          cat_result(result, pSMgr->suggest_morph(wspace));
  1547                      break;
  1551   if (*result) {
  1552     // word reversing wrapper for complex prefixes
  1553     if (complexprefixes) {
  1554       if (utf8) reverseword_utf(result); else reverseword(result);
  1556     return line_tok(result, slst, MSEP_REC);
  1559   // compound word with dash (HU) I18n
  1560   char * dash = NULL;
  1561   int nresult = 0;
  1562   // LANG_hu section: set dash information for suggestions
  1563   if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');
  1564   if ((langnum == LANG_hu) && dash) {
  1565       *dash='\0';
  1566       // examine 2 sides of the dash
  1567       if (dash[1] == '\0') { // base word ending with dash
  1568         if (spell(cw)) {
  1569 		char * p = pSMgr->suggest_morph(cw);
  1570 		if (p) {
  1571 		    int ret = line_tok(p, slst, MSEP_REC);
  1572 		    free(p);
  1573 		    return ret;
  1577       } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.
  1578         if (spell(cw) && (spell("-e"))) {
  1579                         st = pSMgr->suggest_morph(cw);
  1580                         if (st) {
  1581                                 mystrcat(result, st, MAXLNLEN);
  1582                                 free(st);
  1584                         mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
  1585                         st = pSMgr->suggest_morph("-e");
  1586                         if (st) {
  1587                                 mystrcat(result, st, MAXLNLEN);
  1588                                 free(st);
  1590                         return line_tok(result, slst, MSEP_REC);
  1592       } else {
  1593       // first word ending with dash: word- XXX ???
  1594         char r2 = *(dash + 1);
  1595         dash[0]='-';
  1596         dash[1]='\0';
  1597         nresult = spell(cw);
  1598         dash[1] = r2;
  1599         dash[0]='\0';
  1600         if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||
  1601                 ((dash[1] > '0') && (dash[1] < '9')))) {
  1602                             st = pSMgr->suggest_morph(cw);
  1603                             if (st) {
  1604                                 mystrcat(result, st, MAXLNLEN);
  1605                                     free(st);
  1606                                 mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
  1608                             st = pSMgr->suggest_morph(dash+1);
  1609                             if (st) {
  1610                                     mystrcat(result, st, MAXLNLEN);
  1611                                     free(st);
  1613                             return line_tok(result, slst, MSEP_REC);
  1616       // affixed number in correct word
  1617      if (nresult && (dash > cw) && (((*(dash-1)<='9') &&
  1618                         (*(dash-1)>='0')) || (*(dash-1)=='.'))) {
  1619          *dash='-';
  1620          n = 1;
  1621          if (*(dash - n) == '.') n++;
  1622          // search first not a number character to left from dash
  1623          while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {
  1624             n++;
  1626          if ((dash - n) < cw) n--;
  1627          // numbers: valami1000000-hoz
  1628          // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
  1629          // 56-hoz, 6-hoz
  1630          for(; n >= 1; n--) {
  1631             if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && checkword(dash - n, NULL, NULL)) {
  1632                     mystrcat(result, cw, MAXLNLEN);
  1633                     result[dash - cw - n] = '\0';
  1634                         st = pSMgr->suggest_morph(dash - n);
  1635                         if (st) {
  1636                         mystrcat(result, st, MAXLNLEN);
  1637                                 free(st);
  1639                         return line_tok(result, slst, MSEP_REC);
  1644   return 0;
  1647 int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)
  1649   *slst = NULL;
  1650   if (!pSMgr || !pln) return 0;
  1651   char **pl2;
  1652   int pl2n = analyze(&pl2, word);
  1653   int captype = 0;
  1654   int abbv = 0;
  1655   char cw[MAXWORDUTF8LEN];
  1656   cleanword(cw, word, &captype, &abbv);
  1657   char result[MAXLNLEN];
  1658   *result = '\0';
  1660   for (int i = 0; i < pln; i++) {
  1661     cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));
  1663   freelist(&pl2, pl2n);
  1665   if (*result) {
  1666     // allcap
  1667     if (captype == ALLCAP) mkallcap(result);
  1669     // line split
  1670     int linenum = line_tok(result, slst, MSEP_REC);
  1672     // capitalize
  1673     if (captype == INITCAP || captype == HUHINITCAP) {
  1674         for (int j=0; j < linenum; j++) mkinitcap((*slst)[j]);
  1677     // temporary filtering of prefix related errors (eg.
  1678     // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
  1680     int r = 0;
  1681     for (int j=0; j < linenum; j++) {
  1682         if (!spell((*slst)[j])) {
  1683             free((*slst)[j]);
  1684             (*slst)[j] = NULL;
  1685         } else {
  1686             if (r < j) (*slst)[r] = (*slst)[j];
  1687             r++;
  1690     if (r > 0) return r;
  1691     free(*slst);
  1692     *slst = NULL;
  1694   return 0;
  1697 int Hunspell::generate(char*** slst, const char * word, const char * pattern)
  1699   char **pl;
  1700   int pln = analyze(&pl, pattern);
  1701   int n = generate(slst, word, pl, pln);
  1702   freelist(&pl, pln);
  1703   return uniqlist(*slst, n);
  1706 // minimal XML parser functions
  1707 int Hunspell::get_xml_par(char * dest, const char * par, int max)
  1709    char * d = dest;
  1710    if (!par) return 0;
  1711    char end = *par;
  1712    char * dmax = dest + max;
  1713    if (end == '>') end = '<';
  1714    else if (end != '\'' && end != '"') return 0; // bad XML
  1715    for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) *d = *par;
  1716    *d = '\0';
  1717    mystrrep(dest, "&lt;", "<");
  1718    mystrrep(dest, "&amp;", "&");
  1719    return (int)(d - dest);
  1722 int Hunspell::get_langnum() const
  1724    return langnum;
  1727 // return the beginning of the element (attr == NULL) or the attribute
  1728 const char * Hunspell::get_xml_pos(const char * s, const char * attr)
  1730   const char * end = strchr(s, '>');
  1731   const char * p = s;
  1732   if (attr == NULL) return end;
  1733   do {
  1734     p = strstr(p, attr);
  1735     if (!p || p >= end) return 0;
  1736   } while (*(p-1) != ' ' &&  *(p-1) != '\n');
  1737   return p + strlen(attr);
  1740 int Hunspell::check_xml_par(const char * q, const char * attr, const char * value) {
  1741   char cw[MAXWORDUTF8LEN];
  1742   if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) &&
  1743     strcmp(cw, value) == 0) return 1;
  1744   return 0;
  1747 int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {
  1748     int n = 0;
  1749     char * p;
  1750     if (!list) return 0;
  1751     for (p = list; (p = strstr(p, tag)); p++) n++;
  1752     if (n == 0) return 0;
  1753     *slst = (char **) malloc(sizeof(char *) * n);
  1754     if (!*slst) return 0;
  1755     for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) {
  1756         int l = strlen(p);
  1757         (*slst)[n] = (char *) malloc(l + 1);
  1758         if (!(*slst)[n]) return n;
  1759         if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) {
  1760             free((*slst)[n]);
  1761             break;
  1764     return n;
  1767 int Hunspell::spellml(char*** slst, const char * word)
  1769   char *q, *q2;
  1770   char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN];
  1771   q = (char *) strstr(word, "<query");
  1772   if (!q) return 0; // bad XML input
  1773   q2 = strchr(q, '>');
  1774   if (!q2) return 0; // bad XML input
  1775   q2 = strstr(q2, "<word");
  1776   if (!q2) return 0; // bad XML input
  1777   if (check_xml_par(q, "type=", "analyze")) {
  1778       int n = 0, s = 0;
  1779       if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw);
  1780       if (n == 0) return 0;
  1781       // convert the result to <code><a>ana1</a><a>ana2</a></code> format
  1782       for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);
  1783       char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->&amp;
  1784       if (!r) return 0;
  1785       strcpy(r, "<code>");
  1786       for (int i = 0; i < n; i++) {
  1787         int l = strlen(r);
  1788         strcpy(r + l, "<a>");
  1789         strcpy(r + l + 3, (*slst)[i]);
  1790         mystrrep(r + l + 3, "\t", " ");
  1791         mystrrep(r + l + 3, "<", "&lt;");
  1792         mystrrep(r + l + 3, "&", "&amp;");
  1793         strcat(r, "</a>");
  1794         free((*slst)[i]);
  1796       strcat(r, "</code>");
  1797       (*slst)[0] = r;
  1798       return 1;
  1799   } else if (check_xml_par(q, "type=", "stem")) {
  1800       if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw);
  1801   } else if (check_xml_par(q, "type=", "generate")) {
  1802       int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1);
  1803       if (n == 0) return 0;
  1804       char * q3 = strstr(q2 + 1, "<word");
  1805       if (q3) {
  1806         if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN - 1)) {
  1807             return generate(slst, cw, cw2);
  1809       } else {
  1810         if ((q2 = strstr(q2 + 1, "<code"))) {
  1811           char ** slst2;
  1812           if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>"))) {
  1813             int n2 = generate(slst, cw, slst2, n);
  1814             freelist(&slst2, n);
  1815             return uniqlist(*slst, n2);
  1817           freelist(&slst2, n);
  1821   return 0;
  1825 #ifdef HUNSPELL_EXPERIMENTAL
  1826 // XXX need UTF-8 support
  1827 char * Hunspell::morph_with_correction(const char * word)
  1829   char cw[MAXWORDUTF8LEN];
  1830   char wspace[MAXWORDUTF8LEN];
  1831   if (! pSMgr || maxdic == 0) return NULL;
  1832   int wl = strlen(word);
  1833   if (utf8) {
  1834     if (wl >= MAXWORDUTF8LEN) return NULL;
  1835   } else {
  1836     if (wl >= MAXWORDLEN) return NULL;
  1838   int captype = 0;
  1839   int abbv = 0;
  1840   wl = cleanword(cw, word, &captype, &abbv);
  1841   if (wl == 0) return NULL;
  1843   char result[MAXLNLEN];
  1844   char * st = NULL;
  1846   *result = '\0';
  1849   switch(captype) {
  1850      case NOCAP:   {
  1851                      st = pSMgr->suggest_morph_for_spelling_error(cw);
  1852                      if (st) {
  1853                         mystrcat(result, st, MAXLNLEN);
  1854                         free(st);
  1856                      if (abbv) {
  1857                          memcpy(wspace,cw,wl);
  1858                          *(wspace+wl) = '.';
  1859                          *(wspace+wl+1) = '\0';
  1860                          st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1861                          if (st) {
  1862                             if (*result) mystrcat(result, "\n", MAXLNLEN);
  1863                             mystrcat(result, st, MAXLNLEN);
  1864                             free(st);
  1867                                          break;
  1869      case INITCAP: {
  1870                      memcpy(wspace,cw,(wl+1));
  1871                      mkallsmall(wspace);
  1872                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1873                      if (st) {
  1874                         mystrcat(result, st, MAXLNLEN);
  1875                         free(st);
  1877                      st = pSMgr->suggest_morph_for_spelling_error(cw);
  1878                      if (st) {
  1879                         if (*result) mystrcat(result, "\n", MAXLNLEN);
  1880                         mystrcat(result, st, MAXLNLEN);
  1881                         free(st);
  1883                      if (abbv) {
  1884                          memcpy(wspace,cw,wl);
  1885                          *(wspace+wl) = '.';
  1886                          *(wspace+wl+1) = '\0';
  1887                          mkallsmall(wspace);
  1888                          st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1889                          if (st) {
  1890                             if (*result) mystrcat(result, "\n", MAXLNLEN);
  1891                             mystrcat(result, st, MAXLNLEN);
  1892                             free(st);
  1894                          mkinitcap(wspace);
  1895                          st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1896                          if (st) {
  1897                             if (*result) mystrcat(result, "\n", MAXLNLEN);
  1898                             mystrcat(result, st, MAXLNLEN);
  1899                             free(st);
  1902                      break;
  1904      case HUHCAP: {
  1905                      st = pSMgr->suggest_morph_for_spelling_error(cw);
  1906                      if (st) {
  1907                         mystrcat(result, st, MAXLNLEN);
  1908                         free(st);
  1910                      memcpy(wspace,cw,(wl+1));
  1911                      mkallsmall(wspace);
  1912                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1913                      if (st) {
  1914                         if (*result) mystrcat(result, "\n", MAXLNLEN);
  1915                         mystrcat(result, st, MAXLNLEN);
  1916                         free(st);
  1918                      break;
  1920      case ALLCAP: {
  1921                      memcpy(wspace,cw,(wl+1));
  1922                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1923                      if (st) {
  1924                         mystrcat(result, st, MAXLNLEN);
  1925                         free(st);
  1927                      mkallsmall(wspace);
  1928                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1929                      if (st) {
  1930                         if (*result) mystrcat(result, "\n", MAXLNLEN);
  1931                         mystrcat(result, st, MAXLNLEN);
  1932                         free(st);
  1934                      mkinitcap(wspace);
  1935                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1936                      if (st) {
  1937                         if (*result) mystrcat(result, "\n", MAXLNLEN);
  1938                         mystrcat(result, st, MAXLNLEN);
  1939                         free(st);
  1941                      if (abbv) {
  1942                         memcpy(wspace,cw,(wl+1));
  1943                         *(wspace+wl) = '.';
  1944                         *(wspace+wl+1) = '\0';
  1945                         if (*result) mystrcat(result, "\n", MAXLNLEN);
  1946                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1947                         if (st) {
  1948                             mystrcat(result, st, MAXLNLEN);
  1949                             free(st);
  1951                         mkallsmall(wspace);
  1952                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1953                         if (st) {
  1954                           if (*result) mystrcat(result, "\n", MAXLNLEN);
  1955                           mystrcat(result, st, MAXLNLEN);
  1956                           free(st);
  1958                         mkinitcap(wspace);
  1959                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
  1960                         if (st) {
  1961                           if (*result) mystrcat(result, "\n", MAXLNLEN);
  1962                           mystrcat(result, st, MAXLNLEN);
  1963                           free(st);
  1966                      break;
  1970   if (*result) return mystrdup(result);
  1971   return NULL;
  1974 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
  1976 Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
  1978         return (Hunhandle*)(new Hunspell(affpath, dpath));
  1981 Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
  1982     const char * key)
  1984         return (Hunhandle*)(new Hunspell(affpath, dpath, key));
  1987 void Hunspell_destroy(Hunhandle *pHunspell)
  1989         delete (Hunspell*)(pHunspell);
  1992 int Hunspell_spell(Hunhandle *pHunspell, const char *word)
  1994         return ((Hunspell*)pHunspell)->spell(word);
  1997 char *Hunspell_get_dic_encoding(Hunhandle *pHunspell)
  1999         return ((Hunspell*)pHunspell)->get_dic_encoding();
  2002 int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word)
  2004         return ((Hunspell*)pHunspell)->suggest(slst, word);
  2007 int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word)
  2009         return ((Hunspell*)pHunspell)->analyze(slst, word);
  2012 int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)
  2014         return ((Hunspell*)pHunspell)->stem(slst, word);
  2017 int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n)
  2019         return ((Hunspell*)pHunspell)->stem(slst, desc, n);
  2022 int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
  2023     const char * word2)
  2025         return ((Hunspell*)pHunspell)->generate(slst, word, word2);
  2028 int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
  2029     char** desc, int n)
  2031         return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);
  2034   /* functions for run-time modification of the dictionary */
  2036   /* add word to the run-time dictionary */
  2038 int Hunspell_add(Hunhandle *pHunspell, const char * word) {
  2039         return ((Hunspell*)pHunspell)->add(word);
  2042   /* add word to the run-time dictionary with affix flags of
  2043    * the example (a dictionary word): Hunspell will recognize
  2044    * affixed forms of the new word, too.
  2045    */
  2047 int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word,
  2048         const char * example) {
  2049         return ((Hunspell*)pHunspell)->add_with_affix(word, example);
  2052   /* remove word from the run-time dictionary */
  2054 int Hunspell_remove(Hunhandle *pHunspell, const char * word) {
  2055         return ((Hunspell*)pHunspell)->remove(word);
  2058 void Hunspell_free_list(Hunhandle *, char *** slst, int n) {
  2059         freelist(slst, n);

mercurial