The Tor Browser: extensions/spellcheck/hunspell/src/hunspell.cpp@6474c204b198

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /******* BEGIN LICENSE BLOCK *******

     2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1

     3  *

     4  * The contents of this file are subject to the Mozilla Public License Version

     5  * 1.1 (the "License"); you may not use this file except in compliance with

     6  * the License. You may obtain a copy of the License at

     7  * http://www.mozilla.org/MPL/

     8  *

     9  * Software distributed under the License is distributed on an "AS IS" basis,

    10  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License

    11  * for the specific language governing rights and limitations under the

    12  * License.

    13  *

    14  * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)

    15  * and László Németh (Hunspell). Portions created by the Initial Developers

    16  * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.

    17  *

    18  * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)

    19  *                 David Einstein (deinst@world.std.com)

    20  *                 László Németh (nemethl@gyorsposta.hu)

    21  *                 Caolan McNamara (caolanm@redhat.com)

    22  *                 Davide Prina

    23  *                 Giuseppe Modugno

    24  *                 Gianluca Turconi

    25  *                 Simon Brouwer

    26  *                 Noll Janos

    27  *                 Biro Arpad

    28  *                 Goldman Eleonora

    29  *                 Sarlos Tamas

    30  *                 Bencsath Boldizsar

    31  *                 Halacsy Peter

    32  *                 Dvornik Laszlo

    33  *                 Gefferth Andras

    34  *                 Nagy Viktor

    35  *                 Varga Daniel

    36  *                 Chris Halls

    37  *                 Rene Engelhard

    38  *                 Bram Moolenaar

    39  *                 Dafydd Jones

    40  *                 Harri Pitkanen

    41  *                 Andras Timar

    42  *                 Tor Lillqvist

    43  *

    44  * Alternatively, the contents of this file may be used under the terms of

    45  * either the GNU General Public License Version 2 or later (the "GPL"), or

    46  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),

    47  * in which case the provisions of the GPL or the LGPL are applicable instead

    48  * of those above. If you wish to allow use of your version of this file only

    49  * under the terms of either the GPL or the LGPL, and not to allow others to

    50  * use your version of this file under the terms of the MPL, indicate your

    51  * decision by deleting the provisions above and replace them with the notice

    52  * and other provisions required by the GPL or the LGPL. If you do not delete

    53  * the provisions above, a recipient may use your version of this file under

    54  * the terms of any one of the MPL, the GPL or the LGPL.

    55  *

    56  ******* END LICENSE BLOCK *******/

    58 #include <stdlib.h>

    59 #include <string.h>

    60 #include <stdio.h>

    62 #include "hunspell.hxx"

    63 #include "hunspell.h"

    64 #ifndef MOZILLA_CLIENT

    65 #    include "config.h"

    66 #endif

    67 #include "csutil.hxx"

    69 Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)

    70 {

    71     encoding = NULL;

    72     csconv = NULL;

    73     utf8 = 0;

    74     complexprefixes = 0;

    75     affixpath = mystrdup(affpath);

    76     maxdic = 0;

    78     /* first set up the hash manager */

    79     pHMgr[0] = new HashMgr(dpath, affpath, key);

    80     if (pHMgr[0]) maxdic = 1;

    82     /* next set up the affix manager */

    83     /* it needs access to the hash manager lookup methods */

    84     pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);

    86     /* get the preferred try string and the dictionary */

    87     /* encoding from the Affix Manager for that dictionary */

    88     char * try_string = pAMgr->get_try_string();

    89     encoding = pAMgr->get_encoding();

    90     langnum = pAMgr->get_langnum();

    91     utf8 = pAMgr->get_utf8();

    92     if (!utf8)

    93         csconv = get_current_cs(encoding);

    94     complexprefixes = pAMgr->get_complexprefixes();

    95     wordbreak = pAMgr->get_breaktable();

    97     /* and finally set up the suggestion manager */

    98     pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);

    99     if (try_string) free(try_string);

   100 }

   102 Hunspell::~Hunspell()

   103 {

   104     if (pSMgr) delete pSMgr;

   105     if (pAMgr) delete pAMgr;

   106     for (int i = 0; i < maxdic; i++) delete pHMgr[i];

   107     maxdic = 0;

   108     pSMgr = NULL;

   109     pAMgr = NULL;

   110 #ifdef MOZILLA_CLIENT

   111     delete [] csconv;

   112 #endif

   113     csconv= NULL;

   114     if (encoding) free(encoding);

   115     encoding = NULL;

   116     if (affixpath) free(affixpath);

   117     affixpath = NULL;

   118 }

   120 // load extra dictionaries

   121 int Hunspell::add_dic(const char * dpath, const char * key) {

   122     if (maxdic == MAXDIC || !affixpath) return 1;

   123     pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);

   124     if (pHMgr[maxdic]) maxdic++; else return 1;

   125     return 0;

   126 }

   128 // make a copy of src at destination while removing all leading

   129 // blanks and removing any trailing periods after recording

   130 // their presence with the abbreviation flag

   131 // also since already going through character by character,

   132 // set the capitalization type

   133 // return the length of the "cleaned" (and UTF-8 encoded) word

   135 int Hunspell::cleanword2(char * dest, const char * src,

   136     w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)

   137 {

   138    unsigned char * p = (unsigned char *) dest;

   139    const unsigned char * q = (const unsigned char * ) src;

   141    // first skip over any leading blanks

   142    while ((*q != '\0') && (*q == ' ')) q++;

   144    // now strip off any trailing periods (recording their presence)

   145    *pabbrev = 0;

   146    int nl = strlen((const char *)q);

   147    while ((nl > 0) && (*(q+nl-1)=='.')) {

   148        nl--;

   149        (*pabbrev)++;

   150    }

   152    // if no characters are left it can't be capitalized

   153    if (nl <= 0) {

   154        *pcaptype = NOCAP;

   155        *p = '\0';

   156        return 0;

   157    }

   159    strncpy(dest, (char *) q, nl);

   160    *(dest + nl) = '\0';

   161    nl = strlen(dest);

   162    if (utf8) {

   163       *nc = u8_u16(dest_utf, MAXWORDLEN, dest);

   164       // don't check too long words

   165       if (*nc >= MAXWORDLEN) return 0;

   166       if (*nc == -1) { // big Unicode character (non BMP area)

   167          *pcaptype = NOCAP;

   168          return nl;

   169       }

   170      *pcaptype = get_captype_utf8(dest_utf, *nc, langnum);

   171    } else {

   172      *pcaptype = get_captype(dest, nl, csconv);

   173      *nc = nl;

   174    }

   175    return nl;

   176 }

   178 int Hunspell::cleanword(char * dest, const char * src,

   179     int * pcaptype, int * pabbrev)

   180 {

   181    unsigned char * p = (unsigned char *) dest;

   182    const unsigned char * q = (const unsigned char * ) src;

   183    int firstcap = 0;

   185    // first skip over any leading blanks

   186    while ((*q != '\0') && (*q == ' ')) q++;

   188    // now strip off any trailing periods (recording their presence)

   189    *pabbrev = 0;

   190    int nl = strlen((const char *)q);

   191    while ((nl > 0) && (*(q+nl-1)=='.')) {

   192        nl--;

   193        (*pabbrev)++;

   194    }

   196    // if no characters are left it can't be capitalized

   197    if (nl <= 0) {

   198        *pcaptype = NOCAP;

   199        *p = '\0';

   200        return 0;

   201    }

   203    // now determine the capitalization type of the first nl letters

   204    int ncap = 0;

   205    int nneutral = 0;

   206    int nc = 0;

   208    if (!utf8) {

   209       while (nl > 0) {

   210          nc++;

   211          if (csconv[(*q)].ccase) ncap++;

   212          if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;

   213          *p++ = *q++;

   214          nl--;

   215       }

   216       // remember to terminate the destination string

   217       *p = '\0';

   218       firstcap = csconv[(unsigned char)(*dest)].ccase;

   219    } else {

   220       unsigned short idx;

   221       w_char t[MAXWORDLEN];

   222       nc = u8_u16(t, MAXWORDLEN, src);

   223       for (int i = 0; i < nc; i++) {

   224          idx = (t[i].h << 8) + t[i].l;

   225          unsigned short low = unicodetolower(idx, langnum);

   226          if (idx != low) ncap++;

   227          if (unicodetoupper(idx, langnum) == low) nneutral++;

   228       }

   229       u16_u8(dest, MAXWORDUTF8LEN, t, nc);

   230       if (ncap) {

   231          idx = (t[0].h << 8) + t[0].l;

   232          firstcap = (idx != unicodetolower(idx, langnum));

   233       }

   234    }

   236    // now finally set the captype

   237    if (ncap == 0) {

   238         *pcaptype = NOCAP;

   239    } else if ((ncap == 1) && firstcap) {

   240         *pcaptype = INITCAP;

   241    } else if ((ncap == nc) || ((ncap + nneutral) == nc)){

   242         *pcaptype = ALLCAP;

   243    } else if ((ncap > 1) && firstcap) {

   244         *pcaptype = HUHINITCAP;

   245    } else {

   246         *pcaptype = HUHCAP;

   247    }

   248    return strlen(dest);

   249 }

   251 void Hunspell::mkallcap(char * p)

   252 {

   253   if (utf8) {

   254       w_char u[MAXWORDLEN];

   255       int nc = u8_u16(u, MAXWORDLEN, p);

   256       unsigned short idx;

   257       for (int i = 0; i < nc; i++) {

   258          idx = (u[i].h << 8) + u[i].l;

   259          if (idx != unicodetoupper(idx, langnum)) {

   260             u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);

   261             u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);

   262          }

   263       }

   264       u16_u8(p, MAXWORDUTF8LEN, u, nc);

   265   } else {

   266     while (*p != '\0') {

   267         *p = csconv[((unsigned char) *p)].cupper;

   268         p++;

   269     }

   270   }

   271 }

   273 int Hunspell::mkallcap2(char * p, w_char * u, int nc)

   274 {

   275   if (utf8) {

   276       unsigned short idx;

   277       for (int i = 0; i < nc; i++) {

   278          idx = (u[i].h << 8) + u[i].l;

   279          unsigned short up = unicodetoupper(idx, langnum);

   280          if (idx != up) {

   281             u[i].h = (unsigned char) (up >> 8);

   282             u[i].l = (unsigned char) (up & 0x00FF);

   283          }

   284       }

   285       u16_u8(p, MAXWORDUTF8LEN, u, nc);

   286       return strlen(p);

   287   } else {

   288     while (*p != '\0') {

   289         *p = csconv[((unsigned char) *p)].cupper;

   290         p++;

   291     }

   292   }

   293   return nc;

   294 }

   297 void Hunspell::mkallsmall(char * p)

   298 {

   299     while (*p != '\0') {

   300         *p = csconv[((unsigned char) *p)].clower;

   301         p++;

   302     }

   303 }

   305 int Hunspell::mkallsmall2(char * p, w_char * u, int nc)

   306 {

   307   if (utf8) {

   308       unsigned short idx;

   309       for (int i = 0; i < nc; i++) {

   310          idx = (u[i].h << 8) + u[i].l;

   311          unsigned short low = unicodetolower(idx, langnum);

   312          if (idx != low) {

   313             u[i].h = (unsigned char) (low >> 8);

   314             u[i].l = (unsigned char) (low & 0x00FF);

   315          }

   316       }

   317       u16_u8(p, MAXWORDUTF8LEN, u, nc);

   318       return strlen(p);

   319   } else {

   320     while (*p != '\0') {

   321         *p = csconv[((unsigned char) *p)].clower;

   322         p++;

   323     }

   324   }

   325   return nc;

   326 }

   328 // convert UTF-8 sharp S codes to latin 1

   329 char * Hunspell::sharps_u8_l1(char * dest, char * source) {

   330     char * p = dest;

   331     *p = *source;

   332     for (p++, source++; *(source - 1); p++, source++) {

   333         *p = *source;

   334         if (*source == '\x9F') *--p = '\xDF';

   335     }

   336     return dest;

   337 }

   339 // recursive search for right ss - sharp s permutations

   340 hentry * Hunspell::spellsharps(char * base, char * pos, int n,

   341         int repnum, char * tmp, int * info, char **root) {

   342     pos = strstr(pos, "ss");

   343     if (pos && (n < MAXSHARPS)) {

   344         *pos = '\xC3';

   345         *(pos + 1) = '\x9F';

   346         hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root);

   347         if (h) return h;

   348         *pos = 's';

   349         *(pos + 1) = 's';

   350         h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root);

   351         if (h) return h;

   352     } else if (repnum > 0) {

   353         if (utf8) return checkword(base, info, root);

   354         return checkword(sharps_u8_l1(tmp, base), info, root);

   355     }

   356     return NULL;

   357 }

   359 int Hunspell::is_keepcase(const hentry * rv) {

   360     return pAMgr && rv->astr && pAMgr->get_keepcase() &&

   361         TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);

   362 }

   364 /* insert a word to the beginning of the suggestion array and return ns */

   365 int Hunspell::insert_sug(char ***slst, char * word, int ns) {

   366     char * dup = mystrdup(word);

   367     if (!dup) return ns;

   368     if (ns == MAXSUGGESTION) {

   369         ns--;

   370         free((*slst)[ns]);

   371     }

   372     for (int k = ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];

   373     (*slst)[0] = dup;

   374     return ns + 1;

   375 }

   377 int Hunspell::spell(const char * word, int * info, char ** root)

   378 {

   379   struct hentry * rv=NULL;

   380   // need larger vector. For example, Turkish capital letter I converted a

   381   // 2-byte UTF-8 character (dotless i) by mkallsmall.

   382   char cw[MAXWORDUTF8LEN];

   383   char wspace[MAXWORDUTF8LEN];

   384   w_char unicw[MAXWORDLEN];

   385   // Hunspell supports XML input of the simplified API (see manual)

   386   if (strcmp(word, SPELL_XML) == 0) return 1;

   387   int nc = strlen(word);

   388   int wl2 = 0;

   389   if (utf8) {

   390     if (nc >= MAXWORDUTF8LEN) return 0;

   391   } else {

   392     if (nc >= MAXWORDLEN) return 0;

   393   }

   394   int captype = 0;

   395   int abbv = 0;

   396   int wl = 0;

   398   // input conversion

   399   RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;

   400   if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);

   401   else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

   403   int info2 = 0;

   404   if (wl == 0 || maxdic == 0) return 1;

   405   if (root) *root = NULL;

   407   // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.)

   408   enum { NBEGIN, NNUM, NSEP };

   409   int nstate = NBEGIN;

   410   int i;

   412   for (i = 0; (i < wl); i++) {

   413     if ((cw[i] <= '9') && (cw[i] >= '0')) {

   414         nstate = NNUM;

   415     } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) {

   416         if ((nstate == NSEP) || (i == 0)) break;

   417         nstate = NSEP;

   418     } else break;

   419   }

   420   if ((i == wl) && (nstate == NNUM)) return 1;

   421   if (!info) info = &info2; else *info = 0;

   423   switch(captype) {

   424      case HUHCAP:

   425      case HUHINITCAP:

   426             *info += SPELL_ORIGCAP;

   427      case NOCAP: {

   428             rv = checkword(cw, info, root);

   429             if ((abbv) && !(rv)) {

   430                 memcpy(wspace,cw,wl);

   431                 *(wspace+wl) = '.';

   432                 *(wspace+wl+1) = '\0';

   433                 rv = checkword(wspace, info, root);

   434             }

   435             break;

   436          }

   437      case ALLCAP: {

   438             *info += SPELL_ORIGCAP;

   439             rv = checkword(cw, info, root);

   440             if (rv) break;

   441             if (abbv) {

   442                 memcpy(wspace,cw,wl);

   443                 *(wspace+wl) = '.';

   444                 *(wspace+wl+1) = '\0';

   445                 rv = checkword(wspace, info, root);

   446                 if (rv) break;

   447             }

   448             // Spec. prefix handling for Catalan, French, Italian:

   449 	    // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).

   450             if (pAMgr && strchr(cw, '\'')) {

   451                 wl = mkallsmall2(cw, unicw, nc);

   452         	//There are no really sane circumstances where this could fail,

   453         	//but anyway...

   454         	if (char * apostrophe = strchr(cw, '\'')) {

   455                     if (utf8) {

   456             	        w_char tmpword[MAXWORDLEN];

   457             	        *apostrophe = '\0';

   458             	        wl2 = u8_u16(tmpword, MAXWORDLEN, cw);

   459             	        *apostrophe = '\'';

   460 		        if (wl2 < nc) {

   461 		            mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);

   462 			    rv = checkword(cw, info, root);

   463 			    if (rv) break;

   464 		        }

   465                     } else {

   466 		        mkinitcap2(apostrophe + 1, unicw, nc);

   467 		        rv = checkword(cw, info, root);

   468 		        if (rv) break;

   469 		    }

   470 		}

   471 		mkinitcap2(cw, unicw, nc);

   472 		rv = checkword(cw, info, root);

   473 		if (rv) break;

   474             }

   475             if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {

   476                 char tmpword[MAXWORDUTF8LEN];

   477                 wl = mkallsmall2(cw, unicw, nc);

   478                 memcpy(wspace,cw,(wl+1));

   479                 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);

   480                 if (!rv) {

   481                     wl2 = mkinitcap2(cw, unicw, nc);

   482                     rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);

   483                 }

   484                 if ((abbv) && !(rv)) {

   485                     *(wspace+wl) = '.';

   486                     *(wspace+wl+1) = '\0';

   487                     rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);

   488                     if (!rv) {

   489                         memcpy(wspace, cw, wl2);

   490                         *(wspace+wl2) = '.';

   491                         *(wspace+wl2+1) = '\0';

   492                         rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);

   493                     }

   494                 }

   495                 if (rv) break;

   496             }

   497         }

   498      case INITCAP: {

   499              *info += SPELL_ORIGCAP;

   500              wl = mkallsmall2(cw, unicw, nc);

   501              memcpy(wspace,cw,(wl+1));

   502              wl2 = mkinitcap2(cw, unicw, nc);

   503              if (captype == INITCAP) *info += SPELL_INITCAP;

   504              rv = checkword(cw, info, root);

   505              if (captype == INITCAP) *info -= SPELL_INITCAP;

   506              // forbid bad capitalization

   507              // (for example, ijs -> Ijs instead of IJs in Dutch)

   508              // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)

   509              if (*info & SPELL_FORBIDDEN) {

   510                 rv = NULL;

   511                 break;

   512              }

   513              if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;

   514              if (rv) break;

   516              rv = checkword(wspace, info, root);

   517              if (abbv && !rv) {

   519                  *(wspace+wl) = '.';

   520                  *(wspace+wl+1) = '\0';

   521                  rv = checkword(wspace, info, root);

   522                  if (!rv) {

   523                     memcpy(wspace, cw, wl2);

   524                     *(wspace+wl2) = '.';

   525                     *(wspace+wl2+1) = '\0';

   526     	    	    if (captype == INITCAP) *info += SPELL_INITCAP;

   527                     rv = checkword(wspace, info, root);

   528     	    	    if (captype == INITCAP) *info -= SPELL_INITCAP;

   529                     if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;

   530                     break;

   531                  }

   532              }

   533              if (rv && is_keepcase(rv) &&

   534                 ((captype == ALLCAP) ||

   535                    // if CHECKSHARPS: KEEPCASE words with \xDF  are allowed

   536                    // in INITCAP form, too.

   537                    !(pAMgr->get_checksharps() &&

   538                       ((utf8 && strstr(wspace, "\xC3\x9F")) ||

   539                       (!utf8 && strchr(wspace, '\xDF')))))) rv = NULL;

   540              break;

   541            }

   542   }

   544   if (rv) {

   545       if (pAMgr && pAMgr->get_warn() && rv->astr &&

   546           TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {

   547               *info += SPELL_WARN;

   548 	      if (pAMgr->get_forbidwarn()) return 0;

   549               return HUNSPELL_OK_WARN;

   550       }

   551       return HUNSPELL_OK;

   552   }

   554   // recursive breaking at break points

   555   if (wordbreak) {

   556     char * s;

   557     char r;

   558     int nbr = 0;

   559     wl = strlen(cw);

   560     int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;

   562     // calculate break points for recursion limit

   563     for (int j = 0; j < numbreak; j++) {

   564       s = cw;

   565       do {

   566       	s = (char *) strstr(s, wordbreak[j]);

   567       	if (s) {

   568 		nbr++;

   569 		s++;

   570 	}

   571       } while (s);

   572     }

   573     if (nbr >= 10) return 0;

   575     // check boundary patterns (^begin and end$)

   576     for (int j = 0; j < numbreak; j++) {

   577       int plen = strlen(wordbreak[j]);

   578       if (plen == 1 || plen > wl) continue;

   579       if (wordbreak[j][0] == '^' && strncmp(cw, wordbreak[j] + 1, plen - 1) == 0

   580         && spell(cw + plen - 1)) return 1;

   581       if (wordbreak[j][plen - 1] == '$' &&

   582         strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) {

   583 	    r = cw[wl - plen + 1];

   584 	    cw[wl - plen + 1] = '\0';

   585     	    if (spell(cw)) return 1;

   586 	    cw[wl - plen + 1] = r;

   587 	}

   588     }

   590     // other patterns

   591     for (int j = 0; j < numbreak; j++) {

   592       int plen = strlen(wordbreak[j]);

   593       s=(char *) strstr(cw, wordbreak[j]);

   594       if (s && (s > cw) && (s < cw + wl - plen)) {

   595 	if (!spell(s + plen)) continue;

   596         r = *s;

   597         *s = '\0';

   598         // examine 2 sides of the break point

   599         if (spell(cw)) return 1;

   600         *s = r;

   602         // LANG_hu: spec. dash rule

   603 	if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) {

   604 	  r = s[1];

   605 	  s[1] = '\0';

   606           if (spell(cw)) return 1; // check the first part with dash

   607           s[1] = r;

   608 	}

   609         // end of LANG speficic region

   611       }

   612     }

   613   }

   615   return 0;

   616 }

   618 struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)

   619 {

   620   struct hentry * he = NULL;

   621   int len, i;

   622   char w2[MAXWORDUTF8LEN];

   623   const char * word;

   625   char * ignoredchars = pAMgr->get_ignore();

   626   if (ignoredchars != NULL) {

   627      strcpy(w2, w);

   628      if (utf8) {

   629         int ignoredchars_utf16_len;

   630         unsigned short * ignoredchars_utf16 = pAMgr->get_ignore_utf16(&ignoredchars_utf16_len);

   631         remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len);

   632      } else {

   633         remove_ignored_chars(w2,ignoredchars);

   634      }

   635      word = w2;

   636   } else word = w;

   638   len = strlen(word);

   640   if (!len)

   641       return NULL;

   643   // word reversing wrapper for complex prefixes

   644   if (complexprefixes) {

   645     if (word != w2) {

   646       strcpy(w2, word);

   647       word = w2;

   648     }

   649     if (utf8) reverseword_utf(w2); else reverseword(w2);

   650   }

   652   // look word in hash table

   653   for (i = 0; (i < maxdic) && !he; i ++) {

   654   he = (pHMgr[i])->lookup(word);

   656   // check forbidden and onlyincompound words

   657   if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {

   658     if (info) *info += SPELL_FORBIDDEN;

   659     // LANG_hu section: set dash information for suggestions

   660     if (langnum == LANG_hu) {

   661         if (pAMgr->get_compoundflag() &&

   662             TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {

   663                 if (info) *info += SPELL_COMPOUND;

   664         }

   665     }

   666     return NULL;

   667   }

   669   // he = next not needaffix, onlyincompound homonym or onlyupcase word

   670   while (he && (he->astr) &&

   671     ((pAMgr->get_needaffix() && TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||

   672        (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||

   673        (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))

   674     )) he = he->next_homonym;

   675   }

   677   // check with affixes

   678   if (!he && pAMgr) {

   679      // try stripping off affixes */

   680      he = pAMgr->affix_check(word, len, 0);

   682      // check compound restriction and onlyupcase

   683      if (he && he->astr && (

   684         (pAMgr->get_onlyincompound() &&

   685     	    TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||

   686         (info && (*info & SPELL_INITCAP) &&

   687     	    TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {

   688     	    he = NULL;

   689      }

   691      if (he) {

   692         if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {

   693             if (info) *info += SPELL_FORBIDDEN;

   694             return NULL;

   695         }

   696         if (root) {

   697             *root = mystrdup(he->word);

   698             if (*root && complexprefixes) {

   699                 if (utf8) reverseword_utf(*root); else reverseword(*root);

   700             }

   701         }

   702      // try check compound word

   703      } else if (pAMgr->get_compound()) {

   704           he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info);

   705           // LANG_hu section: `moving rule' with last dash

   706           if ((!he) && (langnum == LANG_hu) && (word[len-1] == '-')) {

   707              char * dup = mystrdup(word);

   708              if (!dup) return NULL;

   709              dup[len-1] = '\0';

   710              he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0, info);

   711              free(dup);

   712           }

   713           // end of LANG speficic region

   714           if (he) {

   715                 if (root) {

   716                     *root = mystrdup(he->word);

   717                     if (*root && complexprefixes) {

   718                         if (utf8) reverseword_utf(*root); else reverseword(*root);

   719                     }

   720                 }

   721                 if (info) *info += SPELL_COMPOUND;

   722           }

   723      }

   725   }

   727   return he;

   728 }

   730 int Hunspell::suggest(char*** slst, const char * word)

   731 {

   732   int onlycmpdsug = 0;

   733   char cw[MAXWORDUTF8LEN];

   734   char wspace[MAXWORDUTF8LEN];

   735   if (!pSMgr || maxdic == 0) return 0;

   736   w_char unicw[MAXWORDLEN];

   737   *slst = NULL;

   738   // process XML input of the simplified API (see manual)

   739   if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {

   740      return spellml(slst, word);

   741   }

   742   int nc = strlen(word);

   743   if (utf8) {

   744     if (nc >= MAXWORDUTF8LEN) return 0;

   745   } else {

   746     if (nc >= MAXWORDLEN) return 0;

   747   }

   748   int captype = 0;

   749   int abbv = 0;

   750   int wl = 0;

   752   // input conversion

   753   RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;

   754   if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);

   755   else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

   757   if (wl == 0) return 0;

   758   int ns = 0;

   759   int capwords = 0;

   761   // check capitalized form for FORCEUCASE

   762   if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {

   763     int info = SPELL_ORIGCAP;

   764     char ** wlst;

   765     if (checkword(cw, &info, NULL)) {

   766         if (*slst) {

   767             wlst = *slst;

   768         } else {

   769             wlst = (char **) malloc(MAXSUGGESTION * sizeof(char *));

   770             if (wlst == NULL) return -1;

   771             *slst = wlst;

   772             for (int i = 0; i < MAXSUGGESTION; i++) {

   773                 wlst[i] = NULL;

   774             }

   775         }

   776         wlst[0] = mystrdup(cw);

   777         mkinitcap(wlst[0]);

   778         return 1;

   779     }

   780   }

   782   switch(captype) {

   783      case NOCAP:   {

   784                      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);

   785                      break;

   786                    }

   788      case INITCAP: {

   789                      capwords = 1;

   790                      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);

   791                      if (ns == -1) break;

   792                      memcpy(wspace,cw,(wl+1));

   793                      mkallsmall2(wspace, unicw, nc);

   794                      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

   795                      break;

   796                    }

   797      case HUHINITCAP:

   798                     capwords = 1;

   799      case HUHCAP: {

   800                      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);

   801                      if (ns != -1) {

   802                         int prevns;

   803     		        // something.The -> something. The

   804                         char * dot = strchr(cw, '.');

   805 		        if (dot && (dot > cw)) {

   806 		            int captype_;

   807 		            if (utf8) {

   808 		               w_char w_[MAXWORDLEN];

   809 			       int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);

   810 		               captype_ = get_captype_utf8(w_, wl_, langnum);

   811 		            } else captype_ = get_captype(dot+1, strlen(dot+1), csconv);

   812 		    	    if (captype_ == INITCAP) {

   813                         	char * st = mystrdup(cw);

   814                         	if (st) st = (char *) realloc(st, wl + 2);

   815 				if (st) {

   816                         		st[(dot - cw) + 1] = ' ';

   817                         		strcpy(st + (dot - cw) + 2, dot + 1);

   818                     			ns = insert_sug(slst, st, ns);

   819 					free(st);

   820 				}

   821 		    	    }

   822 		        }

   823                         if (captype == HUHINITCAP) {

   824                             // TheOpenOffice.org -> The OpenOffice.org

   825                             memcpy(wspace,cw,(wl+1));

   826                             mkinitsmall2(wspace, unicw, nc);

   827                             ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

   828                         }

   829                         memcpy(wspace,cw,(wl+1));

   830                         mkallsmall2(wspace, unicw, nc);

   831                         if (spell(wspace)) ns = insert_sug(slst, wspace, ns);

   832                         prevns = ns;

   833                         ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

   834                         if (captype == HUHINITCAP) {

   835                             mkinitcap2(wspace, unicw, nc);

   836                             if (spell(wspace)) ns = insert_sug(slst, wspace, ns);

   837                             ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

   838                         }

   839                         // aNew -> "a New" (instead of "a new")

   840                         for (int j = prevns; j < ns; j++) {

   841                            char * space = strchr((*slst)[j],' ');

   842                            if (space) {

   843                                 int slen = strlen(space + 1);

   844                                 // different case after space (need capitalisation)

   845                                 if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {

   846                                     w_char w[MAXWORDLEN];

   847                                     int wc = 0;

   848                                     char * r = (*slst)[j];

   849                                     if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1);

   850                                     mkinitcap2(space + 1, w, wc);

   851                                     // set as first suggestion

   852                                     for (int k = j; k > 0; k--) (*slst)[k] = (*slst)[k - 1];

   853                                     (*slst)[0] = r;

   854                                 }

   855                            }

   856                         }

   857                      }

   858                      break;

   859                    }

   861      case ALLCAP: {

   862                      memcpy(wspace, cw, (wl+1));

   863                      mkallsmall2(wspace, unicw, nc);

   864                      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

   865                      if (ns == -1) break;

   866                      if (pAMgr && pAMgr->get_keepcase() && spell(wspace))

   867                         ns = insert_sug(slst, wspace, ns);

   868                      mkinitcap2(wspace, unicw, nc);

   869                      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);

   870                      for (int j=0; j < ns; j++) {

   871                         mkallcap((*slst)[j]);

   872                         if (pAMgr && pAMgr->get_checksharps()) {

   873                             char * pos;

   874                             if (utf8) {

   875                                 pos = strstr((*slst)[j], "\xC3\x9F");

   876                                 while (pos) {

   877                                     *pos = 'S';

   878                                     *(pos+1) = 'S';

   879                                     pos = strstr(pos+2, "\xC3\x9F");

   880                                 }

   881                             } else {

   882                                 pos = strchr((*slst)[j], '\xDF');

   883                                 while (pos) {

   884                                     (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 2);

   885                                     mystrrep((*slst)[j], "\xDF", "SS");

   886                                     pos = strchr((*slst)[j], '\xDF');

   887                                 }

   888                             }

   889                         }

   890                      }

   891                      break;

   892                    }

   893   }

   895  // LANG_hu section: replace '-' with ' ' in Hungarian

   896   if (langnum == LANG_hu) {

   897       for (int j=0; j < ns; j++) {

   898           char * pos = strchr((*slst)[j],'-');

   899           if (pos) {

   900               int info;

   901               char w[MAXWORDUTF8LEN];

   902               *pos = '\0';

   903               strcpy(w, (*slst)[j]);

   904               strcat(w, pos + 1);

   905               spell(w, &info, NULL);

   906               if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {

   907                   *pos = ' ';

   908               } else *pos = '-';

   909           }

   910       }

   911   }

   912   // END OF LANG_hu section

   914   // try ngram approach since found nothing or only compound words

   915   if (pAMgr && (ns == 0 || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0) && (*slst)) {

   916       switch(captype) {

   917           case NOCAP: {

   918               ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);

   919               break;

   920           }

   921 	  case HUHINITCAP:

   922               capwords = 1;

   923           case HUHCAP: {

   924               memcpy(wspace,cw,(wl+1));

   925               mkallsmall2(wspace, unicw, nc);

   926               ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);

   927 	      break;

   928           }

   929          case INITCAP: {

   930               capwords = 1;

   931               memcpy(wspace,cw,(wl+1));

   932               mkallsmall2(wspace, unicw, nc);

   933               ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);

   934               break;

   935           }

   936           case ALLCAP: {

   937               memcpy(wspace,cw,(wl+1));

   938               mkallsmall2(wspace, unicw, nc);

   939 	      int oldns = ns;

   940               ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);

   941               for (int j = oldns; j < ns; j++)

   942                   mkallcap((*slst)[j]);

   943               break;

   944          }

   945       }

   946   }

   948   // try dash suggestion (Afo-American -> Afro-American)

   949   if (char * pos = strchr(cw, '-')) {

   950      char * ppos = cw;

   951      int nodashsug = 1;

   952      char ** nlst = NULL;

   953      int nn = 0;

   954      int last = 0;

   955      if (*slst) {

   956         for (int j = 0; j < ns && nodashsug == 1; j++) {

   957            if (strchr((*slst)[j], '-')) nodashsug = 0;

   958         }

   959      }

   960      while (nodashsug && !last) {

   961 	if (*pos == '\0') last = 1; else *pos = '\0';

   962         if (!spell(ppos)) {

   963           nn = suggest(&nlst, ppos);

   964           for (int j = nn - 1; j >= 0; j--) {

   965             strncpy(wspace, cw, ppos - cw);

   966             strcpy(wspace + (ppos - cw), nlst[j]);

   967             if (!last) {

   968             	strcat(wspace, "-");

   969 		strcat(wspace, pos + 1);

   970 	    }

   971             ns = insert_sug(slst, wspace, ns);

   972             free(nlst[j]);

   973           }

   974           if (nlst != NULL) free(nlst);

   975           nodashsug = 0;

   976         }

   977 	if (!last) {

   978           *pos = '-';

   979           ppos = pos + 1;

   980           pos = strchr(ppos, '-');

   981         }

   982 	if (!pos) pos = cw + strlen(cw);

   983      }

   984   }

   986   // word reversing wrapper for complex prefixes

   987   if (complexprefixes) {

   988     for (int j = 0; j < ns; j++) {

   989       if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);

   990     }

   991   }

   993   // capitalize

   994   if (capwords) for (int j=0; j < ns; j++) {

   995       mkinitcap((*slst)[j]);

   996   }

   998   // expand suggestions with dot(s)

   999   if (abbv && pAMgr && pAMgr->get_sugswithdots()) {

  1000     for (int j = 0; j < ns; j++) {

  1001       (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);

  1002       strcat((*slst)[j], word + strlen(word) - abbv);

  1003     }

  1004   }

  1006   // remove bad capitalized and forbidden forms

  1007   if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {

  1008   switch (captype) {

  1009     case INITCAP:

  1010     case ALLCAP: {

  1011       int l = 0;

  1012       for (int j=0; j < ns; j++) {

  1013         if (!strchr((*slst)[j],' ') && !spell((*slst)[j])) {

  1014           char s[MAXSWUTF8L];

  1015           w_char w[MAXSWL];

  1016           int len;

  1017           if (utf8) {

  1018             len = u8_u16(w, MAXSWL, (*slst)[j]);

  1019           } else {

  1020             strcpy(s, (*slst)[j]);

  1021             len = strlen(s);

  1022           }

  1023           mkallsmall2(s, w, len);

  1024           free((*slst)[j]);

  1025           if (spell(s)) {

  1026             (*slst)[l] = mystrdup(s);

  1027             if ((*slst)[l]) l++;

  1028           } else {

  1029             mkinitcap2(s, w, len);

  1030             if (spell(s)) {

  1031               (*slst)[l] = mystrdup(s);

  1032               if ((*slst)[l]) l++;

  1033             }

  1034           }

  1035         } else {

  1036           (*slst)[l] = (*slst)[j];

  1037           l++;

  1038         }

  1039       }

  1040       ns = l;

  1041     }

  1042   }

  1043   }

  1045   // remove duplications

  1046   int l = 0;

  1047   for (int j = 0; j < ns; j++) {

  1048     (*slst)[l] = (*slst)[j];

  1049     for (int k = 0; k < l; k++) {

  1050       if (strcmp((*slst)[k], (*slst)[j]) == 0) {

  1051         free((*slst)[j]);

  1052         l--;

  1053         break;

  1054       }

  1055     }

  1056     l++;

  1057   }

  1058   ns = l;

  1060   // output conversion

  1061   rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;

  1062   for (int j = 0; rl && j < ns; j++) {

  1063     if (rl->conv((*slst)[j], wspace)) {

  1064       free((*slst)[j]);

  1065       (*slst)[j] = mystrdup(wspace);

  1066     }

  1067   }

  1069   // if suggestions removed by nosuggest, onlyincompound parameters

  1070   if (l == 0 && *slst) {

  1071     free(*slst);

  1072     *slst = NULL;

  1073   }

  1074   return l;

  1075 }

  1077 void Hunspell::free_list(char *** slst, int n) {

  1078         freelist(slst, n);

  1079 }

  1081 char * Hunspell::get_dic_encoding()

  1082 {

  1083   return encoding;

  1084 }

  1086 #ifdef HUNSPELL_EXPERIMENTAL

  1087 // XXX need UTF-8 support

  1088 int Hunspell::suggest_auto(char*** slst, const char * word)

  1089 {

  1090   char cw[MAXWORDUTF8LEN];

  1091   char wspace[MAXWORDUTF8LEN];

  1092   if (!pSMgr || maxdic == 0) return 0;

  1093   int wl = strlen(word);

  1094   if (utf8) {

  1095     if (wl >= MAXWORDUTF8LEN) return 0;

  1096   } else {

  1097     if (wl >= MAXWORDLEN) return 0;

  1098   }

  1099   int captype = 0;

  1100   int abbv = 0;

  1101   wl = cleanword(cw, word, &captype, &abbv);

  1102   if (wl == 0) return 0;

  1103   int ns = 0;

  1104   *slst = NULL; // HU, nsug in pSMgr->suggest

  1106   switch(captype) {

  1107      case NOCAP:   {

  1108                      ns = pSMgr->suggest_auto(slst, cw, ns);

  1109                      if (ns>0) break;

  1110                      break;

  1111                    }

  1113      case INITCAP: {

  1114                      memcpy(wspace,cw,(wl+1));

  1115                      mkallsmall(wspace);

  1116                      ns = pSMgr->suggest_auto(slst, wspace, ns);

  1117                      for (int j=0; j < ns; j++)

  1118                        mkinitcap((*slst)[j]);

  1119                      ns = pSMgr->suggest_auto(slst, cw, ns);

  1120                      break;

  1122                    }

  1124      case HUHINITCAP:

  1125      case HUHCAP: {

  1126                      ns = pSMgr->suggest_auto(slst, cw, ns);

  1127                      if (ns == 0) {

  1128                         memcpy(wspace,cw,(wl+1));

  1129                         mkallsmall(wspace);

  1130                         ns = pSMgr->suggest_auto(slst, wspace, ns);

  1131                      }

  1132                      break;

  1133                    }

  1135      case ALLCAP: {

  1136                      memcpy(wspace,cw,(wl+1));

  1137                      mkallsmall(wspace);

  1138                      ns = pSMgr->suggest_auto(slst, wspace, ns);

  1140                      mkinitcap(wspace);

  1141                      ns = pSMgr->suggest_auto(slst, wspace, ns);

  1143                      for (int j=0; j < ns; j++)

  1144                        mkallcap((*slst)[j]);

  1145                      break;

  1146                    }

  1147   }

  1149   // word reversing wrapper for complex prefixes

  1150   if (complexprefixes) {

  1151     for (int j = 0; j < ns; j++) {

  1152       if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);

  1153     }

  1154   }

  1156   // expand suggestions with dot(s)

  1157   if (abbv && pAMgr && pAMgr->get_sugswithdots()) {

  1158     for (int j = 0; j < ns; j++) {

  1159       (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);

  1160       strcat((*slst)[j], word + strlen(word) - abbv);

  1161     }

  1162   }

  1164   // LANG_hu section: replace '-' with ' ' in Hungarian

  1165   if (langnum == LANG_hu) {

  1166       for (int j=0; j < ns; j++) {

  1167           char * pos = strchr((*slst)[j],'-');

  1168           if (pos) {

  1169               int info;

  1170               char w[MAXWORDUTF8LEN];

  1171               *pos = '\0';

  1172               strcpy(w, (*slst)[j]);

  1173               strcat(w, pos + 1);

  1174               spell(w, &info, NULL);

  1175               if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {

  1176                   *pos = ' ';

  1177               } else *pos = '-';

  1178           }

  1179       }

  1180   }

  1181   // END OF LANG_hu section

  1182   return ns;

  1183 }

  1184 #endif

  1186 int Hunspell::stem(char*** slst, char ** desc, int n)

  1187 {

  1188   char result[MAXLNLEN];

  1189   char result2[MAXLNLEN];

  1190   *slst = NULL;

  1191   if (n == 0) return 0;

  1192   *result2 = '\0';

  1193   for (int i = 0; i < n; i++) {

  1194     *result = '\0';

  1195     // add compound word parts (except the last one)

  1196     char * s = (char *) desc[i];

  1197     char * part = strstr(s, MORPH_PART);

  1198     if (part) {

  1199         char * nextpart = strstr(part + 1, MORPH_PART);

  1200         while (nextpart) {

  1201             copy_field(result + strlen(result), part, MORPH_PART);

  1202             part = nextpart;

  1203             nextpart = strstr(part + 1, MORPH_PART);

  1204         }

  1205         s = part;

  1206     }

  1208     char **pl;

  1209     char tok[MAXLNLEN];

  1210     strcpy(tok, s);

  1211     char * alt = strstr(tok, " | ");

  1212     while (alt) {

  1213         alt[1] = MSEP_ALT;

  1214         alt = strstr(alt, " | ");

  1215     }

  1216     int pln = line_tok(tok, &pl, MSEP_ALT);

  1217     for (int k = 0; k < pln; k++) {

  1218         // add derivational suffixes

  1219         if (strstr(pl[k], MORPH_DERI_SFX)) {

  1220             // remove inflectional suffixes

  1221             char * is = strstr(pl[k], MORPH_INFL_SFX);

  1222             if (is) *is = '\0';

  1223             char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);

  1224             if (sg) {

  1225                 char ** gen;

  1226                 int genl = line_tok(sg, &gen, MSEP_REC);

  1227                 free(sg);

  1228                 for (int j = 0; j < genl; j++) {

  1229                     sprintf(result2 + strlen(result2), "%c%s%s",

  1230                             MSEP_REC, result, gen[j]);

  1231                 }

  1232                 freelist(&gen, genl);

  1233             }

  1234         } else {

  1235             sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);

  1236             if (strstr(pl[k], MORPH_SURF_PFX)) {

  1237                 copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);

  1238             }

  1239             copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);

  1240         }

  1241     }

  1242     freelist(&pl, pln);

  1243   }

  1244   int sln = line_tok(result2, slst, MSEP_REC);

  1245   return uniqlist(*slst, sln);

  1247 }

  1249 int Hunspell::stem(char*** slst, const char * word)

  1250 {

  1251   char ** pl;

  1252   int pln = analyze(&pl, word);

  1253   int pln2 = stem(slst, pl, pln);

  1254   freelist(&pl, pln);

  1255   return pln2;

  1256 }

  1258 #ifdef HUNSPELL_EXPERIMENTAL

  1259 int Hunspell::suggest_pos_stems(char*** slst, const char * word)

  1260 {

  1261   char cw[MAXWORDUTF8LEN];

  1262   char wspace[MAXWORDUTF8LEN];

  1263   if (! pSMgr || maxdic == 0) return 0;

  1264   int wl = strlen(word);

  1265   if (utf8) {

  1266     if (wl >= MAXWORDUTF8LEN) return 0;

  1267   } else {

  1268     if (wl >= MAXWORDLEN) return 0;

  1269   }

  1270   int captype = 0;

  1271   int abbv = 0;

  1272   wl = cleanword(cw, word, &captype, &abbv);

  1273   if (wl == 0) return 0;

  1275   int ns = 0; // ns=0 = normalized input

  1277   *slst = NULL; // HU, nsug in pSMgr->suggest

  1279   switch(captype) {

  1280      case HUHCAP:

  1281      case NOCAP:   {

  1282                      ns = pSMgr->suggest_pos_stems(slst, cw, ns);

  1284                      if ((abbv) && (ns == 0)) {

  1285                          memcpy(wspace,cw,wl);

  1286                          *(wspace+wl) = '.';

  1287                          *(wspace+wl+1) = '\0';

  1288                          ns = pSMgr->suggest_pos_stems(slst, wspace, ns);

  1289                      }

  1291                      break;

  1292                    }

  1294      case INITCAP: {

  1296                      ns = pSMgr->suggest_pos_stems(slst, cw, ns);

  1298                      if (ns == 0 || ((*slst)[0][0] == '#')) {

  1299                         memcpy(wspace,cw,(wl+1));

  1300                         mkallsmall(wspace);

  1301                         ns = pSMgr->suggest_pos_stems(slst, wspace, ns);

  1302                      }

  1304                      break;

  1306                    }

  1308      case ALLCAP: {

  1309                      ns = pSMgr->suggest_pos_stems(slst, cw, ns);

  1310                      if (ns != 0) break;

  1312                      memcpy(wspace,cw,(wl+1));

  1313                      mkallsmall(wspace);

  1314                      ns = pSMgr->suggest_pos_stems(slst, wspace, ns);

  1316                      if (ns == 0) {

  1317                          mkinitcap(wspace);

  1318                          ns = pSMgr->suggest_pos_stems(slst, wspace, ns);

  1319                      }

  1320                      break;

  1321                    }

  1322   }

  1324   return ns;

  1325 }

  1326 #endif // END OF HUNSPELL_EXPERIMENTAL CODE

  1328 const char * Hunspell::get_wordchars()

  1329 {

  1330   return pAMgr->get_wordchars();

  1331 }

  1333 unsigned short * Hunspell::get_wordchars_utf16(int * len)

  1334 {

  1335   return pAMgr->get_wordchars_utf16(len);

  1336 }

  1338 void Hunspell::mkinitcap(char * p)

  1339 {

  1340   if (!utf8) {

  1341     if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;

  1342   } else {

  1343       int len;

  1344       w_char u[MAXWORDLEN];

  1345       len = u8_u16(u, MAXWORDLEN, p);

  1346       unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);

  1347       u[0].h = (unsigned char) (i >> 8);

  1348       u[0].l = (unsigned char) (i & 0x00FF);

  1349       u16_u8(p, MAXWORDUTF8LEN, u, len);

  1350   }

  1351 }

  1353 int Hunspell::mkinitcap2(char * p, w_char * u, int nc)

  1354 {

  1355   if (!utf8) {

  1356     if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;

  1357   } else if (nc > 0) {

  1358       unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);

  1359       u[0].h = (unsigned char) (i >> 8);

  1360       u[0].l = (unsigned char) (i & 0x00FF);

  1361       u16_u8(p, MAXWORDUTF8LEN, u, nc);

  1362       return strlen(p);

  1363   }

  1364   return nc;

  1365 }

  1367 int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)

  1368 {

  1369   if (!utf8) {

  1370     if (*p != '\0') *p = csconv[((unsigned char)*p)].clower;

  1371   } else if (nc > 0) {

  1372       unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);

  1373       u[0].h = (unsigned char) (i >> 8);

  1374       u[0].l = (unsigned char) (i & 0x00FF);

  1375       u16_u8(p, MAXWORDUTF8LEN, u, nc);

  1376       return strlen(p);

  1377   }

  1378   return nc;

  1379 }

  1381 int Hunspell::add(const char * word)

  1382 {

  1383     if (pHMgr[0]) return (pHMgr[0])->add(word);

  1384     return 0;

  1385 }

  1387 int Hunspell::add_with_affix(const char * word, const char * example)

  1388 {

  1389     if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example);

  1390     return 0;

  1391 }

  1393 int Hunspell::remove(const char * word)

  1394 {

  1395     if (pHMgr[0]) return (pHMgr[0])->remove(word);

  1396     return 0;

  1397 }

  1399 const char * Hunspell::get_version()

  1400 {

  1401   return pAMgr->get_version();

  1402 }

  1404 struct cs_info * Hunspell::get_csconv()

  1405 {

  1406   return csconv;

  1407 }

  1409 void Hunspell::cat_result(char * result, char * st)

  1410 {

  1411     if (st) {

  1412         if (*result) mystrcat(result, "\n", MAXLNLEN);

  1413         mystrcat(result, st, MAXLNLEN);

  1414         free(st);

  1415     }

  1416 }

  1418 int Hunspell::analyze(char*** slst, const char * word)

  1419 {

  1420   char cw[MAXWORDUTF8LEN];

  1421   char wspace[MAXWORDUTF8LEN];

  1422   w_char unicw[MAXWORDLEN];

  1423   int wl2 = 0;

  1424   *slst = NULL;

  1425   if (! pSMgr || maxdic == 0) return 0;

  1426   int nc = strlen(word);

  1427   if (utf8) {

  1428     if (nc >= MAXWORDUTF8LEN) return 0;

  1429   } else {

  1430     if (nc >= MAXWORDLEN) return 0;

  1431   }

  1432   int captype = 0;

  1433   int abbv = 0;

  1434   int wl = 0;

  1436   // input conversion

  1437   RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;

  1438   if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);

  1439   else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);

  1441   if (wl == 0) {

  1442       if (abbv) {

  1443           for (wl = 0; wl < abbv; wl++) cw[wl] = '.';

  1444           cw[wl] = '\0';

  1445           abbv = 0;

  1446       } else return 0;

  1447   }

  1449   char result[MAXLNLEN];

  1450   char * st = NULL;

  1452   *result = '\0';

  1454   int n = 0;

  1455   int n2 = 0;

  1456   int n3 = 0;

  1458   // test numbers

  1459   // LANG_hu section: set dash information for suggestions

  1460   if (langnum == LANG_hu) {

  1461   while ((n < wl) &&

  1462         (((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) {

  1463         n++;

  1464         if ((cw[n] == '.') || (cw[n] == ',')) {

  1465                 if (((n2 == 0) && (n > 3)) ||

  1466                         ((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) break;

  1467                 n2++;

  1468                 n3 = n;

  1469         }

  1470   }

  1472   if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0;

  1473   if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xB0')) && checkword(cw+n, NULL, NULL))) {

  1474         mystrcat(result, cw, MAXLNLEN);

  1475         result[n - 1] = '\0';

  1476         if (n == wl) cat_result(result, pSMgr->suggest_morph(cw + n - 1));

  1477         else {

  1478                 char sign = cw[n];

  1479                 cw[n] = '\0';

  1480                 cat_result(result, pSMgr->suggest_morph(cw + n - 1));

  1481                 mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE

  1482                 cw[n] = sign;

  1483                 cat_result(result, pSMgr->suggest_morph(cw + n));

  1484         }

  1485         return line_tok(result, slst, MSEP_REC);

  1486   }

  1487   }

  1488   // END OF LANG_hu section

  1490   switch(captype) {

  1491      case HUHCAP:

  1492      case HUHINITCAP:

  1493      case NOCAP:  {

  1494                     cat_result(result, pSMgr->suggest_morph(cw));

  1495                     if (abbv) {

  1496                         memcpy(wspace,cw,wl);

  1497                         *(wspace+wl) = '.';

  1498                         *(wspace+wl+1) = '\0';

  1499                         cat_result(result, pSMgr->suggest_morph(wspace));

  1500                     }

  1501                     break;

  1502                 }

  1503      case INITCAP: {

  1504                      wl = mkallsmall2(cw, unicw, nc);

  1505                      memcpy(wspace,cw,(wl+1));

  1506                      wl2 = mkinitcap2(cw, unicw, nc);

  1507                      cat_result(result, pSMgr->suggest_morph(wspace));

  1508                      cat_result(result, pSMgr->suggest_morph(cw));

  1509                      if (abbv) {

  1510                          *(wspace+wl) = '.';

  1511                          *(wspace+wl+1) = '\0';

  1512                          cat_result(result, pSMgr->suggest_morph(wspace));

  1514                          memcpy(wspace, cw, wl2);

  1515                          *(wspace+wl2) = '.';

  1516                          *(wspace+wl2+1) = '\0';

  1518                          cat_result(result, pSMgr->suggest_morph(wspace));

  1519                      }

  1520                      break;

  1521                    }

  1522      case ALLCAP: {

  1523                      cat_result(result, pSMgr->suggest_morph(cw));

  1524                      if (abbv) {

  1525                          memcpy(wspace,cw,wl);

  1526                          *(wspace+wl) = '.';

  1527                          *(wspace+wl+1) = '\0';

  1528                          cat_result(result, pSMgr->suggest_morph(cw));

  1529                      }

  1530                      wl = mkallsmall2(cw, unicw, nc);

  1531                      memcpy(wspace,cw,(wl+1));

  1532                      wl2 = mkinitcap2(cw, unicw, nc);

  1534                      cat_result(result, pSMgr->suggest_morph(wspace));

  1535                      cat_result(result, pSMgr->suggest_morph(cw));

  1536                      if (abbv) {

  1537                          *(wspace+wl) = '.';

  1538                          *(wspace+wl+1) = '\0';

  1539                          cat_result(result, pSMgr->suggest_morph(wspace));

  1541                          memcpy(wspace, cw, wl2);

  1542                          *(wspace+wl2) = '.';

  1543                          *(wspace+wl2+1) = '\0';

  1545                          cat_result(result, pSMgr->suggest_morph(wspace));

  1546                      }

  1547                      break;

  1548                    }

  1549   }

  1551   if (*result) {

  1552     // word reversing wrapper for complex prefixes

  1553     if (complexprefixes) {

  1554       if (utf8) reverseword_utf(result); else reverseword(result);

  1555     }

  1556     return line_tok(result, slst, MSEP_REC);

  1557   }

  1559   // compound word with dash (HU) I18n

  1560   char * dash = NULL;

  1561   int nresult = 0;

  1562   // LANG_hu section: set dash information for suggestions

  1563   if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');

  1564   if ((langnum == LANG_hu) && dash) {

  1565       *dash='\0';

  1566       // examine 2 sides of the dash

  1567       if (dash[1] == '\0') { // base word ending with dash

  1568         if (spell(cw)) {

  1569 		char * p = pSMgr->suggest_morph(cw);

  1570 		if (p) {

  1571 		    int ret = line_tok(p, slst, MSEP_REC);

  1572 		    free(p);

  1573 		    return ret;

  1574 		}

  1576 	}

  1577       } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.

  1578         if (spell(cw) && (spell("-e"))) {

  1579                         st = pSMgr->suggest_morph(cw);

  1580                         if (st) {

  1581                                 mystrcat(result, st, MAXLNLEN);

  1582                                 free(st);

  1583                         }

  1584                         mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE

  1585                         st = pSMgr->suggest_morph("-e");

  1586                         if (st) {

  1587                                 mystrcat(result, st, MAXLNLEN);

  1588                                 free(st);

  1589                         }

  1590                         return line_tok(result, slst, MSEP_REC);

  1591                 }

  1592       } else {

  1593       // first word ending with dash: word- XXX ???

  1594         char r2 = *(dash + 1);

  1595         dash[0]='-';

  1596         dash[1]='\0';

  1597         nresult = spell(cw);

  1598         dash[1] = r2;

  1599         dash[0]='\0';

  1600         if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||

  1601                 ((dash[1] > '0') && (dash[1] < '9')))) {

  1602                             st = pSMgr->suggest_morph(cw);

  1603                             if (st) {

  1604                                 mystrcat(result, st, MAXLNLEN);

  1605                                     free(st);

  1606                                 mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE

  1607                             }

  1608                             st = pSMgr->suggest_morph(dash+1);

  1609                             if (st) {

  1610                                     mystrcat(result, st, MAXLNLEN);

  1611                                     free(st);

  1612                             }

  1613                             return line_tok(result, slst, MSEP_REC);

  1614                         }

  1615       }

  1616       // affixed number in correct word

  1617      if (nresult && (dash > cw) && (((*(dash-1)<='9') &&

  1618                         (*(dash-1)>='0')) || (*(dash-1)=='.'))) {

  1619          *dash='-';

  1620          n = 1;

  1621          if (*(dash - n) == '.') n++;

  1622          // search first not a number character to left from dash

  1623          while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {

  1624             n++;

  1625          }

  1626          if ((dash - n) < cw) n--;

  1627          // numbers: valami1000000-hoz

  1628          // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,

  1629          // 56-hoz, 6-hoz

  1630          for(; n >= 1; n--) {

  1631             if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && checkword(dash - n, NULL, NULL)) {

  1632                     mystrcat(result, cw, MAXLNLEN);

  1633                     result[dash - cw - n] = '\0';

  1634                         st = pSMgr->suggest_morph(dash - n);

  1635                         if (st) {

  1636                         mystrcat(result, st, MAXLNLEN);

  1637                                 free(st);

  1638                         }

  1639                         return line_tok(result, slst, MSEP_REC);

  1640             }

  1641          }

  1642      }

  1643   }

  1644   return 0;

  1645 }

  1647 int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)

  1648 {

  1649   *slst = NULL;

  1650   if (!pSMgr || !pln) return 0;

  1651   char **pl2;

  1652   int pl2n = analyze(&pl2, word);

  1653   int captype = 0;

  1654   int abbv = 0;

  1655   char cw[MAXWORDUTF8LEN];

  1656   cleanword(cw, word, &captype, &abbv);

  1657   char result[MAXLNLEN];

  1658   *result = '\0';

  1660   for (int i = 0; i < pln; i++) {

  1661     cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));

  1662   }

  1663   freelist(&pl2, pl2n);

  1665   if (*result) {

  1666     // allcap

  1667     if (captype == ALLCAP) mkallcap(result);

  1669     // line split

  1670     int linenum = line_tok(result, slst, MSEP_REC);

  1672     // capitalize

  1673     if (captype == INITCAP || captype == HUHINITCAP) {

  1674         for (int j=0; j < linenum; j++) mkinitcap((*slst)[j]);

  1675     }

  1677     // temporary filtering of prefix related errors (eg.

  1678     // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")

  1680     int r = 0;

  1681     for (int j=0; j < linenum; j++) {

  1682         if (!spell((*slst)[j])) {

  1683             free((*slst)[j]);

  1684             (*slst)[j] = NULL;

  1685         } else {

  1686             if (r < j) (*slst)[r] = (*slst)[j];

  1687             r++;

  1688         }

  1689     }

  1690     if (r > 0) return r;

  1691     free(*slst);

  1692     *slst = NULL;

  1693   }

  1694   return 0;

  1695 }

  1697 int Hunspell::generate(char*** slst, const char * word, const char * pattern)

  1698 {

  1699   char **pl;

  1700   int pln = analyze(&pl, pattern);

  1701   int n = generate(slst, word, pl, pln);

  1702   freelist(&pl, pln);

  1703   return uniqlist(*slst, n);

  1704 }

  1706 // minimal XML parser functions

  1707 int Hunspell::get_xml_par(char * dest, const char * par, int max)

  1708 {

  1709    char * d = dest;

  1710    if (!par) return 0;

  1711    char end = *par;

  1712    char * dmax = dest + max;

  1713    if (end == '>') end = '<';

  1714    else if (end != '\'' && end != '"') return 0; // bad XML

  1715    for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) *d = *par;

  1716    *d = '\0';

  1717    mystrrep(dest, "&lt;", "<");

  1718    mystrrep(dest, "&amp;", "&");

  1719    return (int)(d - dest);

  1720 }

  1722 int Hunspell::get_langnum() const

  1723 {

  1724    return langnum;

  1725 }

  1727 // return the beginning of the element (attr == NULL) or the attribute

  1728 const char * Hunspell::get_xml_pos(const char * s, const char * attr)

  1729 {

  1730   const char * end = strchr(s, '>');

  1731   const char * p = s;

  1732   if (attr == NULL) return end;

  1733   do {

  1734     p = strstr(p, attr);

  1735     if (!p || p >= end) return 0;

  1736   } while (*(p-1) != ' ' &&  *(p-1) != '\n');

  1737   return p + strlen(attr);

  1738 }

  1740 int Hunspell::check_xml_par(const char * q, const char * attr, const char * value) {

  1741   char cw[MAXWORDUTF8LEN];

  1742   if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) &&

  1743     strcmp(cw, value) == 0) return 1;

  1744   return 0;

  1745 }

  1747 int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {

  1748     int n = 0;

  1749     char * p;

  1750     if (!list) return 0;

  1751     for (p = list; (p = strstr(p, tag)); p++) n++;

  1752     if (n == 0) return 0;

  1753     *slst = (char **) malloc(sizeof(char *) * n);

  1754     if (!*slst) return 0;

  1755     for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) {

  1756         int l = strlen(p);

  1757         (*slst)[n] = (char *) malloc(l + 1);

  1758         if (!(*slst)[n]) return n;

  1759         if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) {

  1760             free((*slst)[n]);

  1761             break;

  1762         }

  1763     }

  1764     return n;

  1765 }

  1767 int Hunspell::spellml(char*** slst, const char * word)

  1768 {

  1769   char *q, *q2;

  1770   char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN];

  1771   q = (char *) strstr(word, "<query");

  1772   if (!q) return 0; // bad XML input

  1773   q2 = strchr(q, '>');

  1774   if (!q2) return 0; // bad XML input

  1775   q2 = strstr(q2, "<word");

  1776   if (!q2) return 0; // bad XML input

  1777   if (check_xml_par(q, "type=", "analyze")) {

  1778       int n = 0, s = 0;

  1779       if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw);

  1780       if (n == 0) return 0;

  1781       // convert the result to <code><a>ana1</a><a>ana2</a></code> format

  1782       for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);

  1783       char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->&amp;

  1784       if (!r) return 0;

  1785       strcpy(r, "<code>");

  1786       for (int i = 0; i < n; i++) {

  1787         int l = strlen(r);

  1788         strcpy(r + l, "<a>");

  1789         strcpy(r + l + 3, (*slst)[i]);

  1790         mystrrep(r + l + 3, "\t", " ");

  1791         mystrrep(r + l + 3, "<", "&lt;");

  1792         mystrrep(r + l + 3, "&", "&amp;");

  1793         strcat(r, "</a>");

  1794         free((*slst)[i]);

  1795       }

  1796       strcat(r, "</code>");

  1797       (*slst)[0] = r;

  1798       return 1;

  1799   } else if (check_xml_par(q, "type=", "stem")) {

  1800       if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw);

  1801   } else if (check_xml_par(q, "type=", "generate")) {

  1802       int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1);

  1803       if (n == 0) return 0;

  1804       char * q3 = strstr(q2 + 1, "<word");

  1805       if (q3) {

  1806         if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN - 1)) {

  1807             return generate(slst, cw, cw2);

  1808         }

  1809       } else {

  1810         if ((q2 = strstr(q2 + 1, "<code"))) {

  1811           char ** slst2;

  1812           if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>"))) {

  1813             int n2 = generate(slst, cw, slst2, n);

  1814             freelist(&slst2, n);

  1815             return uniqlist(*slst, n2);

  1816           }

  1817           freelist(&slst2, n);

  1818         }

  1819       }

  1820   }

  1821   return 0;

  1822 }

  1825 #ifdef HUNSPELL_EXPERIMENTAL

  1826 // XXX need UTF-8 support

  1827 char * Hunspell::morph_with_correction(const char * word)

  1828 {

  1829   char cw[MAXWORDUTF8LEN];

  1830   char wspace[MAXWORDUTF8LEN];

  1831   if (! pSMgr || maxdic == 0) return NULL;

  1832   int wl = strlen(word);

  1833   if (utf8) {

  1834     if (wl >= MAXWORDUTF8LEN) return NULL;

  1835   } else {

  1836     if (wl >= MAXWORDLEN) return NULL;

  1837   }

  1838   int captype = 0;

  1839   int abbv = 0;

  1840   wl = cleanword(cw, word, &captype, &abbv);

  1841   if (wl == 0) return NULL;

  1843   char result[MAXLNLEN];

  1844   char * st = NULL;

  1846   *result = '\0';

  1849   switch(captype) {

  1850      case NOCAP:   {

  1851                      st = pSMgr->suggest_morph_for_spelling_error(cw);

  1852                      if (st) {

  1853                         mystrcat(result, st, MAXLNLEN);

  1854                         free(st);

  1855                      }

  1856                      if (abbv) {

  1857                          memcpy(wspace,cw,wl);

  1858                          *(wspace+wl) = '.';

  1859                          *(wspace+wl+1) = '\0';

  1860                          st = pSMgr->suggest_morph_for_spelling_error(wspace);

  1861                          if (st) {

  1862                             if (*result) mystrcat(result, "\n", MAXLNLEN);

  1863                             mystrcat(result, st, MAXLNLEN);

  1864                             free(st);

  1865                                                  }

  1866                      }

  1867                                          break;

  1868                    }

  1869      case INITCAP: {

  1870                      memcpy(wspace,cw,(wl+1));

  1871                      mkallsmall(wspace);

  1872                      st = pSMgr->suggest_morph_for_spelling_error(wspace);

  1873                      if (st) {

  1874                         mystrcat(result, st, MAXLNLEN);

  1875                         free(st);

  1876                      }

  1877                      st = pSMgr->suggest_morph_for_spelling_error(cw);

  1878                      if (st) {

  1879                         if (*result) mystrcat(result, "\n", MAXLNLEN);

  1880                         mystrcat(result, st, MAXLNLEN);

  1881                         free(st);

  1882                      }

  1883                      if (abbv) {

  1884                          memcpy(wspace,cw,wl);

  1885                          *(wspace+wl) = '.';

  1886                          *(wspace+wl+1) = '\0';

  1887                          mkallsmall(wspace);

  1888                          st = pSMgr->suggest_morph_for_spelling_error(wspace);

  1889                          if (st) {

  1890                             if (*result) mystrcat(result, "\n", MAXLNLEN);

  1891                             mystrcat(result, st, MAXLNLEN);

  1892                             free(st);

  1893                          }

  1894                          mkinitcap(wspace);

  1895                          st = pSMgr->suggest_morph_for_spelling_error(wspace);

  1896                          if (st) {

  1897                             if (*result) mystrcat(result, "\n", MAXLNLEN);

  1898                             mystrcat(result, st, MAXLNLEN);

  1899                             free(st);

  1900                          }

  1901                      }

  1902                      break;

  1903                    }

  1904      case HUHCAP: {

  1905                      st = pSMgr->suggest_morph_for_spelling_error(cw);

  1906                      if (st) {

  1907                         mystrcat(result, st, MAXLNLEN);

  1908                         free(st);

  1909                      }

  1910                      memcpy(wspace,cw,(wl+1));

  1911                      mkallsmall(wspace);

  1912                      st = pSMgr->suggest_morph_for_spelling_error(wspace);

  1913                      if (st) {

  1914                         if (*result) mystrcat(result, "\n", MAXLNLEN);

  1915                         mystrcat(result, st, MAXLNLEN);

  1916                         free(st);

  1917                      }

  1918                      break;

  1919                  }

  1920      case ALLCAP: {

  1921                      memcpy(wspace,cw,(wl+1));

  1922                      st = pSMgr->suggest_morph_for_spelling_error(wspace);

  1923                      if (st) {

  1924                         mystrcat(result, st, MAXLNLEN);

  1925                         free(st);

  1926                      }

  1927                      mkallsmall(wspace);

  1928                      st = pSMgr->suggest_morph_for_spelling_error(wspace);

  1929                      if (st) {

  1930                         if (*result) mystrcat(result, "\n", MAXLNLEN);

  1931                         mystrcat(result, st, MAXLNLEN);

  1932                         free(st);

  1933                      }

  1934                      mkinitcap(wspace);

  1935                      st = pSMgr->suggest_morph_for_spelling_error(wspace);

  1936                      if (st) {

  1937                         if (*result) mystrcat(result, "\n", MAXLNLEN);

  1938                         mystrcat(result, st, MAXLNLEN);

  1939                         free(st);

  1940                      }

  1941                      if (abbv) {

  1942                         memcpy(wspace,cw,(wl+1));

  1943                         *(wspace+wl) = '.';

  1944                         *(wspace+wl+1) = '\0';

  1945                         if (*result) mystrcat(result, "\n", MAXLNLEN);

  1946                         st = pSMgr->suggest_morph_for_spelling_error(wspace);

  1947                         if (st) {

  1948                             mystrcat(result, st, MAXLNLEN);

  1949                             free(st);

  1950                         }

  1951                         mkallsmall(wspace);

  1952                         st = pSMgr->suggest_morph_for_spelling_error(wspace);

  1953                         if (st) {

  1954                           if (*result) mystrcat(result, "\n", MAXLNLEN);

  1955                           mystrcat(result, st, MAXLNLEN);

  1956                           free(st);

  1957                         }

  1958                         mkinitcap(wspace);

  1959                         st = pSMgr->suggest_morph_for_spelling_error(wspace);

  1960                         if (st) {

  1961                           if (*result) mystrcat(result, "\n", MAXLNLEN);

  1962                           mystrcat(result, st, MAXLNLEN);

  1963                           free(st);

  1964                         }

  1965                      }

  1966                      break;

  1967                    }

  1968   }

  1970   if (*result) return mystrdup(result);

  1971   return NULL;

  1972 }

  1974 #endif // END OF HUNSPELL_EXPERIMENTAL CODE

  1976 Hunhandle *Hunspell_create(const char * affpath, const char * dpath)

  1977 {

  1978         return (Hunhandle*)(new Hunspell(affpath, dpath));

  1979 }

  1981 Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,

  1982     const char * key)

  1983 {

  1984         return (Hunhandle*)(new Hunspell(affpath, dpath, key));

  1985 }

  1987 void Hunspell_destroy(Hunhandle *pHunspell)

  1988 {

  1989         delete (Hunspell*)(pHunspell);

  1990 }

  1992 int Hunspell_spell(Hunhandle *pHunspell, const char *word)

  1993 {

  1994         return ((Hunspell*)pHunspell)->spell(word);

  1995 }

  1997 char *Hunspell_get_dic_encoding(Hunhandle *pHunspell)

  1998 {

  1999         return ((Hunspell*)pHunspell)->get_dic_encoding();

  2000 }

  2002 int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word)

  2003 {

  2004         return ((Hunspell*)pHunspell)->suggest(slst, word);

  2005 }

  2007 int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word)

  2008 {

  2009         return ((Hunspell*)pHunspell)->analyze(slst, word);

  2010 }

  2012 int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)

  2013 {

  2014         return ((Hunspell*)pHunspell)->stem(slst, word);

  2015 }

  2017 int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n)

  2018 {

  2019         return ((Hunspell*)pHunspell)->stem(slst, desc, n);

  2020 }

  2022 int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,

  2023     const char * word2)

  2024 {

  2025         return ((Hunspell*)pHunspell)->generate(slst, word, word2);

  2026 }

  2028 int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,

  2029     char** desc, int n)

  2030 {

  2031         return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);

  2032 }

  2034   /* functions for run-time modification of the dictionary */

  2036   /* add word to the run-time dictionary */

  2038 int Hunspell_add(Hunhandle *pHunspell, const char * word) {

  2039         return ((Hunspell*)pHunspell)->add(word);

  2040 }

  2042   /* add word to the run-time dictionary with affix flags of

  2043    * the example (a dictionary word): Hunspell will recognize

  2044    * affixed forms of the new word, too.

  2045    */

  2047 int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word,

  2048         const char * example) {

  2049         return ((Hunspell*)pHunspell)->add_with_affix(word, example);

  2050 }

  2052   /* remove word from the run-time dictionary */

  2054 int Hunspell_remove(Hunhandle *pHunspell, const char * word) {

  2055         return ((Hunspell*)pHunspell)->remove(word);

  2056 }

  2058 void Hunspell_free_list(Hunhandle *, char *** slst, int n) {

  2059         freelist(slst, n);

  2060 }

The Tor Browser / file revision

extensions/spellcheck/hunspell/src/hunspell.cpp@6474c204b198

extensions/spellcheck/hunspell/src/hunspell.cpp