extensions/spellcheck/hunspell/src/affixmgr.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/extensions/spellcheck/hunspell/src/affixmgr.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,4575 @@
     1.4 +/******* BEGIN LICENSE BLOCK *******
     1.5 + * Version: MPL 1.1/GPL 2.0/LGPL 2.1
     1.6 + * 
     1.7 + * The contents of this file are subject to the Mozilla Public License Version
     1.8 + * 1.1 (the "License"); you may not use this file except in compliance with
     1.9 + * the License. You may obtain a copy of the License at
    1.10 + * http://www.mozilla.org/MPL/
    1.11 + * 
    1.12 + * Software distributed under the License is distributed on an "AS IS" basis,
    1.13 + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
    1.14 + * for the specific language governing rights and limitations under the
    1.15 + * License.
    1.16 + * 
    1.17 + * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
    1.18 + * and László Németh (Hunspell). Portions created by the Initial Developers
    1.19 + * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
    1.20 + * 
    1.21 + * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
    1.22 + *                 David Einstein (deinst@world.std.com)
    1.23 + *                 László Németh (nemethl@gyorsposta.hu)
    1.24 + *                 Caolan McNamara (caolanm@redhat.com)
    1.25 + *                 Davide Prina
    1.26 + *                 Giuseppe Modugno
    1.27 + *                 Gianluca Turconi
    1.28 + *                 Simon Brouwer
    1.29 + *                 Noll Janos
    1.30 + *                 Biro Arpad
    1.31 + *                 Goldman Eleonora
    1.32 + *                 Sarlos Tamas
    1.33 + *                 Bencsath Boldizsar
    1.34 + *                 Halacsy Peter
    1.35 + *                 Dvornik Laszlo
    1.36 + *                 Gefferth Andras
    1.37 + *                 Nagy Viktor
    1.38 + *                 Varga Daniel
    1.39 + *                 Chris Halls
    1.40 + *                 Rene Engelhard
    1.41 + *                 Bram Moolenaar
    1.42 + *                 Dafydd Jones
    1.43 + *                 Harri Pitkanen
    1.44 + *                 Andras Timar
    1.45 + *                 Tor Lillqvist
    1.46 + * 
    1.47 + * Alternatively, the contents of this file may be used under the terms of
    1.48 + * either the GNU General Public License Version 2 or later (the "GPL"), or
    1.49 + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
    1.50 + * in which case the provisions of the GPL or the LGPL are applicable instead
    1.51 + * of those above. If you wish to allow use of your version of this file only
    1.52 + * under the terms of either the GPL or the LGPL, and not to allow others to
    1.53 + * use your version of this file under the terms of the MPL, indicate your
    1.54 + * decision by deleting the provisions above and replace them with the notice
    1.55 + * and other provisions required by the GPL or the LGPL. If you do not delete
    1.56 + * the provisions above, a recipient may use your version of this file under
    1.57 + * the terms of any one of the MPL, the GPL or the LGPL.
    1.58 + *
    1.59 + ******* END LICENSE BLOCK *******/
    1.60 +
    1.61 +#include <stdlib.h>
    1.62 +#include <string.h>
    1.63 +#include <stdio.h>
    1.64 +#include <ctype.h>
    1.65 +
    1.66 +#include <vector>
    1.67 +
    1.68 +#include "affixmgr.hxx"
    1.69 +#include "affentry.hxx"
    1.70 +#include "langnum.hxx"
    1.71 +
    1.72 +#include "csutil.hxx"
    1.73 +
    1.74 +AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * key) 
    1.75 +{
    1.76 +  // register hash manager and load affix data from aff file
    1.77 +  pHMgr = ptr[0];
    1.78 +  alldic = ptr;
    1.79 +  maxdic = md;
    1.80 +  keystring = NULL;
    1.81 +  trystring = NULL;
    1.82 +  encoding=NULL;
    1.83 +  csconv=NULL;
    1.84 +  utf8 = 0;
    1.85 +  complexprefixes = 0;
    1.86 +  maptable = NULL;
    1.87 +  nummap = 0;
    1.88 +  breaktable = NULL;
    1.89 +  numbreak = -1;
    1.90 +  reptable = NULL;
    1.91 +  numrep = 0;
    1.92 +  iconvtable = NULL;
    1.93 +  oconvtable = NULL;
    1.94 +  checkcpdtable = NULL;
    1.95 +  // allow simplified compound forms (see 3rd field of CHECKCOMPOUNDPATTERN)
    1.96 +  simplifiedcpd = 0;
    1.97 +  numcheckcpd = 0;
    1.98 +  defcpdtable = NULL;
    1.99 +  numdefcpd = 0;
   1.100 +  phone = NULL;
   1.101 +  compoundflag = FLAG_NULL; // permits word in compound forms
   1.102 +  compoundbegin = FLAG_NULL; // may be first word in compound forms
   1.103 +  compoundmiddle = FLAG_NULL; // may be middle word in compound forms
   1.104 +  compoundend = FLAG_NULL; // may be last word in compound forms
   1.105 +  compoundroot = FLAG_NULL; // compound word signing flag
   1.106 +  compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word
   1.107 +  compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word
   1.108 +  checkcompounddup = 0; // forbid double words in compounds
   1.109 +  checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution)
   1.110 +  checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds
   1.111 +  checkcompoundtriple = 0; // forbid compounds with triple letters
   1.112 +  simplifiedtriple = 0; // allow simplified triple letters in compounds (Schiff+fahrt -> Schiffahrt)
   1.113 +  forbiddenword = FORBIDDENWORD; // forbidden word signing flag
   1.114 +  nosuggest = FLAG_NULL; // don't suggest words signed with NOSUGGEST flag
   1.115 +  nongramsuggest = FLAG_NULL;
   1.116 +  lang = NULL; // language
   1.117 +  langnum = 0; // language code (see http://l10n.openoffice.org/languages.html)
   1.118 +  needaffix = FLAG_NULL; // forbidden root, allowed only with suffixes
   1.119 +  cpdwordmax = -1; // default: unlimited wordcount in compound words
   1.120 +  cpdmin = -1;  // undefined
   1.121 +  cpdmaxsyllable = 0; // default: unlimited syllablecount in compound words
   1.122 +  cpdvowels=NULL; // vowels (for calculating of Hungarian compounding limit, O(n) search! XXX)
   1.123 +  cpdvowels_utf16=NULL; // vowels for UTF-8 encoding (bsearch instead of O(n) search)
   1.124 +  cpdvowels_utf16_len=0; // vowels
   1.125 +  pfxappnd=NULL; // previous prefix for counting the syllables of prefix BUG
   1.126 +  sfxappnd=NULL; // previous suffix for counting a special syllables BUG
   1.127 +  cpdsyllablenum=NULL; // syllable count incrementing flag
   1.128 +  checknum=0; // checking numbers, and word with numbers
   1.129 +  wordchars=NULL; // letters + spec. word characters
   1.130 +  wordchars_utf16=NULL; // letters + spec. word characters
   1.131 +  wordchars_utf16_len=0; // letters + spec. word characters
   1.132 +  ignorechars=NULL; // letters + spec. word characters
   1.133 +  ignorechars_utf16=NULL; // letters + spec. word characters
   1.134 +  ignorechars_utf16_len=0; // letters + spec. word characters
   1.135 +  version=NULL; // affix and dictionary file version string
   1.136 +  havecontclass=0; // flags of possible continuing classes (double affix)
   1.137 +  // LEMMA_PRESENT: not put root into the morphological output. Lemma presents
   1.138 +  // in morhological description in dictionary file. It's often combined with PSEUDOROOT.
   1.139 +  lemma_present = FLAG_NULL; 
   1.140 +  circumfix = FLAG_NULL; 
   1.141 +  onlyincompound = FLAG_NULL; 
   1.142 +  maxngramsugs = -1; // undefined
   1.143 +  maxdiff = -1; // undefined
   1.144 +  onlymaxdiff = 0;
   1.145 +  maxcpdsugs = -1; // undefined
   1.146 +  nosplitsugs = 0;
   1.147 +  sugswithdots = 0;
   1.148 +  keepcase = 0;
   1.149 +  forceucase = 0;
   1.150 +  warn = 0;
   1.151 +  forbidwarn = 0;
   1.152 +  checksharps = 0;
   1.153 +  substandard = FLAG_NULL;
   1.154 +  fullstrip = 0;
   1.155 +
   1.156 +  sfx = NULL;
   1.157 +  pfx = NULL;
   1.158 +
   1.159 +  for (int i=0; i < SETSIZE; i++) {
   1.160 +     pStart[i] = NULL;
   1.161 +     sStart[i] = NULL;
   1.162 +     pFlag[i] = NULL;
   1.163 +     sFlag[i] = NULL;
   1.164 +  }
   1.165 +
   1.166 +  for (int j=0; j < CONTSIZE; j++) {
   1.167 +    contclasses[j] = 0;
   1.168 +  }
   1.169 +
   1.170 +  if (parse_file(affpath, key)) {
   1.171 +     HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath);
   1.172 +  }
   1.173 +  
   1.174 +  if (cpdmin == -1) cpdmin = MINCPDLEN;
   1.175 +
   1.176 +}
   1.177 +
   1.178 +
   1.179 +AffixMgr::~AffixMgr() 
   1.180 +{
   1.181 +  // pass through linked prefix entries and clean up
   1.182 +  for (int i=0; i < SETSIZE ;i++) {
   1.183 +       pFlag[i] = NULL;
   1.184 +       PfxEntry * ptr = pStart[i];
   1.185 +       PfxEntry * nptr = NULL;
   1.186 +       while (ptr) {
   1.187 +            nptr = ptr->getNext();
   1.188 +            delete(ptr);
   1.189 +            ptr = nptr;
   1.190 +            nptr = NULL;
   1.191 +       }  
   1.192 +  }
   1.193 +
   1.194 +  // pass through linked suffix entries and clean up
   1.195 +  for (int j=0; j < SETSIZE ; j++) {
   1.196 +       sFlag[j] = NULL;
   1.197 +       SfxEntry * ptr = sStart[j];
   1.198 +       SfxEntry * nptr = NULL;
   1.199 +       while (ptr) {
   1.200 +            nptr = ptr->getNext();
   1.201 +            delete(ptr);
   1.202 +            ptr = nptr;
   1.203 +            nptr = NULL;
   1.204 +       }
   1.205 +       sStart[j] = NULL;
   1.206 +  }
   1.207 +
   1.208 +  if (keystring) free(keystring);
   1.209 +  keystring=NULL;
   1.210 +  if (trystring) free(trystring);
   1.211 +  trystring=NULL;
   1.212 +  if (encoding) free(encoding);
   1.213 +  encoding=NULL;
   1.214 +  if (maptable) {  
   1.215 +     for (int j=0; j < nummap; j++) {
   1.216 +        for (int k=0; k < maptable[j].len; k++) {
   1.217 +           if (maptable[j].set[k]) free(maptable[j].set[k]);
   1.218 +        }
   1.219 +        free(maptable[j].set);
   1.220 +        maptable[j].set = NULL;
   1.221 +        maptable[j].len = 0;
   1.222 +     }
   1.223 +     free(maptable);  
   1.224 +     maptable = NULL;
   1.225 +  }
   1.226 +  nummap = 0;
   1.227 +  if (breaktable) {
   1.228 +     for (int j=0; j < numbreak; j++) {
   1.229 +        if (breaktable[j]) free(breaktable[j]);
   1.230 +        breaktable[j] = NULL;
   1.231 +     }
   1.232 +     free(breaktable);  
   1.233 +     breaktable = NULL;
   1.234 +  }
   1.235 +  numbreak = 0;
   1.236 +  if (reptable) {
   1.237 +     for (int j=0; j < numrep; j++) {
   1.238 +        free(reptable[j].pattern);
   1.239 +        free(reptable[j].pattern2);
   1.240 +     }
   1.241 +     free(reptable);  
   1.242 +     reptable = NULL;
   1.243 +  }
   1.244 +  if (iconvtable) delete iconvtable;
   1.245 +  if (oconvtable) delete oconvtable;
   1.246 +  if (phone && phone->rules) {
   1.247 +     for (int j=0; j < phone->num + 1; j++) {
   1.248 +        free(phone->rules[j * 2]);
   1.249 +        free(phone->rules[j * 2 + 1]);
   1.250 +     }
   1.251 +     free(phone->rules);
   1.252 +     free(phone);  
   1.253 +     phone = NULL;
   1.254 +  }
   1.255 +
   1.256 +  if (defcpdtable) {  
   1.257 +     for (int j=0; j < numdefcpd; j++) {
   1.258 +        free(defcpdtable[j].def);
   1.259 +        defcpdtable[j].def = NULL;
   1.260 +     }
   1.261 +     free(defcpdtable);  
   1.262 +     defcpdtable = NULL;
   1.263 +  }
   1.264 +  numrep = 0;
   1.265 +  if (checkcpdtable) {  
   1.266 +     for (int j=0; j < numcheckcpd; j++) {
   1.267 +        free(checkcpdtable[j].pattern);
   1.268 +        free(checkcpdtable[j].pattern2);
   1.269 +        free(checkcpdtable[j].pattern3);
   1.270 +        checkcpdtable[j].pattern = NULL;
   1.271 +        checkcpdtable[j].pattern2 = NULL;
   1.272 +        checkcpdtable[j].pattern3 = NULL;
   1.273 +     }
   1.274 +     free(checkcpdtable);  
   1.275 +     checkcpdtable = NULL;
   1.276 +  }
   1.277 +  numcheckcpd = 0;
   1.278 +  FREE_FLAG(compoundflag);
   1.279 +  FREE_FLAG(compoundbegin);
   1.280 +  FREE_FLAG(compoundmiddle);
   1.281 +  FREE_FLAG(compoundend);
   1.282 +  FREE_FLAG(compoundpermitflag);
   1.283 +  FREE_FLAG(compoundforbidflag);
   1.284 +  FREE_FLAG(compoundroot);
   1.285 +  FREE_FLAG(forbiddenword);
   1.286 +  FREE_FLAG(nosuggest);
   1.287 +  FREE_FLAG(nongramsuggest);
   1.288 +  FREE_FLAG(needaffix);
   1.289 +  FREE_FLAG(lemma_present);
   1.290 +  FREE_FLAG(circumfix);
   1.291 +  FREE_FLAG(onlyincompound);
   1.292 +  
   1.293 +  cpdwordmax = 0;
   1.294 +  pHMgr = NULL;
   1.295 +  cpdmin = 0;
   1.296 +  cpdmaxsyllable = 0;
   1.297 +  if (cpdvowels) free(cpdvowels);
   1.298 +  if (cpdvowels_utf16) free(cpdvowels_utf16);
   1.299 +  if (cpdsyllablenum) free(cpdsyllablenum);
   1.300 +  free_utf_tbl();
   1.301 +  if (lang) free(lang);
   1.302 +  if (wordchars) free(wordchars);
   1.303 +  if (wordchars_utf16) free(wordchars_utf16);
   1.304 +  if (ignorechars) free(ignorechars);
   1.305 +  if (ignorechars_utf16) free(ignorechars_utf16);
   1.306 +  if (version) free(version);
   1.307 +  checknum=0;
   1.308 +#ifdef MOZILLA_CLIENT
   1.309 +  delete [] csconv;
   1.310 +#endif
   1.311 +}
   1.312 +
   1.313 +
   1.314 +// read in aff file and build up prefix and suffix entry objects 
   1.315 +int  AffixMgr::parse_file(const char * affpath, const char * key)
   1.316 +{
   1.317 +  char * line; // io buffers
   1.318 +  char ft;     // affix type
   1.319 +  
   1.320 +  // checking flag duplication
   1.321 +  char dupflags[CONTSIZE];
   1.322 +  char dupflags_ini = 1;
   1.323 +
   1.324 +  // first line indicator for removing byte order mark
   1.325 +  int firstline = 1;
   1.326 +  
   1.327 +  // open the affix file
   1.328 +  FileMgr * afflst = new FileMgr(affpath, key);
   1.329 +  if (!afflst) {
   1.330 +    HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n",affpath);
   1.331 +    return 1;
   1.332 +  }
   1.333 +
   1.334 +  // step one is to parse the affix file building up the internal
   1.335 +  // affix data structures
   1.336 +
   1.337 +    // read in each line ignoring any that do not
   1.338 +    // start with a known line type indicator
   1.339 +    while ((line = afflst->getline())) {
   1.340 +       mychomp(line);
   1.341 +
   1.342 +       /* remove byte order mark */
   1.343 +       if (firstline) {
   1.344 +         firstline = 0;
   1.345 +         // Affix file begins with byte order mark: possible incompatibility with old Hunspell versions
   1.346 +         if (strncmp(line,"\xEF\xBB\xBF",3) == 0) {
   1.347 +            memmove(line, line+3, strlen(line+3)+1);
   1.348 +         }
   1.349 +       }
   1.350 +
   1.351 +       /* parse in the keyboard string */
   1.352 +       if (strncmp(line,"KEY",3) == 0) {
   1.353 +          if (parse_string(line, &keystring, afflst->getlinenum())) {
   1.354 +             delete afflst;
   1.355 +             return 1;
   1.356 +          }
   1.357 +       }
   1.358 +
   1.359 +       /* parse in the try string */
   1.360 +       if (strncmp(line,"TRY",3) == 0) {
   1.361 +          if (parse_string(line, &trystring, afflst->getlinenum())) {
   1.362 +             delete afflst;
   1.363 +             return 1;
   1.364 +          }
   1.365 +       }
   1.366 +
   1.367 +       /* parse in the name of the character set used by the .dict and .aff */
   1.368 +       if (strncmp(line,"SET",3) == 0) {
   1.369 +          if (parse_string(line, &encoding, afflst->getlinenum())) {
   1.370 +             delete afflst;
   1.371 +             return 1;
   1.372 +          }
   1.373 +          if (strcmp(encoding, "UTF-8") == 0) {
   1.374 +             utf8 = 1;
   1.375 +#ifndef OPENOFFICEORG
   1.376 +#ifndef MOZILLA_CLIENT
   1.377 +             if (initialize_utf_tbl()) return 1;
   1.378 +#endif
   1.379 +#endif
   1.380 +          }
   1.381 +       }
   1.382 +
   1.383 +       /* parse COMPLEXPREFIXES for agglutinative languages with right-to-left writing system */
   1.384 +       if (strncmp(line,"COMPLEXPREFIXES",15) == 0)
   1.385 +                   complexprefixes = 1;
   1.386 +
   1.387 +       /* parse in the flag used by the controlled compound words */
   1.388 +       if (strncmp(line,"COMPOUNDFLAG",12) == 0) {
   1.389 +          if (parse_flag(line, &compoundflag, afflst)) {
   1.390 +             delete afflst;
   1.391 +             return 1;
   1.392 +          }
   1.393 +       }
   1.394 +
   1.395 +       /* parse in the flag used by compound words */
   1.396 +       if (strncmp(line,"COMPOUNDBEGIN",13) == 0) {
   1.397 +          if (complexprefixes) {
   1.398 +            if (parse_flag(line, &compoundend, afflst)) {
   1.399 +              delete afflst;
   1.400 +              return 1;
   1.401 +            }
   1.402 +          } else {
   1.403 +            if (parse_flag(line, &compoundbegin, afflst)) {
   1.404 +              delete afflst;
   1.405 +              return 1;
   1.406 +            }
   1.407 +          }
   1.408 +       }
   1.409 +
   1.410 +       /* parse in the flag used by compound words */
   1.411 +       if (strncmp(line,"COMPOUNDMIDDLE",14) == 0) {
   1.412 +          if (parse_flag(line, &compoundmiddle, afflst)) {
   1.413 +             delete afflst;
   1.414 +             return 1;
   1.415 +          }
   1.416 +       }
   1.417 +       /* parse in the flag used by compound words */
   1.418 +       if (strncmp(line,"COMPOUNDEND",11) == 0) {
   1.419 +          if (complexprefixes) {
   1.420 +            if (parse_flag(line, &compoundbegin, afflst)) {
   1.421 +              delete afflst;
   1.422 +              return 1;
   1.423 +            }
   1.424 +          } else {
   1.425 +            if (parse_flag(line, &compoundend, afflst)) {
   1.426 +              delete afflst;
   1.427 +              return 1;
   1.428 +            }
   1.429 +          }
   1.430 +       }
   1.431 +
   1.432 +       /* parse in the data used by compound_check() method */
   1.433 +       if (strncmp(line,"COMPOUNDWORDMAX",15) == 0) {
   1.434 +          if (parse_num(line, &cpdwordmax, afflst)) {
   1.435 +             delete afflst;
   1.436 +             return 1;
   1.437 +          }
   1.438 +       }
   1.439 +
   1.440 +       /* parse in the flag sign compounds in dictionary */
   1.441 +       if (strncmp(line,"COMPOUNDROOT",12) == 0) {
   1.442 +          if (parse_flag(line, &compoundroot, afflst)) {
   1.443 +             delete afflst;
   1.444 +             return 1;
   1.445 +          }
   1.446 +       }
   1.447 +
   1.448 +       /* parse in the flag used by compound_check() method */
   1.449 +       if (strncmp(line,"COMPOUNDPERMITFLAG",18) == 0) {
   1.450 +          if (parse_flag(line, &compoundpermitflag, afflst)) {
   1.451 +             delete afflst;
   1.452 +             return 1;
   1.453 +          }
   1.454 +       }
   1.455 +
   1.456 +       /* parse in the flag used by compound_check() method */
   1.457 +       if (strncmp(line,"COMPOUNDFORBIDFLAG",18) == 0) {
   1.458 +          if (parse_flag(line, &compoundforbidflag, afflst)) {
   1.459 +             delete afflst;
   1.460 +             return 1;
   1.461 +          }
   1.462 +       }
   1.463 +
   1.464 +       if (strncmp(line,"CHECKCOMPOUNDDUP",16) == 0) {
   1.465 +                   checkcompounddup = 1;
   1.466 +       }
   1.467 +
   1.468 +       if (strncmp(line,"CHECKCOMPOUNDREP",16) == 0) {
   1.469 +                   checkcompoundrep = 1;
   1.470 +       }
   1.471 +
   1.472 +       if (strncmp(line,"CHECKCOMPOUNDTRIPLE",19) == 0) {
   1.473 +                   checkcompoundtriple = 1;
   1.474 +       }
   1.475 +
   1.476 +       if (strncmp(line,"SIMPLIFIEDTRIPLE",16) == 0) {
   1.477 +                   simplifiedtriple = 1;
   1.478 +       }
   1.479 +
   1.480 +       if (strncmp(line,"CHECKCOMPOUNDCASE",17) == 0) {
   1.481 +                   checkcompoundcase = 1;
   1.482 +       }
   1.483 +
   1.484 +       if (strncmp(line,"NOSUGGEST",9) == 0) {
   1.485 +          if (parse_flag(line, &nosuggest, afflst)) {
   1.486 +             delete afflst;
   1.487 +             return 1;
   1.488 +          }
   1.489 +       }
   1.490 +
   1.491 +       if (strncmp(line,"NONGRAMSUGGEST",14) == 0) {
   1.492 +          if (parse_flag(line, &nongramsuggest, afflst)) {
   1.493 +             delete afflst;
   1.494 +             return 1;
   1.495 +          }
   1.496 +       }
   1.497 +
   1.498 +       /* parse in the flag used by forbidden words */
   1.499 +       if (strncmp(line,"FORBIDDENWORD",13) == 0) {
   1.500 +          if (parse_flag(line, &forbiddenword, afflst)) {
   1.501 +             delete afflst;
   1.502 +             return 1;
   1.503 +          }
   1.504 +       }
   1.505 +
   1.506 +       /* parse in the flag used by forbidden words */
   1.507 +       if (strncmp(line,"LEMMA_PRESENT",13) == 0) {
   1.508 +          if (parse_flag(line, &lemma_present, afflst)) {
   1.509 +             delete afflst;
   1.510 +             return 1;
   1.511 +          }
   1.512 +       }
   1.513 +
   1.514 +       /* parse in the flag used by circumfixes */
   1.515 +       if (strncmp(line,"CIRCUMFIX",9) == 0) {
   1.516 +          if (parse_flag(line, &circumfix, afflst)) {
   1.517 +             delete afflst;
   1.518 +             return 1;
   1.519 +          }
   1.520 +       }
   1.521 +
   1.522 +       /* parse in the flag used by fogemorphemes */
   1.523 +       if (strncmp(line,"ONLYINCOMPOUND",14) == 0) {
   1.524 +          if (parse_flag(line, &onlyincompound, afflst)) {
   1.525 +             delete afflst;
   1.526 +             return 1;
   1.527 +          }
   1.528 +       }
   1.529 +
   1.530 +       /* parse in the flag used by `needaffixs' */
   1.531 +       if (strncmp(line,"PSEUDOROOT",10) == 0) {
   1.532 +          if (parse_flag(line, &needaffix, afflst)) {
   1.533 +             delete afflst;
   1.534 +             return 1;
   1.535 +          }
   1.536 +       }
   1.537 +
   1.538 +       /* parse in the flag used by `needaffixs' */
   1.539 +       if (strncmp(line,"NEEDAFFIX",9) == 0) {
   1.540 +          if (parse_flag(line, &needaffix, afflst)) {
   1.541 +             delete afflst;
   1.542 +             return 1;
   1.543 +          }
   1.544 +       }
   1.545 +
   1.546 +       /* parse in the minimal length for words in compounds */
   1.547 +       if (strncmp(line,"COMPOUNDMIN",11) == 0) {
   1.548 +          if (parse_num(line, &cpdmin, afflst)) {
   1.549 +             delete afflst;
   1.550 +             return 1;
   1.551 +          }
   1.552 +          if (cpdmin < 1) cpdmin = 1;
   1.553 +       }
   1.554 +
   1.555 +       /* parse in the max. words and syllables in compounds */
   1.556 +       if (strncmp(line,"COMPOUNDSYLLABLE",16) == 0) {
   1.557 +          if (parse_cpdsyllable(line, afflst)) {
   1.558 +             delete afflst;
   1.559 +             return 1;
   1.560 +          }
   1.561 +       }
   1.562 +
   1.563 +       /* parse in the flag used by compound_check() method */
   1.564 +       if (strncmp(line,"SYLLABLENUM",11) == 0) {
   1.565 +          if (parse_string(line, &cpdsyllablenum, afflst->getlinenum())) {
   1.566 +             delete afflst;
   1.567 +             return 1;
   1.568 +          }
   1.569 +       }
   1.570 +
   1.571 +       /* parse in the flag used by the controlled compound words */
   1.572 +       if (strncmp(line,"CHECKNUM",8) == 0) {
   1.573 +           checknum=1;
   1.574 +       }
   1.575 +
   1.576 +       /* parse in the extra word characters */
   1.577 +       if (strncmp(line,"WORDCHARS",9) == 0) {
   1.578 +          if (parse_array(line, &wordchars, &wordchars_utf16, &wordchars_utf16_len, utf8, afflst->getlinenum())) {
   1.579 +             delete afflst;
   1.580 +             return 1;
   1.581 +          }
   1.582 +       }
   1.583 +
   1.584 +       /* parse in the ignored characters (for example, Arabic optional diacretics charachters */
   1.585 +       if (strncmp(line,"IGNORE",6) == 0) {
   1.586 +          if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, utf8, afflst->getlinenum())) {
   1.587 +             delete afflst;
   1.588 +             return 1;
   1.589 +          }
   1.590 +       }
   1.591 +
   1.592 +       /* parse in the typical fault correcting table */
   1.593 +       if (strncmp(line,"REP",3) == 0) {
   1.594 +          if (parse_reptable(line, afflst)) {
   1.595 +             delete afflst;
   1.596 +             return 1;
   1.597 +          }
   1.598 +       }
   1.599 +
   1.600 +       /* parse in the input conversion table */
   1.601 +       if (strncmp(line,"ICONV",5) == 0) {
   1.602 +          if (parse_convtable(line, afflst, &iconvtable, "ICONV")) {
   1.603 +             delete afflst;
   1.604 +             return 1;
   1.605 +          }
   1.606 +       }
   1.607 +
   1.608 +       /* parse in the input conversion table */
   1.609 +       if (strncmp(line,"OCONV",5) == 0) {
   1.610 +          if (parse_convtable(line, afflst, &oconvtable, "OCONV")) {
   1.611 +             delete afflst;
   1.612 +             return 1;
   1.613 +          }
   1.614 +       }
   1.615 +
   1.616 +       /* parse in the phonetic translation table */
   1.617 +       if (strncmp(line,"PHONE",5) == 0) {
   1.618 +          if (parse_phonetable(line, afflst)) {
   1.619 +             delete afflst;
   1.620 +             return 1;
   1.621 +          }
   1.622 +       }
   1.623 +
   1.624 +       /* parse in the checkcompoundpattern table */
   1.625 +       if (strncmp(line,"CHECKCOMPOUNDPATTERN",20) == 0) {
   1.626 +          if (parse_checkcpdtable(line, afflst)) {
   1.627 +             delete afflst;
   1.628 +             return 1;
   1.629 +          }
   1.630 +       }
   1.631 +
   1.632 +       /* parse in the defcompound table */
   1.633 +       if (strncmp(line,"COMPOUNDRULE",12) == 0) {
   1.634 +          if (parse_defcpdtable(line, afflst)) {
   1.635 +             delete afflst;
   1.636 +             return 1;
   1.637 +          }
   1.638 +       }
   1.639 +
   1.640 +       /* parse in the related character map table */
   1.641 +       if (strncmp(line,"MAP",3) == 0) {
   1.642 +          if (parse_maptable(line, afflst)) {
   1.643 +             delete afflst;
   1.644 +             return 1;
   1.645 +          }
   1.646 +       }
   1.647 +
   1.648 +       /* parse in the word breakpoints table */
   1.649 +       if (strncmp(line,"BREAK",5) == 0) {
   1.650 +          if (parse_breaktable(line, afflst)) {
   1.651 +             delete afflst;
   1.652 +             return 1;
   1.653 +          }
   1.654 +       }
   1.655 +
   1.656 +       /* parse in the language for language specific codes */
   1.657 +       if (strncmp(line,"LANG",4) == 0) {
   1.658 +          if (parse_string(line, &lang, afflst->getlinenum())) {
   1.659 +             delete afflst;
   1.660 +             return 1;
   1.661 +          }
   1.662 +          langnum = get_lang_num(lang);
   1.663 +       }
   1.664 +
   1.665 +       if (strncmp(line,"VERSION",7) == 0) {
   1.666 +          for(line = line + 7; *line == ' ' || *line == '\t'; line++);
   1.667 +          version = mystrdup(line);
   1.668 +       }
   1.669 +
   1.670 +       if (strncmp(line,"MAXNGRAMSUGS",12) == 0) {
   1.671 +          if (parse_num(line, &maxngramsugs, afflst)) {
   1.672 +             delete afflst;
   1.673 +             return 1;
   1.674 +          }
   1.675 +       }
   1.676 +
   1.677 +       if (strncmp(line,"ONLYMAXDIFF", 11) == 0)
   1.678 +                   onlymaxdiff = 1;
   1.679 +
   1.680 +       if (strncmp(line,"MAXDIFF",7) == 0) {
   1.681 +          if (parse_num(line, &maxdiff, afflst)) {
   1.682 +             delete afflst;
   1.683 +             return 1;
   1.684 +          }
   1.685 +       }
   1.686 +
   1.687 +       if (strncmp(line,"MAXCPDSUGS",10) == 0) {
   1.688 +          if (parse_num(line, &maxcpdsugs, afflst)) {
   1.689 +             delete afflst;
   1.690 +             return 1;
   1.691 +          }
   1.692 +       }
   1.693 +
   1.694 +       if (strncmp(line,"NOSPLITSUGS",11) == 0) {
   1.695 +                   nosplitsugs=1;
   1.696 +       }
   1.697 +
   1.698 +       if (strncmp(line,"FULLSTRIP",9) == 0) {
   1.699 +                   fullstrip=1;
   1.700 +       }
   1.701 +
   1.702 +       if (strncmp(line,"SUGSWITHDOTS",12) == 0) {
   1.703 +                   sugswithdots=1;
   1.704 +       }
   1.705 +
   1.706 +       /* parse in the flag used by forbidden words */
   1.707 +       if (strncmp(line,"KEEPCASE",8) == 0) {
   1.708 +          if (parse_flag(line, &keepcase, afflst)) {
   1.709 +             delete afflst;
   1.710 +             return 1;
   1.711 +          }
   1.712 +       }
   1.713 +
   1.714 +       /* parse in the flag used by `forceucase' */
   1.715 +       if (strncmp(line,"FORCEUCASE",10) == 0) {
   1.716 +          if (parse_flag(line, &forceucase, afflst)) {
   1.717 +             delete afflst;
   1.718 +             return 1;
   1.719 +          }
   1.720 +       }
   1.721 +
   1.722 +       /* parse in the flag used by `warn' */
   1.723 +       if (strncmp(line,"WARN",4) == 0) {
   1.724 +          if (parse_flag(line, &warn, afflst)) {
   1.725 +             delete afflst;
   1.726 +             return 1;
   1.727 +          }
   1.728 +       }
   1.729 +
   1.730 +       if (strncmp(line,"FORBIDWARN",10) == 0) {
   1.731 +                   forbidwarn=1;
   1.732 +       }
   1.733 +
   1.734 +       /* parse in the flag used by the affix generator */
   1.735 +       if (strncmp(line,"SUBSTANDARD",11) == 0) {
   1.736 +          if (parse_flag(line, &substandard, afflst)) {
   1.737 +             delete afflst;
   1.738 +             return 1;
   1.739 +          }
   1.740 +       }
   1.741 +
   1.742 +       if (strncmp(line,"CHECKSHARPS",11) == 0) {
   1.743 +                   checksharps=1;
   1.744 +       }
   1.745 +
   1.746 +       /* parse this affix: P - prefix, S - suffix */
   1.747 +       ft = ' ';
   1.748 +       if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P';
   1.749 +       if (strncmp(line,"SFX",3) == 0) ft = complexprefixes ? 'P' : 'S';
   1.750 +       if (ft != ' ') {
   1.751 +          if (dupflags_ini) {
   1.752 +            memset(dupflags, 0, sizeof(dupflags));
   1.753 +            dupflags_ini = 0;
   1.754 +          }
   1.755 +          if (parse_affix(line, ft, afflst, dupflags)) {
   1.756 +             delete afflst;
   1.757 +             process_pfx_tree_to_list();
   1.758 +             process_sfx_tree_to_list();
   1.759 +             return 1;
   1.760 +          }
   1.761 +       }
   1.762 +
   1.763 +    }
   1.764 +    delete afflst;
   1.765 +
   1.766 +    // convert affix trees to sorted list
   1.767 +    process_pfx_tree_to_list();
   1.768 +    process_sfx_tree_to_list();
   1.769 +
   1.770 +    // now we can speed up performance greatly taking advantage of the 
   1.771 +    // relationship between the affixes and the idea of "subsets".
   1.772 +
   1.773 +    // View each prefix as a potential leading subset of another and view
   1.774 +    // each suffix (reversed) as a potential trailing subset of another.
   1.775 +
   1.776 +    // To illustrate this relationship if we know the prefix "ab" is found in the
   1.777 +    // word to examine, only prefixes that "ab" is a leading subset of need be examined.
   1.778 +    // Furthermore is "ab" is not present then none of the prefixes that "ab" is
   1.779 +    // is a subset need be examined.
   1.780 +    // The same argument goes for suffix string that are reversed.
   1.781 +
   1.782 +    // Then to top this off why not examine the first char of the word to quickly
   1.783 +    // limit the set of prefixes to examine (i.e. the prefixes to examine must 
   1.784 +    // be leading supersets of the first character of the word (if they exist)
   1.785 + 
   1.786 +    // To take advantage of this "subset" relationship, we need to add two links
   1.787 +    // from entry.  One to take next if the current prefix is found (call it nexteq)
   1.788 +    // and one to take next if the current prefix is not found (call it nextne).
   1.789 +
   1.790 +    // Since we have built ordered lists, all that remains is to properly initialize 
   1.791 +    // the nextne and nexteq pointers that relate them
   1.792 +
   1.793 +    process_pfx_order();
   1.794 +    process_sfx_order();
   1.795 +
   1.796 +    /* get encoding for CHECKCOMPOUNDCASE */
   1.797 +    if (!utf8) {
   1.798 +    char * enc = get_encoding();
   1.799 +    csconv = get_current_cs(enc);
   1.800 +    free(enc);
   1.801 +    enc = NULL;
   1.802 +
   1.803 +    char expw[MAXLNLEN];
   1.804 +    if (wordchars) {
   1.805 +        strcpy(expw, wordchars);
   1.806 +        free(wordchars);
   1.807 +    } else *expw = '\0';
   1.808 +
   1.809 +    for (int i = 0; i <= 255; i++) {
   1.810 +        if ( (csconv[i].cupper != csconv[i].clower) &&
   1.811 +            (! strchr(expw, (char) i))) {
   1.812 +                *(expw + strlen(expw) + 1) = '\0';
   1.813 +                *(expw + strlen(expw)) = (char) i;
   1.814 +        }
   1.815 +    }
   1.816 +
   1.817 +    wordchars = mystrdup(expw);
   1.818 +    }
   1.819 +
   1.820 +    // default BREAK definition
   1.821 +    if (numbreak == -1) {
   1.822 +        breaktable = (char **) malloc(sizeof(char *) * 3);
   1.823 +        if (!breaktable) return 1;
   1.824 +        breaktable[0] = mystrdup("-");
   1.825 +        breaktable[1] = mystrdup("^-");
   1.826 +        breaktable[2] = mystrdup("-$");
   1.827 +        if (breaktable[0] && breaktable[1] && breaktable[2]) numbreak = 3;
   1.828 +    }
   1.829 +    return 0;
   1.830 +}
   1.831 +
   1.832 +
   1.833 +// we want to be able to quickly access prefix information
   1.834 +// both by prefix flag, and sorted by prefix string itself 
   1.835 +// so we need to set up two indexes
   1.836 +
   1.837 +int AffixMgr::build_pfxtree(PfxEntry* pfxptr)
   1.838 +{
   1.839 +  PfxEntry * ptr;
   1.840 +  PfxEntry * pptr;
   1.841 +  PfxEntry * ep = pfxptr;
   1.842 +
   1.843 +  // get the right starting points
   1.844 +  const char * key = ep->getKey();
   1.845 +  const unsigned char flg = (unsigned char) (ep->getFlag() & 0x00FF);
   1.846 +
   1.847 +  // first index by flag which must exist
   1.848 +  ptr = pFlag[flg];
   1.849 +  ep->setFlgNxt(ptr);
   1.850 +  pFlag[flg] = ep;
   1.851 +
   1.852 +
   1.853 +  // handle the special case of null affix string
   1.854 +  if (strlen(key) == 0) {
   1.855 +    // always inset them at head of list at element 0
   1.856 +     ptr = pStart[0];
   1.857 +     ep->setNext(ptr);
   1.858 +     pStart[0] = ep;
   1.859 +     return 0;
   1.860 +  }
   1.861 +
   1.862 +  // now handle the normal case
   1.863 +  ep->setNextEQ(NULL);
   1.864 +  ep->setNextNE(NULL);
   1.865 +
   1.866 +  unsigned char sp = *((const unsigned char *)key);
   1.867 +  ptr = pStart[sp];
   1.868 +  
   1.869 +  // handle the first insert 
   1.870 +  if (!ptr) {
   1.871 +     pStart[sp] = ep;
   1.872 +     return 0;
   1.873 +  }
   1.874 +
   1.875 +
   1.876 +  // otherwise use binary tree insertion so that a sorted
   1.877 +  // list can easily be generated later
   1.878 +  pptr = NULL;
   1.879 +  for (;;) {
   1.880 +    pptr = ptr;
   1.881 +    if (strcmp(ep->getKey(), ptr->getKey() ) <= 0) {
   1.882 +       ptr = ptr->getNextEQ();
   1.883 +       if (!ptr) {
   1.884 +          pptr->setNextEQ(ep);
   1.885 +          break;
   1.886 +       }
   1.887 +    } else {
   1.888 +       ptr = ptr->getNextNE();
   1.889 +       if (!ptr) {
   1.890 +          pptr->setNextNE(ep);
   1.891 +          break;
   1.892 +       }
   1.893 +    }
   1.894 +  }
   1.895 +  return 0;
   1.896 +}
   1.897 +
   1.898 +// we want to be able to quickly access suffix information
   1.899 +// both by suffix flag, and sorted by the reverse of the
   1.900 +// suffix string itself; so we need to set up two indexes
   1.901 +int AffixMgr::build_sfxtree(SfxEntry* sfxptr)
   1.902 +{
   1.903 +  SfxEntry * ptr;
   1.904 +  SfxEntry * pptr;
   1.905 +  SfxEntry * ep = sfxptr;
   1.906 +
   1.907 +  /* get the right starting point */
   1.908 +  const char * key = ep->getKey();
   1.909 +  const unsigned char flg = (unsigned char) (ep->getFlag() & 0x00FF);
   1.910 +
   1.911 +  // first index by flag which must exist
   1.912 +  ptr = sFlag[flg];
   1.913 +  ep->setFlgNxt(ptr);
   1.914 +  sFlag[flg] = ep;
   1.915 +
   1.916 +  // next index by affix string
   1.917 +
   1.918 +  // handle the special case of null affix string
   1.919 +  if (strlen(key) == 0) {
   1.920 +    // always inset them at head of list at element 0
   1.921 +     ptr = sStart[0];
   1.922 +     ep->setNext(ptr);
   1.923 +     sStart[0] = ep;
   1.924 +     return 0;
   1.925 +  }
   1.926 +
   1.927 +  // now handle the normal case
   1.928 +  ep->setNextEQ(NULL);
   1.929 +  ep->setNextNE(NULL);
   1.930 +
   1.931 +  unsigned char sp = *((const unsigned char *)key);
   1.932 +  ptr = sStart[sp];
   1.933 +  
   1.934 +  // handle the first insert 
   1.935 +  if (!ptr) {
   1.936 +     sStart[sp] = ep;
   1.937 +     return 0;
   1.938 +  }
   1.939 +
   1.940 +  // otherwise use binary tree insertion so that a sorted
   1.941 +  // list can easily be generated later
   1.942 +  pptr = NULL;
   1.943 +  for (;;) {
   1.944 +    pptr = ptr;
   1.945 +    if (strcmp(ep->getKey(), ptr->getKey() ) <= 0) {
   1.946 +       ptr = ptr->getNextEQ();
   1.947 +       if (!ptr) {
   1.948 +          pptr->setNextEQ(ep);
   1.949 +          break;
   1.950 +       }
   1.951 +    } else {
   1.952 +       ptr = ptr->getNextNE();
   1.953 +       if (!ptr) {
   1.954 +          pptr->setNextNE(ep);
   1.955 +          break;
   1.956 +       }
   1.957 +    }
   1.958 +  }
   1.959 +  return 0;
   1.960 +}
   1.961 +
   1.962 +// convert from binary tree to sorted list
   1.963 +int AffixMgr::process_pfx_tree_to_list()
   1.964 +{
   1.965 +  for (int i=1; i< SETSIZE; i++) {
   1.966 +    pStart[i] = process_pfx_in_order(pStart[i],NULL);
   1.967 +  }
   1.968 +  return 0;
   1.969 +}
   1.970 +
   1.971 +
   1.972 +PfxEntry* AffixMgr::process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr)
   1.973 +{
   1.974 +  if (ptr) {
   1.975 +    nptr = process_pfx_in_order(ptr->getNextNE(), nptr);
   1.976 +    ptr->setNext(nptr);
   1.977 +    nptr = process_pfx_in_order(ptr->getNextEQ(), ptr);
   1.978 +  }
   1.979 +  return nptr;
   1.980 +}
   1.981 +
   1.982 +
   1.983 +// convert from binary tree to sorted list
   1.984 +int AffixMgr:: process_sfx_tree_to_list()
   1.985 +{
   1.986 +  for (int i=1; i< SETSIZE; i++) {
   1.987 +    sStart[i] = process_sfx_in_order(sStart[i],NULL);
   1.988 +  }
   1.989 +  return 0;
   1.990 +}
   1.991 +
   1.992 +SfxEntry* AffixMgr::process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr)
   1.993 +{
   1.994 +  if (ptr) {
   1.995 +    nptr = process_sfx_in_order(ptr->getNextNE(), nptr);
   1.996 +    ptr->setNext(nptr);
   1.997 +    nptr = process_sfx_in_order(ptr->getNextEQ(), ptr);
   1.998 +  }
   1.999 +  return nptr;
  1.1000 +}
  1.1001 +
  1.1002 +
  1.1003 +// reinitialize the PfxEntry links NextEQ and NextNE to speed searching
  1.1004 +// using the idea of leading subsets this time
  1.1005 +int AffixMgr::process_pfx_order()
  1.1006 +{
  1.1007 +    PfxEntry* ptr;
  1.1008 +
  1.1009 +    // loop through each prefix list starting point
  1.1010 +    for (int i=1; i < SETSIZE; i++) {
  1.1011 +
  1.1012 +         ptr = pStart[i];
  1.1013 +
  1.1014 +         // look through the remainder of the list
  1.1015 +         //  and find next entry with affix that 
  1.1016 +         // the current one is not a subset of
  1.1017 +         // mark that as destination for NextNE
  1.1018 +         // use next in list that you are a subset
  1.1019 +         // of as NextEQ
  1.1020 +
  1.1021 +         for (; ptr != NULL; ptr = ptr->getNext()) {
  1.1022 +
  1.1023 +             PfxEntry * nptr = ptr->getNext();
  1.1024 +             for (; nptr != NULL; nptr = nptr->getNext()) {
  1.1025 +                 if (! isSubset( ptr->getKey() , nptr->getKey() )) break;
  1.1026 +             }
  1.1027 +             ptr->setNextNE(nptr);
  1.1028 +             ptr->setNextEQ(NULL);
  1.1029 +             if ((ptr->getNext()) && isSubset(ptr->getKey() , (ptr->getNext())->getKey())) 
  1.1030 +                 ptr->setNextEQ(ptr->getNext());
  1.1031 +         }
  1.1032 +
  1.1033 +         // now clean up by adding smart search termination strings:
  1.1034 +         // if you are already a superset of the previous prefix
  1.1035 +         // but not a subset of the next, search can end here
  1.1036 +         // so set NextNE properly
  1.1037 +
  1.1038 +         ptr = pStart[i];
  1.1039 +         for (; ptr != NULL; ptr = ptr->getNext()) {
  1.1040 +             PfxEntry * nptr = ptr->getNext();
  1.1041 +             PfxEntry * mptr = NULL;
  1.1042 +             for (; nptr != NULL; nptr = nptr->getNext()) {
  1.1043 +                 if (! isSubset(ptr->getKey(),nptr->getKey())) break;
  1.1044 +                 mptr = nptr;
  1.1045 +             }
  1.1046 +             if (mptr) mptr->setNextNE(NULL);
  1.1047 +         }
  1.1048 +    }
  1.1049 +    return 0;
  1.1050 +}
  1.1051 +
  1.1052 +// initialize the SfxEntry links NextEQ and NextNE to speed searching
  1.1053 +// using the idea of leading subsets this time
  1.1054 +int AffixMgr::process_sfx_order()
  1.1055 +{
  1.1056 +    SfxEntry* ptr;
  1.1057 +
  1.1058 +    // loop through each prefix list starting point
  1.1059 +    for (int i=1; i < SETSIZE; i++) {
  1.1060 +
  1.1061 +         ptr = sStart[i];
  1.1062 +
  1.1063 +         // look through the remainder of the list
  1.1064 +         //  and find next entry with affix that 
  1.1065 +         // the current one is not a subset of
  1.1066 +         // mark that as destination for NextNE
  1.1067 +         // use next in list that you are a subset
  1.1068 +         // of as NextEQ
  1.1069 +
  1.1070 +         for (; ptr != NULL; ptr = ptr->getNext()) {
  1.1071 +             SfxEntry * nptr = ptr->getNext();
  1.1072 +             for (; nptr != NULL; nptr = nptr->getNext()) {
  1.1073 +                 if (! isSubset(ptr->getKey(),nptr->getKey())) break;
  1.1074 +             }
  1.1075 +             ptr->setNextNE(nptr);
  1.1076 +             ptr->setNextEQ(NULL);
  1.1077 +             if ((ptr->getNext()) && isSubset(ptr->getKey(),(ptr->getNext())->getKey())) 
  1.1078 +                 ptr->setNextEQ(ptr->getNext());
  1.1079 +         }
  1.1080 +
  1.1081 +
  1.1082 +         // now clean up by adding smart search termination strings:
  1.1083 +         // if you are already a superset of the previous suffix
  1.1084 +         // but not a subset of the next, search can end here
  1.1085 +         // so set NextNE properly
  1.1086 +
  1.1087 +         ptr = sStart[i];
  1.1088 +         for (; ptr != NULL; ptr = ptr->getNext()) {
  1.1089 +             SfxEntry * nptr = ptr->getNext();
  1.1090 +             SfxEntry * mptr = NULL;
  1.1091 +             for (; nptr != NULL; nptr = nptr->getNext()) {
  1.1092 +                 if (! isSubset(ptr->getKey(),nptr->getKey())) break;
  1.1093 +                 mptr = nptr;
  1.1094 +             }
  1.1095 +             if (mptr) mptr->setNextNE(NULL);
  1.1096 +         }
  1.1097 +    }
  1.1098 +    return 0;
  1.1099 +}
  1.1100 +
  1.1101 +// add flags to the result for dictionary debugging
  1.1102 +void AffixMgr::debugflag(char * result, unsigned short flag) {
  1.1103 +    char * st = encode_flag(flag);
  1.1104 +    mystrcat(result, " ", MAXLNLEN);
  1.1105 +    mystrcat(result, MORPH_FLAG, MAXLNLEN);
  1.1106 +    if (st) {
  1.1107 +        mystrcat(result, st, MAXLNLEN);
  1.1108 +        free(st);
  1.1109 +    }
  1.1110 +}
  1.1111 +
  1.1112 +// calculate the character length of the condition
  1.1113 +int AffixMgr::condlen(char * st)
  1.1114 +{
  1.1115 +  int l = 0;
  1.1116 +  bool group = false;
  1.1117 +  for(; *st; st++) {
  1.1118 +    if (*st == '[') {
  1.1119 +        group = true;
  1.1120 +        l++;
  1.1121 +    } else if (*st == ']') group = false;
  1.1122 +    else if (!group && (!utf8 ||
  1.1123 +        (!(*st & 0x80) || ((*st & 0xc0) == 0x80)))) l++;
  1.1124 +  }
  1.1125 +  return l;
  1.1126 +}
  1.1127 +
  1.1128 +int AffixMgr::encodeit(affentry &entry, char * cs)
  1.1129 +{
  1.1130 +  if (strcmp(cs,".") != 0) {
  1.1131 +    entry.numconds = (char) condlen(cs);
  1.1132 +    strncpy(entry.c.conds, cs, MAXCONDLEN);
  1.1133 +    // long condition (end of conds padded by strncpy)
  1.1134 +    if (entry.c.conds[MAXCONDLEN - 1] && cs[MAXCONDLEN]) {
  1.1135 +      entry.opts += aeLONGCOND;
  1.1136 +      entry.c.l.conds2 = mystrdup(cs + MAXCONDLEN_1);
  1.1137 +      if (!entry.c.l.conds2) return 1;
  1.1138 +    }
  1.1139 +  } else {
  1.1140 +    entry.numconds = 0;
  1.1141 +    entry.c.conds[0] = '\0';
  1.1142 +  }
  1.1143 +  return 0;
  1.1144 +}
  1.1145 +
  1.1146 +// return 1 if s1 is a leading subset of s2 (dots are for infixes)
  1.1147 +inline int AffixMgr::isSubset(const char * s1, const char * s2)
  1.1148 + {
  1.1149 +    while (((*s1 == *s2) || (*s1 == '.')) && (*s1 != '\0')) {
  1.1150 +        s1++;
  1.1151 +        s2++;
  1.1152 +    }
  1.1153 +    return (*s1 == '\0');
  1.1154 + }
  1.1155 +
  1.1156 +
  1.1157 +// check word for prefixes
  1.1158 +struct hentry * AffixMgr::prefix_check(const char * word, int len, char in_compound,
  1.1159 +    const FLAG needflag)
  1.1160 +{
  1.1161 +    struct hentry * rv= NULL;
  1.1162 +
  1.1163 +    pfx = NULL;
  1.1164 +    pfxappnd = NULL;
  1.1165 +    sfxappnd = NULL;
  1.1166 +    
  1.1167 +    // first handle the special case of 0 length prefixes
  1.1168 +    PfxEntry * pe = pStart[0];
  1.1169 +    while (pe) {
  1.1170 +        if (
  1.1171 +            // fogemorpheme
  1.1172 +              ((in_compound != IN_CPD_NOT) || !(pe->getCont() &&
  1.1173 +                  (TESTAFF(pe->getCont(), onlyincompound, pe->getContLen())))) &&
  1.1174 +            // permit prefixes in compounds
  1.1175 +              ((in_compound != IN_CPD_END) || (pe->getCont() &&
  1.1176 +                  (TESTAFF(pe->getCont(), compoundpermitflag, pe->getContLen()))))
  1.1177 +              ) {
  1.1178 +                    // check prefix
  1.1179 +                    rv = pe->checkword(word, len, in_compound, needflag);
  1.1180 +                    if (rv) {
  1.1181 +                        pfx=pe; // BUG: pfx not stateless
  1.1182 +                        return rv;
  1.1183 +                    }
  1.1184 +             }
  1.1185 +       pe = pe->getNext();
  1.1186 +    }
  1.1187 +  
  1.1188 +    // now handle the general case
  1.1189 +    unsigned char sp = *((const unsigned char *)word);
  1.1190 +    PfxEntry * pptr = pStart[sp];
  1.1191 +
  1.1192 +    while (pptr) {
  1.1193 +        if (isSubset(pptr->getKey(),word)) {
  1.1194 +             if (
  1.1195 +            // fogemorpheme
  1.1196 +              ((in_compound != IN_CPD_NOT) || !(pptr->getCont() &&
  1.1197 +                  (TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen())))) &&
  1.1198 +            // permit prefixes in compounds
  1.1199 +              ((in_compound != IN_CPD_END) || (pptr->getCont() &&
  1.1200 +                  (TESTAFF(pptr->getCont(), compoundpermitflag, pptr->getContLen()))))
  1.1201 +              ) {
  1.1202 +            // check prefix
  1.1203 +                  rv = pptr->checkword(word, len, in_compound, needflag);
  1.1204 +                  if (rv) {
  1.1205 +                    pfx=pptr; // BUG: pfx not stateless
  1.1206 +                    return rv;
  1.1207 +                  }
  1.1208 +             }
  1.1209 +             pptr = pptr->getNextEQ();
  1.1210 +        } else {
  1.1211 +             pptr = pptr->getNextNE();
  1.1212 +        }
  1.1213 +    }
  1.1214 +    
  1.1215 +    return NULL;
  1.1216 +}
  1.1217 +
  1.1218 +// check word for prefixes
  1.1219 +struct hentry * AffixMgr::prefix_check_twosfx(const char * word, int len,
  1.1220 +    char in_compound, const FLAG needflag)
  1.1221 +{
  1.1222 +    struct hentry * rv= NULL;
  1.1223 +
  1.1224 +    pfx = NULL;
  1.1225 +    sfxappnd = NULL;
  1.1226 +    
  1.1227 +    // first handle the special case of 0 length prefixes
  1.1228 +    PfxEntry * pe = pStart[0];
  1.1229 +    
  1.1230 +    while (pe) {
  1.1231 +        rv = pe->check_twosfx(word, len, in_compound, needflag);
  1.1232 +        if (rv) return rv;
  1.1233 +        pe = pe->getNext();
  1.1234 +    }
  1.1235 +  
  1.1236 +    // now handle the general case
  1.1237 +    unsigned char sp = *((const unsigned char *)word);
  1.1238 +    PfxEntry * pptr = pStart[sp];
  1.1239 +
  1.1240 +    while (pptr) {
  1.1241 +        if (isSubset(pptr->getKey(),word)) {
  1.1242 +            rv = pptr->check_twosfx(word, len, in_compound, needflag);
  1.1243 +            if (rv) {
  1.1244 +                pfx = pptr;
  1.1245 +                return rv;
  1.1246 +            }
  1.1247 +            pptr = pptr->getNextEQ();
  1.1248 +        } else {
  1.1249 +             pptr = pptr->getNextNE();
  1.1250 +        }
  1.1251 +    }
  1.1252 +    
  1.1253 +    return NULL;
  1.1254 +}
  1.1255 +
  1.1256 +// check word for prefixes
  1.1257 +char * AffixMgr::prefix_check_morph(const char * word, int len, char in_compound,
  1.1258 +    const FLAG needflag)
  1.1259 +{
  1.1260 +    char * st;
  1.1261 +
  1.1262 +    char result[MAXLNLEN];
  1.1263 +    result[0] = '\0';
  1.1264 +
  1.1265 +    pfx = NULL;
  1.1266 +    sfxappnd = NULL;
  1.1267 +    
  1.1268 +    // first handle the special case of 0 length prefixes
  1.1269 +    PfxEntry * pe = pStart[0];
  1.1270 +    while (pe) {
  1.1271 +       st = pe->check_morph(word,len,in_compound, needflag);
  1.1272 +       if (st) {
  1.1273 +            mystrcat(result, st, MAXLNLEN);
  1.1274 +            free(st);
  1.1275 +       }
  1.1276 +       // if (rv) return rv;
  1.1277 +       pe = pe->getNext();
  1.1278 +    }
  1.1279 +  
  1.1280 +    // now handle the general case
  1.1281 +    unsigned char sp = *((const unsigned char *)word);
  1.1282 +    PfxEntry * pptr = pStart[sp];
  1.1283 +
  1.1284 +    while (pptr) {
  1.1285 +        if (isSubset(pptr->getKey(),word)) {
  1.1286 +            st = pptr->check_morph(word,len,in_compound, needflag);
  1.1287 +            if (st) {
  1.1288 +              // fogemorpheme
  1.1289 +              if ((in_compound != IN_CPD_NOT) || !((pptr->getCont() && 
  1.1290 +                        (TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen()))))) {
  1.1291 +                    mystrcat(result, st, MAXLNLEN);
  1.1292 +                    pfx = pptr;
  1.1293 +                }
  1.1294 +                free(st);
  1.1295 +            }
  1.1296 +            pptr = pptr->getNextEQ();
  1.1297 +        } else {
  1.1298 +            pptr = pptr->getNextNE();
  1.1299 +        }
  1.1300 +    }
  1.1301 +    
  1.1302 +    if (*result) return mystrdup(result);
  1.1303 +    return NULL;
  1.1304 +}
  1.1305 +
  1.1306 +
  1.1307 +// check word for prefixes
  1.1308 +char * AffixMgr::prefix_check_twosfx_morph(const char * word, int len,
  1.1309 +    char in_compound, const FLAG needflag)
  1.1310 +{
  1.1311 +    char * st;
  1.1312 +
  1.1313 +    char result[MAXLNLEN];
  1.1314 +    result[0] = '\0';
  1.1315 +
  1.1316 +    pfx = NULL;
  1.1317 +    sfxappnd = NULL;
  1.1318 +    
  1.1319 +    // first handle the special case of 0 length prefixes
  1.1320 +    PfxEntry * pe = pStart[0];
  1.1321 +    while (pe) {
  1.1322 +        st = pe->check_twosfx_morph(word,len,in_compound, needflag);
  1.1323 +        if (st) {
  1.1324 +            mystrcat(result, st, MAXLNLEN);
  1.1325 +            free(st);
  1.1326 +        }
  1.1327 +        pe = pe->getNext();
  1.1328 +    }
  1.1329 +  
  1.1330 +    // now handle the general case
  1.1331 +    unsigned char sp = *((const unsigned char *)word);
  1.1332 +    PfxEntry * pptr = pStart[sp];
  1.1333 +
  1.1334 +    while (pptr) {
  1.1335 +        if (isSubset(pptr->getKey(),word)) {
  1.1336 +            st = pptr->check_twosfx_morph(word, len, in_compound, needflag);
  1.1337 +            if (st) {
  1.1338 +                mystrcat(result, st, MAXLNLEN);
  1.1339 +                free(st);
  1.1340 +                pfx = pptr;
  1.1341 +            }
  1.1342 +            pptr = pptr->getNextEQ();
  1.1343 +        } else {
  1.1344 +            pptr = pptr->getNextNE();
  1.1345 +        }
  1.1346 +    }
  1.1347 +    
  1.1348 +    if (*result) return mystrdup(result);
  1.1349 +    return NULL;
  1.1350 +}
  1.1351 +
  1.1352 +// Is word a non compound with a REP substitution (see checkcompoundrep)?
  1.1353 +int AffixMgr::cpdrep_check(const char * word, int wl)
  1.1354 +{
  1.1355 +  char candidate[MAXLNLEN];
  1.1356 +  const char * r;
  1.1357 +  int lenr, lenp;
  1.1358 +
  1.1359 +  if ((wl < 2) || !numrep) return 0;
  1.1360 +
  1.1361 +  for (int i=0; i < numrep; i++ ) {
  1.1362 +      r = word;
  1.1363 +      lenr = strlen(reptable[i].pattern2);
  1.1364 +      lenp = strlen(reptable[i].pattern);
  1.1365 +      // search every occurence of the pattern in the word
  1.1366 +      while ((r=strstr(r, reptable[i].pattern)) != NULL) {
  1.1367 +          strcpy(candidate, word);
  1.1368 +          if (r-word + lenr + strlen(r+lenp) >= MAXLNLEN) break;
  1.1369 +          strcpy(candidate+(r-word),reptable[i].pattern2);
  1.1370 +          strcpy(candidate+(r-word)+lenr, r+lenp);
  1.1371 +          if (candidate_check(candidate,strlen(candidate))) return 1;
  1.1372 +          r++; // search for the next letter
  1.1373 +      }
  1.1374 +   }
  1.1375 +   return 0;
  1.1376 +}
  1.1377 +
  1.1378 +// forbid compoundings when there are special patterns at word bound
  1.1379 +int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2, const char affixed)
  1.1380 +{
  1.1381 +  int len;
  1.1382 +  for (int i = 0; i < numcheckcpd; i++) {
  1.1383 +      if (isSubset(checkcpdtable[i].pattern2, word + pos) &&
  1.1384 +        (!r1 || !checkcpdtable[i].cond ||
  1.1385 +          (r1->astr && TESTAFF(r1->astr, checkcpdtable[i].cond, r1->alen))) &&
  1.1386 +        (!r2 || !checkcpdtable[i].cond2 ||
  1.1387 +          (r2->astr && TESTAFF(r2->astr, checkcpdtable[i].cond2, r2->alen))) &&
  1.1388 +        // zero length pattern => only TESTAFF
  1.1389 +        // zero pattern (0/flag) => unmodified stem (zero affixes allowed)
  1.1390 +        (!*(checkcpdtable[i].pattern) || (
  1.1391 +            (*(checkcpdtable[i].pattern)=='0' && r1->blen <= pos && strncmp(word + pos - r1->blen, r1->word, r1->blen) == 0) ||
  1.1392 +            (*(checkcpdtable[i].pattern)!='0' && (len = strlen(checkcpdtable[i].pattern)) &&
  1.1393 +                strncmp(word + pos - len, checkcpdtable[i].pattern, len) == 0)))) {
  1.1394 +            return 1;
  1.1395 +        }
  1.1396 +  }
  1.1397 +  return 0;
  1.1398 +}
  1.1399 +
  1.1400 +// forbid compounding with neighbouring upper and lower case characters at word bounds
  1.1401 +int AffixMgr::cpdcase_check(const char * word, int pos)
  1.1402 +{
  1.1403 +  if (utf8) {
  1.1404 +      w_char u, w;
  1.1405 +      const char * p;
  1.1406 +      u8_u16(&u, 1, word + pos);
  1.1407 +      for (p = word + pos - 1; (*p & 0xc0) == 0x80; p--);
  1.1408 +      u8_u16(&w, 1, p);
  1.1409 +      unsigned short a = (u.h << 8) + u.l;
  1.1410 +      unsigned short b = (w.h << 8) + w.l;
  1.1411 +      if (((unicodetoupper(a, langnum) == a) || (unicodetoupper(b, langnum) == b)) &&
  1.1412 +          (a != '-') && (b != '-')) return 1;
  1.1413 +  } else {
  1.1414 +      unsigned char a = *(word + pos - 1);
  1.1415 +      unsigned char b = *(word + pos);
  1.1416 +      if ((csconv[a].ccase || csconv[b].ccase) && (a != '-') && (b != '-')) return 1;
  1.1417 +  }
  1.1418 +  return 0;
  1.1419 +}
  1.1420 +
  1.1421 +// check compound patterns
  1.1422 +int AffixMgr::defcpd_check(hentry *** words, short wnum, hentry * rv, hentry ** def, char all)
  1.1423 +{
  1.1424 +  signed short btpp[MAXWORDLEN]; // metacharacter (*, ?) positions for backtracking
  1.1425 +  signed short btwp[MAXWORDLEN]; // word positions for metacharacters
  1.1426 +  int btnum[MAXWORDLEN]; // number of matched characters in metacharacter positions
  1.1427 +  short bt = 0;  
  1.1428 +  int i, j;
  1.1429 +  int ok;
  1.1430 +  int w = 0;
  1.1431 +
  1.1432 +  if (!*words) {
  1.1433 +    w = 1;
  1.1434 +    *words = def;
  1.1435 +  }
  1.1436 +
  1.1437 +  if (!*words) {
  1.1438 +    return 0;
  1.1439 +  }
  1.1440 +
  1.1441 +  (*words)[wnum] = rv;
  1.1442 +
  1.1443 +  // has the last word COMPOUNDRULE flag?
  1.1444 +  if (rv->alen == 0) {
  1.1445 +    (*words)[wnum] = NULL;
  1.1446 +    if (w) *words = NULL;
  1.1447 +    return 0;
  1.1448 +  }
  1.1449 +  ok = 0;
  1.1450 +  for (i = 0; i < numdefcpd; i++) {
  1.1451 +    for (j = 0; j < defcpdtable[i].len; j++) {
  1.1452 +       if (defcpdtable[i].def[j] != '*' && defcpdtable[i].def[j] != '?' &&
  1.1453 +          TESTAFF(rv->astr, defcpdtable[i].def[j], rv->alen)) ok = 1;
  1.1454 +    }
  1.1455 +  }
  1.1456 +  if (ok == 0) {
  1.1457 +    (*words)[wnum] = NULL;
  1.1458 +    if (w) *words = NULL;
  1.1459 +    return 0;
  1.1460 +  }
  1.1461 +
  1.1462 +  for (i = 0; i < numdefcpd; i++) {
  1.1463 +    signed short pp = 0; // pattern position
  1.1464 +    signed short wp = 0; // "words" position
  1.1465 +    int ok2;
  1.1466 +    ok = 1;
  1.1467 +    ok2 = 1;
  1.1468 +    do {
  1.1469 +      while ((pp < defcpdtable[i].len) && (wp <= wnum)) {
  1.1470 +        if (((pp+1) < defcpdtable[i].len) &&
  1.1471 +          ((defcpdtable[i].def[pp+1] == '*') || (defcpdtable[i].def[pp+1] == '?'))) {
  1.1472 +            int wend = (defcpdtable[i].def[pp+1] == '?') ? wp : wnum;
  1.1473 +            ok2 = 1;
  1.1474 +            pp+=2;
  1.1475 +            btpp[bt] = pp;
  1.1476 +            btwp[bt] = wp;
  1.1477 +            while (wp <= wend) {
  1.1478 +                if (!(*words)[wp]->alen || 
  1.1479 +                  !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp-2], (*words)[wp]->alen)) {
  1.1480 +                    ok2 = 0;
  1.1481 +                    break;
  1.1482 +                }
  1.1483 +                wp++;
  1.1484 +            }
  1.1485 +            if (wp <= wnum) ok2 = 0;
  1.1486 +            btnum[bt] = wp - btwp[bt];
  1.1487 +            if (btnum[bt] > 0) bt++;
  1.1488 +            if (ok2) break;
  1.1489 +        } else {
  1.1490 +            ok2 = 1;
  1.1491 +            if (!(*words)[wp] || !(*words)[wp]->alen || 
  1.1492 +              !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp], (*words)[wp]->alen)) {
  1.1493 +                ok = 0;
  1.1494 +                break;
  1.1495 +            }
  1.1496 +            pp++;
  1.1497 +            wp++;
  1.1498 +            if ((defcpdtable[i].len == pp) && !(wp > wnum)) ok = 0;
  1.1499 +        }
  1.1500 +      }
  1.1501 +    if (ok && ok2) { 
  1.1502 +        int r = pp;
  1.1503 +        while ((defcpdtable[i].len > r) && ((r+1) < defcpdtable[i].len) &&
  1.1504 +            ((defcpdtable[i].def[r+1] == '*') || (defcpdtable[i].def[r+1] == '?'))) r+=2;
  1.1505 +        if (defcpdtable[i].len <= r) return 1;
  1.1506 +    }
  1.1507 +    // backtrack
  1.1508 +    if (bt) do {
  1.1509 +        ok = 1;
  1.1510 +        btnum[bt - 1]--;
  1.1511 +        pp = btpp[bt - 1];
  1.1512 +        wp = btwp[bt - 1] + (signed short) btnum[bt - 1];
  1.1513 +    } while ((btnum[bt - 1] < 0) && --bt);
  1.1514 +  } while (bt);
  1.1515 +
  1.1516 +  if (ok && ok2 && (!all || (defcpdtable[i].len <= pp))) return 1;
  1.1517 +
  1.1518 +  // check zero ending
  1.1519 +  while (ok && ok2 && (defcpdtable[i].len > pp) && ((pp+1) < defcpdtable[i].len) &&
  1.1520 +    ((defcpdtable[i].def[pp+1] == '*') || (defcpdtable[i].def[pp+1] == '?'))) pp+=2;
  1.1521 +  if (ok && ok2 && (defcpdtable[i].len <= pp)) return 1;
  1.1522 +  }
  1.1523 +  (*words)[wnum] = NULL;
  1.1524 +  if (w) *words = NULL;
  1.1525 +  return 0;
  1.1526 +}
  1.1527 +
  1.1528 +inline int AffixMgr::candidate_check(const char * word, int len)
  1.1529 +{
  1.1530 +  struct hentry * rv=NULL;
  1.1531 +  
  1.1532 +  rv = lookup(word);
  1.1533 +  if (rv) return 1;
  1.1534 +
  1.1535 +//  rv = prefix_check(word,len,1);
  1.1536 +//  if (rv) return 1;
  1.1537 +  
  1.1538 +  rv = affix_check(word,len);
  1.1539 +  if (rv) return 1;
  1.1540 +  return 0;
  1.1541 +}
  1.1542 +
  1.1543 +// calculate number of syllable for compound-checking
  1.1544 +short AffixMgr::get_syllable(const char * word, int wlen)
  1.1545 +{
  1.1546 +    if (cpdmaxsyllable==0) return 0;
  1.1547 +    
  1.1548 +    short num=0;
  1.1549 +
  1.1550 +    if (!utf8) {
  1.1551 +        for (int i=0; i<wlen; i++) {
  1.1552 +            if (strchr(cpdvowels, word[i])) num++;
  1.1553 +        }
  1.1554 +    } else if (cpdvowels_utf16) {
  1.1555 +        w_char w[MAXWORDUTF8LEN];
  1.1556 +        int i = u8_u16(w, MAXWORDUTF8LEN, word);
  1.1557 +        for (; i > 0; i--) {
  1.1558 +            if (flag_bsearch((unsigned short *) cpdvowels_utf16,
  1.1559 +                ((unsigned short *) w)[i - 1], cpdvowels_utf16_len)) num++;
  1.1560 +        }
  1.1561 +    }
  1.1562 +    return num;
  1.1563 +}
  1.1564 +
  1.1565 +void AffixMgr::setcminmax(int * cmin, int * cmax, const char * word, int len) {
  1.1566 +    if (utf8) {
  1.1567 +        int i;
  1.1568 +        for (*cmin = 0, i = 0; (i < cpdmin) && word[*cmin]; i++) {
  1.1569 +          for ((*cmin)++; (word[*cmin] & 0xc0) == 0x80; (*cmin)++);
  1.1570 +        }
  1.1571 +        for (*cmax = len, i = 0; (i < (cpdmin - 1)) && *cmax; i++) {
  1.1572 +          for ((*cmax)--; (word[*cmax] & 0xc0) == 0x80; (*cmax)--);
  1.1573 +        }
  1.1574 +    } else {
  1.1575 +        *cmin = cpdmin;
  1.1576 +        *cmax = len - cpdmin + 1;
  1.1577 +    }
  1.1578 +}
  1.1579 +
  1.1580 +
  1.1581 +// check if compound word is correctly spelled
  1.1582 +// hu_mov_rule = spec. Hungarian rule (XXX)
  1.1583 +struct hentry * AffixMgr::compound_check(const char * word, int len, 
  1.1584 +    short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words = NULL,
  1.1585 +    char hu_mov_rule = 0, char is_sug = 0, int * info = NULL)
  1.1586 +{
  1.1587 +    int i; 
  1.1588 +    short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
  1.1589 +    struct hentry * rv = NULL;
  1.1590 +    struct hentry * rv_first;
  1.1591 +    struct hentry * rwords[MAXWORDLEN]; // buffer for COMPOUND pattern checking
  1.1592 +    char st [MAXWORDUTF8LEN + 4];
  1.1593 +    char ch = '\0';
  1.1594 +    int cmin;
  1.1595 +    int cmax;
  1.1596 +    int striple = 0;
  1.1597 +    int scpd = 0;
  1.1598 +    int soldi = 0;
  1.1599 +    int oldcmin = 0;
  1.1600 +    int oldcmax = 0;
  1.1601 +    int oldlen = 0;
  1.1602 +    int checkedstriple = 0;
  1.1603 +    int onlycpdrule;
  1.1604 +    int affixed = 0;
  1.1605 +    hentry ** oldwords = words;
  1.1606 +
  1.1607 +    int checked_prefix;
  1.1608 +
  1.1609 +    setcminmax(&cmin, &cmax, word, len);
  1.1610 +
  1.1611 +    strcpy(st, word);
  1.1612 +
  1.1613 +    for (i = cmin; i < cmax; i++) {
  1.1614 +        // go to end of the UTF-8 character
  1.1615 +        if (utf8) {
  1.1616 +            for (; (st[i] & 0xc0) == 0x80; i++);
  1.1617 +            if (i >= cmax) return NULL;
  1.1618 +        }
  1.1619 +
  1.1620 +        words = oldwords;
  1.1621 +        onlycpdrule = (words) ? 1 : 0;
  1.1622 +
  1.1623 +        do { // onlycpdrule loop
  1.1624 +
  1.1625 +        oldnumsyllable = numsyllable;
  1.1626 +        oldwordnum = wordnum;
  1.1627 +        checked_prefix = 0;
  1.1628 +
  1.1629 +
  1.1630 +        do { // simplified checkcompoundpattern loop
  1.1631 +
  1.1632 +        if (scpd > 0) {
  1.1633 +          for (; scpd <= numcheckcpd && (!checkcpdtable[scpd-1].pattern3 ||
  1.1634 +            strncmp(word + i, checkcpdtable[scpd-1].pattern3, strlen(checkcpdtable[scpd-1].pattern3)) != 0); scpd++);
  1.1635 +
  1.1636 +          if (scpd > numcheckcpd) break; // break simplified checkcompoundpattern loop
  1.1637 +          strcpy(st + i, checkcpdtable[scpd-1].pattern);
  1.1638 +          soldi = i;
  1.1639 +          i += strlen(checkcpdtable[scpd-1].pattern);
  1.1640 +          strcpy(st + i, checkcpdtable[scpd-1].pattern2);
  1.1641 +          strcpy(st + i + strlen(checkcpdtable[scpd-1].pattern2), word + soldi + strlen(checkcpdtable[scpd-1].pattern3));
  1.1642 +
  1.1643 +          oldlen = len;
  1.1644 +          len += strlen(checkcpdtable[scpd-1].pattern) + strlen(checkcpdtable[scpd-1].pattern2) - strlen(checkcpdtable[scpd-1].pattern3);
  1.1645 +          oldcmin = cmin;
  1.1646 +          oldcmax = cmax;
  1.1647 +          setcminmax(&cmin, &cmax, st, len);
  1.1648 +
  1.1649 +          cmax = len - cpdmin + 1;
  1.1650 +        }
  1.1651 +
  1.1652 +        ch = st[i];
  1.1653 +        st[i] = '\0';
  1.1654 +
  1.1655 +        sfx = NULL;
  1.1656 +        pfx = NULL;
  1.1657 +
  1.1658 +        // FIRST WORD
  1.1659 +
  1.1660 +        affixed = 1;
  1.1661 +        rv = lookup(st); // perhaps without prefix
  1.1662 +
  1.1663 +        // search homonym with compound flag
  1.1664 +        while ((rv) && !hu_mov_rule &&
  1.1665 +            ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
  1.1666 +                !((compoundflag && !words && !onlycpdrule && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
  1.1667 +                  (compoundbegin && !wordnum && !onlycpdrule && 
  1.1668 +                        TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
  1.1669 +                  (compoundmiddle && wordnum && !words && !onlycpdrule &&
  1.1670 +                    TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||
  1.1671 +                  (numdefcpd && onlycpdrule &&
  1.1672 +                    ((!words && !wordnum && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)) ||
  1.1673 +                    (words && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0))))) ||
  1.1674 +                  (scpd != 0 && checkcpdtable[scpd-1].cond != FLAG_NULL &&
  1.1675 +                    !TESTAFF(rv->astr, checkcpdtable[scpd-1].cond, rv->alen)))
  1.1676 +                  ) {
  1.1677 +            rv = rv->next_homonym;
  1.1678 +        }
  1.1679 +
  1.1680 +        if (rv) affixed = 0;
  1.1681 +
  1.1682 +        if (!rv) {
  1.1683 +            if (onlycpdrule) break;
  1.1684 +            if (compoundflag && 
  1.1685 +             !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
  1.1686 +                if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
  1.1687 +                        FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
  1.1688 +                    sfx->getCont() &&
  1.1689 +                        ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag, 
  1.1690 +                            sfx->getContLen())) || (compoundend &&
  1.1691 +                        TESTAFF(sfx->getCont(), compoundend, 
  1.1692 +                            sfx->getContLen())))) {
  1.1693 +                        rv = NULL;
  1.1694 +                }
  1.1695 +            }
  1.1696 +
  1.1697 +            if (rv ||
  1.1698 +              (((wordnum == 0) && compoundbegin &&
  1.1699 +                ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
  1.1700 +                (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
  1.1701 +              ((wordnum > 0) && compoundmiddle &&
  1.1702 +                ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
  1.1703 +                (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
  1.1704 +              ) checked_prefix = 1;
  1.1705 +        // else check forbiddenwords and needaffix
  1.1706 +        } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
  1.1707 +            TESTAFF(rv->astr, needaffix, rv->alen) ||
  1.1708 +            TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
  1.1709 +            (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen))
  1.1710 +             )) {
  1.1711 +                st[i] = ch;
  1.1712 +                //continue;
  1.1713 +                break;
  1.1714 +        }
  1.1715 +
  1.1716 +            // check non_compound flag in suffix and prefix
  1.1717 +            if ((rv) && !hu_mov_rule &&
  1.1718 +                ((pfx && pfx->getCont() &&
  1.1719 +                    TESTAFF(pfx->getCont(), compoundforbidflag, 
  1.1720 +                        pfx->getContLen())) ||
  1.1721 +                (sfx && sfx->getCont() &&
  1.1722 +                    TESTAFF(sfx->getCont(), compoundforbidflag, 
  1.1723 +                        sfx->getContLen())))) {
  1.1724 +                    rv = NULL;
  1.1725 +            }
  1.1726 +
  1.1727 +            // check compoundend flag in suffix and prefix
  1.1728 +            if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
  1.1729 +                ((pfx && pfx->getCont() &&
  1.1730 +                    TESTAFF(pfx->getCont(), compoundend, 
  1.1731 +                        pfx->getContLen())) ||
  1.1732 +                (sfx && sfx->getCont() &&
  1.1733 +                    TESTAFF(sfx->getCont(), compoundend, 
  1.1734 +                        sfx->getContLen())))) {
  1.1735 +                    rv = NULL;
  1.1736 +            }
  1.1737 +
  1.1738 +            // check compoundmiddle flag in suffix and prefix
  1.1739 +            if ((rv) && !checked_prefix && (wordnum==0) && compoundmiddle && !hu_mov_rule &&
  1.1740 +                ((pfx && pfx->getCont() &&
  1.1741 +                    TESTAFF(pfx->getCont(), compoundmiddle, 
  1.1742 +                        pfx->getContLen())) ||
  1.1743 +                (sfx && sfx->getCont() &&
  1.1744 +                    TESTAFF(sfx->getCont(), compoundmiddle, 
  1.1745 +                        sfx->getContLen())))) {
  1.1746 +                    rv = NULL;
  1.1747 +            }
  1.1748 +
  1.1749 +        // check forbiddenwords
  1.1750 +        if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
  1.1751 +            TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
  1.1752 +            (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) {
  1.1753 +                return NULL;
  1.1754 +            }
  1.1755 +
  1.1756 +        // increment word number, if the second root has a compoundroot flag
  1.1757 +        if ((rv) && compoundroot && 
  1.1758 +            (TESTAFF(rv->astr, compoundroot, rv->alen))) {
  1.1759 +                wordnum++;
  1.1760 +        }
  1.1761 +
  1.1762 +        // first word is acceptable in compound words?
  1.1763 +        if (((rv) && 
  1.1764 +          ( checked_prefix || (words && words[wnum]) ||
  1.1765 +            (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
  1.1766 +            ((oldwordnum == 0) && compoundbegin && TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
  1.1767 +            ((oldwordnum > 0) && compoundmiddle && TESTAFF(rv->astr, compoundmiddle, rv->alen))// ||
  1.1768 +//            (numdefcpd && )
  1.1769 +
  1.1770 +// LANG_hu section: spec. Hungarian rule
  1.1771 +            || ((langnum == LANG_hu) && hu_mov_rule && (
  1.1772 +                    TESTAFF(rv->astr, 'F', rv->alen) || // XXX hardwired Hungarian dictionary codes
  1.1773 +                    TESTAFF(rv->astr, 'G', rv->alen) ||
  1.1774 +                    TESTAFF(rv->astr, 'H', rv->alen)
  1.1775 +                )
  1.1776 +              )
  1.1777 +// END of LANG_hu section
  1.1778 +          ) &&
  1.1779 +          (
  1.1780 +             // test CHECKCOMPOUNDPATTERN conditions
  1.1781 +             scpd == 0 || checkcpdtable[scpd-1].cond == FLAG_NULL || 
  1.1782 +                TESTAFF(rv->astr, checkcpdtable[scpd-1].cond, rv->alen)
  1.1783 +          )
  1.1784 +          && ! (( checkcompoundtriple && scpd == 0 && !words && // test triple letters
  1.1785 +                   (word[i-1]==word[i]) && (
  1.1786 +                      ((i>1) && (word[i-1]==word[i-2])) ||
  1.1787 +                      ((word[i-1]==word[i+1])) // may be word[i+1] == '\0'
  1.1788 +                   )
  1.1789 +               ) ||
  1.1790 +               (
  1.1791 +                 checkcompoundcase && scpd == 0 && !words && cpdcase_check(word, i)
  1.1792 +               ))
  1.1793 +         )
  1.1794 +// LANG_hu section: spec. Hungarian rule
  1.1795 +         || ((!rv) && (langnum == LANG_hu) && hu_mov_rule && (rv = affix_check(st,i)) &&
  1.1796 +              (sfx && sfx->getCont() && ( // XXX hardwired Hungarian dic. codes
  1.1797 +                        TESTAFF(sfx->getCont(), (unsigned short) 'x', sfx->getContLen()) ||
  1.1798 +                        TESTAFF(sfx->getCont(), (unsigned short) '%', sfx->getContLen())
  1.1799 +                    )
  1.1800 +               )
  1.1801 +             )
  1.1802 +         ) { // first word is ok condition
  1.1803 +
  1.1804 +// LANG_hu section: spec. Hungarian rule
  1.1805 +            if (langnum == LANG_hu) {
  1.1806 +                // calculate syllable number of the word
  1.1807 +                numsyllable += get_syllable(st, i);
  1.1808 +                // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
  1.1809 +                if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
  1.1810 +            }
  1.1811 +// END of LANG_hu section
  1.1812 +
  1.1813 +            // NEXT WORD(S)
  1.1814 +            rv_first = rv;
  1.1815 +            st[i] = ch;
  1.1816 +
  1.1817 +        do { // striple loop
  1.1818 +
  1.1819 +            // check simplifiedtriple
  1.1820 +            if (simplifiedtriple) { 
  1.1821 +              if (striple) { 
  1.1822 +                checkedstriple = 1;
  1.1823 +                i--; // check "fahrt" instead of "ahrt" in "Schiffahrt"
  1.1824 +              } else if (i > 2 && *(word+i - 1) == *(word + i - 2)) striple = 1;
  1.1825 +            }
  1.1826 +
  1.1827 +            rv = lookup((st+i)); // perhaps without prefix
  1.1828 +
  1.1829 +        // search homonym with compound flag
  1.1830 +        while ((rv) && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
  1.1831 +                        !((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
  1.1832 +                          (compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) ||
  1.1833 +                           (numdefcpd && words && defcpd_check(&words, wnum + 1, rv, NULL,1))) ||
  1.1834 +                             (scpd != 0 && checkcpdtable[scpd-1].cond2 != FLAG_NULL &&
  1.1835 +                                !TESTAFF(rv->astr, checkcpdtable[scpd-1].cond2, rv->alen))
  1.1836 +                           )) {
  1.1837 +            rv = rv->next_homonym;
  1.1838 +        }
  1.1839 +
  1.1840 +            // check FORCEUCASE
  1.1841 +            if (rv && forceucase && (rv) &&
  1.1842 +                (TESTAFF(rv->astr, forceucase, rv->alen)) && !(info && *info & SPELL_ORIGCAP)) rv = NULL;
  1.1843 +
  1.1844 +            if (rv && words && words[wnum + 1]) return rv_first;
  1.1845 +
  1.1846 +            oldnumsyllable2 = numsyllable;
  1.1847 +            oldwordnum2 = wordnum;
  1.1848 +
  1.1849 +
  1.1850 +// LANG_hu section: spec. Hungarian rule, XXX hardwired dictionary code
  1.1851 +            if ((rv) && (langnum == LANG_hu) && (TESTAFF(rv->astr, 'I', rv->alen)) && !(TESTAFF(rv->astr, 'J', rv->alen))) {
  1.1852 +                numsyllable--;
  1.1853 +            }
  1.1854 +// END of LANG_hu section
  1.1855 +
  1.1856 +            // increment word number, if the second root has a compoundroot flag
  1.1857 +            if ((rv) && (compoundroot) && 
  1.1858 +                (TESTAFF(rv->astr, compoundroot, rv->alen))) {
  1.1859 +                    wordnum++;
  1.1860 +            }
  1.1861 +
  1.1862 +            // check forbiddenwords
  1.1863 +            if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
  1.1864 +                TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
  1.1865 +               (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) return NULL;
  1.1866 +
  1.1867 +            // second word is acceptable, as a root?
  1.1868 +            // hungarian conventions: compounding is acceptable,
  1.1869 +            // when compound forms consist of 2 words, or if more,
  1.1870 +            // then the syllable number of root words must be 6, or lesser.
  1.1871 +
  1.1872 +            if ((rv) && (
  1.1873 +                      (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
  1.1874 +                      (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))
  1.1875 +                    )
  1.1876 +                && (
  1.1877 +                      ((cpdwordmax==-1) || (wordnum+1<cpdwordmax)) || 
  1.1878 +                      ((cpdmaxsyllable!=0) && 
  1.1879 +                          (numsyllable + get_syllable(HENTRY_WORD(rv), rv->clen)<=cpdmaxsyllable))
  1.1880 +                    ) &&
  1.1881 +               (
  1.1882 +                 // test CHECKCOMPOUNDPATTERN
  1.1883 +                 !numcheckcpd || scpd != 0 || !cpdpat_check(word, i, rv_first, rv, 0)
  1.1884 +               ) &&
  1.1885 +                (
  1.1886 +                     (!checkcompounddup || (rv != rv_first))
  1.1887 +                   )
  1.1888 +            // test CHECKCOMPOUNDPATTERN conditions
  1.1889 +                && (scpd == 0 || checkcpdtable[scpd-1].cond2 == FLAG_NULL ||
  1.1890 +                      TESTAFF(rv->astr, checkcpdtable[scpd-1].cond2, rv->alen))
  1.1891 +                )
  1.1892 +                 {
  1.1893 +                      // forbid compound word, if it is a non compound word with typical fault
  1.1894 +                      if (checkcompoundrep && cpdrep_check(word,len)) return NULL;
  1.1895 +                      return rv_first;
  1.1896 +            }
  1.1897 +
  1.1898 +            numsyllable = oldnumsyllable2;
  1.1899 +            wordnum = oldwordnum2;
  1.1900 +
  1.1901 +            // perhaps second word has prefix or/and suffix
  1.1902 +            sfx = NULL;
  1.1903 +            sfxflag = FLAG_NULL;
  1.1904 +            rv = (compoundflag && !onlycpdrule) ? affix_check((word+i),strlen(word+i), compoundflag, IN_CPD_END) : NULL;
  1.1905 +            if (!rv && compoundend && !onlycpdrule) {
  1.1906 +                sfx = NULL;
  1.1907 +                pfx = NULL;
  1.1908 +                rv = affix_check((word+i),strlen(word+i), compoundend, IN_CPD_END);
  1.1909 +            }
  1.1910 +
  1.1911 +            if (!rv && numdefcpd && words) {
  1.1912 +                rv = affix_check((word+i),strlen(word+i), 0, IN_CPD_END);
  1.1913 +                if (rv && defcpd_check(&words, wnum + 1, rv, NULL, 1)) return rv_first;
  1.1914 +                rv = NULL;
  1.1915 +            }
  1.1916 +
  1.1917 +            // test CHECKCOMPOUNDPATTERN conditions (allowed forms)
  1.1918 +            if (rv && !(scpd == 0 || checkcpdtable[scpd-1].cond2 == FLAG_NULL || 
  1.1919 +                TESTAFF(rv->astr, checkcpdtable[scpd-1].cond2, rv->alen))) rv = NULL;
  1.1920 +
  1.1921 +            // test CHECKCOMPOUNDPATTERN conditions (forbidden compounds)
  1.1922 +            if (rv && numcheckcpd && scpd == 0 && cpdpat_check(word, i, rv_first, rv, affixed)) rv = NULL;
  1.1923 +
  1.1924 +            // check non_compound flag in suffix and prefix
  1.1925 +            if ((rv) && 
  1.1926 +                ((pfx && pfx->getCont() &&
  1.1927 +                    TESTAFF(pfx->getCont(), compoundforbidflag, 
  1.1928 +                        pfx->getContLen())) ||
  1.1929 +                (sfx && sfx->getCont() &&
  1.1930 +                    TESTAFF(sfx->getCont(), compoundforbidflag, 
  1.1931 +                        sfx->getContLen())))) {
  1.1932 +                    rv = NULL;
  1.1933 +            }
  1.1934 +
  1.1935 +            // check FORCEUCASE
  1.1936 +            if (rv && forceucase && (rv) &&
  1.1937 +                (TESTAFF(rv->astr, forceucase, rv->alen)) && !(info && *info & SPELL_ORIGCAP)) rv = NULL;
  1.1938 +
  1.1939 +            // check forbiddenwords
  1.1940 +            if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
  1.1941 +                TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
  1.1942 +               (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) return NULL;
  1.1943 +
  1.1944 +            // pfxappnd = prefix of word+i, or NULL
  1.1945 +            // calculate syllable number of prefix.
  1.1946 +            // hungarian convention: when syllable number of prefix is more,
  1.1947 +            // than 1, the prefix+word counts as two words.
  1.1948 +
  1.1949 +            if (langnum == LANG_hu) {
  1.1950 +                // calculate syllable number of the word
  1.1951 +                numsyllable += get_syllable(word + i, strlen(word + i));
  1.1952 +
  1.1953 +                // - affix syllable num.
  1.1954 +                // XXX only second suffix (inflections, not derivations)
  1.1955 +                if (sfxappnd) {
  1.1956 +                    char * tmp = myrevstrdup(sfxappnd);
  1.1957 +                    numsyllable -= get_syllable(tmp, strlen(tmp));
  1.1958 +                    free(tmp);
  1.1959 +                }
  1.1960 +
  1.1961 +                // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
  1.1962 +                if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
  1.1963 +
  1.1964 +                // increment syllable num, if last word has a SYLLABLENUM flag
  1.1965 +                // and the suffix is beginning `s'
  1.1966 +
  1.1967 +                if (cpdsyllablenum) {
  1.1968 +                    switch (sfxflag) {
  1.1969 +                        case 'c': { numsyllable+=2; break; }
  1.1970 +                        case 'J': { numsyllable += 1; break; }
  1.1971 +                        case 'I': { if (rv && TESTAFF(rv->astr, 'J', rv->alen)) numsyllable += 1; break; }
  1.1972 +                    }
  1.1973 +                }
  1.1974 +            }
  1.1975 +
  1.1976 +            // increment word number, if the second word has a compoundroot flag
  1.1977 +            if ((rv) && (compoundroot) && 
  1.1978 +                (TESTAFF(rv->astr, compoundroot, rv->alen))) {
  1.1979 +                    wordnum++;
  1.1980 +            }
  1.1981 +
  1.1982 +            // second word is acceptable, as a word with prefix or/and suffix?
  1.1983 +            // hungarian conventions: compounding is acceptable,
  1.1984 +            // when compound forms consist 2 word, otherwise
  1.1985 +            // the syllable number of root words is 6, or lesser.
  1.1986 +            if ((rv) && 
  1.1987 +                    (
  1.1988 +                      ((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) || 
  1.1989 +                      ((cpdmaxsyllable != 0) && 
  1.1990 +                          (numsyllable <= cpdmaxsyllable))
  1.1991 +                    )
  1.1992 +                && (
  1.1993 +                   (!checkcompounddup || (rv != rv_first))
  1.1994 +                   )) {
  1.1995 +                    // forbid compound word, if it is a non compound word with typical fault
  1.1996 +                    if (checkcompoundrep && cpdrep_check(word, len)) return NULL;
  1.1997 +                    return rv_first;
  1.1998 +            }
  1.1999 +
  1.2000 +            numsyllable = oldnumsyllable2;
  1.2001 +            wordnum = oldwordnum2;
  1.2002 +
  1.2003 +            // perhaps second word is a compound word (recursive call)
  1.2004 +            if (wordnum < maxwordnum) {
  1.2005 +                rv = compound_check((st+i),strlen(st+i), wordnum+1,
  1.2006 +                     numsyllable, maxwordnum, wnum + 1, words, 0, is_sug, info);
  1.2007 +                
  1.2008 +                if (rv && numcheckcpd && ((scpd == 0 && cpdpat_check(word, i, rv_first, rv, affixed)) ||
  1.2009 +                   (scpd != 0 && !cpdpat_check(word, i, rv_first, rv, affixed)))) rv = NULL;
  1.2010 +            } else {
  1.2011 +                rv=NULL;
  1.2012 +            }
  1.2013 +            if (rv) {
  1.2014 +                // forbid compound word, if it is a non compound word with typical fault
  1.2015 +                if (checkcompoundrep || forbiddenword) {
  1.2016 +                    struct hentry * rv2 = NULL;
  1.2017 +
  1.2018 +                    if (checkcompoundrep && cpdrep_check(word, len)) return NULL;
  1.2019 +                    
  1.2020 +                    // check first part
  1.2021 +                    if (strncmp(rv->word, word + i, rv->blen) == 0) {
  1.2022 +                        char r = *(st + i + rv->blen);
  1.2023 +                        *(st + i + rv->blen) = '\0';
  1.2024 +                        
  1.2025 +                        if (checkcompoundrep && cpdrep_check(st, i + rv->blen)) {
  1.2026 +                            *(st + i + rv->blen) = r;
  1.2027 +                            continue;
  1.2028 +                        }
  1.2029 +
  1.2030 +                        if (forbiddenword) {
  1.2031 +                            rv2 = lookup(word);
  1.2032 +                            if (!rv2) rv2 = affix_check(word, len);
  1.2033 +                            if (rv2 && rv2->astr && TESTAFF(rv2->astr, forbiddenword, rv2->alen) && 
  1.2034 +                                (strncmp(rv2->word, st, i + rv->blen) == 0)) {
  1.2035 +                                    return NULL;
  1.2036 +                            }
  1.2037 +                        }
  1.2038 +                        *(st + i + rv->blen) = r;
  1.2039 +                    }
  1.2040 +                }
  1.2041 +                return rv_first;
  1.2042 +            }
  1.2043 +          } while (striple && !checkedstriple); // end of striple loop
  1.2044 +
  1.2045 +          if (checkedstriple) {
  1.2046 +            i++;
  1.2047 +            checkedstriple = 0;
  1.2048 +            striple = 0;
  1.2049 +          }
  1.2050 +
  1.2051 +        } // first word is ok condition
  1.2052 +
  1.2053 +        if (soldi != 0) {
  1.2054 +          i = soldi;
  1.2055 +          soldi = 0;
  1.2056 +          len = oldlen;
  1.2057 +          cmin = oldcmin;
  1.2058 +          cmax = oldcmax;
  1.2059 +        }
  1.2060 +        scpd++;
  1.2061 +
  1.2062 +
  1.2063 +        } while (!onlycpdrule && simplifiedcpd && scpd <= numcheckcpd); // end of simplifiedcpd loop
  1.2064 +
  1.2065 +        scpd = 0;
  1.2066 +        wordnum = oldwordnum;
  1.2067 +        numsyllable = oldnumsyllable;
  1.2068 +
  1.2069 +        if (soldi != 0) {
  1.2070 +          i = soldi;
  1.2071 +          strcpy(st, word); // XXX add more optim.
  1.2072 +          soldi = 0;
  1.2073 +        } else st[i] = ch;
  1.2074 +
  1.2075 +        } while (numdefcpd && oldwordnum == 0 && !onlycpdrule && (onlycpdrule = 1)); // end of onlycpd loop
  1.2076 +
  1.2077 +    }
  1.2078 +
  1.2079 +    return NULL;
  1.2080 +}
  1.2081 +
  1.2082 +// check if compound word is correctly spelled
  1.2083 +// hu_mov_rule = spec. Hungarian rule (XXX)
  1.2084 +int AffixMgr::compound_check_morph(const char * word, int len, 
  1.2085 +    short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words,
  1.2086 +    char hu_mov_rule = 0, char ** result = NULL, char * partresult = NULL)
  1.2087 +{
  1.2088 +    int i;
  1.2089 +    short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
  1.2090 +    int ok = 0;
  1.2091 +
  1.2092 +    struct hentry * rv = NULL;
  1.2093 +    struct hentry * rv_first;
  1.2094 +    struct hentry * rwords[MAXWORDLEN]; // buffer for COMPOUND pattern checking
  1.2095 +    char st [MAXWORDUTF8LEN + 4];
  1.2096 +    char ch;
  1.2097 +
  1.2098 +    int checked_prefix;
  1.2099 +    char presult[MAXLNLEN];
  1.2100 +
  1.2101 +    int cmin;
  1.2102 +    int cmax;
  1.2103 +
  1.2104 +    int onlycpdrule;
  1.2105 +    int affixed = 0;
  1.2106 +    hentry ** oldwords = words;
  1.2107 +
  1.2108 +    setcminmax(&cmin, &cmax, word, len);
  1.2109 +
  1.2110 +    strcpy(st, word);
  1.2111 +
  1.2112 +    for (i = cmin; i < cmax; i++) {
  1.2113 +        oldnumsyllable = numsyllable;
  1.2114 +        oldwordnum = wordnum;
  1.2115 +        checked_prefix = 0;
  1.2116 +
  1.2117 +        // go to end of the UTF-8 character
  1.2118 +        if (utf8) {
  1.2119 +            for (; (st[i] & 0xc0) == 0x80; i++);
  1.2120 +            if (i >= cmax) return 0;
  1.2121 +        }
  1.2122 +
  1.2123 +        words = oldwords;
  1.2124 +        onlycpdrule = (words) ? 1 : 0;
  1.2125 +
  1.2126 +        do { // onlycpdrule loop
  1.2127 +
  1.2128 +        oldnumsyllable = numsyllable;
  1.2129 +        oldwordnum = wordnum;
  1.2130 +        checked_prefix = 0;
  1.2131 +
  1.2132 +        ch = st[i];
  1.2133 +        st[i] = '\0';
  1.2134 +        sfx = NULL;
  1.2135 +
  1.2136 +        // FIRST WORD
  1.2137 +
  1.2138 +        affixed = 1;
  1.2139 +
  1.2140 +        *presult = '\0';
  1.2141 +        if (partresult) mystrcat(presult, partresult, MAXLNLEN);
  1.2142 +
  1.2143 +        rv = lookup(st); // perhaps without prefix
  1.2144 +
  1.2145 +        // search homonym with compound flag
  1.2146 +        while ((rv) && !hu_mov_rule && 
  1.2147 +            ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
  1.2148 +                !((compoundflag && !words && !onlycpdrule && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
  1.2149 +                (compoundbegin && !wordnum && !onlycpdrule &&
  1.2150 +                        TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
  1.2151 +                (compoundmiddle && wordnum && !words && !onlycpdrule &&
  1.2152 +                    TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||
  1.2153 +                  (numdefcpd && onlycpdrule &&
  1.2154 +                    ((!words && !wordnum && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)) ||
  1.2155 +                    (words && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0))))
  1.2156 +                  ))) {
  1.2157 +            rv = rv->next_homonym;
  1.2158 +        }
  1.2159 +
  1.2160 +        if (rv) affixed = 0;
  1.2161 +
  1.2162 +        if (rv)  {
  1.2163 +            sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_PART, st);
  1.2164 +            if (!HENTRY_FIND(rv, MORPH_STEM)) {
  1.2165 +                sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_STEM, st);
  1.2166 +            }
  1.2167 +            // store the pointer of the hash entry
  1.2168 +//            sprintf(presult + strlen(presult), "%c%s%p", MSEP_FLD, MORPH_HENTRY, rv);
  1.2169 +            if (HENTRY_DATA(rv)) {
  1.2170 +                sprintf(presult + strlen(presult), "%c%s", MSEP_FLD, HENTRY_DATA2(rv));
  1.2171 +            }
  1.2172 +        }        
  1.2173 +
  1.2174 +        if (!rv) {
  1.2175 +            if (onlycpdrule) break;
  1.2176 +            if (compoundflag &&
  1.2177 +             !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
  1.2178 +                if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
  1.2179 +                        FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
  1.2180 +                    sfx->getCont() &&
  1.2181 +                        ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag, 
  1.2182 +                            sfx->getContLen())) || (compoundend &&
  1.2183 +                        TESTAFF(sfx->getCont(), compoundend, 
  1.2184 +                            sfx->getContLen())))) {
  1.2185 +                        rv = NULL;
  1.2186 +                }
  1.2187 +            }
  1.2188 +
  1.2189 +            if (rv ||
  1.2190 +              (((wordnum == 0) && compoundbegin &&
  1.2191 +                ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
  1.2192 +                (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
  1.2193 +              ((wordnum > 0) && compoundmiddle &&
  1.2194 +                ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
  1.2195 +                (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
  1.2196 +              ) {
  1.2197 +                // char * p = prefix_check_morph(st, i, 0, compound);
  1.2198 +                char * p = NULL;
  1.2199 +                if (compoundflag) p = affix_check_morph(st, i, compoundflag);
  1.2200 +                if (!p || (*p == '\0')) {
  1.2201 +                   if (p) free(p);
  1.2202 +                   p = NULL;
  1.2203 +                   if ((wordnum == 0) && compoundbegin) {
  1.2204 +                     p = affix_check_morph(st, i, compoundbegin);
  1.2205 +                   } else if ((wordnum > 0) && compoundmiddle) {
  1.2206 +                     p = affix_check_morph(st, i, compoundmiddle);                   
  1.2207 +                   }
  1.2208 +                }
  1.2209 +                if (p && (*p != '\0')) {
  1.2210 +                    sprintf(presult + strlen(presult), "%c%s%s%s", MSEP_FLD,
  1.2211 +                        MORPH_PART, st, line_uniq_app(&p, MSEP_REC));
  1.2212 +                }
  1.2213 +                if (p) free(p);
  1.2214 +                checked_prefix = 1;
  1.2215 +            }
  1.2216 +        // else check forbiddenwords
  1.2217 +        } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
  1.2218 +            TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
  1.2219 +            TESTAFF(rv->astr, needaffix, rv->alen))) {
  1.2220 +                st[i] = ch;
  1.2221 +                continue;
  1.2222 +        }
  1.2223 +
  1.2224 +            // check non_compound flag in suffix and prefix
  1.2225 +            if ((rv) && !hu_mov_rule &&
  1.2226 +                ((pfx && pfx->getCont() &&
  1.2227 +                    TESTAFF(pfx->getCont(), compoundforbidflag, 
  1.2228 +                        pfx->getContLen())) ||
  1.2229 +                (sfx && sfx->getCont() &&
  1.2230 +                    TESTAFF(sfx->getCont(), compoundforbidflag, 
  1.2231 +                        sfx->getContLen())))) {
  1.2232 +                    continue;
  1.2233 +            }
  1.2234 +
  1.2235 +            // check compoundend flag in suffix and prefix
  1.2236 +            if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
  1.2237 +                ((pfx && pfx->getCont() &&
  1.2238 +                    TESTAFF(pfx->getCont(), compoundend, 
  1.2239 +                        pfx->getContLen())) ||
  1.2240 +                (sfx && sfx->getCont() &&
  1.2241 +                    TESTAFF(sfx->getCont(), compoundend, 
  1.2242 +                        sfx->getContLen())))) {
  1.2243 +                    continue;
  1.2244 +            }
  1.2245 +
  1.2246 +            // check compoundmiddle flag in suffix and prefix
  1.2247 +            if ((rv) && !checked_prefix && (wordnum==0) && compoundmiddle && !hu_mov_rule &&
  1.2248 +                ((pfx && pfx->getCont() &&
  1.2249 +                    TESTAFF(pfx->getCont(), compoundmiddle, 
  1.2250 +                        pfx->getContLen())) ||
  1.2251 +                (sfx && sfx->getCont() &&
  1.2252 +                    TESTAFF(sfx->getCont(), compoundmiddle, 
  1.2253 +                        sfx->getContLen())))) {
  1.2254 +                    rv = NULL;
  1.2255 +            }       
  1.2256 +
  1.2257 +        // check forbiddenwords
  1.2258 +        if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen)
  1.2259 +            || TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) continue;
  1.2260 +
  1.2261 +        // increment word number, if the second root has a compoundroot flag
  1.2262 +        if ((rv) && (compoundroot) && 
  1.2263 +            (TESTAFF(rv->astr, compoundroot, rv->alen))) {
  1.2264 +                wordnum++;
  1.2265 +        }
  1.2266 +
  1.2267 +        // first word is acceptable in compound words?
  1.2268 +        if (((rv) && 
  1.2269 +          ( checked_prefix || (words && words[wnum]) ||
  1.2270 +            (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
  1.2271 +            ((oldwordnum == 0) && compoundbegin && TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
  1.2272 +            ((oldwordnum > 0) && compoundmiddle && TESTAFF(rv->astr, compoundmiddle, rv->alen)) 
  1.2273 +// LANG_hu section: spec. Hungarian rule
  1.2274 +            || ((langnum == LANG_hu) && // hu_mov_rule
  1.2275 +                hu_mov_rule && (
  1.2276 +                    TESTAFF(rv->astr, 'F', rv->alen) ||
  1.2277 +                    TESTAFF(rv->astr, 'G', rv->alen) ||
  1.2278 +                    TESTAFF(rv->astr, 'H', rv->alen)
  1.2279 +                )
  1.2280 +              )
  1.2281 +// END of LANG_hu section
  1.2282 +          )
  1.2283 +          && ! (( checkcompoundtriple && !words && // test triple letters
  1.2284 +                   (word[i-1]==word[i]) && (
  1.2285 +                      ((i>1) && (word[i-1]==word[i-2])) || 
  1.2286 +                      ((word[i-1]==word[i+1])) // may be word[i+1] == '\0'
  1.2287 +                   )
  1.2288 +               ) ||
  1.2289 +               (
  1.2290 +                   // test CHECKCOMPOUNDPATTERN
  1.2291 +                   numcheckcpd && !words && cpdpat_check(word, i, rv, NULL, affixed)
  1.2292 +               ) ||
  1.2293 +               ( 
  1.2294 +                 checkcompoundcase && !words && cpdcase_check(word, i)
  1.2295 +               ))
  1.2296 +         )
  1.2297 +// LANG_hu section: spec. Hungarian rule
  1.2298 +         || ((!rv) && (langnum == LANG_hu) && hu_mov_rule && (rv = affix_check(st,i)) &&
  1.2299 +              (sfx && sfx->getCont() && (
  1.2300 +                        TESTAFF(sfx->getCont(), (unsigned short) 'x', sfx->getContLen()) ||
  1.2301 +                        TESTAFF(sfx->getCont(), (unsigned short) '%', sfx->getContLen())
  1.2302 +                    )                
  1.2303 +               )
  1.2304 +             )
  1.2305 +// END of LANG_hu section
  1.2306 +         ) {
  1.2307 +
  1.2308 +// LANG_hu section: spec. Hungarian rule
  1.2309 +            if (langnum == LANG_hu) {
  1.2310 +                // calculate syllable number of the word
  1.2311 +                numsyllable += get_syllable(st, i);
  1.2312 +
  1.2313 +                // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
  1.2314 +                if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
  1.2315 +            }
  1.2316 +// END of LANG_hu section
  1.2317 +
  1.2318 +            // NEXT WORD(S)
  1.2319 +            rv_first = rv;
  1.2320 +            rv = lookup((word+i)); // perhaps without prefix
  1.2321 +
  1.2322 +        // search homonym with compound flag
  1.2323 +        while ((rv) && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
  1.2324 +                        !((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
  1.2325 +                          (compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) ||
  1.2326 +                           (numdefcpd && words && defcpd_check(&words, wnum + 1, rv, NULL,1))))) {
  1.2327 +            rv = rv->next_homonym;
  1.2328 +        }
  1.2329 +
  1.2330 +            if (rv && words && words[wnum + 1]) {
  1.2331 +                  mystrcat(*result, presult, MAXLNLEN);
  1.2332 +                  mystrcat(*result, " ", MAXLNLEN);
  1.2333 +                  mystrcat(*result, MORPH_PART, MAXLNLEN);
  1.2334 +                  mystrcat(*result, word+i, MAXLNLEN);
  1.2335 +                  if (complexprefixes && HENTRY_DATA(rv)) mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
  1.2336 +                  if (!HENTRY_FIND(rv, MORPH_STEM)) {
  1.2337 +                    mystrcat(*result, " ", MAXLNLEN);
  1.2338 +                    mystrcat(*result, MORPH_STEM, MAXLNLEN);
  1.2339 +                    mystrcat(*result, HENTRY_WORD(rv), MAXLNLEN);
  1.2340 +                  }
  1.2341 +                  // store the pointer of the hash entry
  1.2342 +//                  sprintf(*result + strlen(*result), " %s%p", MORPH_HENTRY, rv);
  1.2343 +                  if (!complexprefixes && HENTRY_DATA(rv)) {
  1.2344 +                    mystrcat(*result, " ", MAXLNLEN);
  1.2345 +                    mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
  1.2346 +                  }
  1.2347 +                  mystrcat(*result, "\n", MAXLNLEN);
  1.2348 +                  ok = 1;
  1.2349 +                  return 0;
  1.2350 +            }
  1.2351 +
  1.2352 +            oldnumsyllable2 = numsyllable;
  1.2353 +            oldwordnum2 = wordnum;
  1.2354 +
  1.2355 +// LANG_hu section: spec. Hungarian rule
  1.2356 +            if ((rv) && (langnum == LANG_hu) && (TESTAFF(rv->astr, 'I', rv->alen)) && !(TESTAFF(rv->astr, 'J', rv->alen))) {
  1.2357 +                numsyllable--;
  1.2358 +            }
  1.2359 +// END of LANG_hu section
  1.2360 +            // increment word number, if the second root has a compoundroot flag
  1.2361 +            if ((rv) && (compoundroot) && 
  1.2362 +                (TESTAFF(rv->astr, compoundroot, rv->alen))) {
  1.2363 +                    wordnum++;
  1.2364 +            }
  1.2365 +
  1.2366 +            // check forbiddenwords
  1.2367 +            if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
  1.2368 +                TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) {
  1.2369 +                st[i] = ch;
  1.2370 +                continue;
  1.2371 +            }
  1.2372 +
  1.2373 +            // second word is acceptable, as a root?
  1.2374 +            // hungarian conventions: compounding is acceptable,
  1.2375 +            // when compound forms consist of 2 words, or if more,
  1.2376 +            // then the syllable number of root words must be 6, or lesser.
  1.2377 +            if ((rv) && (
  1.2378 +                      (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
  1.2379 +                      (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))
  1.2380 +                    )
  1.2381 +                && (
  1.2382 +                      ((cpdwordmax==-1) || (wordnum+1<cpdwordmax)) || 
  1.2383 +                      ((cpdmaxsyllable!=0) &&
  1.2384 +                          (numsyllable+get_syllable(HENTRY_WORD(rv),rv->blen)<=cpdmaxsyllable))
  1.2385 +                    )
  1.2386 +                && (
  1.2387 +                     (!checkcompounddup || (rv != rv_first))
  1.2388 +                   )
  1.2389 +                )
  1.2390 +                 {
  1.2391 +                      // bad compound word
  1.2392 +                      mystrcat(*result, presult, MAXLNLEN);
  1.2393 +                      mystrcat(*result, " ", MAXLNLEN);
  1.2394 +                      mystrcat(*result, MORPH_PART, MAXLNLEN);
  1.2395 +                      mystrcat(*result, word+i, MAXLNLEN);
  1.2396 +
  1.2397 +                      if (HENTRY_DATA(rv)) {
  1.2398 +                        if (complexprefixes) mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
  1.2399 +                        if (! HENTRY_FIND(rv, MORPH_STEM)) {
  1.2400 +                           mystrcat(*result, " ", MAXLNLEN);
  1.2401 +                           mystrcat(*result, MORPH_STEM, MAXLNLEN);
  1.2402 +                           mystrcat(*result, HENTRY_WORD(rv), MAXLNLEN);
  1.2403 +                        }
  1.2404 +                        // store the pointer of the hash entry
  1.2405 +//                        sprintf(*result + strlen(*result), " %s%p", MORPH_HENTRY, rv);
  1.2406 +                        if (!complexprefixes) {
  1.2407 +                            mystrcat(*result, " ", MAXLNLEN);
  1.2408 +                            mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
  1.2409 +                        }
  1.2410 +                      }
  1.2411 +                      mystrcat(*result, "\n", MAXLNLEN);
  1.2412 +                              ok = 1;
  1.2413 +            }
  1.2414 +
  1.2415 +            numsyllable = oldnumsyllable2 ;
  1.2416 +            wordnum = oldwordnum2;
  1.2417 +
  1.2418 +            // perhaps second word has prefix or/and suffix
  1.2419 +            sfx = NULL;
  1.2420 +            sfxflag = FLAG_NULL;
  1.2421 +
  1.2422 +            if (compoundflag && !onlycpdrule) rv = affix_check((word+i),strlen(word+i), compoundflag); else rv = NULL;
  1.2423 +
  1.2424 +            if (!rv && compoundend && !onlycpdrule) {
  1.2425 +                sfx = NULL;
  1.2426 +                pfx = NULL;
  1.2427 +                rv = affix_check((word+i),strlen(word+i), compoundend);
  1.2428 +            }
  1.2429 +
  1.2430 +            if (!rv && numdefcpd && words) {
  1.2431 +                rv = affix_check((word+i),strlen(word+i), 0, IN_CPD_END);
  1.2432 +                if (rv && words && defcpd_check(&words, wnum + 1, rv, NULL, 1)) {
  1.2433 +                      char * m = NULL;
  1.2434 +                      if (compoundflag) m = affix_check_morph((word+i),strlen(word+i), compoundflag);
  1.2435 +                      if ((!m || *m == '\0') && compoundend) {
  1.2436 +                            if (m) free(m);
  1.2437 +                            m = affix_check_morph((word+i),strlen(word+i), compoundend);
  1.2438 +                      }
  1.2439 +                      mystrcat(*result, presult, MAXLNLEN);
  1.2440 +                      if (m || (*m != '\0')) {
  1.2441 +                        sprintf(*result + strlen(*result), "%c%s%s%s", MSEP_FLD,
  1.2442 +                            MORPH_PART, word + i, line_uniq_app(&m, MSEP_REC));
  1.2443 +                      }
  1.2444 +                      if (m) free(m);
  1.2445 +                      mystrcat(*result, "\n", MAXLNLEN);
  1.2446 +                      ok = 1;
  1.2447 +                }
  1.2448 +            }
  1.2449 +
  1.2450 +            // check non_compound flag in suffix and prefix
  1.2451 +            if ((rv) && 
  1.2452 +                ((pfx && pfx->getCont() &&
  1.2453 +                    TESTAFF(pfx->getCont(), compoundforbidflag, 
  1.2454 +                        pfx->getContLen())) ||
  1.2455 +                (sfx && sfx->getCont() &&
  1.2456 +                    TESTAFF(sfx->getCont(), compoundforbidflag, 
  1.2457 +                        sfx->getContLen())))) {
  1.2458 +                    rv = NULL;
  1.2459 +            }
  1.2460 +
  1.2461 +            // check forbiddenwords
  1.2462 +            if ((rv) && (rv->astr) && (TESTAFF(rv->astr,forbiddenword,rv->alen) ||
  1.2463 +                    TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))
  1.2464 +                    && (! TESTAFF(rv->astr, needaffix, rv->alen))) {
  1.2465 +                        st[i] = ch;
  1.2466 +                        continue;
  1.2467 +                    }
  1.2468 +
  1.2469 +            if (langnum == LANG_hu) {
  1.2470 +                // calculate syllable number of the word
  1.2471 +                numsyllable += get_syllable(word + i, strlen(word + i));
  1.2472 +
  1.2473 +                // - affix syllable num.
  1.2474 +                // XXX only second suffix (inflections, not derivations)
  1.2475 +                if (sfxappnd) {
  1.2476 +                    char * tmp = myrevstrdup(sfxappnd);
  1.2477 +                    numsyllable -= get_syllable(tmp, strlen(tmp));
  1.2478 +                    free(tmp);
  1.2479 +                }
  1.2480 +
  1.2481 +                // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
  1.2482 +                if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
  1.2483 +
  1.2484 +                // increment syllable num, if last word has a SYLLABLENUM flag
  1.2485 +                // and the suffix is beginning `s'
  1.2486 +
  1.2487 +                if (cpdsyllablenum) {
  1.2488 +                    switch (sfxflag) {
  1.2489 +                        case 'c': { numsyllable+=2; break; }
  1.2490 +                        case 'J': { numsyllable += 1; break; }
  1.2491 +                        case 'I': { if (rv && TESTAFF(rv->astr, 'J', rv->alen)) numsyllable += 1; break; }
  1.2492 +                    }
  1.2493 +                }
  1.2494 +            }
  1.2495 +
  1.2496 +            // increment word number, if the second word has a compoundroot flag
  1.2497 +            if ((rv) && (compoundroot) && 
  1.2498 +                (TESTAFF(rv->astr, compoundroot, rv->alen))) {
  1.2499 +                    wordnum++;
  1.2500 +            }
  1.2501 +            // second word is acceptable, as a word with prefix or/and suffix?
  1.2502 +            // hungarian conventions: compounding is acceptable,
  1.2503 +            // when compound forms consist 2 word, otherwise
  1.2504 +            // the syllable number of root words is 6, or lesser.
  1.2505 +            if ((rv) && 
  1.2506 +                    (
  1.2507 +                      ((cpdwordmax==-1) || (wordnum+1<cpdwordmax)) || 
  1.2508 +                      ((cpdmaxsyllable!=0) &&
  1.2509 +                          (numsyllable <= cpdmaxsyllable))
  1.2510 +                    )
  1.2511 +                && (
  1.2512 +                   (!checkcompounddup || (rv != rv_first))
  1.2513 +                   )) {
  1.2514 +                      char * m = NULL;
  1.2515 +                      if (compoundflag) m = affix_check_morph((word+i),strlen(word+i), compoundflag);
  1.2516 +                      if ((!m || *m == '\0') && compoundend) {
  1.2517 +                            if (m) free(m);
  1.2518 +                            m = affix_check_morph((word+i),strlen(word+i), compoundend);
  1.2519 +                      }
  1.2520 +                      mystrcat(*result, presult, MAXLNLEN);
  1.2521 +                      if (m && (*m != '\0')) {
  1.2522 +                        sprintf(*result + strlen(*result), "%c%s%s%s", MSEP_FLD,
  1.2523 +                            MORPH_PART, word + i, line_uniq_app(&m, MSEP_REC));
  1.2524 +                      }
  1.2525 +                      if (m) free(m);
  1.2526 +                      sprintf(*result + strlen(*result), "%c", MSEP_REC);
  1.2527 +                      ok = 1;
  1.2528 +            }
  1.2529 +
  1.2530 +            numsyllable = oldnumsyllable2;
  1.2531 +            wordnum = oldwordnum2;
  1.2532 +
  1.2533 +            // perhaps second word is a compound word (recursive call)
  1.2534 +            if ((wordnum < maxwordnum) && (ok == 0)) {
  1.2535 +                        compound_check_morph((word+i),strlen(word+i), wordnum+1, 
  1.2536 +                             numsyllable, maxwordnum, wnum + 1, words, 0, result, presult);
  1.2537 +            } else {
  1.2538 +                rv=NULL;
  1.2539 +            }
  1.2540 +        }
  1.2541 +        st[i] = ch;
  1.2542 +        wordnum = oldwordnum;
  1.2543 +        numsyllable = oldnumsyllable;
  1.2544 +
  1.2545 +        } while (numdefcpd && oldwordnum == 0 && !onlycpdrule && (onlycpdrule = 1)); // end of onlycpd loop
  1.2546 +
  1.2547 +    }
  1.2548 +    return 0;
  1.2549 +}    
  1.2550 +
  1.2551 + // return 1 if s1 (reversed) is a leading subset of end of s2
  1.2552 +/* inline int AffixMgr::isRevSubset(const char * s1, const char * end_of_s2, int len)
  1.2553 + {
  1.2554 +    while ((len > 0) && *s1 && (*s1 == *end_of_s2)) {
  1.2555 +        s1++;
  1.2556 +        end_of_s2--;
  1.2557 +        len--;
  1.2558 +    }
  1.2559 +    return (*s1 == '\0');
  1.2560 + }
  1.2561 + */
  1.2562 +
  1.2563 +inline int AffixMgr::isRevSubset(const char * s1, const char * end_of_s2, int len)
  1.2564 + {
  1.2565 +    while ((len > 0) && (*s1 != '\0') && ((*s1 == *end_of_s2) || (*s1 == '.'))) {
  1.2566 +        s1++;
  1.2567 +        end_of_s2--;
  1.2568 +        len--;
  1.2569 +    }
  1.2570 +    return (*s1 == '\0');
  1.2571 + }
  1.2572 +
  1.2573 +// check word for suffixes
  1.2574 +
  1.2575 +struct hentry * AffixMgr::suffix_check (const char * word, int len, 
  1.2576 +       int sfxopts, PfxEntry * ppfx, char ** wlst, int maxSug, int * ns, 
  1.2577 +       const FLAG cclass, const FLAG needflag, char in_compound)
  1.2578 +{
  1.2579 +    struct hentry * rv = NULL;
  1.2580 +    PfxEntry* ep = ppfx;
  1.2581 +
  1.2582 +    // first handle the special case of 0 length suffixes
  1.2583 +    SfxEntry * se = sStart[0];
  1.2584 +
  1.2585 +    while (se) {
  1.2586 +        if (!cclass || se->getCont()) {
  1.2587 +            // suffixes are not allowed in beginning of compounds
  1.2588 +            if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass
  1.2589 +             // except when signed with compoundpermitflag flag
  1.2590 +             (se->getCont() && compoundpermitflag &&
  1.2591 +                TESTAFF(se->getCont(),compoundpermitflag,se->getContLen()))) && (!circumfix ||
  1.2592 +              // no circumfix flag in prefix and suffix
  1.2593 +              ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
  1.2594 +                   circumfix, ep->getContLen())) &&
  1.2595 +               (!se->getCont() || !(TESTAFF(se->getCont(),circumfix,se->getContLen())))) ||
  1.2596 +              // circumfix flag in prefix AND suffix
  1.2597 +              ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
  1.2598 +                   circumfix, ep->getContLen())) &&
  1.2599 +               (se->getCont() && (TESTAFF(se->getCont(),circumfix,se->getContLen())))))  &&
  1.2600 +            // fogemorpheme
  1.2601 +              (in_compound || 
  1.2602 +                 !(se->getCont() && (TESTAFF(se->getCont(), onlyincompound, se->getContLen())))) &&
  1.2603 +            // needaffix on prefix or first suffix
  1.2604 +              (cclass || 
  1.2605 +                   !(se->getCont() && TESTAFF(se->getCont(), needaffix, se->getContLen())) ||
  1.2606 +                   (ppfx && !((ep->getCont()) &&
  1.2607 +                     TESTAFF(ep->getCont(), needaffix,
  1.2608 +                       ep->getContLen())))
  1.2609 +              )) {
  1.2610 +                rv = se->checkword(word,len, sfxopts, ppfx, wlst, maxSug, ns, (FLAG) cclass, 
  1.2611 +                    needflag, (in_compound ? 0 : onlyincompound));
  1.2612 +                if (rv) {
  1.2613 +                    sfx=se; // BUG: sfx not stateless
  1.2614 +                    return rv;
  1.2615 +                }
  1.2616 +            }
  1.2617 +        }
  1.2618 +       se = se->getNext();
  1.2619 +    }
  1.2620 +
  1.2621 +    // now handle the general case
  1.2622 +    if (len == 0) return NULL; // FULLSTRIP
  1.2623 +    unsigned char sp= *((const unsigned char *)(word + len - 1));
  1.2624 +    SfxEntry * sptr = sStart[sp];
  1.2625 +
  1.2626 +    while (sptr) {
  1.2627 +        if (isRevSubset(sptr->getKey(), word + len - 1, len)
  1.2628 +        ) {
  1.2629 +            // suffixes are not allowed in beginning of compounds
  1.2630 +            if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass
  1.2631 +             // except when signed with compoundpermitflag flag
  1.2632 +             (sptr->getCont() && compoundpermitflag &&
  1.2633 +                TESTAFF(sptr->getCont(),compoundpermitflag,sptr->getContLen()))) && (!circumfix ||
  1.2634 +              // no circumfix flag in prefix and suffix
  1.2635 +              ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
  1.2636 +                   circumfix, ep->getContLen())) &&
  1.2637 +               (!sptr->getCont() || !(TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))) ||
  1.2638 +              // circumfix flag in prefix AND suffix
  1.2639 +              ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
  1.2640 +                   circumfix, ep->getContLen())) &&
  1.2641 +               (sptr->getCont() && (TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))))  &&
  1.2642 +            // fogemorpheme
  1.2643 +              (in_compound || 
  1.2644 +                 !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) &&
  1.2645 +            // needaffix on prefix or first suffix
  1.2646 +              (cclass || 
  1.2647 +                  !(sptr->getCont() && TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) ||
  1.2648 +                  (ppfx && !((ep->getCont()) &&
  1.2649 +                     TESTAFF(ep->getCont(), needaffix,
  1.2650 +                       ep->getContLen())))
  1.2651 +              )
  1.2652 +            ) if (in_compound != IN_CPD_END || ppfx || !(sptr->getCont() && TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))) {
  1.2653 +                rv = sptr->checkword(word,len, sfxopts, ppfx, wlst,
  1.2654 +                    maxSug, ns, cclass, needflag, (in_compound ? 0 : onlyincompound));
  1.2655 +                if (rv) {
  1.2656 +                    sfx=sptr; // BUG: sfx not stateless
  1.2657 +                    sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
  1.2658 +                    if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
  1.2659 +                    return rv;
  1.2660 +                }
  1.2661 +             }
  1.2662 +             sptr = sptr->getNextEQ();
  1.2663 +        } else {
  1.2664 +             sptr = sptr->getNextNE();
  1.2665 +        }
  1.2666 +    }
  1.2667 +
  1.2668 +    return NULL;
  1.2669 +}
  1.2670 +
  1.2671 +// check word for two-level suffixes
  1.2672 +
  1.2673 +struct hentry * AffixMgr::suffix_check_twosfx(const char * word, int len, 
  1.2674 +       int sfxopts, PfxEntry * ppfx, const FLAG needflag)
  1.2675 +{
  1.2676 +    struct hentry * rv = NULL;
  1.2677 +
  1.2678 +    // first handle the special case of 0 length suffixes
  1.2679 +    SfxEntry * se = sStart[0];
  1.2680 +    while (se) {
  1.2681 +        if (contclasses[se->getFlag()])
  1.2682 +        {
  1.2683 +            rv = se->check_twosfx(word,len, sfxopts, ppfx, needflag);
  1.2684 +            if (rv) return rv;
  1.2685 +        }
  1.2686 +        se = se->getNext();
  1.2687 +    }
  1.2688 +
  1.2689 +    // now handle the general case
  1.2690 +    if (len == 0) return NULL; // FULLSTRIP
  1.2691 +    unsigned char sp = *((const unsigned char *)(word + len - 1));
  1.2692 +    SfxEntry * sptr = sStart[sp];
  1.2693 +
  1.2694 +    while (sptr) {
  1.2695 +        if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
  1.2696 +            if (contclasses[sptr->getFlag()])
  1.2697 +            {
  1.2698 +                rv = sptr->check_twosfx(word,len, sfxopts, ppfx, needflag);
  1.2699 +                if (rv) {
  1.2700 +                    sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
  1.2701 +                    if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
  1.2702 +                    return rv;
  1.2703 +                }
  1.2704 +            }
  1.2705 +            sptr = sptr->getNextEQ();
  1.2706 +        } else {
  1.2707 +             sptr = sptr->getNextNE();
  1.2708 +        }
  1.2709 +    }
  1.2710 +
  1.2711 +    return NULL;
  1.2712 +}
  1.2713 +
  1.2714 +char * AffixMgr::suffix_check_twosfx_morph(const char * word, int len, 
  1.2715 +       int sfxopts, PfxEntry * ppfx, const FLAG needflag)
  1.2716 +{
  1.2717 +    char result[MAXLNLEN];
  1.2718 +    char result2[MAXLNLEN];
  1.2719 +    char result3[MAXLNLEN];
  1.2720 +    
  1.2721 +    char * st;
  1.2722 +
  1.2723 +    result[0] = '\0';
  1.2724 +    result2[0] = '\0';
  1.2725 +    result3[0] = '\0';
  1.2726 +
  1.2727 +    // first handle the special case of 0 length suffixes
  1.2728 +    SfxEntry * se = sStart[0];
  1.2729 +    while (se) {
  1.2730 +        if (contclasses[se->getFlag()])
  1.2731 +        {
  1.2732 +            st = se->check_twosfx_morph(word,len, sfxopts, ppfx, needflag);
  1.2733 +            if (st) {
  1.2734 +                if (ppfx) {
  1.2735 +                    if (ppfx->getMorph()) {
  1.2736 +                        mystrcat(result, ppfx->getMorph(), MAXLNLEN);
  1.2737 +                        mystrcat(result, " ", MAXLNLEN);
  1.2738 +                    } else debugflag(result, ppfx->getFlag());
  1.2739 +                }
  1.2740 +                mystrcat(result, st, MAXLNLEN);
  1.2741 +                free(st);
  1.2742 +                if (se->getMorph()) {
  1.2743 +                    mystrcat(result, " ", MAXLNLEN);
  1.2744 +                    mystrcat(result, se->getMorph(), MAXLNLEN);
  1.2745 +                } else debugflag(result, se->getFlag());
  1.2746 +                mystrcat(result, "\n", MAXLNLEN);
  1.2747 +            }
  1.2748 +        }
  1.2749 +        se = se->getNext();
  1.2750 +    }
  1.2751 +
  1.2752 +    // now handle the general case
  1.2753 +    if (len == 0) return NULL; // FULLSTRIP
  1.2754 +    unsigned char sp = *((const unsigned char *)(word + len - 1));
  1.2755 +    SfxEntry * sptr = sStart[sp];
  1.2756 +
  1.2757 +    while (sptr) {
  1.2758 +        if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
  1.2759 +            if (contclasses[sptr->getFlag()]) 
  1.2760 +            {
  1.2761 +                st = sptr->check_twosfx_morph(word,len, sfxopts, ppfx, needflag);
  1.2762 +                if (st) {
  1.2763 +                    sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
  1.2764 +                    if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
  1.2765 +                    strcpy(result2, st);
  1.2766 +                    free(st);
  1.2767 +
  1.2768 +                result3[0] = '\0';
  1.2769 +
  1.2770 +                if (sptr->getMorph()) {
  1.2771 +                    mystrcat(result3, " ", MAXLNLEN);
  1.2772 +                    mystrcat(result3, sptr->getMorph(), MAXLNLEN);
  1.2773 +                } else debugflag(result3, sptr->getFlag());
  1.2774 +                strlinecat(result2, result3);
  1.2775 +                mystrcat(result2, "\n", MAXLNLEN);
  1.2776 +                mystrcat(result,  result2, MAXLNLEN);
  1.2777 +                }
  1.2778 +            }
  1.2779 +            sptr = sptr->getNextEQ();
  1.2780 +        } else {
  1.2781 +             sptr = sptr->getNextNE();
  1.2782 +        }
  1.2783 +    }
  1.2784 +    if (*result) return mystrdup(result);
  1.2785 +    return NULL;
  1.2786 +}
  1.2787 +
  1.2788 +char * AffixMgr::suffix_check_morph(const char * word, int len, 
  1.2789 +       int sfxopts, PfxEntry * ppfx, const FLAG cclass, const FLAG needflag, char in_compound)
  1.2790 +{
  1.2791 +    char result[MAXLNLEN];
  1.2792 +    
  1.2793 +    struct hentry * rv = NULL;
  1.2794 +
  1.2795 +    result[0] = '\0';
  1.2796 +
  1.2797 +    PfxEntry* ep = ppfx;
  1.2798 +
  1.2799 +    // first handle the special case of 0 length suffixes
  1.2800 +    SfxEntry * se = sStart[0];
  1.2801 +    while (se) {
  1.2802 +        if (!cclass || se->getCont()) {
  1.2803 +            // suffixes are not allowed in beginning of compounds
  1.2804 +            if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
  1.2805 +             // except when signed with compoundpermitflag flag
  1.2806 +             (se->getCont() && compoundpermitflag &&
  1.2807 +                TESTAFF(se->getCont(),compoundpermitflag,se->getContLen()))) && (!circumfix ||
  1.2808 +              // no circumfix flag in prefix and suffix
  1.2809 +              ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
  1.2810 +                   circumfix, ep->getContLen())) &&
  1.2811 +               (!se->getCont() || !(TESTAFF(se->getCont(),circumfix,se->getContLen())))) ||
  1.2812 +              // circumfix flag in prefix AND suffix
  1.2813 +              ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
  1.2814 +                   circumfix, ep->getContLen())) &&
  1.2815 +               (se->getCont() && (TESTAFF(se->getCont(),circumfix,se->getContLen())))))  &&
  1.2816 +            // fogemorpheme
  1.2817 +              (in_compound || 
  1.2818 +                 !((se->getCont() && (TESTAFF(se->getCont(), onlyincompound, se->getContLen()))))) &&
  1.2819 +            // needaffix on prefix or first suffix
  1.2820 +              (cclass || 
  1.2821 +                   !(se->getCont() && TESTAFF(se->getCont(), needaffix, se->getContLen())) ||
  1.2822 +                   (ppfx && !((ep->getCont()) &&
  1.2823 +                     TESTAFF(ep->getCont(), needaffix,
  1.2824 +                       ep->getContLen())))
  1.2825 +              )
  1.2826 +            ))
  1.2827 +            rv = se->checkword(word, len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
  1.2828 +         while (rv) {
  1.2829 +           if (ppfx) {
  1.2830 +                if (ppfx->getMorph()) {
  1.2831 +                    mystrcat(result, ppfx->getMorph(), MAXLNLEN);
  1.2832 +                    mystrcat(result, " ", MAXLNLEN);
  1.2833 +                } else debugflag(result, ppfx->getFlag());
  1.2834 +            }
  1.2835 +            if (complexprefixes && HENTRY_DATA(rv)) mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
  1.2836 +            if (! HENTRY_FIND(rv, MORPH_STEM)) {
  1.2837 +                mystrcat(result, " ", MAXLNLEN);                                
  1.2838 +                mystrcat(result, MORPH_STEM, MAXLNLEN);
  1.2839 +                mystrcat(result, HENTRY_WORD(rv), MAXLNLEN);
  1.2840 +            }
  1.2841 +            // store the pointer of the hash entry
  1.2842 +//            sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, rv);
  1.2843 +            
  1.2844 +            if (!complexprefixes && HENTRY_DATA(rv)) {
  1.2845 +                    mystrcat(result, " ", MAXLNLEN);                                
  1.2846 +                    mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
  1.2847 +            }
  1.2848 +            if (se->getMorph()) {
  1.2849 +                mystrcat(result, " ", MAXLNLEN);                                
  1.2850 +                mystrcat(result, se->getMorph(), MAXLNLEN);
  1.2851 +            } else debugflag(result, se->getFlag());
  1.2852 +            mystrcat(result, "\n", MAXLNLEN);
  1.2853 +            rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
  1.2854 +         }
  1.2855 +       }
  1.2856 +       se = se->getNext();
  1.2857 +    }
  1.2858 +
  1.2859 +    // now handle the general case
  1.2860 +    if (len == 0) return NULL; // FULLSTRIP
  1.2861 +    unsigned char sp = *((const unsigned char *)(word + len - 1));
  1.2862 +    SfxEntry * sptr = sStart[sp];
  1.2863 +
  1.2864 +    while (sptr) {
  1.2865 +        if (isRevSubset(sptr->getKey(), word + len - 1, len)
  1.2866 +        ) {
  1.2867 +            // suffixes are not allowed in beginning of compounds
  1.2868 +            if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
  1.2869 +             // except when signed with compoundpermitflag flag
  1.2870 +             (sptr->getCont() && compoundpermitflag &&
  1.2871 +                TESTAFF(sptr->getCont(),compoundpermitflag,sptr->getContLen()))) && (!circumfix ||
  1.2872 +              // no circumfix flag in prefix and suffix
  1.2873 +              ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
  1.2874 +                   circumfix, ep->getContLen())) &&
  1.2875 +               (!sptr->getCont() || !(TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))) ||
  1.2876 +              // circumfix flag in prefix AND suffix
  1.2877 +              ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
  1.2878 +                   circumfix, ep->getContLen())) &&
  1.2879 +               (sptr->getCont() && (TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))))  &&
  1.2880 +            // fogemorpheme
  1.2881 +              (in_compound || 
  1.2882 +                 !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) &&
  1.2883 +            // needaffix on first suffix
  1.2884 +              (cclass || !(sptr->getCont() && 
  1.2885 +                   TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())))
  1.2886 +            )) rv = sptr->checkword(word,len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
  1.2887 +            while (rv) {
  1.2888 +                    if (ppfx) {
  1.2889 +                        if (ppfx->getMorph()) {
  1.2890 +                            mystrcat(result, ppfx->getMorph(), MAXLNLEN);
  1.2891 +                            mystrcat(result, " ", MAXLNLEN);
  1.2892 +                        } else debugflag(result, ppfx->getFlag());
  1.2893 +                    }    
  1.2894 +                    if (complexprefixes && HENTRY_DATA(rv)) mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
  1.2895 +                    if (! HENTRY_FIND(rv, MORPH_STEM)) {
  1.2896 +                            mystrcat(result, " ", MAXLNLEN);                                
  1.2897 +                            mystrcat(result, MORPH_STEM, MAXLNLEN);
  1.2898 +                            mystrcat(result, HENTRY_WORD(rv), MAXLNLEN);
  1.2899 +                    }
  1.2900 +                    // store the pointer of the hash entry
  1.2901 +//                    sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, rv);
  1.2902 +
  1.2903 +                    if (!complexprefixes && HENTRY_DATA(rv)) {
  1.2904 +                        mystrcat(result, " ", MAXLNLEN);                                
  1.2905 +                        mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
  1.2906 +                    }
  1.2907 +
  1.2908 +                if (sptr->getMorph()) {
  1.2909 +                    mystrcat(result, " ", MAXLNLEN);
  1.2910 +                    mystrcat(result, sptr->getMorph(), MAXLNLEN);
  1.2911 +                } else debugflag(result, sptr->getFlag());
  1.2912 +                mystrcat(result, "\n", MAXLNLEN);
  1.2913 +                rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
  1.2914 +            }
  1.2915 +             sptr = sptr->getNextEQ();
  1.2916 +        } else {
  1.2917 +             sptr = sptr->getNextNE();
  1.2918 +        }
  1.2919 +    }
  1.2920 +
  1.2921 +    if (*result) return mystrdup(result);
  1.2922 +    return NULL;
  1.2923 +}
  1.2924 +
  1.2925 +// check if word with affixes is correctly spelled
  1.2926 +struct hentry * AffixMgr::affix_check (const char * word, int len, const FLAG needflag, char in_compound)
  1.2927 +{
  1.2928 +    struct hentry * rv= NULL;
  1.2929 +
  1.2930 +    // check all prefixes (also crossed with suffixes if allowed) 
  1.2931 +    rv = prefix_check(word, len, in_compound, needflag);
  1.2932 +    if (rv) return rv;
  1.2933 +
  1.2934 +    // if still not found check all suffixes
  1.2935 +    rv = suffix_check(word, len, 0, NULL, NULL, 0, NULL, FLAG_NULL, needflag, in_compound);
  1.2936 +
  1.2937 +    if (havecontclass) {
  1.2938 +        sfx = NULL;
  1.2939 +        pfx = NULL;
  1.2940 +
  1.2941 +        if (rv) return rv;
  1.2942 +        // if still not found check all two-level suffixes
  1.2943 +        rv = suffix_check_twosfx(word, len, 0, NULL, needflag);
  1.2944 +
  1.2945 +        if (rv) return rv;
  1.2946 +        // if still not found check all two-level suffixes
  1.2947 +        rv = prefix_check_twosfx(word, len, IN_CPD_NOT, needflag);
  1.2948 +    }
  1.2949 +
  1.2950 +    return rv;
  1.2951 +}
  1.2952 +
  1.2953 +// check if word with affixes is correctly spelled
  1.2954 +char * AffixMgr::affix_check_morph(const char * word, int len, const FLAG needflag, char in_compound)
  1.2955 +{
  1.2956 +    char result[MAXLNLEN];
  1.2957 +    char * st = NULL;
  1.2958 +
  1.2959 +    *result = '\0';
  1.2960 +    
  1.2961 +    // check all prefixes (also crossed with suffixes if allowed) 
  1.2962 +    st = prefix_check_morph(word, len, in_compound);
  1.2963 +    if (st) {
  1.2964 +        mystrcat(result, st, MAXLNLEN);
  1.2965 +        free(st);
  1.2966 +    }
  1.2967 +
  1.2968 +    // if still not found check all suffixes    
  1.2969 +    st = suffix_check_morph(word, len, 0, NULL, '\0', needflag, in_compound);
  1.2970 +    if (st) {
  1.2971 +        mystrcat(result, st, MAXLNLEN);
  1.2972 +        free(st);
  1.2973 +    }
  1.2974 +
  1.2975 +    if (havecontclass) {
  1.2976 +        sfx = NULL;
  1.2977 +        pfx = NULL;
  1.2978 +        // if still not found check all two-level suffixes
  1.2979 +        st = suffix_check_twosfx_morph(word, len, 0, NULL, needflag);
  1.2980 +        if (st) {
  1.2981 +            mystrcat(result, st, MAXLNLEN);
  1.2982 +            free(st);
  1.2983 +        }
  1.2984 +
  1.2985 +        // if still not found check all two-level suffixes
  1.2986 +        st = prefix_check_twosfx_morph(word, len, IN_CPD_NOT, needflag);
  1.2987 +        if (st) {
  1.2988 +            mystrcat(result, st, MAXLNLEN);
  1.2989 +            free(st);
  1.2990 +        }
  1.2991 +    }
  1.2992 +
  1.2993 +    return mystrdup(result);
  1.2994 +}
  1.2995 +
  1.2996 +char * AffixMgr::morphgen(char * ts, int wl, const unsigned short * ap,
  1.2997 +    unsigned short al, char * morph, char * targetmorph, int level)
  1.2998 +{
  1.2999 +    // handle suffixes
  1.3000 +    char * stemmorph;
  1.3001 +    char * stemmorphcatpos;
  1.3002 +    char mymorph[MAXLNLEN];
  1.3003 +
  1.3004 +    if (!morph) return NULL;
  1.3005 +
  1.3006 +    // check substandard flag
  1.3007 +    if (TESTAFF(ap, substandard, al)) return NULL;
  1.3008 +
  1.3009 +    if (morphcmp(morph, targetmorph) == 0) return mystrdup(ts);
  1.3010 +
  1.3011 +//    int targetcount = get_sfxcount(targetmorph);
  1.3012 +
  1.3013 +    // use input suffix fields, if exist
  1.3014 +    if (strstr(morph, MORPH_INFL_SFX) || strstr(morph, MORPH_DERI_SFX)) {
  1.3015 +        stemmorph = mymorph;
  1.3016 +        strcpy(stemmorph, morph);
  1.3017 +        mystrcat(stemmorph, " ", MAXLNLEN);
  1.3018 +        stemmorphcatpos = stemmorph + strlen(stemmorph);
  1.3019 +    } else {
  1.3020 +        stemmorph = morph;
  1.3021 +        stemmorphcatpos = NULL;
  1.3022 +    }
  1.3023 +
  1.3024 +    for (int i = 0; i < al; i++) {
  1.3025 +        const unsigned char c = (unsigned char) (ap[i] & 0x00FF);
  1.3026 +        SfxEntry * sptr = sFlag[c];
  1.3027 +        while (sptr) {
  1.3028 +            if (sptr->getFlag() == ap[i] && sptr->getMorph() && ((sptr->getContLen() == 0) || 
  1.3029 +                // don't generate forms with substandard affixes
  1.3030 +                !TESTAFF(sptr->getCont(), substandard, sptr->getContLen()))) {
  1.3031 +
  1.3032 +                if (stemmorphcatpos) strcpy(stemmorphcatpos, sptr->getMorph());
  1.3033 +                else stemmorph = (char *) sptr->getMorph();
  1.3034 +
  1.3035 +                int cmp = morphcmp(stemmorph, targetmorph);
  1.3036 +
  1.3037 +                if (cmp == 0) {
  1.3038 +                    char * newword = sptr->add(ts, wl);
  1.3039 +                    if (newword) {
  1.3040 +                        hentry * check = pHMgr->lookup(newword); // XXX extra dic
  1.3041 +                        if (!check || !check->astr || 
  1.3042 +                            !(TESTAFF(check->astr, forbiddenword, check->alen) || 
  1.3043 +                              TESTAFF(check->astr, ONLYUPCASEFLAG, check->alen))) {
  1.3044 +                                return newword;
  1.3045 +                        }
  1.3046 +                        free(newword);
  1.3047 +                    }
  1.3048 +                }
  1.3049 +                
  1.3050 +                // recursive call for secondary suffixes
  1.3051 +                if ((level == 0) && (cmp == 1) && (sptr->getContLen() > 0) &&
  1.3052 +//                    (get_sfxcount(stemmorph) < targetcount) &&
  1.3053 +                    !TESTAFF(sptr->getCont(), substandard, sptr->getContLen())) {
  1.3054 +                    char * newword = sptr->add(ts, wl);
  1.3055 +                    if (newword) {
  1.3056 +                        char * newword2 = morphgen(newword, strlen(newword), sptr->getCont(),
  1.3057 +                            sptr->getContLen(), stemmorph, targetmorph, 1);
  1.3058 +
  1.3059 +                        if (newword2) {
  1.3060 +                            free(newword);
  1.3061 +                            return newword2;
  1.3062 +                        }
  1.3063 +                        free(newword);
  1.3064 +                        newword = NULL;
  1.3065 +                    }
  1.3066 +                }
  1.3067 +            }
  1.3068 +            sptr = sptr->getFlgNxt();
  1.3069 +        }
  1.3070 +    }
  1.3071 +   return NULL;
  1.3072 +}
  1.3073 +
  1.3074 +
  1.3075 +int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts,
  1.3076 +    int wl, const unsigned short * ap, unsigned short al, char * bad, int badl,
  1.3077 +    char * phon)
  1.3078 +{
  1.3079 +    int nh=0;
  1.3080 +    // first add root word to list
  1.3081 +    if ((nh < maxn) && !(al && ((needaffix && TESTAFF(ap, needaffix, al)) ||
  1.3082 +         (onlyincompound && TESTAFF(ap, onlyincompound, al))))) {
  1.3083 +       wlst[nh].word = mystrdup(ts);
  1.3084 +       if (!wlst[nh].word) return 0;
  1.3085 +       wlst[nh].allow = (1 == 0);
  1.3086 +       wlst[nh].orig = NULL;
  1.3087 +       nh++;
  1.3088 +       // add special phonetic version
  1.3089 +       if (phon && (nh < maxn)) {
  1.3090 +    	    wlst[nh].word = mystrdup(phon);
  1.3091 +            if (!wlst[nh].word) return nh - 1;
  1.3092 +    	    wlst[nh].allow = (1 == 0);
  1.3093 +    	    wlst[nh].orig = mystrdup(ts);
  1.3094 +            if (!wlst[nh].orig) return nh - 1;
  1.3095 +    	    nh++;
  1.3096 +       }
  1.3097 +    }
  1.3098 +
  1.3099 +    // handle suffixes
  1.3100 +    for (int i = 0; i < al; i++) {
  1.3101 +       const unsigned char c = (unsigned char) (ap[i] & 0x00FF);
  1.3102 +       SfxEntry * sptr = sFlag[c];
  1.3103 +       while (sptr) {
  1.3104 +         if ((sptr->getFlag() == ap[i]) && (!sptr->getKeyLen() || ((badl > sptr->getKeyLen()) &&
  1.3105 +                (strcmp(sptr->getAffix(), bad + badl - sptr->getKeyLen()) == 0))) &&
  1.3106 +                // check needaffix flag
  1.3107 +                !(sptr->getCont() && ((needaffix && 
  1.3108 +                      TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) ||
  1.3109 +                  (circumfix && 
  1.3110 +                      TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())) ||
  1.3111 +                  (onlyincompound && 
  1.3112 +                      TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))
  1.3113 +                ) {
  1.3114 +            char * newword = sptr->add(ts, wl);
  1.3115 +            if (newword) {
  1.3116 +                if (nh < maxn) {
  1.3117 +                    wlst[nh].word = newword;
  1.3118 +                    wlst[nh].allow = sptr->allowCross();
  1.3119 +                    wlst[nh].orig = NULL;
  1.3120 +                    nh++;
  1.3121 +                    // add special phonetic version
  1.3122 +    		    if (phon && (nh < maxn)) {
  1.3123 +    			char st[MAXWORDUTF8LEN];
  1.3124 +    			strcpy(st, phon);
  1.3125 +    			strcat(st, sptr->getKey());
  1.3126 +    			reverseword(st + strlen(phon));
  1.3127 +    			wlst[nh].word = mystrdup(st);
  1.3128 +    			if (!wlst[nh].word) return nh - 1;
  1.3129 +    			wlst[nh].allow = (1 == 0);
  1.3130 +    			wlst[nh].orig = mystrdup(newword);
  1.3131 +                        if (!wlst[nh].orig) return nh - 1;
  1.3132 +    			nh++;
  1.3133 +    		    }
  1.3134 +                } else {
  1.3135 +                    free(newword);
  1.3136 +                }
  1.3137 +            }
  1.3138 +         }
  1.3139 +         sptr = sptr->getFlgNxt();
  1.3140 +       }
  1.3141 +    }
  1.3142 +
  1.3143 +    int n = nh;
  1.3144 +
  1.3145 +    // handle cross products of prefixes and suffixes
  1.3146 +    for (int j=1;j<n ;j++)
  1.3147 +       if (wlst[j].allow) {
  1.3148 +          for (int k = 0; k < al; k++) {
  1.3149 +             const unsigned char c = (unsigned char) (ap[k] & 0x00FF);
  1.3150 +             PfxEntry * cptr = pFlag[c];
  1.3151 +             while (cptr) {
  1.3152 +                if ((cptr->getFlag() == ap[k]) && cptr->allowCross() && (!cptr->getKeyLen() || ((badl > cptr->getKeyLen()) &&
  1.3153 +                        (strncmp(cptr->getKey(), bad, cptr->getKeyLen()) == 0)))) {
  1.3154 +                    int l1 = strlen(wlst[j].word);
  1.3155 +                    char * newword = cptr->add(wlst[j].word, l1);
  1.3156 +                    if (newword) {
  1.3157 +                       if (nh < maxn) {
  1.3158 +                          wlst[nh].word = newword;
  1.3159 +                          wlst[nh].allow = cptr->allowCross();
  1.3160 +                          wlst[nh].orig = NULL;
  1.3161 +                          nh++;
  1.3162 +                       } else {
  1.3163 +                          free(newword);
  1.3164 +                       }
  1.3165 +                    }
  1.3166 +                }
  1.3167 +                cptr = cptr->getFlgNxt();
  1.3168 +             }
  1.3169 +          }
  1.3170 +       }
  1.3171 +
  1.3172 +
  1.3173 +    // now handle pure prefixes
  1.3174 +    for (int m = 0; m < al; m ++) {
  1.3175 +       const unsigned char c = (unsigned char) (ap[m] & 0x00FF);
  1.3176 +       PfxEntry * ptr = pFlag[c];
  1.3177 +       while (ptr) {
  1.3178 +         if ((ptr->getFlag() == ap[m]) && (!ptr->getKeyLen() || ((badl > ptr->getKeyLen()) &&
  1.3179 +                (strncmp(ptr->getKey(), bad, ptr->getKeyLen()) == 0))) &&
  1.3180 +                // check needaffix flag
  1.3181 +                !(ptr->getCont() && ((needaffix && 
  1.3182 +                      TESTAFF(ptr->getCont(), needaffix, ptr->getContLen())) ||
  1.3183 +                     (circumfix && 
  1.3184 +                      TESTAFF(ptr->getCont(), circumfix, ptr->getContLen())) ||                      
  1.3185 +                  (onlyincompound && 
  1.3186 +                      TESTAFF(ptr->getCont(), onlyincompound, ptr->getContLen()))))
  1.3187 +                ) {
  1.3188 +            char * newword = ptr->add(ts, wl);
  1.3189 +            if (newword) {
  1.3190 +                if (nh < maxn) {
  1.3191 +                    wlst[nh].word = newword;
  1.3192 +                    wlst[nh].allow = ptr->allowCross();
  1.3193 +                    wlst[nh].orig = NULL;
  1.3194 +                    nh++;
  1.3195 +                } else {
  1.3196 +                    free(newword);
  1.3197 +                } 
  1.3198 +            }
  1.3199 +         }
  1.3200 +         ptr = ptr->getFlgNxt();
  1.3201 +       }
  1.3202 +    }
  1.3203 +
  1.3204 +    return nh;
  1.3205 +}
  1.3206 +
  1.3207 +// return length of replacing table
  1.3208 +int AffixMgr::get_numrep() const
  1.3209 +{
  1.3210 +  return numrep;
  1.3211 +}
  1.3212 +
  1.3213 +// return replacing table
  1.3214 +struct replentry * AffixMgr::get_reptable() const
  1.3215 +{
  1.3216 +  if (! reptable ) return NULL;
  1.3217 +  return reptable;
  1.3218 +}
  1.3219 +
  1.3220 +// return iconv table
  1.3221 +RepList * AffixMgr::get_iconvtable() const
  1.3222 +{
  1.3223 +  if (! iconvtable ) return NULL;
  1.3224 +  return iconvtable;
  1.3225 +}
  1.3226 +
  1.3227 +// return oconv table
  1.3228 +RepList * AffixMgr::get_oconvtable() const
  1.3229 +{
  1.3230 +  if (! oconvtable ) return NULL;
  1.3231 +  return oconvtable;
  1.3232 +}
  1.3233 +
  1.3234 +// return replacing table
  1.3235 +struct phonetable * AffixMgr::get_phonetable() const
  1.3236 +{
  1.3237 +  if (! phone ) return NULL;
  1.3238 +  return phone;
  1.3239 +}
  1.3240 +
  1.3241 +// return length of character map table
  1.3242 +int AffixMgr::get_nummap() const
  1.3243 +{
  1.3244 +  return nummap;
  1.3245 +}
  1.3246 +
  1.3247 +// return character map table
  1.3248 +struct mapentry * AffixMgr::get_maptable() const
  1.3249 +{
  1.3250 +  if (! maptable ) return NULL;
  1.3251 +  return maptable;
  1.3252 +}
  1.3253 +
  1.3254 +// return length of word break table
  1.3255 +int AffixMgr::get_numbreak() const
  1.3256 +{
  1.3257 +  return numbreak;
  1.3258 +}
  1.3259 +
  1.3260 +// return character map table
  1.3261 +char ** AffixMgr::get_breaktable() const
  1.3262 +{
  1.3263 +  if (! breaktable ) return NULL;
  1.3264 +  return breaktable;
  1.3265 +}
  1.3266 +
  1.3267 +// return text encoding of dictionary
  1.3268 +char * AffixMgr::get_encoding()
  1.3269 +{
  1.3270 +  if (! encoding ) encoding = mystrdup(SPELL_ENCODING);
  1.3271 +  return mystrdup(encoding);
  1.3272 +}
  1.3273 +
  1.3274 +// return text encoding of dictionary
  1.3275 +int AffixMgr::get_langnum() const
  1.3276 +{
  1.3277 +  return langnum;
  1.3278 +}
  1.3279 +
  1.3280 +// return double prefix option
  1.3281 +int AffixMgr::get_complexprefixes() const
  1.3282 +{
  1.3283 +  return complexprefixes;
  1.3284 +}
  1.3285 +
  1.3286 +// return FULLSTRIP option
  1.3287 +int AffixMgr::get_fullstrip() const
  1.3288 +{
  1.3289 +  return fullstrip;
  1.3290 +}
  1.3291 +
  1.3292 +FLAG AffixMgr::get_keepcase() const
  1.3293 +{
  1.3294 +  return keepcase;
  1.3295 +}
  1.3296 +
  1.3297 +FLAG AffixMgr::get_forceucase() const
  1.3298 +{
  1.3299 +  return forceucase;
  1.3300 +}
  1.3301 +
  1.3302 +FLAG AffixMgr::get_warn() const
  1.3303 +{
  1.3304 +  return warn;
  1.3305 +}
  1.3306 +
  1.3307 +int AffixMgr::get_forbidwarn() const
  1.3308 +{
  1.3309 +  return forbidwarn;
  1.3310 +}
  1.3311 +
  1.3312 +int AffixMgr::get_checksharps() const
  1.3313 +{
  1.3314 +  return checksharps;
  1.3315 +}
  1.3316 +
  1.3317 +char * AffixMgr::encode_flag(unsigned short aflag) const
  1.3318 +{
  1.3319 +  return pHMgr->encode_flag(aflag);
  1.3320 +}
  1.3321 +
  1.3322 +
  1.3323 +// return the preferred ignore string for suggestions
  1.3324 +char * AffixMgr::get_ignore() const
  1.3325 +{
  1.3326 +  if (!ignorechars) return NULL;
  1.3327 +  return ignorechars;
  1.3328 +}
  1.3329 +
  1.3330 +// return the preferred ignore string for suggestions
  1.3331 +unsigned short * AffixMgr::get_ignore_utf16(int * len) const
  1.3332 +{
  1.3333 +  *len = ignorechars_utf16_len;
  1.3334 +  return ignorechars_utf16;
  1.3335 +}
  1.3336 +
  1.3337 +// return the keyboard string for suggestions
  1.3338 +char * AffixMgr::get_key_string()
  1.3339 +{
  1.3340 +  if (! keystring ) keystring = mystrdup(SPELL_KEYSTRING);
  1.3341 +  return mystrdup(keystring);
  1.3342 +}
  1.3343 +
  1.3344 +// return the preferred try string for suggestions
  1.3345 +char * AffixMgr::get_try_string() const
  1.3346 +{
  1.3347 +  if (! trystring ) return NULL;
  1.3348 +  return mystrdup(trystring);
  1.3349 +}
  1.3350 +
  1.3351 +// return the preferred try string for suggestions
  1.3352 +const char * AffixMgr::get_wordchars() const
  1.3353 +{
  1.3354 +  return wordchars;
  1.3355 +}
  1.3356 +
  1.3357 +unsigned short * AffixMgr::get_wordchars_utf16(int * len) const
  1.3358 +{
  1.3359 +  *len = wordchars_utf16_len;
  1.3360 +  return wordchars_utf16;
  1.3361 +}
  1.3362 +
  1.3363 +// is there compounding?
  1.3364 +int AffixMgr::get_compound() const
  1.3365 +{
  1.3366 +  return compoundflag || compoundbegin || numdefcpd;
  1.3367 +}
  1.3368 +
  1.3369 +// return the compound words control flag
  1.3370 +FLAG AffixMgr::get_compoundflag() const
  1.3371 +{
  1.3372 +  return compoundflag;
  1.3373 +}
  1.3374 +
  1.3375 +// return the forbidden words control flag
  1.3376 +FLAG AffixMgr::get_forbiddenword() const
  1.3377 +{
  1.3378 +  return forbiddenword;
  1.3379 +}
  1.3380 +
  1.3381 +// return the forbidden words control flag
  1.3382 +FLAG AffixMgr::get_nosuggest() const
  1.3383 +{
  1.3384 +  return nosuggest;
  1.3385 +}
  1.3386 +
  1.3387 +// return the forbidden words control flag
  1.3388 +FLAG AffixMgr::get_nongramsuggest() const
  1.3389 +{
  1.3390 +  return nongramsuggest;
  1.3391 +}
  1.3392 +
  1.3393 +// return the forbidden words flag modify flag
  1.3394 +FLAG AffixMgr::get_needaffix() const
  1.3395 +{
  1.3396 +  return needaffix;
  1.3397 +}
  1.3398 +
  1.3399 +// return the onlyincompound flag
  1.3400 +FLAG AffixMgr::get_onlyincompound() const
  1.3401 +{
  1.3402 +  return onlyincompound;
  1.3403 +}
  1.3404 +
  1.3405 +// return the compound word signal flag
  1.3406 +FLAG AffixMgr::get_compoundroot() const
  1.3407 +{
  1.3408 +  return compoundroot;
  1.3409 +}
  1.3410 +
  1.3411 +// return the compound begin signal flag
  1.3412 +FLAG AffixMgr::get_compoundbegin() const
  1.3413 +{
  1.3414 +  return compoundbegin;
  1.3415 +}
  1.3416 +
  1.3417 +// return the value of checknum
  1.3418 +int AffixMgr::get_checknum() const
  1.3419 +{
  1.3420 +  return checknum;
  1.3421 +}
  1.3422 +
  1.3423 +// return the value of prefix
  1.3424 +const char * AffixMgr::get_prefix() const
  1.3425 +{
  1.3426 +  if (pfx) return pfx->getKey();
  1.3427 +  return NULL;
  1.3428 +}
  1.3429 +
  1.3430 +// return the value of suffix
  1.3431 +const char * AffixMgr::get_suffix() const
  1.3432 +{
  1.3433 +  return sfxappnd;
  1.3434 +}
  1.3435 +
  1.3436 +// return the value of suffix
  1.3437 +const char * AffixMgr::get_version() const
  1.3438 +{
  1.3439 +  return version;
  1.3440 +}
  1.3441 +
  1.3442 +// return lemma_present flag
  1.3443 +FLAG AffixMgr::get_lemma_present() const
  1.3444 +{
  1.3445 +  return lemma_present;
  1.3446 +}
  1.3447 +
  1.3448 +// utility method to look up root words in hash table
  1.3449 +struct hentry * AffixMgr::lookup(const char * word)
  1.3450 +{
  1.3451 +  int i;
  1.3452 +  struct hentry * he = NULL;
  1.3453 +  for (i = 0; i < *maxdic && !he; i++) {
  1.3454 +    he = (alldic[i])->lookup(word);
  1.3455 +  }
  1.3456 +  return he;
  1.3457 +}
  1.3458 +
  1.3459 +// return the value of suffix
  1.3460 +int AffixMgr::have_contclass() const
  1.3461 +{
  1.3462 +  return havecontclass;
  1.3463 +}
  1.3464 +
  1.3465 +// return utf8
  1.3466 +int AffixMgr::get_utf8() const
  1.3467 +{
  1.3468 +  return utf8;
  1.3469 +}
  1.3470 +
  1.3471 +int AffixMgr::get_maxngramsugs(void) const
  1.3472 +{
  1.3473 +  return maxngramsugs;
  1.3474 +}
  1.3475 +
  1.3476 +int AffixMgr::get_maxcpdsugs(void) const
  1.3477 +{
  1.3478 +  return maxcpdsugs;
  1.3479 +}
  1.3480 +
  1.3481 +int AffixMgr::get_maxdiff(void) const
  1.3482 +{
  1.3483 +  return maxdiff;
  1.3484 +}
  1.3485 +
  1.3486 +int AffixMgr::get_onlymaxdiff(void) const
  1.3487 +{
  1.3488 +  return onlymaxdiff;
  1.3489 +}
  1.3490 +
  1.3491 +// return nosplitsugs
  1.3492 +int AffixMgr::get_nosplitsugs(void) const
  1.3493 +{
  1.3494 +  return nosplitsugs;
  1.3495 +}
  1.3496 +
  1.3497 +// return sugswithdots
  1.3498 +int AffixMgr::get_sugswithdots(void) const
  1.3499 +{
  1.3500 +  return sugswithdots;
  1.3501 +}
  1.3502 +
  1.3503 +/* parse flag */
  1.3504 +int AffixMgr::parse_flag(char * line, unsigned short * out, FileMgr * af) {
  1.3505 +   char * s = NULL;
  1.3506 +   if (*out != FLAG_NULL && !(*out >= DEFAULTFLAGS)) {
  1.3507 +      HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of an affix file parameter\n", af->getlinenum());
  1.3508 +      return 1;
  1.3509 +   }
  1.3510 +   if (parse_string(line, &s, af->getlinenum())) return 1;
  1.3511 +   *out = pHMgr->decode_flag(s);
  1.3512 +   free(s);
  1.3513 +   return 0;
  1.3514 +}
  1.3515 +
  1.3516 +/* parse num */
  1.3517 +int AffixMgr::parse_num(char * line, int * out, FileMgr * af) {
  1.3518 +   char * s = NULL;
  1.3519 +   if (*out != -1) {
  1.3520 +      HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of an affix file parameter\n", af->getlinenum());
  1.3521 +      return 1;
  1.3522 +   }
  1.3523 +   if (parse_string(line, &s, af->getlinenum())) return 1;
  1.3524 +   *out = atoi(s);
  1.3525 +   free(s);
  1.3526 +   return 0;
  1.3527 +}
  1.3528 +
  1.3529 +/* parse in the max syllablecount of compound words and  */
  1.3530 +int  AffixMgr::parse_cpdsyllable(char * line, FileMgr * af)
  1.3531 +{
  1.3532 +   char * tp = line;
  1.3533 +   char * piece;
  1.3534 +   int i = 0;
  1.3535 +   int np = 0;
  1.3536 +   w_char w[MAXWORDLEN];
  1.3537 +   piece = mystrsep(&tp, 0);
  1.3538 +   while (piece) {
  1.3539 +      if (*piece != '\0') {
  1.3540 +          switch(i) {
  1.3541 +             case 0: { np++; break; }
  1.3542 +             case 1: { cpdmaxsyllable = atoi(piece); np++; break; }
  1.3543 +             case 2: {
  1.3544 +                if (!utf8) {
  1.3545 +                    cpdvowels = mystrdup(piece);
  1.3546 +                } else {
  1.3547 +                    int n = u8_u16(w, MAXWORDLEN, piece);
  1.3548 +                    if (n > 0) {
  1.3549 +                        flag_qsort((unsigned short *) w, 0, n);
  1.3550 +                        cpdvowels_utf16 = (w_char *) malloc(n * sizeof(w_char));
  1.3551 +                        if (!cpdvowels_utf16) return 1;
  1.3552 +                        memcpy(cpdvowels_utf16, w, n * sizeof(w_char));
  1.3553 +                    }
  1.3554 +                    cpdvowels_utf16_len = n;
  1.3555 +                }
  1.3556 +                np++;
  1.3557 +                break;
  1.3558 +             }
  1.3559 +             default: break;
  1.3560 +          }
  1.3561 +          i++;
  1.3562 +      }
  1.3563 +      piece = mystrsep(&tp, 0);
  1.3564 +   }
  1.3565 +   if (np < 2) {
  1.3566 +      HUNSPELL_WARNING(stderr, "error: line %d: missing compoundsyllable information\n", af->getlinenum());
  1.3567 +      return 1;
  1.3568 +   }
  1.3569 +   if (np == 2) cpdvowels = mystrdup("aeiouAEIOU");
  1.3570 +   return 0;
  1.3571 +}
  1.3572 +
  1.3573 +/* parse in the typical fault correcting table */
  1.3574 +int  AffixMgr::parse_reptable(char * line, FileMgr * af)
  1.3575 +{
  1.3576 +   if (numrep != 0) {
  1.3577 +      HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
  1.3578 +      return 1;
  1.3579 +   }
  1.3580 +   char * tp = line;
  1.3581 +   char * piece;
  1.3582 +   int i = 0;
  1.3583 +   int np = 0;
  1.3584 +   piece = mystrsep(&tp, 0);
  1.3585 +   while (piece) {
  1.3586 +       if (*piece != '\0') {
  1.3587 +          switch(i) {
  1.3588 +             case 0: { np++; break; }
  1.3589 +             case 1: { 
  1.3590 +                       numrep = atoi(piece);
  1.3591 +                       if (numrep < 1) {
  1.3592 +                          HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n", af->getlinenum());
  1.3593 +                          return 1;
  1.3594 +                       }
  1.3595 +                       reptable = (replentry *) malloc(numrep * sizeof(struct replentry));
  1.3596 +                       if (!reptable) return 1;
  1.3597 +                       np++;
  1.3598 +                       break;
  1.3599 +                     }
  1.3600 +             default: break;
  1.3601 +          }
  1.3602 +          i++;
  1.3603 +       }
  1.3604 +       piece = mystrsep(&tp, 0);
  1.3605 +   }
  1.3606 +   if (np != 2) {
  1.3607 +      HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
  1.3608 +      return 1;
  1.3609 +   } 
  1.3610 + 
  1.3611 +   /* now parse the numrep lines to read in the remainder of the table */
  1.3612 +   char * nl;
  1.3613 +   for (int j=0; j < numrep; j++) {
  1.3614 +        if (!(nl = af->getline())) return 1;
  1.3615 +        mychomp(nl);
  1.3616 +        tp = nl;
  1.3617 +        i = 0;
  1.3618 +        reptable[j].pattern = NULL;
  1.3619 +        reptable[j].pattern2 = NULL;
  1.3620 +        piece = mystrsep(&tp, 0);
  1.3621 +        while (piece) {
  1.3622 +           if (*piece != '\0') {
  1.3623 +               switch(i) {
  1.3624 +                  case 0: {
  1.3625 +                             if (strncmp(piece,"REP",3) != 0) {
  1.3626 +                                 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  1.3627 +                                 numrep = 0;
  1.3628 +                                 return 1;
  1.3629 +                             }
  1.3630 +                             break;
  1.3631 +                          }
  1.3632 +                  case 1: {
  1.3633 +                            if (*piece == '^') reptable[j].start = true; else reptable[j].start = false;
  1.3634 +                            reptable[j].pattern = mystrrep(mystrdup(piece + int(reptable[j].start)),"_"," ");
  1.3635 +                            int lr = strlen(reptable[j].pattern) - 1;
  1.3636 +                            if (reptable[j].pattern[lr] == '$') {
  1.3637 +                                reptable[j].end = true;
  1.3638 +                                reptable[j].pattern[lr] = '\0';
  1.3639 +                            } else reptable[j].end = false;
  1.3640 +                            break;
  1.3641 +                          }
  1.3642 +                  case 2: { reptable[j].pattern2 = mystrrep(mystrdup(piece),"_"," "); break; }
  1.3643 +                  default: break;
  1.3644 +               }
  1.3645 +               i++;
  1.3646 +           }
  1.3647 +           piece = mystrsep(&tp, 0);
  1.3648 +        }
  1.3649 +        if ((!(reptable[j].pattern)) || (!(reptable[j].pattern2))) {
  1.3650 +             HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  1.3651 +             numrep = 0;
  1.3652 +             return 1;
  1.3653 +        }
  1.3654 +   }
  1.3655 +   return 0;
  1.3656 +}
  1.3657 +
  1.3658 +/* parse in the typical fault correcting table */
  1.3659 +int  AffixMgr::parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword)
  1.3660 +{
  1.3661 +   if (*rl) {
  1.3662 +      HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
  1.3663 +      return 1;
  1.3664 +   }
  1.3665 +   char * tp = line;
  1.3666 +   char * piece;
  1.3667 +   int i = 0;
  1.3668 +   int np = 0;
  1.3669 +   int numrl = 0;
  1.3670 +   piece = mystrsep(&tp, 0);
  1.3671 +   while (piece) {
  1.3672 +       if (*piece != '\0') {
  1.3673 +          switch(i) {
  1.3674 +             case 0: { np++; break; }
  1.3675 +             case 1: { 
  1.3676 +                       numrl = atoi(piece);
  1.3677 +                       if (numrl < 1) {
  1.3678 +                          HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n", af->getlinenum());
  1.3679 +                          return 1;
  1.3680 +                       }
  1.3681 +                       *rl = new RepList(numrl);
  1.3682 +                       if (!*rl) return 1;
  1.3683 +                       np++;
  1.3684 +                       break;
  1.3685 +                     }
  1.3686 +             default: break;
  1.3687 +          }
  1.3688 +          i++;
  1.3689 +       }
  1.3690 +       piece = mystrsep(&tp, 0);
  1.3691 +   }
  1.3692 +   if (np != 2) {
  1.3693 +      HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
  1.3694 +      return 1;
  1.3695 +   } 
  1.3696 + 
  1.3697 +   /* now parse the num lines to read in the remainder of the table */
  1.3698 +   char * nl;
  1.3699 +   for (int j=0; j < numrl; j++) {
  1.3700 +        if (!(nl = af->getline())) return 1;
  1.3701 +        mychomp(nl);
  1.3702 +        tp = nl;
  1.3703 +        i = 0;
  1.3704 +        char * pattern = NULL;
  1.3705 +        char * pattern2 = NULL;
  1.3706 +        piece = mystrsep(&tp, 0);
  1.3707 +        while (piece) {
  1.3708 +           if (*piece != '\0') {
  1.3709 +               switch(i) {
  1.3710 +                  case 0: {
  1.3711 +                             if (strncmp(piece, keyword, strlen(keyword)) != 0) {
  1.3712 +                                 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  1.3713 +                                 delete *rl;
  1.3714 +                                 *rl = NULL;
  1.3715 +                                 return 1;
  1.3716 +                             }
  1.3717 +                             break;
  1.3718 +                          }
  1.3719 +                  case 1: { pattern = mystrrep(mystrdup(piece),"_"," "); break; }
  1.3720 +                  case 2: { 
  1.3721 +                    pattern2 = mystrrep(mystrdup(piece),"_"," ");
  1.3722 +                    break; 
  1.3723 +                  }
  1.3724 +                  default: break;
  1.3725 +               }
  1.3726 +               i++;
  1.3727 +           }
  1.3728 +           piece = mystrsep(&tp, 0);
  1.3729 +        }
  1.3730 +        if (!pattern || !pattern2) {
  1.3731 +            if (pattern)
  1.3732 +                free(pattern);
  1.3733 +            if (pattern2)
  1.3734 +                free(pattern2);
  1.3735 +            HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  1.3736 +            return 1;
  1.3737 +        }
  1.3738 +        (*rl)->add(pattern, pattern2);
  1.3739 +   }
  1.3740 +   return 0;
  1.3741 +}
  1.3742 +
  1.3743 +
  1.3744 +/* parse in the typical fault correcting table */
  1.3745 +int  AffixMgr::parse_phonetable(char * line, FileMgr * af)
  1.3746 +{
  1.3747 +   if (phone) {
  1.3748 +      HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
  1.3749 +      return 1;
  1.3750 +   }
  1.3751 +   char * tp = line;
  1.3752 +   char * piece;
  1.3753 +   int i = 0;
  1.3754 +   int np = 0;
  1.3755 +   piece = mystrsep(&tp, 0);
  1.3756 +   while (piece) {
  1.3757 +       if (*piece != '\0') {
  1.3758 +          switch(i) {
  1.3759 +             case 0: { np++; break; }
  1.3760 +             case 1: { 
  1.3761 +                       phone = (phonetable *) malloc(sizeof(struct phonetable));
  1.3762 +                       if (!phone) return 1;
  1.3763 +                       phone->num = atoi(piece);
  1.3764 +                       phone->rules = NULL;
  1.3765 +                       phone->utf8 = (char) utf8;
  1.3766 +                       if (phone->num < 1) {
  1.3767 +                          HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
  1.3768 +                          return 1;
  1.3769 +                       }
  1.3770 +                       phone->rules = (char * *) malloc(2 * (phone->num + 1) * sizeof(char *));
  1.3771 +                       if (!phone->rules) {
  1.3772 +                          free(phone);
  1.3773 +                          phone = NULL;
  1.3774 +                          return 1;
  1.3775 +                       }
  1.3776 +                       np++;
  1.3777 +                       break;
  1.3778 +                     }
  1.3779 +             default: break;
  1.3780 +          }
  1.3781 +          i++;
  1.3782 +       }
  1.3783 +       piece = mystrsep(&tp, 0);
  1.3784 +   }
  1.3785 +   if (np != 2) {
  1.3786 +      HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
  1.3787 +      return 1;
  1.3788 +   } 
  1.3789 + 
  1.3790 +   /* now parse the phone->num lines to read in the remainder of the table */
  1.3791 +   char * nl;
  1.3792 +   for (int j=0; j < phone->num; j++) {
  1.3793 +        if (!(nl = af->getline())) return 1;
  1.3794 +        mychomp(nl);
  1.3795 +        tp = nl;
  1.3796 +        i = 0;
  1.3797 +        phone->rules[j * 2] = NULL;
  1.3798 +        phone->rules[j * 2 + 1] = NULL;
  1.3799 +        piece = mystrsep(&tp, 0);
  1.3800 +        while (piece) {
  1.3801 +           if (*piece != '\0') {
  1.3802 +               switch(i) {
  1.3803 +                  case 0: {
  1.3804 +                             if (strncmp(piece,"PHONE",5) != 0) {
  1.3805 +                                 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  1.3806 +                                 phone->num = 0;
  1.3807 +                                 return 1;
  1.3808 +                             }
  1.3809 +                             break;
  1.3810 +                          }
  1.3811 +                  case 1: { phone->rules[j * 2] = mystrrep(mystrdup(piece),"_",""); break; }
  1.3812 +                  case 2: { phone->rules[j * 2 + 1] = mystrrep(mystrdup(piece),"_",""); break; }
  1.3813 +                  default: break;
  1.3814 +               }
  1.3815 +               i++;
  1.3816 +           }
  1.3817 +           piece = mystrsep(&tp, 0);
  1.3818 +        }
  1.3819 +        if ((!(phone->rules[j * 2])) || (!(phone->rules[j * 2 + 1]))) {
  1.3820 +             HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  1.3821 +             phone->num = 0;
  1.3822 +             return 1;
  1.3823 +        }
  1.3824 +   }
  1.3825 +   phone->rules[phone->num * 2] = mystrdup("");
  1.3826 +   phone->rules[phone->num * 2 + 1] = mystrdup("");
  1.3827 +   init_phonet_hash(*phone);
  1.3828 +   return 0;
  1.3829 +}
  1.3830 +
  1.3831 +/* parse in the checkcompoundpattern table */
  1.3832 +int  AffixMgr::parse_checkcpdtable(char * line, FileMgr * af)
  1.3833 +{
  1.3834 +   if (numcheckcpd != 0) {
  1.3835 +      HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
  1.3836 +      return 1;
  1.3837 +   }
  1.3838 +   char * tp = line;
  1.3839 +   char * piece;
  1.3840 +   int i = 0;
  1.3841 +   int np = 0;
  1.3842 +   piece = mystrsep(&tp, 0);
  1.3843 +   while (piece) {
  1.3844 +       if (*piece != '\0') {
  1.3845 +          switch(i) {
  1.3846 +             case 0: { np++; break; }
  1.3847 +             case 1: { 
  1.3848 +                       numcheckcpd = atoi(piece);
  1.3849 +                       if (numcheckcpd < 1) {
  1.3850 +                          HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
  1.3851 +                          return 1;
  1.3852 +                       }
  1.3853 +                       checkcpdtable = (patentry *) malloc(numcheckcpd * sizeof(struct patentry));
  1.3854 +                       if (!checkcpdtable) return 1;
  1.3855 +                       np++;
  1.3856 +                       break;
  1.3857 +                     }
  1.3858 +             default: break;
  1.3859 +          }
  1.3860 +          i++;
  1.3861 +       }
  1.3862 +       piece = mystrsep(&tp, 0);
  1.3863 +   }
  1.3864 +   if (np != 2) {
  1.3865 +      HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",  af->getlinenum());
  1.3866 +      return 1;
  1.3867 +   }
  1.3868 +
  1.3869 +   /* now parse the numcheckcpd lines to read in the remainder of the table */
  1.3870 +   char * nl;
  1.3871 +   for (int j=0; j < numcheckcpd; j++) {
  1.3872 +        if (!(nl = af->getline())) return 1;
  1.3873 +        mychomp(nl);
  1.3874 +        tp = nl;
  1.3875 +        i = 0;
  1.3876 +        checkcpdtable[j].pattern = NULL;
  1.3877 +        checkcpdtable[j].pattern2 = NULL;
  1.3878 +        checkcpdtable[j].pattern3 = NULL;
  1.3879 +        checkcpdtable[j].cond = FLAG_NULL;
  1.3880 +        checkcpdtable[j].cond2 = FLAG_NULL;
  1.3881 +        piece = mystrsep(&tp, 0);
  1.3882 +        while (piece) {
  1.3883 +           if (*piece != '\0') {
  1.3884 +               switch(i) {
  1.3885 +                  case 0: {
  1.3886 +                             if (strncmp(piece,"CHECKCOMPOUNDPATTERN",20) != 0) {
  1.3887 +                                 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  1.3888 +                                 numcheckcpd = 0;
  1.3889 +                                 return 1;
  1.3890 +                             }
  1.3891 +                             break;
  1.3892 +                          }
  1.3893 +                  case 1: { 
  1.3894 +                    checkcpdtable[j].pattern = mystrdup(piece); 
  1.3895 +                    char * p = strchr(checkcpdtable[j].pattern, '/');
  1.3896 +                    if (p) {
  1.3897 +                      *p = '\0';
  1.3898 +                    checkcpdtable[j].cond = pHMgr->decode_flag(p + 1);
  1.3899 +                    }
  1.3900 +                    break; }
  1.3901 +                  case 2: { 
  1.3902 +                    checkcpdtable[j].pattern2 = mystrdup(piece);
  1.3903 +                    char * p = strchr(checkcpdtable[j].pattern2, '/');
  1.3904 +                    if (p) {
  1.3905 +                      *p = '\0';
  1.3906 +                      checkcpdtable[j].cond2 = pHMgr->decode_flag(p + 1);
  1.3907 +                    }
  1.3908 +                    break;
  1.3909 +                    }
  1.3910 +                  case 3: { checkcpdtable[j].pattern3 = mystrdup(piece); simplifiedcpd = 1; break; }
  1.3911 +                  default: break;
  1.3912 +               }
  1.3913 +               i++;
  1.3914 +           }
  1.3915 +           piece = mystrsep(&tp, 0);
  1.3916 +        }
  1.3917 +        if ((!(checkcpdtable[j].pattern)) || (!(checkcpdtable[j].pattern2))) {
  1.3918 +             HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  1.3919 +             numcheckcpd = 0;
  1.3920 +             return 1;
  1.3921 +        }
  1.3922 +   }
  1.3923 +   return 0;
  1.3924 +}
  1.3925 +
  1.3926 +/* parse in the compound rule table */
  1.3927 +int  AffixMgr::parse_defcpdtable(char * line, FileMgr * af)
  1.3928 +{
  1.3929 +   if (numdefcpd != 0) {
  1.3930 +      HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
  1.3931 +      return 1;
  1.3932 +   }
  1.3933 +   char * tp = line;
  1.3934 +   char * piece;
  1.3935 +   int i = 0;
  1.3936 +   int np = 0;
  1.3937 +   piece = mystrsep(&tp, 0);
  1.3938 +   while (piece) {
  1.3939 +       if (*piece != '\0') {
  1.3940 +          switch(i) {
  1.3941 +             case 0: { np++; break; }
  1.3942 +             case 1: { 
  1.3943 +                       numdefcpd = atoi(piece);
  1.3944 +                       if (numdefcpd < 1) {
  1.3945 +                          HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
  1.3946 +                          return 1;
  1.3947 +                       }
  1.3948 +                       defcpdtable = (flagentry *) malloc(numdefcpd * sizeof(flagentry));
  1.3949 +                       if (!defcpdtable) return 1;
  1.3950 +                       np++;
  1.3951 +                       break;
  1.3952 +                     }
  1.3953 +             default: break;
  1.3954 +          }
  1.3955 +          i++;
  1.3956 +       }
  1.3957 +       piece = mystrsep(&tp, 0);
  1.3958 +   }
  1.3959 +   if (np != 2) {
  1.3960 +      HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
  1.3961 +      return 1;
  1.3962 +   } 
  1.3963 + 
  1.3964 +   /* now parse the numdefcpd lines to read in the remainder of the table */
  1.3965 +   char * nl;
  1.3966 +   for (int j=0; j < numdefcpd; j++) {
  1.3967 +        if (!(nl = af->getline())) return 1;
  1.3968 +        mychomp(nl);
  1.3969 +        tp = nl;
  1.3970 +        i = 0;
  1.3971 +        defcpdtable[j].def = NULL;
  1.3972 +        piece = mystrsep(&tp, 0);
  1.3973 +        while (piece) {
  1.3974 +           if (*piece != '\0') {
  1.3975 +               switch(i) {
  1.3976 +                  case 0: {
  1.3977 +                             if (strncmp(piece, "COMPOUNDRULE", 12) != 0) {
  1.3978 +                                 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  1.3979 +                                 numdefcpd = 0;
  1.3980 +                                 return 1;
  1.3981 +                             }
  1.3982 +                             break;
  1.3983 +                          }
  1.3984 +                  case 1: { // handle parenthesized flags
  1.3985 +                            if (strchr(piece, '(')) {
  1.3986 +                                defcpdtable[j].def = (FLAG *) malloc(strlen(piece) * sizeof(FLAG));
  1.3987 +                                defcpdtable[j].len = 0;
  1.3988 +                                int end = 0;
  1.3989 +                                FLAG * conv;
  1.3990 +                                while (!end) {
  1.3991 +                                    char * par = piece + 1;
  1.3992 +                                    while (*par != '(' && *par != ')' && *par != '\0') par++;
  1.3993 +                                    if (*par == '\0') end = 1; else *par = '\0';
  1.3994 +                                    if (*piece == '(') piece++;
  1.3995 +                                    if (*piece == '*' || *piece == '?') {
  1.3996 +                                        defcpdtable[j].def[defcpdtable[j].len++] = (FLAG) *piece;
  1.3997 +                                    } else if (*piece != '\0') {
  1.3998 +                                        int l = pHMgr->decode_flags(&conv, piece, af);
  1.3999 +                                        for (int k = 0; k < l; k++) defcpdtable[j].def[defcpdtable[j].len++] = conv[k];
  1.4000 +                                        free(conv);
  1.4001 +                                    }
  1.4002 +                                    piece = par + 1;
  1.4003 +                                }
  1.4004 +                            } else {
  1.4005 +                                defcpdtable[j].len = pHMgr->decode_flags(&(defcpdtable[j].def), piece, af);
  1.4006 +                            }
  1.4007 +                            break; 
  1.4008 +                           }
  1.4009 +                  default: break;
  1.4010 +               }
  1.4011 +               i++;
  1.4012 +           }
  1.4013 +           piece = mystrsep(&tp, 0);
  1.4014 +        }
  1.4015 +        if (!defcpdtable[j].len) {
  1.4016 +             HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  1.4017 +             numdefcpd = 0;
  1.4018 +             return 1;
  1.4019 +        }
  1.4020 +   }
  1.4021 +   return 0;
  1.4022 +}
  1.4023 +
  1.4024 +
  1.4025 +/* parse in the character map table */
  1.4026 +int  AffixMgr::parse_maptable(char * line, FileMgr * af)
  1.4027 +{
  1.4028 +   if (nummap != 0) {
  1.4029 +      HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
  1.4030 +      return 1;
  1.4031 +   }
  1.4032 +   char * tp = line;
  1.4033 +   char * piece;
  1.4034 +   int i = 0;
  1.4035 +   int np = 0;
  1.4036 +   piece = mystrsep(&tp, 0);
  1.4037 +   while (piece) {
  1.4038 +       if (*piece != '\0') {
  1.4039 +          switch(i) {
  1.4040 +             case 0: { np++; break; }
  1.4041 +             case 1: { 
  1.4042 +                       nummap = atoi(piece);
  1.4043 +                       if (nummap < 1) {
  1.4044 +                          HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
  1.4045 +                          return 1;
  1.4046 +                       }
  1.4047 +                       maptable = (mapentry *) malloc(nummap * sizeof(struct mapentry));
  1.4048 +                       if (!maptable) return 1;
  1.4049 +                       np++;
  1.4050 +                       break;
  1.4051 +                     }
  1.4052 +             default: break;
  1.4053 +          }
  1.4054 +          i++;
  1.4055 +       }
  1.4056 +       piece = mystrsep(&tp, 0);
  1.4057 +   }
  1.4058 +   if (np != 2) {
  1.4059 +      HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
  1.4060 +      return 1;
  1.4061 +   } 
  1.4062 + 
  1.4063 +   /* now parse the nummap lines to read in the remainder of the table */
  1.4064 +   char * nl;
  1.4065 +   for (int j=0; j < nummap; j++) {
  1.4066 +        if (!(nl = af->getline())) return 1;
  1.4067 +        mychomp(nl);
  1.4068 +        tp = nl;
  1.4069 +        i = 0;
  1.4070 +        maptable[j].set = NULL;
  1.4071 +        maptable[j].len = 0;
  1.4072 +        piece = mystrsep(&tp, 0);
  1.4073 +        while (piece) {
  1.4074 +           if (*piece != '\0') {
  1.4075 +               switch(i) {
  1.4076 +                  case 0: {
  1.4077 +                             if (strncmp(piece,"MAP",3) != 0) {
  1.4078 +                                 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  1.4079 +                                 nummap = 0;
  1.4080 +                                 return 1;
  1.4081 +                             }
  1.4082 +                             break;
  1.4083 +                          }
  1.4084 +                  case 1: {
  1.4085 +			    int setn = 0;
  1.4086 +                            maptable[j].len = strlen(piece);
  1.4087 +                            maptable[j].set = (char **) malloc(maptable[j].len * sizeof(char*));
  1.4088 +                            if (!maptable[j].set) return 1;
  1.4089 +			    for (int k = 0; k < maptable[j].len; k++) {
  1.4090 +				int chl = 1;
  1.4091 +				int chb = k;
  1.4092 +			        if (piece[k] == '(') {
  1.4093 +				    char * parpos = strchr(piece + k, ')');
  1.4094 +				    if (parpos != NULL) {
  1.4095 +					chb = k + 1;
  1.4096 +					chl = (int)(parpos - piece) - k - 1;
  1.4097 +					k = k + chl + 1;
  1.4098 +				    }
  1.4099 +				} else {
  1.4100 +				    if (utf8 && (piece[k] & 0xc0) == 0xc0) {
  1.4101 +					for (k++; utf8 && (piece[k] & 0xc0) == 0x80; k++);
  1.4102 +					chl = k - chb;
  1.4103 +					k--;
  1.4104 +				    }
  1.4105 +				}
  1.4106 +				maptable[j].set[setn] = (char *) malloc(chl + 1);
  1.4107 +				if (!maptable[j].set[setn]) return 1;
  1.4108 +				strncpy(maptable[j].set[setn], piece + chb, chl);
  1.4109 +				maptable[j].set[setn][chl] = '\0';
  1.4110 +				setn++;
  1.4111 +			    }
  1.4112 +                            maptable[j].len = setn;
  1.4113 +                            break; }
  1.4114 +                  default: break;
  1.4115 +               }
  1.4116 +               i++;
  1.4117 +           }
  1.4118 +           piece = mystrsep(&tp, 0);
  1.4119 +        }
  1.4120 +        if (!maptable[j].set || !maptable[j].len) {
  1.4121 +             HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  1.4122 +             nummap = 0;
  1.4123 +             return 1;
  1.4124 +        }
  1.4125 +   }
  1.4126 +   return 0;
  1.4127 +}
  1.4128 +
  1.4129 +/* parse in the word breakpoint table */
  1.4130 +int  AffixMgr::parse_breaktable(char * line, FileMgr * af)
  1.4131 +{
  1.4132 +   if (numbreak > -1) {
  1.4133 +      HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
  1.4134 +      return 1;
  1.4135 +   }
  1.4136 +   char * tp = line;
  1.4137 +   char * piece;
  1.4138 +   int i = 0;
  1.4139 +   int np = 0;
  1.4140 +   piece = mystrsep(&tp, 0);
  1.4141 +   while (piece) {
  1.4142 +       if (*piece != '\0') {
  1.4143 +          switch(i) {
  1.4144 +             case 0: { np++; break; }
  1.4145 +             case 1: { 
  1.4146 +                       numbreak = atoi(piece);
  1.4147 +                       if (numbreak < 0) {
  1.4148 +                          HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
  1.4149 +                          return 1;
  1.4150 +                       }
  1.4151 +                       if (numbreak == 0) return 0;
  1.4152 +                       breaktable = (char **) malloc(numbreak * sizeof(char *));
  1.4153 +                       if (!breaktable) return 1;
  1.4154 +                       np++;
  1.4155 +                       break;
  1.4156 +                     }
  1.4157 +             default: break;
  1.4158 +          }
  1.4159 +          i++;
  1.4160 +       }
  1.4161 +       piece = mystrsep(&tp, 0);
  1.4162 +   }
  1.4163 +   if (np != 2) {
  1.4164 +      HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
  1.4165 +      return 1;
  1.4166 +   } 
  1.4167 + 
  1.4168 +   /* now parse the numbreak lines to read in the remainder of the table */
  1.4169 +   char * nl;
  1.4170 +   for (int j=0; j < numbreak; j++) {
  1.4171 +        if (!(nl = af->getline())) return 1;
  1.4172 +        mychomp(nl);
  1.4173 +        tp = nl;
  1.4174 +        i = 0;
  1.4175 +        piece = mystrsep(&tp, 0);
  1.4176 +        while (piece) {
  1.4177 +           if (*piece != '\0') {
  1.4178 +               switch(i) {
  1.4179 +                  case 0: {
  1.4180 +                             if (strncmp(piece,"BREAK",5) != 0) {
  1.4181 +                                 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  1.4182 +                                 numbreak = 0;
  1.4183 +                                 return 1;
  1.4184 +                             }
  1.4185 +                             break;
  1.4186 +                          }
  1.4187 +                  case 1: {
  1.4188 +                            breaktable[j] = mystrdup(piece);
  1.4189 +                            break;
  1.4190 +                          }
  1.4191 +                  default: break;
  1.4192 +               }
  1.4193 +               i++;
  1.4194 +           }
  1.4195 +           piece = mystrsep(&tp, 0);
  1.4196 +        }
  1.4197 +        if (!breaktable) {
  1.4198 +             HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  1.4199 +             numbreak = 0;
  1.4200 +             return 1;
  1.4201 +        }
  1.4202 +   }
  1.4203 +   return 0;
  1.4204 +}
  1.4205 +
  1.4206 +void AffixMgr::reverse_condition(char * piece) {
  1.4207 +    int neg = 0;
  1.4208 +    for (char * k = piece + strlen(piece) - 1; k >= piece; k--) {
  1.4209 +        switch(*k) {
  1.4210 +          case '[': {
  1.4211 +                if (neg) *(k+1) = '['; else *k = ']';
  1.4212 +                    break;
  1.4213 +            }
  1.4214 +          case ']': {
  1.4215 +                *k = '[';
  1.4216 +                if (neg) *(k+1) = '^';
  1.4217 +                neg = 0;
  1.4218 +                break;
  1.4219 +            }
  1.4220 +          case '^': {
  1.4221 +               if (*(k+1) == ']') neg = 1; else *(k+1) = *k;
  1.4222 +               break;
  1.4223 +                }
  1.4224 +          default: {
  1.4225 +            if (neg) *(k+1) = *k;
  1.4226 +          }
  1.4227 +       }
  1.4228 +    }
  1.4229 +}
  1.4230 +
  1.4231 +int  AffixMgr::parse_affix(char * line, const char at, FileMgr * af, char * dupflags)
  1.4232 +{
  1.4233 +   int numents = 0;      // number of affentry structures to parse
  1.4234 +
  1.4235 +   unsigned short aflag = 0;      // affix char identifier
  1.4236 +
  1.4237 +   char ff=0;
  1.4238 +   std::vector<affentry> affentries;
  1.4239 +
  1.4240 +   char * tp = line;
  1.4241 +   char * nl = line;
  1.4242 +   char * piece;
  1.4243 +   int i = 0;
  1.4244 +
  1.4245 +   // checking lines with bad syntax
  1.4246 +#ifdef DEBUG
  1.4247 +   int basefieldnum = 0;
  1.4248 +#endif
  1.4249 +
  1.4250 +   // split affix header line into pieces
  1.4251 +
  1.4252 +   int np = 0;
  1.4253 +
  1.4254 +   piece = mystrsep(&tp, 0);
  1.4255 +   while (piece) {
  1.4256 +      if (*piece != '\0') {
  1.4257 +          switch(i) {
  1.4258 +             // piece 1 - is type of affix
  1.4259 +             case 0: { np++; break; }
  1.4260 +          
  1.4261 +             // piece 2 - is affix char
  1.4262 +             case 1: { 
  1.4263 +                    np++;
  1.4264 +                    aflag = pHMgr->decode_flag(piece);
  1.4265 +                    if (((at == 'S') && (dupflags[aflag] & dupSFX)) ||
  1.4266 +                        ((at == 'P') && (dupflags[aflag] & dupPFX))) {
  1.4267 +                        HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of an affix flag\n",
  1.4268 +                            af->getlinenum());
  1.4269 +                        // return 1; XXX permissive mode for bad dictionaries
  1.4270 +                    }
  1.4271 +                    dupflags[aflag] += (char) ((at == 'S') ? dupSFX : dupPFX);
  1.4272 +                    break; 
  1.4273 +                    }
  1.4274 +             // piece 3 - is cross product indicator 
  1.4275 +             case 2: { np++; if (*piece == 'Y') ff = aeXPRODUCT; break; }
  1.4276 +
  1.4277 +             // piece 4 - is number of affentries
  1.4278 +             case 3: { 
  1.4279 +                       np++;
  1.4280 +                       numents = atoi(piece); 
  1.4281 +                       if (numents == 0) {
  1.4282 +                           char * err = pHMgr->encode_flag(aflag);
  1.4283 +                           if (err) {
  1.4284 +                                HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
  1.4285 +                                   af->getlinenum());
  1.4286 +                                free(err);
  1.4287 +                           }
  1.4288 +                           return 1;
  1.4289 +                       }
  1.4290 +                       affentries.resize(numents);
  1.4291 +                       affentries[0].opts = ff;
  1.4292 +                       if (utf8) affentries[0].opts += aeUTF8;
  1.4293 +                       if (pHMgr->is_aliasf()) affentries[0].opts += aeALIASF;
  1.4294 +                       if (pHMgr->is_aliasm()) affentries[0].opts += aeALIASM;
  1.4295 +                       affentries[0].aflag = aflag;
  1.4296 +                     }
  1.4297 +
  1.4298 +             default: break;
  1.4299 +          }
  1.4300 +          i++;
  1.4301 +      }
  1.4302 +      piece = mystrsep(&tp, 0);
  1.4303 +   }
  1.4304 +   // check to make sure we parsed enough pieces
  1.4305 +   if (np != 4) {
  1.4306 +       char * err = pHMgr->encode_flag(aflag);
  1.4307 +       if (err) {
  1.4308 +            HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
  1.4309 +            free(err);
  1.4310 +       }
  1.4311 +       return 1;
  1.4312 +   }
  1.4313 + 
  1.4314 +   // now parse numents affentries for this affix
  1.4315 +   std::vector<affentry>::iterator start = affentries.begin();
  1.4316 +   std::vector<affentry>::iterator end = affentries.end();
  1.4317 +   for (std::vector<affentry>::iterator entry = start; entry != end; ++entry) {
  1.4318 +      if (!(nl = af->getline())) return 1;
  1.4319 +      mychomp(nl);
  1.4320 +      tp = nl;
  1.4321 +      i = 0;
  1.4322 +      np = 0;
  1.4323 +
  1.4324 +      // split line into pieces
  1.4325 +      piece = mystrsep(&tp, 0);
  1.4326 +      while (piece) {
  1.4327 +         if (*piece != '\0') {
  1.4328 +             switch(i) {
  1.4329 +                // piece 1 - is type
  1.4330 +                case 0: { 
  1.4331 +                          np++;
  1.4332 +                          if (entry != start) entry->opts = start->opts &
  1.4333 +                             (char) (aeXPRODUCT + aeUTF8 + aeALIASF + aeALIASM);
  1.4334 +                          break;
  1.4335 +                        }
  1.4336 +
  1.4337 +                // piece 2 - is affix char
  1.4338 +                case 1: { 
  1.4339 +                          np++;
  1.4340 +                          if (pHMgr->decode_flag(piece) != aflag) {
  1.4341 +                              char * err = pHMgr->encode_flag(aflag);
  1.4342 +                              if (err) {
  1.4343 +                                HUNSPELL_WARNING(stderr, "error: line %d: affix %s is corrupt\n",
  1.4344 +                                    af->getlinenum(), err);
  1.4345 +                                free(err);
  1.4346 +                              }
  1.4347 +                              return 1;
  1.4348 +                          }
  1.4349 +
  1.4350 +                          if (entry != start) entry->aflag = start->aflag;
  1.4351 +                          break;
  1.4352 +                        }
  1.4353 +
  1.4354 +                // piece 3 - is string to strip or 0 for null 
  1.4355 +                case 2: { 
  1.4356 +                          np++;
  1.4357 +                          if (complexprefixes) {
  1.4358 +                            if (utf8) reverseword_utf(piece); else reverseword(piece);
  1.4359 +                          }
  1.4360 +                          entry->strip = mystrdup(piece);
  1.4361 +                          entry->stripl = (unsigned char) strlen(entry->strip);
  1.4362 +                          if (strcmp(entry->strip,"0") == 0) {
  1.4363 +                              free(entry->strip);
  1.4364 +                              entry->strip=mystrdup("");
  1.4365 +                              entry->stripl = 0;
  1.4366 +                          }   
  1.4367 +                          break; 
  1.4368 +                        }
  1.4369 +
  1.4370 +                // piece 4 - is affix string or 0 for null
  1.4371 +                case 3: { 
  1.4372 +                          char * dash;  
  1.4373 +                          entry->morphcode = NULL;
  1.4374 +                          entry->contclass = NULL;
  1.4375 +                          entry->contclasslen = 0;
  1.4376 +                          np++;
  1.4377 +                          dash = strchr(piece, '/');
  1.4378 +                          if (dash) {
  1.4379 +                            *dash = '\0';
  1.4380 +
  1.4381 +                            if (ignorechars) {
  1.4382 +                              if (utf8) {
  1.4383 +                                remove_ignored_chars_utf(piece, ignorechars_utf16, ignorechars_utf16_len);
  1.4384 +                              } else {
  1.4385 +                                remove_ignored_chars(piece,ignorechars);
  1.4386 +                              }
  1.4387 +                            }
  1.4388 +
  1.4389 +                            if (complexprefixes) {
  1.4390 +                                if (utf8) reverseword_utf(piece); else reverseword(piece);
  1.4391 +                            }
  1.4392 +                            entry->appnd = mystrdup(piece);
  1.4393 +
  1.4394 +                            if (pHMgr->is_aliasf()) {
  1.4395 +                                int index = atoi(dash + 1);
  1.4396 +                                entry->contclasslen = (unsigned short) pHMgr->get_aliasf(index, &(entry->contclass), af);
  1.4397 +                                if (!entry->contclasslen) HUNSPELL_WARNING(stderr, "error: bad affix flag alias: \"%s\"\n", dash+1);
  1.4398 +                            } else {
  1.4399 +                                entry->contclasslen = (unsigned short) pHMgr->decode_flags(&(entry->contclass), dash + 1, af);
  1.4400 +                                flag_qsort(entry->contclass, 0, entry->contclasslen);
  1.4401 +                            }
  1.4402 +                            *dash = '/';
  1.4403 +
  1.4404 +                            havecontclass = 1;
  1.4405 +                            for (unsigned short _i = 0; _i < entry->contclasslen; _i++) {
  1.4406 +                              contclasses[(entry->contclass)[_i]] = 1;
  1.4407 +                            }
  1.4408 +                          } else {
  1.4409 +                            if (ignorechars) {
  1.4410 +                              if (utf8) {
  1.4411 +                                remove_ignored_chars_utf(piece, ignorechars_utf16, ignorechars_utf16_len);
  1.4412 +                              } else {
  1.4413 +                                remove_ignored_chars(piece,ignorechars);
  1.4414 +                              }
  1.4415 +                            }
  1.4416 +
  1.4417 +                            if (complexprefixes) {
  1.4418 +                                if (utf8) reverseword_utf(piece); else reverseword(piece);
  1.4419 +                            }
  1.4420 +                            entry->appnd = mystrdup(piece);
  1.4421 +                          }
  1.4422 +
  1.4423 +                          entry->appndl = (unsigned char) strlen(entry->appnd);
  1.4424 +                          if (strcmp(entry->appnd,"0") == 0) {
  1.4425 +                              free(entry->appnd);
  1.4426 +                              entry->appnd=mystrdup("");
  1.4427 +                              entry->appndl = 0;
  1.4428 +                          }   
  1.4429 +                          break; 
  1.4430 +                        }
  1.4431 +
  1.4432 +                // piece 5 - is the conditions descriptions
  1.4433 +                case 4: { 
  1.4434 +                          np++;
  1.4435 +                          if (complexprefixes) {
  1.4436 +                            if (utf8) reverseword_utf(piece); else reverseword(piece);
  1.4437 +                            reverse_condition(piece);
  1.4438 +                          }
  1.4439 +                          if (entry->stripl && (strcmp(piece, ".") != 0) &&
  1.4440 +                            redundant_condition(at, entry->strip, entry->stripl, piece, af->getlinenum()))
  1.4441 +                                strcpy(piece, ".");
  1.4442 +                          if (at == 'S') {
  1.4443 +                            reverseword(piece);
  1.4444 +                            reverse_condition(piece);
  1.4445 +                          }
  1.4446 +                          if (encodeit(*entry, piece)) return 1;
  1.4447 +                         break;
  1.4448 +                }
  1.4449 +
  1.4450 +                case 5: {
  1.4451 +                          np++;
  1.4452 +                          if (pHMgr->is_aliasm()) {
  1.4453 +                            int index = atoi(piece);
  1.4454 +                            entry->morphcode = pHMgr->get_aliasm(index);
  1.4455 +                          } else {
  1.4456 +                            if (complexprefixes) { // XXX - fix me for morph. gen.
  1.4457 +                                if (utf8) reverseword_utf(piece); else reverseword(piece);
  1.4458 +                            }
  1.4459 +                            // add the remaining of the line
  1.4460 +                            if (*tp) {
  1.4461 +                                *(tp - 1) = ' ';
  1.4462 +                                tp = tp + strlen(tp);
  1.4463 +                            }
  1.4464 +                            entry->morphcode = mystrdup(piece);
  1.4465 +                            if (!entry->morphcode) return 1;
  1.4466 +                          }
  1.4467 +                          break; 
  1.4468 +                }
  1.4469 +                default: break;
  1.4470 +             }
  1.4471 +             i++;
  1.4472 +         }
  1.4473 +         piece = mystrsep(&tp, 0);
  1.4474 +      }
  1.4475 +      // check to make sure we parsed enough pieces
  1.4476 +      if (np < 4) {
  1.4477 +          char * err = pHMgr->encode_flag(aflag);
  1.4478 +          if (err) {
  1.4479 +            HUNSPELL_WARNING(stderr, "error: line %d: affix %s is corrupt\n",
  1.4480 +                af->getlinenum(), err);
  1.4481 +            free(err);
  1.4482 +          }
  1.4483 +          return 1;
  1.4484 +      }
  1.4485 +
  1.4486 +#ifdef DEBUG
  1.4487 +      // detect unnecessary fields, excepting comments
  1.4488 +      if (basefieldnum) {
  1.4489 +        int fieldnum = !(entry->morphcode) ? 5 : ((*(entry->morphcode)=='#') ? 5 : 6);
  1.4490 +          if (fieldnum != basefieldnum) 
  1.4491 +            HUNSPELL_WARNING(stderr, "warning: line %d: bad field number\n", af->getlinenum());
  1.4492 +      } else {
  1.4493 +        basefieldnum = !(entry->morphcode) ? 5 : ((*(entry->morphcode)=='#') ? 5 : 6);
  1.4494 +      }
  1.4495 +#endif
  1.4496 +   }
  1.4497 + 
  1.4498 +   // now create SfxEntry or PfxEntry objects and use links to
  1.4499 +   // build an ordered (sorted by affix string) list
  1.4500 +   for (std::vector<affentry>::iterator entry = start; entry != end; ++entry) {
  1.4501 +      if (at == 'P') {
  1.4502 +          PfxEntry * pfxptr = new PfxEntry(this,&(*entry));
  1.4503 +          build_pfxtree(pfxptr);
  1.4504 +      } else {
  1.4505 +          SfxEntry * sfxptr = new SfxEntry(this,&(*entry));
  1.4506 +          build_sfxtree(sfxptr); 
  1.4507 +      }
  1.4508 +   }
  1.4509 +   return 0;
  1.4510 +}
  1.4511 +
  1.4512 +int AffixMgr::redundant_condition(char ft, char * strip, int stripl, const char * cond, int linenum) {
  1.4513 +  int condl = strlen(cond);
  1.4514 +  int i;
  1.4515 +  int j;
  1.4516 +  int neg;
  1.4517 +  int in;
  1.4518 +  if (ft == 'P') { // prefix
  1.4519 +    if (strncmp(strip, cond, condl) == 0) return 1;
  1.4520 +    if (utf8) {
  1.4521 +    } else {
  1.4522 +      for (i = 0, j = 0; (i < stripl) && (j < condl); i++, j++) {
  1.4523 +        if (cond[j] != '[') {
  1.4524 +          if (cond[j] != strip[i]) {
  1.4525 +            HUNSPELL_WARNING(stderr, "warning: line %d: incompatible stripping characters and condition\n", linenum);
  1.4526 +            return 0;
  1.4527 +          }
  1.4528 +        } else {
  1.4529 +          neg = (cond[j+1] == '^') ? 1 : 0;
  1.4530 +          in = 0;
  1.4531 +          do {
  1.4532 +            j++;
  1.4533 +            if (strip[i] == cond[j]) in = 1;
  1.4534 +          } while ((j < (condl - 1)) && (cond[j] != ']'));
  1.4535 +          if (j == (condl - 1) && (cond[j] != ']')) {
  1.4536 +            HUNSPELL_WARNING(stderr, "error: line %d: missing ] in condition:\n%s\n", linenum, cond);
  1.4537 +            return 0;
  1.4538 +          }
  1.4539 +          if ((!neg && !in) || (neg && in)) {
  1.4540 +            HUNSPELL_WARNING(stderr, "warning: line %d: incompatible stripping characters and condition\n", linenum);
  1.4541 +            return 0;
  1.4542 +          }
  1.4543 +        }
  1.4544 +      }
  1.4545 +      if (j >= condl) return 1;
  1.4546 +    }
  1.4547 +  } else { // suffix
  1.4548 +    if ((stripl >= condl) && strcmp(strip + stripl - condl, cond) == 0) return 1;
  1.4549 +    if (utf8) {
  1.4550 +    } else {
  1.4551 +      for (i = stripl - 1, j = condl - 1; (i >= 0) && (j >= 0); i--, j--) {
  1.4552 +        if (cond[j] != ']') {
  1.4553 +          if (cond[j] != strip[i]) {
  1.4554 +            HUNSPELL_WARNING(stderr, "warning: line %d: incompatible stripping characters and condition\n", linenum);
  1.4555 +            return 0;
  1.4556 +          }
  1.4557 +        } else {
  1.4558 +          in = 0;
  1.4559 +          do {
  1.4560 +            j--;
  1.4561 +            if (strip[i] == cond[j]) in = 1;
  1.4562 +          } while ((j > 0) && (cond[j] != '['));
  1.4563 +          if ((j == 0) && (cond[j] != '[')) {
  1.4564 +            HUNSPELL_WARNING(stderr, "error: line: %d: missing ] in condition:\n%s\n", linenum, cond);
  1.4565 +            return 0;
  1.4566 +          }
  1.4567 +          neg = (cond[j+1] == '^') ? 1 : 0;
  1.4568 +          if ((!neg && !in) || (neg && in)) {
  1.4569 +            HUNSPELL_WARNING(stderr, "warning: line %d: incompatible stripping characters and condition\n", linenum);
  1.4570 +            return 0;
  1.4571 +          }
  1.4572 +        }
  1.4573 +      }
  1.4574 +      if (j < 0) return 1;
  1.4575 +    }
  1.4576 +  }
  1.4577 +  return 0;
  1.4578 +}

mercurial