extensions/spellcheck/hunspell/src/affixmgr.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /******* BEGIN LICENSE BLOCK *******
     2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
     3  * 
     4  * The contents of this file are subject to the Mozilla Public License Version
     5  * 1.1 (the "License"); you may not use this file except in compliance with
     6  * the License. You may obtain a copy of the License at
     7  * http://www.mozilla.org/MPL/
     8  * 
     9  * Software distributed under the License is distributed on an "AS IS" basis,
    10  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
    11  * for the specific language governing rights and limitations under the
    12  * License.
    13  * 
    14  * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
    15  * and László Németh (Hunspell). Portions created by the Initial Developers
    16  * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
    17  * 
    18  * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
    19  *                 David Einstein (deinst@world.std.com)
    20  *                 László Németh (nemethl@gyorsposta.hu)
    21  *                 Caolan McNamara (caolanm@redhat.com)
    22  *                 Davide Prina
    23  *                 Giuseppe Modugno
    24  *                 Gianluca Turconi
    25  *                 Simon Brouwer
    26  *                 Noll Janos
    27  *                 Biro Arpad
    28  *                 Goldman Eleonora
    29  *                 Sarlos Tamas
    30  *                 Bencsath Boldizsar
    31  *                 Halacsy Peter
    32  *                 Dvornik Laszlo
    33  *                 Gefferth Andras
    34  *                 Nagy Viktor
    35  *                 Varga Daniel
    36  *                 Chris Halls
    37  *                 Rene Engelhard
    38  *                 Bram Moolenaar
    39  *                 Dafydd Jones
    40  *                 Harri Pitkanen
    41  *                 Andras Timar
    42  *                 Tor Lillqvist
    43  * 
    44  * Alternatively, the contents of this file may be used under the terms of
    45  * either the GNU General Public License Version 2 or later (the "GPL"), or
    46  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
    47  * in which case the provisions of the GPL or the LGPL are applicable instead
    48  * of those above. If you wish to allow use of your version of this file only
    49  * under the terms of either the GPL or the LGPL, and not to allow others to
    50  * use your version of this file under the terms of the MPL, indicate your
    51  * decision by deleting the provisions above and replace them with the notice
    52  * and other provisions required by the GPL or the LGPL. If you do not delete
    53  * the provisions above, a recipient may use your version of this file under
    54  * the terms of any one of the MPL, the GPL or the LGPL.
    55  *
    56  ******* END LICENSE BLOCK *******/
    58 #include <stdlib.h>
    59 #include <string.h>
    60 #include <stdio.h>
    61 #include <ctype.h>
    63 #include <vector>
    65 #include "affixmgr.hxx"
    66 #include "affentry.hxx"
    67 #include "langnum.hxx"
    69 #include "csutil.hxx"
    71 AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * key) 
    72 {
    73   // register hash manager and load affix data from aff file
    74   pHMgr = ptr[0];
    75   alldic = ptr;
    76   maxdic = md;
    77   keystring = NULL;
    78   trystring = NULL;
    79   encoding=NULL;
    80   csconv=NULL;
    81   utf8 = 0;
    82   complexprefixes = 0;
    83   maptable = NULL;
    84   nummap = 0;
    85   breaktable = NULL;
    86   numbreak = -1;
    87   reptable = NULL;
    88   numrep = 0;
    89   iconvtable = NULL;
    90   oconvtable = NULL;
    91   checkcpdtable = NULL;
    92   // allow simplified compound forms (see 3rd field of CHECKCOMPOUNDPATTERN)
    93   simplifiedcpd = 0;
    94   numcheckcpd = 0;
    95   defcpdtable = NULL;
    96   numdefcpd = 0;
    97   phone = NULL;
    98   compoundflag = FLAG_NULL; // permits word in compound forms
    99   compoundbegin = FLAG_NULL; // may be first word in compound forms
   100   compoundmiddle = FLAG_NULL; // may be middle word in compound forms
   101   compoundend = FLAG_NULL; // may be last word in compound forms
   102   compoundroot = FLAG_NULL; // compound word signing flag
   103   compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word
   104   compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word
   105   checkcompounddup = 0; // forbid double words in compounds
   106   checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution)
   107   checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds
   108   checkcompoundtriple = 0; // forbid compounds with triple letters
   109   simplifiedtriple = 0; // allow simplified triple letters in compounds (Schiff+fahrt -> Schiffahrt)
   110   forbiddenword = FORBIDDENWORD; // forbidden word signing flag
   111   nosuggest = FLAG_NULL; // don't suggest words signed with NOSUGGEST flag
   112   nongramsuggest = FLAG_NULL;
   113   lang = NULL; // language
   114   langnum = 0; // language code (see http://l10n.openoffice.org/languages.html)
   115   needaffix = FLAG_NULL; // forbidden root, allowed only with suffixes
   116   cpdwordmax = -1; // default: unlimited wordcount in compound words
   117   cpdmin = -1;  // undefined
   118   cpdmaxsyllable = 0; // default: unlimited syllablecount in compound words
   119   cpdvowels=NULL; // vowels (for calculating of Hungarian compounding limit, O(n) search! XXX)
   120   cpdvowels_utf16=NULL; // vowels for UTF-8 encoding (bsearch instead of O(n) search)
   121   cpdvowels_utf16_len=0; // vowels
   122   pfxappnd=NULL; // previous prefix for counting the syllables of prefix BUG
   123   sfxappnd=NULL; // previous suffix for counting a special syllables BUG
   124   cpdsyllablenum=NULL; // syllable count incrementing flag
   125   checknum=0; // checking numbers, and word with numbers
   126   wordchars=NULL; // letters + spec. word characters
   127   wordchars_utf16=NULL; // letters + spec. word characters
   128   wordchars_utf16_len=0; // letters + spec. word characters
   129   ignorechars=NULL; // letters + spec. word characters
   130   ignorechars_utf16=NULL; // letters + spec. word characters
   131   ignorechars_utf16_len=0; // letters + spec. word characters
   132   version=NULL; // affix and dictionary file version string
   133   havecontclass=0; // flags of possible continuing classes (double affix)
   134   // LEMMA_PRESENT: not put root into the morphological output. Lemma presents
   135   // in morhological description in dictionary file. It's often combined with PSEUDOROOT.
   136   lemma_present = FLAG_NULL; 
   137   circumfix = FLAG_NULL; 
   138   onlyincompound = FLAG_NULL; 
   139   maxngramsugs = -1; // undefined
   140   maxdiff = -1; // undefined
   141   onlymaxdiff = 0;
   142   maxcpdsugs = -1; // undefined
   143   nosplitsugs = 0;
   144   sugswithdots = 0;
   145   keepcase = 0;
   146   forceucase = 0;
   147   warn = 0;
   148   forbidwarn = 0;
   149   checksharps = 0;
   150   substandard = FLAG_NULL;
   151   fullstrip = 0;
   153   sfx = NULL;
   154   pfx = NULL;
   156   for (int i=0; i < SETSIZE; i++) {
   157      pStart[i] = NULL;
   158      sStart[i] = NULL;
   159      pFlag[i] = NULL;
   160      sFlag[i] = NULL;
   161   }
   163   for (int j=0; j < CONTSIZE; j++) {
   164     contclasses[j] = 0;
   165   }
   167   if (parse_file(affpath, key)) {
   168      HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath);
   169   }
   171   if (cpdmin == -1) cpdmin = MINCPDLEN;
   173 }
   176 AffixMgr::~AffixMgr() 
   177 {
   178   // pass through linked prefix entries and clean up
   179   for (int i=0; i < SETSIZE ;i++) {
   180        pFlag[i] = NULL;
   181        PfxEntry * ptr = pStart[i];
   182        PfxEntry * nptr = NULL;
   183        while (ptr) {
   184             nptr = ptr->getNext();
   185             delete(ptr);
   186             ptr = nptr;
   187             nptr = NULL;
   188        }  
   189   }
   191   // pass through linked suffix entries and clean up
   192   for (int j=0; j < SETSIZE ; j++) {
   193        sFlag[j] = NULL;
   194        SfxEntry * ptr = sStart[j];
   195        SfxEntry * nptr = NULL;
   196        while (ptr) {
   197             nptr = ptr->getNext();
   198             delete(ptr);
   199             ptr = nptr;
   200             nptr = NULL;
   201        }
   202        sStart[j] = NULL;
   203   }
   205   if (keystring) free(keystring);
   206   keystring=NULL;
   207   if (trystring) free(trystring);
   208   trystring=NULL;
   209   if (encoding) free(encoding);
   210   encoding=NULL;
   211   if (maptable) {  
   212      for (int j=0; j < nummap; j++) {
   213         for (int k=0; k < maptable[j].len; k++) {
   214            if (maptable[j].set[k]) free(maptable[j].set[k]);
   215         }
   216         free(maptable[j].set);
   217         maptable[j].set = NULL;
   218         maptable[j].len = 0;
   219      }
   220      free(maptable);  
   221      maptable = NULL;
   222   }
   223   nummap = 0;
   224   if (breaktable) {
   225      for (int j=0; j < numbreak; j++) {
   226         if (breaktable[j]) free(breaktable[j]);
   227         breaktable[j] = NULL;
   228      }
   229      free(breaktable);  
   230      breaktable = NULL;
   231   }
   232   numbreak = 0;
   233   if (reptable) {
   234      for (int j=0; j < numrep; j++) {
   235         free(reptable[j].pattern);
   236         free(reptable[j].pattern2);
   237      }
   238      free(reptable);  
   239      reptable = NULL;
   240   }
   241   if (iconvtable) delete iconvtable;
   242   if (oconvtable) delete oconvtable;
   243   if (phone && phone->rules) {
   244      for (int j=0; j < phone->num + 1; j++) {
   245         free(phone->rules[j * 2]);
   246         free(phone->rules[j * 2 + 1]);
   247      }
   248      free(phone->rules);
   249      free(phone);  
   250      phone = NULL;
   251   }
   253   if (defcpdtable) {  
   254      for (int j=0; j < numdefcpd; j++) {
   255         free(defcpdtable[j].def);
   256         defcpdtable[j].def = NULL;
   257      }
   258      free(defcpdtable);  
   259      defcpdtable = NULL;
   260   }
   261   numrep = 0;
   262   if (checkcpdtable) {  
   263      for (int j=0; j < numcheckcpd; j++) {
   264         free(checkcpdtable[j].pattern);
   265         free(checkcpdtable[j].pattern2);
   266         free(checkcpdtable[j].pattern3);
   267         checkcpdtable[j].pattern = NULL;
   268         checkcpdtable[j].pattern2 = NULL;
   269         checkcpdtable[j].pattern3 = NULL;
   270      }
   271      free(checkcpdtable);  
   272      checkcpdtable = NULL;
   273   }
   274   numcheckcpd = 0;
   275   FREE_FLAG(compoundflag);
   276   FREE_FLAG(compoundbegin);
   277   FREE_FLAG(compoundmiddle);
   278   FREE_FLAG(compoundend);
   279   FREE_FLAG(compoundpermitflag);
   280   FREE_FLAG(compoundforbidflag);
   281   FREE_FLAG(compoundroot);
   282   FREE_FLAG(forbiddenword);
   283   FREE_FLAG(nosuggest);
   284   FREE_FLAG(nongramsuggest);
   285   FREE_FLAG(needaffix);
   286   FREE_FLAG(lemma_present);
   287   FREE_FLAG(circumfix);
   288   FREE_FLAG(onlyincompound);
   290   cpdwordmax = 0;
   291   pHMgr = NULL;
   292   cpdmin = 0;
   293   cpdmaxsyllable = 0;
   294   if (cpdvowels) free(cpdvowels);
   295   if (cpdvowels_utf16) free(cpdvowels_utf16);
   296   if (cpdsyllablenum) free(cpdsyllablenum);
   297   free_utf_tbl();
   298   if (lang) free(lang);
   299   if (wordchars) free(wordchars);
   300   if (wordchars_utf16) free(wordchars_utf16);
   301   if (ignorechars) free(ignorechars);
   302   if (ignorechars_utf16) free(ignorechars_utf16);
   303   if (version) free(version);
   304   checknum=0;
   305 #ifdef MOZILLA_CLIENT
   306   delete [] csconv;
   307 #endif
   308 }
   311 // read in aff file and build up prefix and suffix entry objects 
   312 int  AffixMgr::parse_file(const char * affpath, const char * key)
   313 {
   314   char * line; // io buffers
   315   char ft;     // affix type
   317   // checking flag duplication
   318   char dupflags[CONTSIZE];
   319   char dupflags_ini = 1;
   321   // first line indicator for removing byte order mark
   322   int firstline = 1;
   324   // open the affix file
   325   FileMgr * afflst = new FileMgr(affpath, key);
   326   if (!afflst) {
   327     HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n",affpath);
   328     return 1;
   329   }
   331   // step one is to parse the affix file building up the internal
   332   // affix data structures
   334     // read in each line ignoring any that do not
   335     // start with a known line type indicator
   336     while ((line = afflst->getline())) {
   337        mychomp(line);
   339        /* remove byte order mark */
   340        if (firstline) {
   341          firstline = 0;
   342          // Affix file begins with byte order mark: possible incompatibility with old Hunspell versions
   343          if (strncmp(line,"\xEF\xBB\xBF",3) == 0) {
   344             memmove(line, line+3, strlen(line+3)+1);
   345          }
   346        }
   348        /* parse in the keyboard string */
   349        if (strncmp(line,"KEY",3) == 0) {
   350           if (parse_string(line, &keystring, afflst->getlinenum())) {
   351              delete afflst;
   352              return 1;
   353           }
   354        }
   356        /* parse in the try string */
   357        if (strncmp(line,"TRY",3) == 0) {
   358           if (parse_string(line, &trystring, afflst->getlinenum())) {
   359              delete afflst;
   360              return 1;
   361           }
   362        }
   364        /* parse in the name of the character set used by the .dict and .aff */
   365        if (strncmp(line,"SET",3) == 0) {
   366           if (parse_string(line, &encoding, afflst->getlinenum())) {
   367              delete afflst;
   368              return 1;
   369           }
   370           if (strcmp(encoding, "UTF-8") == 0) {
   371              utf8 = 1;
   372 #ifndef OPENOFFICEORG
   373 #ifndef MOZILLA_CLIENT
   374              if (initialize_utf_tbl()) return 1;
   375 #endif
   376 #endif
   377           }
   378        }
   380        /* parse COMPLEXPREFIXES for agglutinative languages with right-to-left writing system */
   381        if (strncmp(line,"COMPLEXPREFIXES",15) == 0)
   382                    complexprefixes = 1;
   384        /* parse in the flag used by the controlled compound words */
   385        if (strncmp(line,"COMPOUNDFLAG",12) == 0) {
   386           if (parse_flag(line, &compoundflag, afflst)) {
   387              delete afflst;
   388              return 1;
   389           }
   390        }
   392        /* parse in the flag used by compound words */
   393        if (strncmp(line,"COMPOUNDBEGIN",13) == 0) {
   394           if (complexprefixes) {
   395             if (parse_flag(line, &compoundend, afflst)) {
   396               delete afflst;
   397               return 1;
   398             }
   399           } else {
   400             if (parse_flag(line, &compoundbegin, afflst)) {
   401               delete afflst;
   402               return 1;
   403             }
   404           }
   405        }
   407        /* parse in the flag used by compound words */
   408        if (strncmp(line,"COMPOUNDMIDDLE",14) == 0) {
   409           if (parse_flag(line, &compoundmiddle, afflst)) {
   410              delete afflst;
   411              return 1;
   412           }
   413        }
   414        /* parse in the flag used by compound words */
   415        if (strncmp(line,"COMPOUNDEND",11) == 0) {
   416           if (complexprefixes) {
   417             if (parse_flag(line, &compoundbegin, afflst)) {
   418               delete afflst;
   419               return 1;
   420             }
   421           } else {
   422             if (parse_flag(line, &compoundend, afflst)) {
   423               delete afflst;
   424               return 1;
   425             }
   426           }
   427        }
   429        /* parse in the data used by compound_check() method */
   430        if (strncmp(line,"COMPOUNDWORDMAX",15) == 0) {
   431           if (parse_num(line, &cpdwordmax, afflst)) {
   432              delete afflst;
   433              return 1;
   434           }
   435        }
   437        /* parse in the flag sign compounds in dictionary */
   438        if (strncmp(line,"COMPOUNDROOT",12) == 0) {
   439           if (parse_flag(line, &compoundroot, afflst)) {
   440              delete afflst;
   441              return 1;
   442           }
   443        }
   445        /* parse in the flag used by compound_check() method */
   446        if (strncmp(line,"COMPOUNDPERMITFLAG",18) == 0) {
   447           if (parse_flag(line, &compoundpermitflag, afflst)) {
   448              delete afflst;
   449              return 1;
   450           }
   451        }
   453        /* parse in the flag used by compound_check() method */
   454        if (strncmp(line,"COMPOUNDFORBIDFLAG",18) == 0) {
   455           if (parse_flag(line, &compoundforbidflag, afflst)) {
   456              delete afflst;
   457              return 1;
   458           }
   459        }
   461        if (strncmp(line,"CHECKCOMPOUNDDUP",16) == 0) {
   462                    checkcompounddup = 1;
   463        }
   465        if (strncmp(line,"CHECKCOMPOUNDREP",16) == 0) {
   466                    checkcompoundrep = 1;
   467        }
   469        if (strncmp(line,"CHECKCOMPOUNDTRIPLE",19) == 0) {
   470                    checkcompoundtriple = 1;
   471        }
   473        if (strncmp(line,"SIMPLIFIEDTRIPLE",16) == 0) {
   474                    simplifiedtriple = 1;
   475        }
   477        if (strncmp(line,"CHECKCOMPOUNDCASE",17) == 0) {
   478                    checkcompoundcase = 1;
   479        }
   481        if (strncmp(line,"NOSUGGEST",9) == 0) {
   482           if (parse_flag(line, &nosuggest, afflst)) {
   483              delete afflst;
   484              return 1;
   485           }
   486        }
   488        if (strncmp(line,"NONGRAMSUGGEST",14) == 0) {
   489           if (parse_flag(line, &nongramsuggest, afflst)) {
   490              delete afflst;
   491              return 1;
   492           }
   493        }
   495        /* parse in the flag used by forbidden words */
   496        if (strncmp(line,"FORBIDDENWORD",13) == 0) {
   497           if (parse_flag(line, &forbiddenword, afflst)) {
   498              delete afflst;
   499              return 1;
   500           }
   501        }
   503        /* parse in the flag used by forbidden words */
   504        if (strncmp(line,"LEMMA_PRESENT",13) == 0) {
   505           if (parse_flag(line, &lemma_present, afflst)) {
   506              delete afflst;
   507              return 1;
   508           }
   509        }
   511        /* parse in the flag used by circumfixes */
   512        if (strncmp(line,"CIRCUMFIX",9) == 0) {
   513           if (parse_flag(line, &circumfix, afflst)) {
   514              delete afflst;
   515              return 1;
   516           }
   517        }
   519        /* parse in the flag used by fogemorphemes */
   520        if (strncmp(line,"ONLYINCOMPOUND",14) == 0) {
   521           if (parse_flag(line, &onlyincompound, afflst)) {
   522              delete afflst;
   523              return 1;
   524           }
   525        }
   527        /* parse in the flag used by `needaffixs' */
   528        if (strncmp(line,"PSEUDOROOT",10) == 0) {
   529           if (parse_flag(line, &needaffix, afflst)) {
   530              delete afflst;
   531              return 1;
   532           }
   533        }
   535        /* parse in the flag used by `needaffixs' */
   536        if (strncmp(line,"NEEDAFFIX",9) == 0) {
   537           if (parse_flag(line, &needaffix, afflst)) {
   538              delete afflst;
   539              return 1;
   540           }
   541        }
   543        /* parse in the minimal length for words in compounds */
   544        if (strncmp(line,"COMPOUNDMIN",11) == 0) {
   545           if (parse_num(line, &cpdmin, afflst)) {
   546              delete afflst;
   547              return 1;
   548           }
   549           if (cpdmin < 1) cpdmin = 1;
   550        }
   552        /* parse in the max. words and syllables in compounds */
   553        if (strncmp(line,"COMPOUNDSYLLABLE",16) == 0) {
   554           if (parse_cpdsyllable(line, afflst)) {
   555              delete afflst;
   556              return 1;
   557           }
   558        }
   560        /* parse in the flag used by compound_check() method */
   561        if (strncmp(line,"SYLLABLENUM",11) == 0) {
   562           if (parse_string(line, &cpdsyllablenum, afflst->getlinenum())) {
   563              delete afflst;
   564              return 1;
   565           }
   566        }
   568        /* parse in the flag used by the controlled compound words */
   569        if (strncmp(line,"CHECKNUM",8) == 0) {
   570            checknum=1;
   571        }
   573        /* parse in the extra word characters */
   574        if (strncmp(line,"WORDCHARS",9) == 0) {
   575           if (parse_array(line, &wordchars, &wordchars_utf16, &wordchars_utf16_len, utf8, afflst->getlinenum())) {
   576              delete afflst;
   577              return 1;
   578           }
   579        }
   581        /* parse in the ignored characters (for example, Arabic optional diacretics charachters */
   582        if (strncmp(line,"IGNORE",6) == 0) {
   583           if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, utf8, afflst->getlinenum())) {
   584              delete afflst;
   585              return 1;
   586           }
   587        }
   589        /* parse in the typical fault correcting table */
   590        if (strncmp(line,"REP",3) == 0) {
   591           if (parse_reptable(line, afflst)) {
   592              delete afflst;
   593              return 1;
   594           }
   595        }
   597        /* parse in the input conversion table */
   598        if (strncmp(line,"ICONV",5) == 0) {
   599           if (parse_convtable(line, afflst, &iconvtable, "ICONV")) {
   600              delete afflst;
   601              return 1;
   602           }
   603        }
   605        /* parse in the input conversion table */
   606        if (strncmp(line,"OCONV",5) == 0) {
   607           if (parse_convtable(line, afflst, &oconvtable, "OCONV")) {
   608              delete afflst;
   609              return 1;
   610           }
   611        }
   613        /* parse in the phonetic translation table */
   614        if (strncmp(line,"PHONE",5) == 0) {
   615           if (parse_phonetable(line, afflst)) {
   616              delete afflst;
   617              return 1;
   618           }
   619        }
   621        /* parse in the checkcompoundpattern table */
   622        if (strncmp(line,"CHECKCOMPOUNDPATTERN",20) == 0) {
   623           if (parse_checkcpdtable(line, afflst)) {
   624              delete afflst;
   625              return 1;
   626           }
   627        }
   629        /* parse in the defcompound table */
   630        if (strncmp(line,"COMPOUNDRULE",12) == 0) {
   631           if (parse_defcpdtable(line, afflst)) {
   632              delete afflst;
   633              return 1;
   634           }
   635        }
   637        /* parse in the related character map table */
   638        if (strncmp(line,"MAP",3) == 0) {
   639           if (parse_maptable(line, afflst)) {
   640              delete afflst;
   641              return 1;
   642           }
   643        }
   645        /* parse in the word breakpoints table */
   646        if (strncmp(line,"BREAK",5) == 0) {
   647           if (parse_breaktable(line, afflst)) {
   648              delete afflst;
   649              return 1;
   650           }
   651        }
   653        /* parse in the language for language specific codes */
   654        if (strncmp(line,"LANG",4) == 0) {
   655           if (parse_string(line, &lang, afflst->getlinenum())) {
   656              delete afflst;
   657              return 1;
   658           }
   659           langnum = get_lang_num(lang);
   660        }
   662        if (strncmp(line,"VERSION",7) == 0) {
   663           for(line = line + 7; *line == ' ' || *line == '\t'; line++);
   664           version = mystrdup(line);
   665        }
   667        if (strncmp(line,"MAXNGRAMSUGS",12) == 0) {
   668           if (parse_num(line, &maxngramsugs, afflst)) {
   669              delete afflst;
   670              return 1;
   671           }
   672        }
   674        if (strncmp(line,"ONLYMAXDIFF", 11) == 0)
   675                    onlymaxdiff = 1;
   677        if (strncmp(line,"MAXDIFF",7) == 0) {
   678           if (parse_num(line, &maxdiff, afflst)) {
   679              delete afflst;
   680              return 1;
   681           }
   682        }
   684        if (strncmp(line,"MAXCPDSUGS",10) == 0) {
   685           if (parse_num(line, &maxcpdsugs, afflst)) {
   686              delete afflst;
   687              return 1;
   688           }
   689        }
   691        if (strncmp(line,"NOSPLITSUGS",11) == 0) {
   692                    nosplitsugs=1;
   693        }
   695        if (strncmp(line,"FULLSTRIP",9) == 0) {
   696                    fullstrip=1;
   697        }
   699        if (strncmp(line,"SUGSWITHDOTS",12) == 0) {
   700                    sugswithdots=1;
   701        }
   703        /* parse in the flag used by forbidden words */
   704        if (strncmp(line,"KEEPCASE",8) == 0) {
   705           if (parse_flag(line, &keepcase, afflst)) {
   706              delete afflst;
   707              return 1;
   708           }
   709        }
   711        /* parse in the flag used by `forceucase' */
   712        if (strncmp(line,"FORCEUCASE",10) == 0) {
   713           if (parse_flag(line, &forceucase, afflst)) {
   714              delete afflst;
   715              return 1;
   716           }
   717        }
   719        /* parse in the flag used by `warn' */
   720        if (strncmp(line,"WARN",4) == 0) {
   721           if (parse_flag(line, &warn, afflst)) {
   722              delete afflst;
   723              return 1;
   724           }
   725        }
   727        if (strncmp(line,"FORBIDWARN",10) == 0) {
   728                    forbidwarn=1;
   729        }
   731        /* parse in the flag used by the affix generator */
   732        if (strncmp(line,"SUBSTANDARD",11) == 0) {
   733           if (parse_flag(line, &substandard, afflst)) {
   734              delete afflst;
   735              return 1;
   736           }
   737        }
   739        if (strncmp(line,"CHECKSHARPS",11) == 0) {
   740                    checksharps=1;
   741        }
   743        /* parse this affix: P - prefix, S - suffix */
   744        ft = ' ';
   745        if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P';
   746        if (strncmp(line,"SFX",3) == 0) ft = complexprefixes ? 'P' : 'S';
   747        if (ft != ' ') {
   748           if (dupflags_ini) {
   749             memset(dupflags, 0, sizeof(dupflags));
   750             dupflags_ini = 0;
   751           }
   752           if (parse_affix(line, ft, afflst, dupflags)) {
   753              delete afflst;
   754              process_pfx_tree_to_list();
   755              process_sfx_tree_to_list();
   756              return 1;
   757           }
   758        }
   760     }
   761     delete afflst;
   763     // convert affix trees to sorted list
   764     process_pfx_tree_to_list();
   765     process_sfx_tree_to_list();
   767     // now we can speed up performance greatly taking advantage of the 
   768     // relationship between the affixes and the idea of "subsets".
   770     // View each prefix as a potential leading subset of another and view
   771     // each suffix (reversed) as a potential trailing subset of another.
   773     // To illustrate this relationship if we know the prefix "ab" is found in the
   774     // word to examine, only prefixes that "ab" is a leading subset of need be examined.
   775     // Furthermore is "ab" is not present then none of the prefixes that "ab" is
   776     // is a subset need be examined.
   777     // The same argument goes for suffix string that are reversed.
   779     // Then to top this off why not examine the first char of the word to quickly
   780     // limit the set of prefixes to examine (i.e. the prefixes to examine must 
   781     // be leading supersets of the first character of the word (if they exist)
   783     // To take advantage of this "subset" relationship, we need to add two links
   784     // from entry.  One to take next if the current prefix is found (call it nexteq)
   785     // and one to take next if the current prefix is not found (call it nextne).
   787     // Since we have built ordered lists, all that remains is to properly initialize 
   788     // the nextne and nexteq pointers that relate them
   790     process_pfx_order();
   791     process_sfx_order();
   793     /* get encoding for CHECKCOMPOUNDCASE */
   794     if (!utf8) {
   795     char * enc = get_encoding();
   796     csconv = get_current_cs(enc);
   797     free(enc);
   798     enc = NULL;
   800     char expw[MAXLNLEN];
   801     if (wordchars) {
   802         strcpy(expw, wordchars);
   803         free(wordchars);
   804     } else *expw = '\0';
   806     for (int i = 0; i <= 255; i++) {
   807         if ( (csconv[i].cupper != csconv[i].clower) &&
   808             (! strchr(expw, (char) i))) {
   809                 *(expw + strlen(expw) + 1) = '\0';
   810                 *(expw + strlen(expw)) = (char) i;
   811         }
   812     }
   814     wordchars = mystrdup(expw);
   815     }
   817     // default BREAK definition
   818     if (numbreak == -1) {
   819         breaktable = (char **) malloc(sizeof(char *) * 3);
   820         if (!breaktable) return 1;
   821         breaktable[0] = mystrdup("-");
   822         breaktable[1] = mystrdup("^-");
   823         breaktable[2] = mystrdup("-$");
   824         if (breaktable[0] && breaktable[1] && breaktable[2]) numbreak = 3;
   825     }
   826     return 0;
   827 }
   830 // we want to be able to quickly access prefix information
   831 // both by prefix flag, and sorted by prefix string itself 
   832 // so we need to set up two indexes
   834 int AffixMgr::build_pfxtree(PfxEntry* pfxptr)
   835 {
   836   PfxEntry * ptr;
   837   PfxEntry * pptr;
   838   PfxEntry * ep = pfxptr;
   840   // get the right starting points
   841   const char * key = ep->getKey();
   842   const unsigned char flg = (unsigned char) (ep->getFlag() & 0x00FF);
   844   // first index by flag which must exist
   845   ptr = pFlag[flg];
   846   ep->setFlgNxt(ptr);
   847   pFlag[flg] = ep;
   850   // handle the special case of null affix string
   851   if (strlen(key) == 0) {
   852     // always inset them at head of list at element 0
   853      ptr = pStart[0];
   854      ep->setNext(ptr);
   855      pStart[0] = ep;
   856      return 0;
   857   }
   859   // now handle the normal case
   860   ep->setNextEQ(NULL);
   861   ep->setNextNE(NULL);
   863   unsigned char sp = *((const unsigned char *)key);
   864   ptr = pStart[sp];
   866   // handle the first insert 
   867   if (!ptr) {
   868      pStart[sp] = ep;
   869      return 0;
   870   }
   873   // otherwise use binary tree insertion so that a sorted
   874   // list can easily be generated later
   875   pptr = NULL;
   876   for (;;) {
   877     pptr = ptr;
   878     if (strcmp(ep->getKey(), ptr->getKey() ) <= 0) {
   879        ptr = ptr->getNextEQ();
   880        if (!ptr) {
   881           pptr->setNextEQ(ep);
   882           break;
   883        }
   884     } else {
   885        ptr = ptr->getNextNE();
   886        if (!ptr) {
   887           pptr->setNextNE(ep);
   888           break;
   889        }
   890     }
   891   }
   892   return 0;
   893 }
   895 // we want to be able to quickly access suffix information
   896 // both by suffix flag, and sorted by the reverse of the
   897 // suffix string itself; so we need to set up two indexes
   898 int AffixMgr::build_sfxtree(SfxEntry* sfxptr)
   899 {
   900   SfxEntry * ptr;
   901   SfxEntry * pptr;
   902   SfxEntry * ep = sfxptr;
   904   /* get the right starting point */
   905   const char * key = ep->getKey();
   906   const unsigned char flg = (unsigned char) (ep->getFlag() & 0x00FF);
   908   // first index by flag which must exist
   909   ptr = sFlag[flg];
   910   ep->setFlgNxt(ptr);
   911   sFlag[flg] = ep;
   913   // next index by affix string
   915   // handle the special case of null affix string
   916   if (strlen(key) == 0) {
   917     // always inset them at head of list at element 0
   918      ptr = sStart[0];
   919      ep->setNext(ptr);
   920      sStart[0] = ep;
   921      return 0;
   922   }
   924   // now handle the normal case
   925   ep->setNextEQ(NULL);
   926   ep->setNextNE(NULL);
   928   unsigned char sp = *((const unsigned char *)key);
   929   ptr = sStart[sp];
   931   // handle the first insert 
   932   if (!ptr) {
   933      sStart[sp] = ep;
   934      return 0;
   935   }
   937   // otherwise use binary tree insertion so that a sorted
   938   // list can easily be generated later
   939   pptr = NULL;
   940   for (;;) {
   941     pptr = ptr;
   942     if (strcmp(ep->getKey(), ptr->getKey() ) <= 0) {
   943        ptr = ptr->getNextEQ();
   944        if (!ptr) {
   945           pptr->setNextEQ(ep);
   946           break;
   947        }
   948     } else {
   949        ptr = ptr->getNextNE();
   950        if (!ptr) {
   951           pptr->setNextNE(ep);
   952           break;
   953        }
   954     }
   955   }
   956   return 0;
   957 }
   959 // convert from binary tree to sorted list
   960 int AffixMgr::process_pfx_tree_to_list()
   961 {
   962   for (int i=1; i< SETSIZE; i++) {
   963     pStart[i] = process_pfx_in_order(pStart[i],NULL);
   964   }
   965   return 0;
   966 }
   969 PfxEntry* AffixMgr::process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr)
   970 {
   971   if (ptr) {
   972     nptr = process_pfx_in_order(ptr->getNextNE(), nptr);
   973     ptr->setNext(nptr);
   974     nptr = process_pfx_in_order(ptr->getNextEQ(), ptr);
   975   }
   976   return nptr;
   977 }
   980 // convert from binary tree to sorted list
   981 int AffixMgr:: process_sfx_tree_to_list()
   982 {
   983   for (int i=1; i< SETSIZE; i++) {
   984     sStart[i] = process_sfx_in_order(sStart[i],NULL);
   985   }
   986   return 0;
   987 }
   989 SfxEntry* AffixMgr::process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr)
   990 {
   991   if (ptr) {
   992     nptr = process_sfx_in_order(ptr->getNextNE(), nptr);
   993     ptr->setNext(nptr);
   994     nptr = process_sfx_in_order(ptr->getNextEQ(), ptr);
   995   }
   996   return nptr;
   997 }
  1000 // reinitialize the PfxEntry links NextEQ and NextNE to speed searching
  1001 // using the idea of leading subsets this time
  1002 int AffixMgr::process_pfx_order()
  1004     PfxEntry* ptr;
  1006     // loop through each prefix list starting point
  1007     for (int i=1; i < SETSIZE; i++) {
  1009          ptr = pStart[i];
  1011          // look through the remainder of the list
  1012          //  and find next entry with affix that 
  1013          // the current one is not a subset of
  1014          // mark that as destination for NextNE
  1015          // use next in list that you are a subset
  1016          // of as NextEQ
  1018          for (; ptr != NULL; ptr = ptr->getNext()) {
  1020              PfxEntry * nptr = ptr->getNext();
  1021              for (; nptr != NULL; nptr = nptr->getNext()) {
  1022                  if (! isSubset( ptr->getKey() , nptr->getKey() )) break;
  1024              ptr->setNextNE(nptr);
  1025              ptr->setNextEQ(NULL);
  1026              if ((ptr->getNext()) && isSubset(ptr->getKey() , (ptr->getNext())->getKey())) 
  1027                  ptr->setNextEQ(ptr->getNext());
  1030          // now clean up by adding smart search termination strings:
  1031          // if you are already a superset of the previous prefix
  1032          // but not a subset of the next, search can end here
  1033          // so set NextNE properly
  1035          ptr = pStart[i];
  1036          for (; ptr != NULL; ptr = ptr->getNext()) {
  1037              PfxEntry * nptr = ptr->getNext();
  1038              PfxEntry * mptr = NULL;
  1039              for (; nptr != NULL; nptr = nptr->getNext()) {
  1040                  if (! isSubset(ptr->getKey(),nptr->getKey())) break;
  1041                  mptr = nptr;
  1043              if (mptr) mptr->setNextNE(NULL);
  1046     return 0;
  1049 // initialize the SfxEntry links NextEQ and NextNE to speed searching
  1050 // using the idea of leading subsets this time
  1051 int AffixMgr::process_sfx_order()
  1053     SfxEntry* ptr;
  1055     // loop through each prefix list starting point
  1056     for (int i=1; i < SETSIZE; i++) {
  1058          ptr = sStart[i];
  1060          // look through the remainder of the list
  1061          //  and find next entry with affix that 
  1062          // the current one is not a subset of
  1063          // mark that as destination for NextNE
  1064          // use next in list that you are a subset
  1065          // of as NextEQ
  1067          for (; ptr != NULL; ptr = ptr->getNext()) {
  1068              SfxEntry * nptr = ptr->getNext();
  1069              for (; nptr != NULL; nptr = nptr->getNext()) {
  1070                  if (! isSubset(ptr->getKey(),nptr->getKey())) break;
  1072              ptr->setNextNE(nptr);
  1073              ptr->setNextEQ(NULL);
  1074              if ((ptr->getNext()) && isSubset(ptr->getKey(),(ptr->getNext())->getKey())) 
  1075                  ptr->setNextEQ(ptr->getNext());
  1079          // now clean up by adding smart search termination strings:
  1080          // if you are already a superset of the previous suffix
  1081          // but not a subset of the next, search can end here
  1082          // so set NextNE properly
  1084          ptr = sStart[i];
  1085          for (; ptr != NULL; ptr = ptr->getNext()) {
  1086              SfxEntry * nptr = ptr->getNext();
  1087              SfxEntry * mptr = NULL;
  1088              for (; nptr != NULL; nptr = nptr->getNext()) {
  1089                  if (! isSubset(ptr->getKey(),nptr->getKey())) break;
  1090                  mptr = nptr;
  1092              if (mptr) mptr->setNextNE(NULL);
  1095     return 0;
  1098 // add flags to the result for dictionary debugging
  1099 void AffixMgr::debugflag(char * result, unsigned short flag) {
  1100     char * st = encode_flag(flag);
  1101     mystrcat(result, " ", MAXLNLEN);
  1102     mystrcat(result, MORPH_FLAG, MAXLNLEN);
  1103     if (st) {
  1104         mystrcat(result, st, MAXLNLEN);
  1105         free(st);
  1109 // calculate the character length of the condition
  1110 int AffixMgr::condlen(char * st)
  1112   int l = 0;
  1113   bool group = false;
  1114   for(; *st; st++) {
  1115     if (*st == '[') {
  1116         group = true;
  1117         l++;
  1118     } else if (*st == ']') group = false;
  1119     else if (!group && (!utf8 ||
  1120         (!(*st & 0x80) || ((*st & 0xc0) == 0x80)))) l++;
  1122   return l;
  1125 int AffixMgr::encodeit(affentry &entry, char * cs)
  1127   if (strcmp(cs,".") != 0) {
  1128     entry.numconds = (char) condlen(cs);
  1129     strncpy(entry.c.conds, cs, MAXCONDLEN);
  1130     // long condition (end of conds padded by strncpy)
  1131     if (entry.c.conds[MAXCONDLEN - 1] && cs[MAXCONDLEN]) {
  1132       entry.opts += aeLONGCOND;
  1133       entry.c.l.conds2 = mystrdup(cs + MAXCONDLEN_1);
  1134       if (!entry.c.l.conds2) return 1;
  1136   } else {
  1137     entry.numconds = 0;
  1138     entry.c.conds[0] = '\0';
  1140   return 0;
  1143 // return 1 if s1 is a leading subset of s2 (dots are for infixes)
  1144 inline int AffixMgr::isSubset(const char * s1, const char * s2)
  1146     while (((*s1 == *s2) || (*s1 == '.')) && (*s1 != '\0')) {
  1147         s1++;
  1148         s2++;
  1150     return (*s1 == '\0');
  1154 // check word for prefixes
  1155 struct hentry * AffixMgr::prefix_check(const char * word, int len, char in_compound,
  1156     const FLAG needflag)
  1158     struct hentry * rv= NULL;
  1160     pfx = NULL;
  1161     pfxappnd = NULL;
  1162     sfxappnd = NULL;
  1164     // first handle the special case of 0 length prefixes
  1165     PfxEntry * pe = pStart[0];
  1166     while (pe) {
  1167         if (
  1168             // fogemorpheme
  1169               ((in_compound != IN_CPD_NOT) || !(pe->getCont() &&
  1170                   (TESTAFF(pe->getCont(), onlyincompound, pe->getContLen())))) &&
  1171             // permit prefixes in compounds
  1172               ((in_compound != IN_CPD_END) || (pe->getCont() &&
  1173                   (TESTAFF(pe->getCont(), compoundpermitflag, pe->getContLen()))))
  1174               ) {
  1175                     // check prefix
  1176                     rv = pe->checkword(word, len, in_compound, needflag);
  1177                     if (rv) {
  1178                         pfx=pe; // BUG: pfx not stateless
  1179                         return rv;
  1182        pe = pe->getNext();
  1185     // now handle the general case
  1186     unsigned char sp = *((const unsigned char *)word);
  1187     PfxEntry * pptr = pStart[sp];
  1189     while (pptr) {
  1190         if (isSubset(pptr->getKey(),word)) {
  1191              if (
  1192             // fogemorpheme
  1193               ((in_compound != IN_CPD_NOT) || !(pptr->getCont() &&
  1194                   (TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen())))) &&
  1195             // permit prefixes in compounds
  1196               ((in_compound != IN_CPD_END) || (pptr->getCont() &&
  1197                   (TESTAFF(pptr->getCont(), compoundpermitflag, pptr->getContLen()))))
  1198               ) {
  1199             // check prefix
  1200                   rv = pptr->checkword(word, len, in_compound, needflag);
  1201                   if (rv) {
  1202                     pfx=pptr; // BUG: pfx not stateless
  1203                     return rv;
  1206              pptr = pptr->getNextEQ();
  1207         } else {
  1208              pptr = pptr->getNextNE();
  1212     return NULL;
  1215 // check word for prefixes
  1216 struct hentry * AffixMgr::prefix_check_twosfx(const char * word, int len,
  1217     char in_compound, const FLAG needflag)
  1219     struct hentry * rv= NULL;
  1221     pfx = NULL;
  1222     sfxappnd = NULL;
  1224     // first handle the special case of 0 length prefixes
  1225     PfxEntry * pe = pStart[0];
  1227     while (pe) {
  1228         rv = pe->check_twosfx(word, len, in_compound, needflag);
  1229         if (rv) return rv;
  1230         pe = pe->getNext();
  1233     // now handle the general case
  1234     unsigned char sp = *((const unsigned char *)word);
  1235     PfxEntry * pptr = pStart[sp];
  1237     while (pptr) {
  1238         if (isSubset(pptr->getKey(),word)) {
  1239             rv = pptr->check_twosfx(word, len, in_compound, needflag);
  1240             if (rv) {
  1241                 pfx = pptr;
  1242                 return rv;
  1244             pptr = pptr->getNextEQ();
  1245         } else {
  1246              pptr = pptr->getNextNE();
  1250     return NULL;
  1253 // check word for prefixes
  1254 char * AffixMgr::prefix_check_morph(const char * word, int len, char in_compound,
  1255     const FLAG needflag)
  1257     char * st;
  1259     char result[MAXLNLEN];
  1260     result[0] = '\0';
  1262     pfx = NULL;
  1263     sfxappnd = NULL;
  1265     // first handle the special case of 0 length prefixes
  1266     PfxEntry * pe = pStart[0];
  1267     while (pe) {
  1268        st = pe->check_morph(word,len,in_compound, needflag);
  1269        if (st) {
  1270             mystrcat(result, st, MAXLNLEN);
  1271             free(st);
  1273        // if (rv) return rv;
  1274        pe = pe->getNext();
  1277     // now handle the general case
  1278     unsigned char sp = *((const unsigned char *)word);
  1279     PfxEntry * pptr = pStart[sp];
  1281     while (pptr) {
  1282         if (isSubset(pptr->getKey(),word)) {
  1283             st = pptr->check_morph(word,len,in_compound, needflag);
  1284             if (st) {
  1285               // fogemorpheme
  1286               if ((in_compound != IN_CPD_NOT) || !((pptr->getCont() && 
  1287                         (TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen()))))) {
  1288                     mystrcat(result, st, MAXLNLEN);
  1289                     pfx = pptr;
  1291                 free(st);
  1293             pptr = pptr->getNextEQ();
  1294         } else {
  1295             pptr = pptr->getNextNE();
  1299     if (*result) return mystrdup(result);
  1300     return NULL;
  1304 // check word for prefixes
  1305 char * AffixMgr::prefix_check_twosfx_morph(const char * word, int len,
  1306     char in_compound, const FLAG needflag)
  1308     char * st;
  1310     char result[MAXLNLEN];
  1311     result[0] = '\0';
  1313     pfx = NULL;
  1314     sfxappnd = NULL;
  1316     // first handle the special case of 0 length prefixes
  1317     PfxEntry * pe = pStart[0];
  1318     while (pe) {
  1319         st = pe->check_twosfx_morph(word,len,in_compound, needflag);
  1320         if (st) {
  1321             mystrcat(result, st, MAXLNLEN);
  1322             free(st);
  1324         pe = pe->getNext();
  1327     // now handle the general case
  1328     unsigned char sp = *((const unsigned char *)word);
  1329     PfxEntry * pptr = pStart[sp];
  1331     while (pptr) {
  1332         if (isSubset(pptr->getKey(),word)) {
  1333             st = pptr->check_twosfx_morph(word, len, in_compound, needflag);
  1334             if (st) {
  1335                 mystrcat(result, st, MAXLNLEN);
  1336                 free(st);
  1337                 pfx = pptr;
  1339             pptr = pptr->getNextEQ();
  1340         } else {
  1341             pptr = pptr->getNextNE();
  1345     if (*result) return mystrdup(result);
  1346     return NULL;
  1349 // Is word a non compound with a REP substitution (see checkcompoundrep)?
  1350 int AffixMgr::cpdrep_check(const char * word, int wl)
  1352   char candidate[MAXLNLEN];
  1353   const char * r;
  1354   int lenr, lenp;
  1356   if ((wl < 2) || !numrep) return 0;
  1358   for (int i=0; i < numrep; i++ ) {
  1359       r = word;
  1360       lenr = strlen(reptable[i].pattern2);
  1361       lenp = strlen(reptable[i].pattern);
  1362       // search every occurence of the pattern in the word
  1363       while ((r=strstr(r, reptable[i].pattern)) != NULL) {
  1364           strcpy(candidate, word);
  1365           if (r-word + lenr + strlen(r+lenp) >= MAXLNLEN) break;
  1366           strcpy(candidate+(r-word),reptable[i].pattern2);
  1367           strcpy(candidate+(r-word)+lenr, r+lenp);
  1368           if (candidate_check(candidate,strlen(candidate))) return 1;
  1369           r++; // search for the next letter
  1372    return 0;
  1375 // forbid compoundings when there are special patterns at word bound
  1376 int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2, const char affixed)
  1378   int len;
  1379   for (int i = 0; i < numcheckcpd; i++) {
  1380       if (isSubset(checkcpdtable[i].pattern2, word + pos) &&
  1381         (!r1 || !checkcpdtable[i].cond ||
  1382           (r1->astr && TESTAFF(r1->astr, checkcpdtable[i].cond, r1->alen))) &&
  1383         (!r2 || !checkcpdtable[i].cond2 ||
  1384           (r2->astr && TESTAFF(r2->astr, checkcpdtable[i].cond2, r2->alen))) &&
  1385         // zero length pattern => only TESTAFF
  1386         // zero pattern (0/flag) => unmodified stem (zero affixes allowed)
  1387         (!*(checkcpdtable[i].pattern) || (
  1388             (*(checkcpdtable[i].pattern)=='0' && r1->blen <= pos && strncmp(word + pos - r1->blen, r1->word, r1->blen) == 0) ||
  1389             (*(checkcpdtable[i].pattern)!='0' && (len = strlen(checkcpdtable[i].pattern)) &&
  1390                 strncmp(word + pos - len, checkcpdtable[i].pattern, len) == 0)))) {
  1391             return 1;
  1394   return 0;
  1397 // forbid compounding with neighbouring upper and lower case characters at word bounds
  1398 int AffixMgr::cpdcase_check(const char * word, int pos)
  1400   if (utf8) {
  1401       w_char u, w;
  1402       const char * p;
  1403       u8_u16(&u, 1, word + pos);
  1404       for (p = word + pos - 1; (*p & 0xc0) == 0x80; p--);
  1405       u8_u16(&w, 1, p);
  1406       unsigned short a = (u.h << 8) + u.l;
  1407       unsigned short b = (w.h << 8) + w.l;
  1408       if (((unicodetoupper(a, langnum) == a) || (unicodetoupper(b, langnum) == b)) &&
  1409           (a != '-') && (b != '-')) return 1;
  1410   } else {
  1411       unsigned char a = *(word + pos - 1);
  1412       unsigned char b = *(word + pos);
  1413       if ((csconv[a].ccase || csconv[b].ccase) && (a != '-') && (b != '-')) return 1;
  1415   return 0;
  1418 // check compound patterns
  1419 int AffixMgr::defcpd_check(hentry *** words, short wnum, hentry * rv, hentry ** def, char all)
  1421   signed short btpp[MAXWORDLEN]; // metacharacter (*, ?) positions for backtracking
  1422   signed short btwp[MAXWORDLEN]; // word positions for metacharacters
  1423   int btnum[MAXWORDLEN]; // number of matched characters in metacharacter positions
  1424   short bt = 0;  
  1425   int i, j;
  1426   int ok;
  1427   int w = 0;
  1429   if (!*words) {
  1430     w = 1;
  1431     *words = def;
  1434   if (!*words) {
  1435     return 0;
  1438   (*words)[wnum] = rv;
  1440   // has the last word COMPOUNDRULE flag?
  1441   if (rv->alen == 0) {
  1442     (*words)[wnum] = NULL;
  1443     if (w) *words = NULL;
  1444     return 0;
  1446   ok = 0;
  1447   for (i = 0; i < numdefcpd; i++) {
  1448     for (j = 0; j < defcpdtable[i].len; j++) {
  1449        if (defcpdtable[i].def[j] != '*' && defcpdtable[i].def[j] != '?' &&
  1450           TESTAFF(rv->astr, defcpdtable[i].def[j], rv->alen)) ok = 1;
  1453   if (ok == 0) {
  1454     (*words)[wnum] = NULL;
  1455     if (w) *words = NULL;
  1456     return 0;
  1459   for (i = 0; i < numdefcpd; i++) {
  1460     signed short pp = 0; // pattern position
  1461     signed short wp = 0; // "words" position
  1462     int ok2;
  1463     ok = 1;
  1464     ok2 = 1;
  1465     do {
  1466       while ((pp < defcpdtable[i].len) && (wp <= wnum)) {
  1467         if (((pp+1) < defcpdtable[i].len) &&
  1468           ((defcpdtable[i].def[pp+1] == '*') || (defcpdtable[i].def[pp+1] == '?'))) {
  1469             int wend = (defcpdtable[i].def[pp+1] == '?') ? wp : wnum;
  1470             ok2 = 1;
  1471             pp+=2;
  1472             btpp[bt] = pp;
  1473             btwp[bt] = wp;
  1474             while (wp <= wend) {
  1475                 if (!(*words)[wp]->alen || 
  1476                   !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp-2], (*words)[wp]->alen)) {
  1477                     ok2 = 0;
  1478                     break;
  1480                 wp++;
  1482             if (wp <= wnum) ok2 = 0;
  1483             btnum[bt] = wp - btwp[bt];
  1484             if (btnum[bt] > 0) bt++;
  1485             if (ok2) break;
  1486         } else {
  1487             ok2 = 1;
  1488             if (!(*words)[wp] || !(*words)[wp]->alen || 
  1489               !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp], (*words)[wp]->alen)) {
  1490                 ok = 0;
  1491                 break;
  1493             pp++;
  1494             wp++;
  1495             if ((defcpdtable[i].len == pp) && !(wp > wnum)) ok = 0;
  1498     if (ok && ok2) { 
  1499         int r = pp;
  1500         while ((defcpdtable[i].len > r) && ((r+1) < defcpdtable[i].len) &&
  1501             ((defcpdtable[i].def[r+1] == '*') || (defcpdtable[i].def[r+1] == '?'))) r+=2;
  1502         if (defcpdtable[i].len <= r) return 1;
  1504     // backtrack
  1505     if (bt) do {
  1506         ok = 1;
  1507         btnum[bt - 1]--;
  1508         pp = btpp[bt - 1];
  1509         wp = btwp[bt - 1] + (signed short) btnum[bt - 1];
  1510     } while ((btnum[bt - 1] < 0) && --bt);
  1511   } while (bt);
  1513   if (ok && ok2 && (!all || (defcpdtable[i].len <= pp))) return 1;
  1515   // check zero ending
  1516   while (ok && ok2 && (defcpdtable[i].len > pp) && ((pp+1) < defcpdtable[i].len) &&
  1517     ((defcpdtable[i].def[pp+1] == '*') || (defcpdtable[i].def[pp+1] == '?'))) pp+=2;
  1518   if (ok && ok2 && (defcpdtable[i].len <= pp)) return 1;
  1520   (*words)[wnum] = NULL;
  1521   if (w) *words = NULL;
  1522   return 0;
  1525 inline int AffixMgr::candidate_check(const char * word, int len)
  1527   struct hentry * rv=NULL;
  1529   rv = lookup(word);
  1530   if (rv) return 1;
  1532 //  rv = prefix_check(word,len,1);
  1533 //  if (rv) return 1;
  1535   rv = affix_check(word,len);
  1536   if (rv) return 1;
  1537   return 0;
  1540 // calculate number of syllable for compound-checking
  1541 short AffixMgr::get_syllable(const char * word, int wlen)
  1543     if (cpdmaxsyllable==0) return 0;
  1545     short num=0;
  1547     if (!utf8) {
  1548         for (int i=0; i<wlen; i++) {
  1549             if (strchr(cpdvowels, word[i])) num++;
  1551     } else if (cpdvowels_utf16) {
  1552         w_char w[MAXWORDUTF8LEN];
  1553         int i = u8_u16(w, MAXWORDUTF8LEN, word);
  1554         for (; i > 0; i--) {
  1555             if (flag_bsearch((unsigned short *) cpdvowels_utf16,
  1556                 ((unsigned short *) w)[i - 1], cpdvowels_utf16_len)) num++;
  1559     return num;
  1562 void AffixMgr::setcminmax(int * cmin, int * cmax, const char * word, int len) {
  1563     if (utf8) {
  1564         int i;
  1565         for (*cmin = 0, i = 0; (i < cpdmin) && word[*cmin]; i++) {
  1566           for ((*cmin)++; (word[*cmin] & 0xc0) == 0x80; (*cmin)++);
  1568         for (*cmax = len, i = 0; (i < (cpdmin - 1)) && *cmax; i++) {
  1569           for ((*cmax)--; (word[*cmax] & 0xc0) == 0x80; (*cmax)--);
  1571     } else {
  1572         *cmin = cpdmin;
  1573         *cmax = len - cpdmin + 1;
  1578 // check if compound word is correctly spelled
  1579 // hu_mov_rule = spec. Hungarian rule (XXX)
  1580 struct hentry * AffixMgr::compound_check(const char * word, int len, 
  1581     short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words = NULL,
  1582     char hu_mov_rule = 0, char is_sug = 0, int * info = NULL)
  1584     int i; 
  1585     short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
  1586     struct hentry * rv = NULL;
  1587     struct hentry * rv_first;
  1588     struct hentry * rwords[MAXWORDLEN]; // buffer for COMPOUND pattern checking
  1589     char st [MAXWORDUTF8LEN + 4];
  1590     char ch = '\0';
  1591     int cmin;
  1592     int cmax;
  1593     int striple = 0;
  1594     int scpd = 0;
  1595     int soldi = 0;
  1596     int oldcmin = 0;
  1597     int oldcmax = 0;
  1598     int oldlen = 0;
  1599     int checkedstriple = 0;
  1600     int onlycpdrule;
  1601     int affixed = 0;
  1602     hentry ** oldwords = words;
  1604     int checked_prefix;
  1606     setcminmax(&cmin, &cmax, word, len);
  1608     strcpy(st, word);
  1610     for (i = cmin; i < cmax; i++) {
  1611         // go to end of the UTF-8 character
  1612         if (utf8) {
  1613             for (; (st[i] & 0xc0) == 0x80; i++);
  1614             if (i >= cmax) return NULL;
  1617         words = oldwords;
  1618         onlycpdrule = (words) ? 1 : 0;
  1620         do { // onlycpdrule loop
  1622         oldnumsyllable = numsyllable;
  1623         oldwordnum = wordnum;
  1624         checked_prefix = 0;
  1627         do { // simplified checkcompoundpattern loop
  1629         if (scpd > 0) {
  1630           for (; scpd <= numcheckcpd && (!checkcpdtable[scpd-1].pattern3 ||
  1631             strncmp(word + i, checkcpdtable[scpd-1].pattern3, strlen(checkcpdtable[scpd-1].pattern3)) != 0); scpd++);
  1633           if (scpd > numcheckcpd) break; // break simplified checkcompoundpattern loop
  1634           strcpy(st + i, checkcpdtable[scpd-1].pattern);
  1635           soldi = i;
  1636           i += strlen(checkcpdtable[scpd-1].pattern);
  1637           strcpy(st + i, checkcpdtable[scpd-1].pattern2);
  1638           strcpy(st + i + strlen(checkcpdtable[scpd-1].pattern2), word + soldi + strlen(checkcpdtable[scpd-1].pattern3));
  1640           oldlen = len;
  1641           len += strlen(checkcpdtable[scpd-1].pattern) + strlen(checkcpdtable[scpd-1].pattern2) - strlen(checkcpdtable[scpd-1].pattern3);
  1642           oldcmin = cmin;
  1643           oldcmax = cmax;
  1644           setcminmax(&cmin, &cmax, st, len);
  1646           cmax = len - cpdmin + 1;
  1649         ch = st[i];
  1650         st[i] = '\0';
  1652         sfx = NULL;
  1653         pfx = NULL;
  1655         // FIRST WORD
  1657         affixed = 1;
  1658         rv = lookup(st); // perhaps without prefix
  1660         // search homonym with compound flag
  1661         while ((rv) && !hu_mov_rule &&
  1662             ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
  1663                 !((compoundflag && !words && !onlycpdrule && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
  1664                   (compoundbegin && !wordnum && !onlycpdrule && 
  1665                         TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
  1666                   (compoundmiddle && wordnum && !words && !onlycpdrule &&
  1667                     TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||
  1668                   (numdefcpd && onlycpdrule &&
  1669                     ((!words && !wordnum && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)) ||
  1670                     (words && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0))))) ||
  1671                   (scpd != 0 && checkcpdtable[scpd-1].cond != FLAG_NULL &&
  1672                     !TESTAFF(rv->astr, checkcpdtable[scpd-1].cond, rv->alen)))
  1673                   ) {
  1674             rv = rv->next_homonym;
  1677         if (rv) affixed = 0;
  1679         if (!rv) {
  1680             if (onlycpdrule) break;
  1681             if (compoundflag && 
  1682              !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
  1683                 if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
  1684                         FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
  1685                     sfx->getCont() &&
  1686                         ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag, 
  1687                             sfx->getContLen())) || (compoundend &&
  1688                         TESTAFF(sfx->getCont(), compoundend, 
  1689                             sfx->getContLen())))) {
  1690                         rv = NULL;
  1694             if (rv ||
  1695               (((wordnum == 0) && compoundbegin &&
  1696                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
  1697                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
  1698               ((wordnum > 0) && compoundmiddle &&
  1699                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
  1700                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
  1701               ) checked_prefix = 1;
  1702         // else check forbiddenwords and needaffix
  1703         } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
  1704             TESTAFF(rv->astr, needaffix, rv->alen) ||
  1705             TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
  1706             (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen))
  1707              )) {
  1708                 st[i] = ch;
  1709                 //continue;
  1710                 break;
  1713             // check non_compound flag in suffix and prefix
  1714             if ((rv) && !hu_mov_rule &&
  1715                 ((pfx && pfx->getCont() &&
  1716                     TESTAFF(pfx->getCont(), compoundforbidflag, 
  1717                         pfx->getContLen())) ||
  1718                 (sfx && sfx->getCont() &&
  1719                     TESTAFF(sfx->getCont(), compoundforbidflag, 
  1720                         sfx->getContLen())))) {
  1721                     rv = NULL;
  1724             // check compoundend flag in suffix and prefix
  1725             if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
  1726                 ((pfx && pfx->getCont() &&
  1727                     TESTAFF(pfx->getCont(), compoundend, 
  1728                         pfx->getContLen())) ||
  1729                 (sfx && sfx->getCont() &&
  1730                     TESTAFF(sfx->getCont(), compoundend, 
  1731                         sfx->getContLen())))) {
  1732                     rv = NULL;
  1735             // check compoundmiddle flag in suffix and prefix
  1736             if ((rv) && !checked_prefix && (wordnum==0) && compoundmiddle && !hu_mov_rule &&
  1737                 ((pfx && pfx->getCont() &&
  1738                     TESTAFF(pfx->getCont(), compoundmiddle, 
  1739                         pfx->getContLen())) ||
  1740                 (sfx && sfx->getCont() &&
  1741                     TESTAFF(sfx->getCont(), compoundmiddle, 
  1742                         sfx->getContLen())))) {
  1743                     rv = NULL;
  1746         // check forbiddenwords
  1747         if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
  1748             TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
  1749             (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) {
  1750                 return NULL;
  1753         // increment word number, if the second root has a compoundroot flag
  1754         if ((rv) && compoundroot && 
  1755             (TESTAFF(rv->astr, compoundroot, rv->alen))) {
  1756                 wordnum++;
  1759         // first word is acceptable in compound words?
  1760         if (((rv) && 
  1761           ( checked_prefix || (words && words[wnum]) ||
  1762             (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
  1763             ((oldwordnum == 0) && compoundbegin && TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
  1764             ((oldwordnum > 0) && compoundmiddle && TESTAFF(rv->astr, compoundmiddle, rv->alen))// ||
  1765 //            (numdefcpd && )
  1767 // LANG_hu section: spec. Hungarian rule
  1768             || ((langnum == LANG_hu) && hu_mov_rule && (
  1769                     TESTAFF(rv->astr, 'F', rv->alen) || // XXX hardwired Hungarian dictionary codes
  1770                     TESTAFF(rv->astr, 'G', rv->alen) ||
  1771                     TESTAFF(rv->astr, 'H', rv->alen)
  1774 // END of LANG_hu section
  1775           ) &&
  1777              // test CHECKCOMPOUNDPATTERN conditions
  1778              scpd == 0 || checkcpdtable[scpd-1].cond == FLAG_NULL || 
  1779                 TESTAFF(rv->astr, checkcpdtable[scpd-1].cond, rv->alen)
  1781           && ! (( checkcompoundtriple && scpd == 0 && !words && // test triple letters
  1782                    (word[i-1]==word[i]) && (
  1783                       ((i>1) && (word[i-1]==word[i-2])) ||
  1784                       ((word[i-1]==word[i+1])) // may be word[i+1] == '\0'
  1786                ) ||
  1788                  checkcompoundcase && scpd == 0 && !words && cpdcase_check(word, i)
  1789                ))
  1791 // LANG_hu section: spec. Hungarian rule
  1792          || ((!rv) && (langnum == LANG_hu) && hu_mov_rule && (rv = affix_check(st,i)) &&
  1793               (sfx && sfx->getCont() && ( // XXX hardwired Hungarian dic. codes
  1794                         TESTAFF(sfx->getCont(), (unsigned short) 'x', sfx->getContLen()) ||
  1795                         TESTAFF(sfx->getCont(), (unsigned short) '%', sfx->getContLen())
  1799          ) { // first word is ok condition
  1801 // LANG_hu section: spec. Hungarian rule
  1802             if (langnum == LANG_hu) {
  1803                 // calculate syllable number of the word
  1804                 numsyllable += get_syllable(st, i);
  1805                 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
  1806                 if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
  1808 // END of LANG_hu section
  1810             // NEXT WORD(S)
  1811             rv_first = rv;
  1812             st[i] = ch;
  1814         do { // striple loop
  1816             // check simplifiedtriple
  1817             if (simplifiedtriple) { 
  1818               if (striple) { 
  1819                 checkedstriple = 1;
  1820                 i--; // check "fahrt" instead of "ahrt" in "Schiffahrt"
  1821               } else if (i > 2 && *(word+i - 1) == *(word + i - 2)) striple = 1;
  1824             rv = lookup((st+i)); // perhaps without prefix
  1826         // search homonym with compound flag
  1827         while ((rv) && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
  1828                         !((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
  1829                           (compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) ||
  1830                            (numdefcpd && words && defcpd_check(&words, wnum + 1, rv, NULL,1))) ||
  1831                              (scpd != 0 && checkcpdtable[scpd-1].cond2 != FLAG_NULL &&
  1832                                 !TESTAFF(rv->astr, checkcpdtable[scpd-1].cond2, rv->alen))
  1833                            )) {
  1834             rv = rv->next_homonym;
  1837             // check FORCEUCASE
  1838             if (rv && forceucase && (rv) &&
  1839                 (TESTAFF(rv->astr, forceucase, rv->alen)) && !(info && *info & SPELL_ORIGCAP)) rv = NULL;
  1841             if (rv && words && words[wnum + 1]) return rv_first;
  1843             oldnumsyllable2 = numsyllable;
  1844             oldwordnum2 = wordnum;
  1847 // LANG_hu section: spec. Hungarian rule, XXX hardwired dictionary code
  1848             if ((rv) && (langnum == LANG_hu) && (TESTAFF(rv->astr, 'I', rv->alen)) && !(TESTAFF(rv->astr, 'J', rv->alen))) {
  1849                 numsyllable--;
  1851 // END of LANG_hu section
  1853             // increment word number, if the second root has a compoundroot flag
  1854             if ((rv) && (compoundroot) && 
  1855                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
  1856                     wordnum++;
  1859             // check forbiddenwords
  1860             if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
  1861                 TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
  1862                (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) return NULL;
  1864             // second word is acceptable, as a root?
  1865             // hungarian conventions: compounding is acceptable,
  1866             // when compound forms consist of 2 words, or if more,
  1867             // then the syllable number of root words must be 6, or lesser.
  1869             if ((rv) && (
  1870                       (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
  1871                       (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))
  1873                 && (
  1874                       ((cpdwordmax==-1) || (wordnum+1<cpdwordmax)) || 
  1875                       ((cpdmaxsyllable!=0) && 
  1876                           (numsyllable + get_syllable(HENTRY_WORD(rv), rv->clen)<=cpdmaxsyllable))
  1877                     ) &&
  1879                  // test CHECKCOMPOUNDPATTERN
  1880                  !numcheckcpd || scpd != 0 || !cpdpat_check(word, i, rv_first, rv, 0)
  1881                ) &&
  1883                      (!checkcompounddup || (rv != rv_first))
  1885             // test CHECKCOMPOUNDPATTERN conditions
  1886                 && (scpd == 0 || checkcpdtable[scpd-1].cond2 == FLAG_NULL ||
  1887                       TESTAFF(rv->astr, checkcpdtable[scpd-1].cond2, rv->alen))
  1890                       // forbid compound word, if it is a non compound word with typical fault
  1891                       if (checkcompoundrep && cpdrep_check(word,len)) return NULL;
  1892                       return rv_first;
  1895             numsyllable = oldnumsyllable2;
  1896             wordnum = oldwordnum2;
  1898             // perhaps second word has prefix or/and suffix
  1899             sfx = NULL;
  1900             sfxflag = FLAG_NULL;
  1901             rv = (compoundflag && !onlycpdrule) ? affix_check((word+i),strlen(word+i), compoundflag, IN_CPD_END) : NULL;
  1902             if (!rv && compoundend && !onlycpdrule) {
  1903                 sfx = NULL;
  1904                 pfx = NULL;
  1905                 rv = affix_check((word+i),strlen(word+i), compoundend, IN_CPD_END);
  1908             if (!rv && numdefcpd && words) {
  1909                 rv = affix_check((word+i),strlen(word+i), 0, IN_CPD_END);
  1910                 if (rv && defcpd_check(&words, wnum + 1, rv, NULL, 1)) return rv_first;
  1911                 rv = NULL;
  1914             // test CHECKCOMPOUNDPATTERN conditions (allowed forms)
  1915             if (rv && !(scpd == 0 || checkcpdtable[scpd-1].cond2 == FLAG_NULL || 
  1916                 TESTAFF(rv->astr, checkcpdtable[scpd-1].cond2, rv->alen))) rv = NULL;
  1918             // test CHECKCOMPOUNDPATTERN conditions (forbidden compounds)
  1919             if (rv && numcheckcpd && scpd == 0 && cpdpat_check(word, i, rv_first, rv, affixed)) rv = NULL;
  1921             // check non_compound flag in suffix and prefix
  1922             if ((rv) && 
  1923                 ((pfx && pfx->getCont() &&
  1924                     TESTAFF(pfx->getCont(), compoundforbidflag, 
  1925                         pfx->getContLen())) ||
  1926                 (sfx && sfx->getCont() &&
  1927                     TESTAFF(sfx->getCont(), compoundforbidflag, 
  1928                         sfx->getContLen())))) {
  1929                     rv = NULL;
  1932             // check FORCEUCASE
  1933             if (rv && forceucase && (rv) &&
  1934                 (TESTAFF(rv->astr, forceucase, rv->alen)) && !(info && *info & SPELL_ORIGCAP)) rv = NULL;
  1936             // check forbiddenwords
  1937             if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
  1938                 TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
  1939                (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) return NULL;
  1941             // pfxappnd = prefix of word+i, or NULL
  1942             // calculate syllable number of prefix.
  1943             // hungarian convention: when syllable number of prefix is more,
  1944             // than 1, the prefix+word counts as two words.
  1946             if (langnum == LANG_hu) {
  1947                 // calculate syllable number of the word
  1948                 numsyllable += get_syllable(word + i, strlen(word + i));
  1950                 // - affix syllable num.
  1951                 // XXX only second suffix (inflections, not derivations)
  1952                 if (sfxappnd) {
  1953                     char * tmp = myrevstrdup(sfxappnd);
  1954                     numsyllable -= get_syllable(tmp, strlen(tmp));
  1955                     free(tmp);
  1958                 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
  1959                 if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
  1961                 // increment syllable num, if last word has a SYLLABLENUM flag
  1962                 // and the suffix is beginning `s'
  1964                 if (cpdsyllablenum) {
  1965                     switch (sfxflag) {
  1966                         case 'c': { numsyllable+=2; break; }
  1967                         case 'J': { numsyllable += 1; break; }
  1968                         case 'I': { if (rv && TESTAFF(rv->astr, 'J', rv->alen)) numsyllable += 1; break; }
  1973             // increment word number, if the second word has a compoundroot flag
  1974             if ((rv) && (compoundroot) && 
  1975                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
  1976                     wordnum++;
  1979             // second word is acceptable, as a word with prefix or/and suffix?
  1980             // hungarian conventions: compounding is acceptable,
  1981             // when compound forms consist 2 word, otherwise
  1982             // the syllable number of root words is 6, or lesser.
  1983             if ((rv) && 
  1985                       ((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) || 
  1986                       ((cpdmaxsyllable != 0) && 
  1987                           (numsyllable <= cpdmaxsyllable))
  1989                 && (
  1990                    (!checkcompounddup || (rv != rv_first))
  1991                    )) {
  1992                     // forbid compound word, if it is a non compound word with typical fault
  1993                     if (checkcompoundrep && cpdrep_check(word, len)) return NULL;
  1994                     return rv_first;
  1997             numsyllable = oldnumsyllable2;
  1998             wordnum = oldwordnum2;
  2000             // perhaps second word is a compound word (recursive call)
  2001             if (wordnum < maxwordnum) {
  2002                 rv = compound_check((st+i),strlen(st+i), wordnum+1,
  2003                      numsyllable, maxwordnum, wnum + 1, words, 0, is_sug, info);
  2005                 if (rv && numcheckcpd && ((scpd == 0 && cpdpat_check(word, i, rv_first, rv, affixed)) ||
  2006                    (scpd != 0 && !cpdpat_check(word, i, rv_first, rv, affixed)))) rv = NULL;
  2007             } else {
  2008                 rv=NULL;
  2010             if (rv) {
  2011                 // forbid compound word, if it is a non compound word with typical fault
  2012                 if (checkcompoundrep || forbiddenword) {
  2013                     struct hentry * rv2 = NULL;
  2015                     if (checkcompoundrep && cpdrep_check(word, len)) return NULL;
  2017                     // check first part
  2018                     if (strncmp(rv->word, word + i, rv->blen) == 0) {
  2019                         char r = *(st + i + rv->blen);
  2020                         *(st + i + rv->blen) = '\0';
  2022                         if (checkcompoundrep && cpdrep_check(st, i + rv->blen)) {
  2023                             *(st + i + rv->blen) = r;
  2024                             continue;
  2027                         if (forbiddenword) {
  2028                             rv2 = lookup(word);
  2029                             if (!rv2) rv2 = affix_check(word, len);
  2030                             if (rv2 && rv2->astr && TESTAFF(rv2->astr, forbiddenword, rv2->alen) && 
  2031                                 (strncmp(rv2->word, st, i + rv->blen) == 0)) {
  2032                                     return NULL;
  2035                         *(st + i + rv->blen) = r;
  2038                 return rv_first;
  2040           } while (striple && !checkedstriple); // end of striple loop
  2042           if (checkedstriple) {
  2043             i++;
  2044             checkedstriple = 0;
  2045             striple = 0;
  2048         } // first word is ok condition
  2050         if (soldi != 0) {
  2051           i = soldi;
  2052           soldi = 0;
  2053           len = oldlen;
  2054           cmin = oldcmin;
  2055           cmax = oldcmax;
  2057         scpd++;
  2060         } while (!onlycpdrule && simplifiedcpd && scpd <= numcheckcpd); // end of simplifiedcpd loop
  2062         scpd = 0;
  2063         wordnum = oldwordnum;
  2064         numsyllable = oldnumsyllable;
  2066         if (soldi != 0) {
  2067           i = soldi;
  2068           strcpy(st, word); // XXX add more optim.
  2069           soldi = 0;
  2070         } else st[i] = ch;
  2072         } while (numdefcpd && oldwordnum == 0 && !onlycpdrule && (onlycpdrule = 1)); // end of onlycpd loop
  2076     return NULL;
  2079 // check if compound word is correctly spelled
  2080 // hu_mov_rule = spec. Hungarian rule (XXX)
  2081 int AffixMgr::compound_check_morph(const char * word, int len, 
  2082     short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words,
  2083     char hu_mov_rule = 0, char ** result = NULL, char * partresult = NULL)
  2085     int i;
  2086     short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
  2087     int ok = 0;
  2089     struct hentry * rv = NULL;
  2090     struct hentry * rv_first;
  2091     struct hentry * rwords[MAXWORDLEN]; // buffer for COMPOUND pattern checking
  2092     char st [MAXWORDUTF8LEN + 4];
  2093     char ch;
  2095     int checked_prefix;
  2096     char presult[MAXLNLEN];
  2098     int cmin;
  2099     int cmax;
  2101     int onlycpdrule;
  2102     int affixed = 0;
  2103     hentry ** oldwords = words;
  2105     setcminmax(&cmin, &cmax, word, len);
  2107     strcpy(st, word);
  2109     for (i = cmin; i < cmax; i++) {
  2110         oldnumsyllable = numsyllable;
  2111         oldwordnum = wordnum;
  2112         checked_prefix = 0;
  2114         // go to end of the UTF-8 character
  2115         if (utf8) {
  2116             for (; (st[i] & 0xc0) == 0x80; i++);
  2117             if (i >= cmax) return 0;
  2120         words = oldwords;
  2121         onlycpdrule = (words) ? 1 : 0;
  2123         do { // onlycpdrule loop
  2125         oldnumsyllable = numsyllable;
  2126         oldwordnum = wordnum;
  2127         checked_prefix = 0;
  2129         ch = st[i];
  2130         st[i] = '\0';
  2131         sfx = NULL;
  2133         // FIRST WORD
  2135         affixed = 1;
  2137         *presult = '\0';
  2138         if (partresult) mystrcat(presult, partresult, MAXLNLEN);
  2140         rv = lookup(st); // perhaps without prefix
  2142         // search homonym with compound flag
  2143         while ((rv) && !hu_mov_rule && 
  2144             ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
  2145                 !((compoundflag && !words && !onlycpdrule && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
  2146                 (compoundbegin && !wordnum && !onlycpdrule &&
  2147                         TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
  2148                 (compoundmiddle && wordnum && !words && !onlycpdrule &&
  2149                     TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||
  2150                   (numdefcpd && onlycpdrule &&
  2151                     ((!words && !wordnum && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)) ||
  2152                     (words && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0))))
  2153                   ))) {
  2154             rv = rv->next_homonym;
  2157         if (rv) affixed = 0;
  2159         if (rv)  {
  2160             sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_PART, st);
  2161             if (!HENTRY_FIND(rv, MORPH_STEM)) {
  2162                 sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_STEM, st);
  2164             // store the pointer of the hash entry
  2165 //            sprintf(presult + strlen(presult), "%c%s%p", MSEP_FLD, MORPH_HENTRY, rv);
  2166             if (HENTRY_DATA(rv)) {
  2167                 sprintf(presult + strlen(presult), "%c%s", MSEP_FLD, HENTRY_DATA2(rv));
  2171         if (!rv) {
  2172             if (onlycpdrule) break;
  2173             if (compoundflag &&
  2174              !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
  2175                 if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
  2176                         FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
  2177                     sfx->getCont() &&
  2178                         ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag, 
  2179                             sfx->getContLen())) || (compoundend &&
  2180                         TESTAFF(sfx->getCont(), compoundend, 
  2181                             sfx->getContLen())))) {
  2182                         rv = NULL;
  2186             if (rv ||
  2187               (((wordnum == 0) && compoundbegin &&
  2188                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
  2189                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
  2190               ((wordnum > 0) && compoundmiddle &&
  2191                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
  2192                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
  2193               ) {
  2194                 // char * p = prefix_check_morph(st, i, 0, compound);
  2195                 char * p = NULL;
  2196                 if (compoundflag) p = affix_check_morph(st, i, compoundflag);
  2197                 if (!p || (*p == '\0')) {
  2198                    if (p) free(p);
  2199                    p = NULL;
  2200                    if ((wordnum == 0) && compoundbegin) {
  2201                      p = affix_check_morph(st, i, compoundbegin);
  2202                    } else if ((wordnum > 0) && compoundmiddle) {
  2203                      p = affix_check_morph(st, i, compoundmiddle);                   
  2206                 if (p && (*p != '\0')) {
  2207                     sprintf(presult + strlen(presult), "%c%s%s%s", MSEP_FLD,
  2208                         MORPH_PART, st, line_uniq_app(&p, MSEP_REC));
  2210                 if (p) free(p);
  2211                 checked_prefix = 1;
  2213         // else check forbiddenwords
  2214         } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
  2215             TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
  2216             TESTAFF(rv->astr, needaffix, rv->alen))) {
  2217                 st[i] = ch;
  2218                 continue;
  2221             // check non_compound flag in suffix and prefix
  2222             if ((rv) && !hu_mov_rule &&
  2223                 ((pfx && pfx->getCont() &&
  2224                     TESTAFF(pfx->getCont(), compoundforbidflag, 
  2225                         pfx->getContLen())) ||
  2226                 (sfx && sfx->getCont() &&
  2227                     TESTAFF(sfx->getCont(), compoundforbidflag, 
  2228                         sfx->getContLen())))) {
  2229                     continue;
  2232             // check compoundend flag in suffix and prefix
  2233             if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
  2234                 ((pfx && pfx->getCont() &&
  2235                     TESTAFF(pfx->getCont(), compoundend, 
  2236                         pfx->getContLen())) ||
  2237                 (sfx && sfx->getCont() &&
  2238                     TESTAFF(sfx->getCont(), compoundend, 
  2239                         sfx->getContLen())))) {
  2240                     continue;
  2243             // check compoundmiddle flag in suffix and prefix
  2244             if ((rv) && !checked_prefix && (wordnum==0) && compoundmiddle && !hu_mov_rule &&
  2245                 ((pfx && pfx->getCont() &&
  2246                     TESTAFF(pfx->getCont(), compoundmiddle, 
  2247                         pfx->getContLen())) ||
  2248                 (sfx && sfx->getCont() &&
  2249                     TESTAFF(sfx->getCont(), compoundmiddle, 
  2250                         sfx->getContLen())))) {
  2251                     rv = NULL;
  2254         // check forbiddenwords
  2255         if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen)
  2256             || TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) continue;
  2258         // increment word number, if the second root has a compoundroot flag
  2259         if ((rv) && (compoundroot) && 
  2260             (TESTAFF(rv->astr, compoundroot, rv->alen))) {
  2261                 wordnum++;
  2264         // first word is acceptable in compound words?
  2265         if (((rv) && 
  2266           ( checked_prefix || (words && words[wnum]) ||
  2267             (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
  2268             ((oldwordnum == 0) && compoundbegin && TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
  2269             ((oldwordnum > 0) && compoundmiddle && TESTAFF(rv->astr, compoundmiddle, rv->alen)) 
  2270 // LANG_hu section: spec. Hungarian rule
  2271             || ((langnum == LANG_hu) && // hu_mov_rule
  2272                 hu_mov_rule && (
  2273                     TESTAFF(rv->astr, 'F', rv->alen) ||
  2274                     TESTAFF(rv->astr, 'G', rv->alen) ||
  2275                     TESTAFF(rv->astr, 'H', rv->alen)
  2278 // END of LANG_hu section
  2280           && ! (( checkcompoundtriple && !words && // test triple letters
  2281                    (word[i-1]==word[i]) && (
  2282                       ((i>1) && (word[i-1]==word[i-2])) || 
  2283                       ((word[i-1]==word[i+1])) // may be word[i+1] == '\0'
  2285                ) ||
  2287                    // test CHECKCOMPOUNDPATTERN
  2288                    numcheckcpd && !words && cpdpat_check(word, i, rv, NULL, affixed)
  2289                ) ||
  2291                  checkcompoundcase && !words && cpdcase_check(word, i)
  2292                ))
  2294 // LANG_hu section: spec. Hungarian rule
  2295          || ((!rv) && (langnum == LANG_hu) && hu_mov_rule && (rv = affix_check(st,i)) &&
  2296               (sfx && sfx->getCont() && (
  2297                         TESTAFF(sfx->getCont(), (unsigned short) 'x', sfx->getContLen()) ||
  2298                         TESTAFF(sfx->getCont(), (unsigned short) '%', sfx->getContLen())
  2302 // END of LANG_hu section
  2303          ) {
  2305 // LANG_hu section: spec. Hungarian rule
  2306             if (langnum == LANG_hu) {
  2307                 // calculate syllable number of the word
  2308                 numsyllable += get_syllable(st, i);
  2310                 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
  2311                 if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
  2313 // END of LANG_hu section
  2315             // NEXT WORD(S)
  2316             rv_first = rv;
  2317             rv = lookup((word+i)); // perhaps without prefix
  2319         // search homonym with compound flag
  2320         while ((rv) && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
  2321                         !((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
  2322                           (compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) ||
  2323                            (numdefcpd && words && defcpd_check(&words, wnum + 1, rv, NULL,1))))) {
  2324             rv = rv->next_homonym;
  2327             if (rv && words && words[wnum + 1]) {
  2328                   mystrcat(*result, presult, MAXLNLEN);
  2329                   mystrcat(*result, " ", MAXLNLEN);
  2330                   mystrcat(*result, MORPH_PART, MAXLNLEN);
  2331                   mystrcat(*result, word+i, MAXLNLEN);
  2332                   if (complexprefixes && HENTRY_DATA(rv)) mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
  2333                   if (!HENTRY_FIND(rv, MORPH_STEM)) {
  2334                     mystrcat(*result, " ", MAXLNLEN);
  2335                     mystrcat(*result, MORPH_STEM, MAXLNLEN);
  2336                     mystrcat(*result, HENTRY_WORD(rv), MAXLNLEN);
  2338                   // store the pointer of the hash entry
  2339 //                  sprintf(*result + strlen(*result), " %s%p", MORPH_HENTRY, rv);
  2340                   if (!complexprefixes && HENTRY_DATA(rv)) {
  2341                     mystrcat(*result, " ", MAXLNLEN);
  2342                     mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
  2344                   mystrcat(*result, "\n", MAXLNLEN);
  2345                   ok = 1;
  2346                   return 0;
  2349             oldnumsyllable2 = numsyllable;
  2350             oldwordnum2 = wordnum;
  2352 // LANG_hu section: spec. Hungarian rule
  2353             if ((rv) && (langnum == LANG_hu) && (TESTAFF(rv->astr, 'I', rv->alen)) && !(TESTAFF(rv->astr, 'J', rv->alen))) {
  2354                 numsyllable--;
  2356 // END of LANG_hu section
  2357             // increment word number, if the second root has a compoundroot flag
  2358             if ((rv) && (compoundroot) && 
  2359                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
  2360                     wordnum++;
  2363             // check forbiddenwords
  2364             if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
  2365                 TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) {
  2366                 st[i] = ch;
  2367                 continue;
  2370             // second word is acceptable, as a root?
  2371             // hungarian conventions: compounding is acceptable,
  2372             // when compound forms consist of 2 words, or if more,
  2373             // then the syllable number of root words must be 6, or lesser.
  2374             if ((rv) && (
  2375                       (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
  2376                       (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))
  2378                 && (
  2379                       ((cpdwordmax==-1) || (wordnum+1<cpdwordmax)) || 
  2380                       ((cpdmaxsyllable!=0) &&
  2381                           (numsyllable+get_syllable(HENTRY_WORD(rv),rv->blen)<=cpdmaxsyllable))
  2383                 && (
  2384                      (!checkcompounddup || (rv != rv_first))
  2388                       // bad compound word
  2389                       mystrcat(*result, presult, MAXLNLEN);
  2390                       mystrcat(*result, " ", MAXLNLEN);
  2391                       mystrcat(*result, MORPH_PART, MAXLNLEN);
  2392                       mystrcat(*result, word+i, MAXLNLEN);
  2394                       if (HENTRY_DATA(rv)) {
  2395                         if (complexprefixes) mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
  2396                         if (! HENTRY_FIND(rv, MORPH_STEM)) {
  2397                            mystrcat(*result, " ", MAXLNLEN);
  2398                            mystrcat(*result, MORPH_STEM, MAXLNLEN);
  2399                            mystrcat(*result, HENTRY_WORD(rv), MAXLNLEN);
  2401                         // store the pointer of the hash entry
  2402 //                        sprintf(*result + strlen(*result), " %s%p", MORPH_HENTRY, rv);
  2403                         if (!complexprefixes) {
  2404                             mystrcat(*result, " ", MAXLNLEN);
  2405                             mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
  2408                       mystrcat(*result, "\n", MAXLNLEN);
  2409                               ok = 1;
  2412             numsyllable = oldnumsyllable2 ;
  2413             wordnum = oldwordnum2;
  2415             // perhaps second word has prefix or/and suffix
  2416             sfx = NULL;
  2417             sfxflag = FLAG_NULL;
  2419             if (compoundflag && !onlycpdrule) rv = affix_check((word+i),strlen(word+i), compoundflag); else rv = NULL;
  2421             if (!rv && compoundend && !onlycpdrule) {
  2422                 sfx = NULL;
  2423                 pfx = NULL;
  2424                 rv = affix_check((word+i),strlen(word+i), compoundend);
  2427             if (!rv && numdefcpd && words) {
  2428                 rv = affix_check((word+i),strlen(word+i), 0, IN_CPD_END);
  2429                 if (rv && words && defcpd_check(&words, wnum + 1, rv, NULL, 1)) {
  2430                       char * m = NULL;
  2431                       if (compoundflag) m = affix_check_morph((word+i),strlen(word+i), compoundflag);
  2432                       if ((!m || *m == '\0') && compoundend) {
  2433                             if (m) free(m);
  2434                             m = affix_check_morph((word+i),strlen(word+i), compoundend);
  2436                       mystrcat(*result, presult, MAXLNLEN);
  2437                       if (m || (*m != '\0')) {
  2438                         sprintf(*result + strlen(*result), "%c%s%s%s", MSEP_FLD,
  2439                             MORPH_PART, word + i, line_uniq_app(&m, MSEP_REC));
  2441                       if (m) free(m);
  2442                       mystrcat(*result, "\n", MAXLNLEN);
  2443                       ok = 1;
  2447             // check non_compound flag in suffix and prefix
  2448             if ((rv) && 
  2449                 ((pfx && pfx->getCont() &&
  2450                     TESTAFF(pfx->getCont(), compoundforbidflag, 
  2451                         pfx->getContLen())) ||
  2452                 (sfx && sfx->getCont() &&
  2453                     TESTAFF(sfx->getCont(), compoundforbidflag, 
  2454                         sfx->getContLen())))) {
  2455                     rv = NULL;
  2458             // check forbiddenwords
  2459             if ((rv) && (rv->astr) && (TESTAFF(rv->astr,forbiddenword,rv->alen) ||
  2460                     TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))
  2461                     && (! TESTAFF(rv->astr, needaffix, rv->alen))) {
  2462                         st[i] = ch;
  2463                         continue;
  2466             if (langnum == LANG_hu) {
  2467                 // calculate syllable number of the word
  2468                 numsyllable += get_syllable(word + i, strlen(word + i));
  2470                 // - affix syllable num.
  2471                 // XXX only second suffix (inflections, not derivations)
  2472                 if (sfxappnd) {
  2473                     char * tmp = myrevstrdup(sfxappnd);
  2474                     numsyllable -= get_syllable(tmp, strlen(tmp));
  2475                     free(tmp);
  2478                 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
  2479                 if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
  2481                 // increment syllable num, if last word has a SYLLABLENUM flag
  2482                 // and the suffix is beginning `s'
  2484                 if (cpdsyllablenum) {
  2485                     switch (sfxflag) {
  2486                         case 'c': { numsyllable+=2; break; }
  2487                         case 'J': { numsyllable += 1; break; }
  2488                         case 'I': { if (rv && TESTAFF(rv->astr, 'J', rv->alen)) numsyllable += 1; break; }
  2493             // increment word number, if the second word has a compoundroot flag
  2494             if ((rv) && (compoundroot) && 
  2495                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
  2496                     wordnum++;
  2498             // second word is acceptable, as a word with prefix or/and suffix?
  2499             // hungarian conventions: compounding is acceptable,
  2500             // when compound forms consist 2 word, otherwise
  2501             // the syllable number of root words is 6, or lesser.
  2502             if ((rv) && 
  2504                       ((cpdwordmax==-1) || (wordnum+1<cpdwordmax)) || 
  2505                       ((cpdmaxsyllable!=0) &&
  2506                           (numsyllable <= cpdmaxsyllable))
  2508                 && (
  2509                    (!checkcompounddup || (rv != rv_first))
  2510                    )) {
  2511                       char * m = NULL;
  2512                       if (compoundflag) m = affix_check_morph((word+i),strlen(word+i), compoundflag);
  2513                       if ((!m || *m == '\0') && compoundend) {
  2514                             if (m) free(m);
  2515                             m = affix_check_morph((word+i),strlen(word+i), compoundend);
  2517                       mystrcat(*result, presult, MAXLNLEN);
  2518                       if (m && (*m != '\0')) {
  2519                         sprintf(*result + strlen(*result), "%c%s%s%s", MSEP_FLD,
  2520                             MORPH_PART, word + i, line_uniq_app(&m, MSEP_REC));
  2522                       if (m) free(m);
  2523                       sprintf(*result + strlen(*result), "%c", MSEP_REC);
  2524                       ok = 1;
  2527             numsyllable = oldnumsyllable2;
  2528             wordnum = oldwordnum2;
  2530             // perhaps second word is a compound word (recursive call)
  2531             if ((wordnum < maxwordnum) && (ok == 0)) {
  2532                         compound_check_morph((word+i),strlen(word+i), wordnum+1, 
  2533                              numsyllable, maxwordnum, wnum + 1, words, 0, result, presult);
  2534             } else {
  2535                 rv=NULL;
  2538         st[i] = ch;
  2539         wordnum = oldwordnum;
  2540         numsyllable = oldnumsyllable;
  2542         } while (numdefcpd && oldwordnum == 0 && !onlycpdrule && (onlycpdrule = 1)); // end of onlycpd loop
  2545     return 0;
  2548  // return 1 if s1 (reversed) is a leading subset of end of s2
  2549 /* inline int AffixMgr::isRevSubset(const char * s1, const char * end_of_s2, int len)
  2551     while ((len > 0) && *s1 && (*s1 == *end_of_s2)) {
  2552         s1++;
  2553         end_of_s2--;
  2554         len--;
  2556     return (*s1 == '\0');
  2558  */
  2560 inline int AffixMgr::isRevSubset(const char * s1, const char * end_of_s2, int len)
  2562     while ((len > 0) && (*s1 != '\0') && ((*s1 == *end_of_s2) || (*s1 == '.'))) {
  2563         s1++;
  2564         end_of_s2--;
  2565         len--;
  2567     return (*s1 == '\0');
  2570 // check word for suffixes
  2572 struct hentry * AffixMgr::suffix_check (const char * word, int len, 
  2573        int sfxopts, PfxEntry * ppfx, char ** wlst, int maxSug, int * ns, 
  2574        const FLAG cclass, const FLAG needflag, char in_compound)
  2576     struct hentry * rv = NULL;
  2577     PfxEntry* ep = ppfx;
  2579     // first handle the special case of 0 length suffixes
  2580     SfxEntry * se = sStart[0];
  2582     while (se) {
  2583         if (!cclass || se->getCont()) {
  2584             // suffixes are not allowed in beginning of compounds
  2585             if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass
  2586              // except when signed with compoundpermitflag flag
  2587              (se->getCont() && compoundpermitflag &&
  2588                 TESTAFF(se->getCont(),compoundpermitflag,se->getContLen()))) && (!circumfix ||
  2589               // no circumfix flag in prefix and suffix
  2590               ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
  2591                    circumfix, ep->getContLen())) &&
  2592                (!se->getCont() || !(TESTAFF(se->getCont(),circumfix,se->getContLen())))) ||
  2593               // circumfix flag in prefix AND suffix
  2594               ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
  2595                    circumfix, ep->getContLen())) &&
  2596                (se->getCont() && (TESTAFF(se->getCont(),circumfix,se->getContLen())))))  &&
  2597             // fogemorpheme
  2598               (in_compound || 
  2599                  !(se->getCont() && (TESTAFF(se->getCont(), onlyincompound, se->getContLen())))) &&
  2600             // needaffix on prefix or first suffix
  2601               (cclass || 
  2602                    !(se->getCont() && TESTAFF(se->getCont(), needaffix, se->getContLen())) ||
  2603                    (ppfx && !((ep->getCont()) &&
  2604                      TESTAFF(ep->getCont(), needaffix,
  2605                        ep->getContLen())))
  2606               )) {
  2607                 rv = se->checkword(word,len, sfxopts, ppfx, wlst, maxSug, ns, (FLAG) cclass, 
  2608                     needflag, (in_compound ? 0 : onlyincompound));
  2609                 if (rv) {
  2610                     sfx=se; // BUG: sfx not stateless
  2611                     return rv;
  2615        se = se->getNext();
  2618     // now handle the general case
  2619     if (len == 0) return NULL; // FULLSTRIP
  2620     unsigned char sp= *((const unsigned char *)(word + len - 1));
  2621     SfxEntry * sptr = sStart[sp];
  2623     while (sptr) {
  2624         if (isRevSubset(sptr->getKey(), word + len - 1, len)
  2625         ) {
  2626             // suffixes are not allowed in beginning of compounds
  2627             if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass
  2628              // except when signed with compoundpermitflag flag
  2629              (sptr->getCont() && compoundpermitflag &&
  2630                 TESTAFF(sptr->getCont(),compoundpermitflag,sptr->getContLen()))) && (!circumfix ||
  2631               // no circumfix flag in prefix and suffix
  2632               ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
  2633                    circumfix, ep->getContLen())) &&
  2634                (!sptr->getCont() || !(TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))) ||
  2635               // circumfix flag in prefix AND suffix
  2636               ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
  2637                    circumfix, ep->getContLen())) &&
  2638                (sptr->getCont() && (TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))))  &&
  2639             // fogemorpheme
  2640               (in_compound || 
  2641                  !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) &&
  2642             // needaffix on prefix or first suffix
  2643               (cclass || 
  2644                   !(sptr->getCont() && TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) ||
  2645                   (ppfx && !((ep->getCont()) &&
  2646                      TESTAFF(ep->getCont(), needaffix,
  2647                        ep->getContLen())))
  2649             ) if (in_compound != IN_CPD_END || ppfx || !(sptr->getCont() && TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))) {
  2650                 rv = sptr->checkword(word,len, sfxopts, ppfx, wlst,
  2651                     maxSug, ns, cclass, needflag, (in_compound ? 0 : onlyincompound));
  2652                 if (rv) {
  2653                     sfx=sptr; // BUG: sfx not stateless
  2654                     sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
  2655                     if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
  2656                     return rv;
  2659              sptr = sptr->getNextEQ();
  2660         } else {
  2661              sptr = sptr->getNextNE();
  2665     return NULL;
  2668 // check word for two-level suffixes
  2670 struct hentry * AffixMgr::suffix_check_twosfx(const char * word, int len, 
  2671        int sfxopts, PfxEntry * ppfx, const FLAG needflag)
  2673     struct hentry * rv = NULL;
  2675     // first handle the special case of 0 length suffixes
  2676     SfxEntry * se = sStart[0];
  2677     while (se) {
  2678         if (contclasses[se->getFlag()])
  2680             rv = se->check_twosfx(word,len, sfxopts, ppfx, needflag);
  2681             if (rv) return rv;
  2683         se = se->getNext();
  2686     // now handle the general case
  2687     if (len == 0) return NULL; // FULLSTRIP
  2688     unsigned char sp = *((const unsigned char *)(word + len - 1));
  2689     SfxEntry * sptr = sStart[sp];
  2691     while (sptr) {
  2692         if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
  2693             if (contclasses[sptr->getFlag()])
  2695                 rv = sptr->check_twosfx(word,len, sfxopts, ppfx, needflag);
  2696                 if (rv) {
  2697                     sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
  2698                     if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
  2699                     return rv;
  2702             sptr = sptr->getNextEQ();
  2703         } else {
  2704              sptr = sptr->getNextNE();
  2708     return NULL;
  2711 char * AffixMgr::suffix_check_twosfx_morph(const char * word, int len, 
  2712        int sfxopts, PfxEntry * ppfx, const FLAG needflag)
  2714     char result[MAXLNLEN];
  2715     char result2[MAXLNLEN];
  2716     char result3[MAXLNLEN];
  2718     char * st;
  2720     result[0] = '\0';
  2721     result2[0] = '\0';
  2722     result3[0] = '\0';
  2724     // first handle the special case of 0 length suffixes
  2725     SfxEntry * se = sStart[0];
  2726     while (se) {
  2727         if (contclasses[se->getFlag()])
  2729             st = se->check_twosfx_morph(word,len, sfxopts, ppfx, needflag);
  2730             if (st) {
  2731                 if (ppfx) {
  2732                     if (ppfx->getMorph()) {
  2733                         mystrcat(result, ppfx->getMorph(), MAXLNLEN);
  2734                         mystrcat(result, " ", MAXLNLEN);
  2735                     } else debugflag(result, ppfx->getFlag());
  2737                 mystrcat(result, st, MAXLNLEN);
  2738                 free(st);
  2739                 if (se->getMorph()) {
  2740                     mystrcat(result, " ", MAXLNLEN);
  2741                     mystrcat(result, se->getMorph(), MAXLNLEN);
  2742                 } else debugflag(result, se->getFlag());
  2743                 mystrcat(result, "\n", MAXLNLEN);
  2746         se = se->getNext();
  2749     // now handle the general case
  2750     if (len == 0) return NULL; // FULLSTRIP
  2751     unsigned char sp = *((const unsigned char *)(word + len - 1));
  2752     SfxEntry * sptr = sStart[sp];
  2754     while (sptr) {
  2755         if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
  2756             if (contclasses[sptr->getFlag()]) 
  2758                 st = sptr->check_twosfx_morph(word,len, sfxopts, ppfx, needflag);
  2759                 if (st) {
  2760                     sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
  2761                     if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
  2762                     strcpy(result2, st);
  2763                     free(st);
  2765                 result3[0] = '\0';
  2767                 if (sptr->getMorph()) {
  2768                     mystrcat(result3, " ", MAXLNLEN);
  2769                     mystrcat(result3, sptr->getMorph(), MAXLNLEN);
  2770                 } else debugflag(result3, sptr->getFlag());
  2771                 strlinecat(result2, result3);
  2772                 mystrcat(result2, "\n", MAXLNLEN);
  2773                 mystrcat(result,  result2, MAXLNLEN);
  2776             sptr = sptr->getNextEQ();
  2777         } else {
  2778              sptr = sptr->getNextNE();
  2781     if (*result) return mystrdup(result);
  2782     return NULL;
  2785 char * AffixMgr::suffix_check_morph(const char * word, int len, 
  2786        int sfxopts, PfxEntry * ppfx, const FLAG cclass, const FLAG needflag, char in_compound)
  2788     char result[MAXLNLEN];
  2790     struct hentry * rv = NULL;
  2792     result[0] = '\0';
  2794     PfxEntry* ep = ppfx;
  2796     // first handle the special case of 0 length suffixes
  2797     SfxEntry * se = sStart[0];
  2798     while (se) {
  2799         if (!cclass || se->getCont()) {
  2800             // suffixes are not allowed in beginning of compounds
  2801             if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
  2802              // except when signed with compoundpermitflag flag
  2803              (se->getCont() && compoundpermitflag &&
  2804                 TESTAFF(se->getCont(),compoundpermitflag,se->getContLen()))) && (!circumfix ||
  2805               // no circumfix flag in prefix and suffix
  2806               ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
  2807                    circumfix, ep->getContLen())) &&
  2808                (!se->getCont() || !(TESTAFF(se->getCont(),circumfix,se->getContLen())))) ||
  2809               // circumfix flag in prefix AND suffix
  2810               ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
  2811                    circumfix, ep->getContLen())) &&
  2812                (se->getCont() && (TESTAFF(se->getCont(),circumfix,se->getContLen())))))  &&
  2813             // fogemorpheme
  2814               (in_compound || 
  2815                  !((se->getCont() && (TESTAFF(se->getCont(), onlyincompound, se->getContLen()))))) &&
  2816             // needaffix on prefix or first suffix
  2817               (cclass || 
  2818                    !(se->getCont() && TESTAFF(se->getCont(), needaffix, se->getContLen())) ||
  2819                    (ppfx && !((ep->getCont()) &&
  2820                      TESTAFF(ep->getCont(), needaffix,
  2821                        ep->getContLen())))
  2823             ))
  2824             rv = se->checkword(word, len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
  2825          while (rv) {
  2826            if (ppfx) {
  2827                 if (ppfx->getMorph()) {
  2828                     mystrcat(result, ppfx->getMorph(), MAXLNLEN);
  2829                     mystrcat(result, " ", MAXLNLEN);
  2830                 } else debugflag(result, ppfx->getFlag());
  2832             if (complexprefixes && HENTRY_DATA(rv)) mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
  2833             if (! HENTRY_FIND(rv, MORPH_STEM)) {
  2834                 mystrcat(result, " ", MAXLNLEN);                                
  2835                 mystrcat(result, MORPH_STEM, MAXLNLEN);
  2836                 mystrcat(result, HENTRY_WORD(rv), MAXLNLEN);
  2838             // store the pointer of the hash entry
  2839 //            sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, rv);
  2841             if (!complexprefixes && HENTRY_DATA(rv)) {
  2842                     mystrcat(result, " ", MAXLNLEN);                                
  2843                     mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
  2845             if (se->getMorph()) {
  2846                 mystrcat(result, " ", MAXLNLEN);                                
  2847                 mystrcat(result, se->getMorph(), MAXLNLEN);
  2848             } else debugflag(result, se->getFlag());
  2849             mystrcat(result, "\n", MAXLNLEN);
  2850             rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
  2853        se = se->getNext();
  2856     // now handle the general case
  2857     if (len == 0) return NULL; // FULLSTRIP
  2858     unsigned char sp = *((const unsigned char *)(word + len - 1));
  2859     SfxEntry * sptr = sStart[sp];
  2861     while (sptr) {
  2862         if (isRevSubset(sptr->getKey(), word + len - 1, len)
  2863         ) {
  2864             // suffixes are not allowed in beginning of compounds
  2865             if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
  2866              // except when signed with compoundpermitflag flag
  2867              (sptr->getCont() && compoundpermitflag &&
  2868                 TESTAFF(sptr->getCont(),compoundpermitflag,sptr->getContLen()))) && (!circumfix ||
  2869               // no circumfix flag in prefix and suffix
  2870               ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
  2871                    circumfix, ep->getContLen())) &&
  2872                (!sptr->getCont() || !(TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))) ||
  2873               // circumfix flag in prefix AND suffix
  2874               ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
  2875                    circumfix, ep->getContLen())) &&
  2876                (sptr->getCont() && (TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))))  &&
  2877             // fogemorpheme
  2878               (in_compound || 
  2879                  !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) &&
  2880             // needaffix on first suffix
  2881               (cclass || !(sptr->getCont() && 
  2882                    TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())))
  2883             )) rv = sptr->checkword(word,len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
  2884             while (rv) {
  2885                     if (ppfx) {
  2886                         if (ppfx->getMorph()) {
  2887                             mystrcat(result, ppfx->getMorph(), MAXLNLEN);
  2888                             mystrcat(result, " ", MAXLNLEN);
  2889                         } else debugflag(result, ppfx->getFlag());
  2891                     if (complexprefixes && HENTRY_DATA(rv)) mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
  2892                     if (! HENTRY_FIND(rv, MORPH_STEM)) {
  2893                             mystrcat(result, " ", MAXLNLEN);                                
  2894                             mystrcat(result, MORPH_STEM, MAXLNLEN);
  2895                             mystrcat(result, HENTRY_WORD(rv), MAXLNLEN);
  2897                     // store the pointer of the hash entry
  2898 //                    sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, rv);
  2900                     if (!complexprefixes && HENTRY_DATA(rv)) {
  2901                         mystrcat(result, " ", MAXLNLEN);                                
  2902                         mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
  2905                 if (sptr->getMorph()) {
  2906                     mystrcat(result, " ", MAXLNLEN);
  2907                     mystrcat(result, sptr->getMorph(), MAXLNLEN);
  2908                 } else debugflag(result, sptr->getFlag());
  2909                 mystrcat(result, "\n", MAXLNLEN);
  2910                 rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
  2912              sptr = sptr->getNextEQ();
  2913         } else {
  2914              sptr = sptr->getNextNE();
  2918     if (*result) return mystrdup(result);
  2919     return NULL;
  2922 // check if word with affixes is correctly spelled
  2923 struct hentry * AffixMgr::affix_check (const char * word, int len, const FLAG needflag, char in_compound)
  2925     struct hentry * rv= NULL;
  2927     // check all prefixes (also crossed with suffixes if allowed) 
  2928     rv = prefix_check(word, len, in_compound, needflag);
  2929     if (rv) return rv;
  2931     // if still not found check all suffixes
  2932     rv = suffix_check(word, len, 0, NULL, NULL, 0, NULL, FLAG_NULL, needflag, in_compound);
  2934     if (havecontclass) {
  2935         sfx = NULL;
  2936         pfx = NULL;
  2938         if (rv) return rv;
  2939         // if still not found check all two-level suffixes
  2940         rv = suffix_check_twosfx(word, len, 0, NULL, needflag);
  2942         if (rv) return rv;
  2943         // if still not found check all two-level suffixes
  2944         rv = prefix_check_twosfx(word, len, IN_CPD_NOT, needflag);
  2947     return rv;
  2950 // check if word with affixes is correctly spelled
  2951 char * AffixMgr::affix_check_morph(const char * word, int len, const FLAG needflag, char in_compound)
  2953     char result[MAXLNLEN];
  2954     char * st = NULL;
  2956     *result = '\0';
  2958     // check all prefixes (also crossed with suffixes if allowed) 
  2959     st = prefix_check_morph(word, len, in_compound);
  2960     if (st) {
  2961         mystrcat(result, st, MAXLNLEN);
  2962         free(st);
  2965     // if still not found check all suffixes    
  2966     st = suffix_check_morph(word, len, 0, NULL, '\0', needflag, in_compound);
  2967     if (st) {
  2968         mystrcat(result, st, MAXLNLEN);
  2969         free(st);
  2972     if (havecontclass) {
  2973         sfx = NULL;
  2974         pfx = NULL;
  2975         // if still not found check all two-level suffixes
  2976         st = suffix_check_twosfx_morph(word, len, 0, NULL, needflag);
  2977         if (st) {
  2978             mystrcat(result, st, MAXLNLEN);
  2979             free(st);
  2982         // if still not found check all two-level suffixes
  2983         st = prefix_check_twosfx_morph(word, len, IN_CPD_NOT, needflag);
  2984         if (st) {
  2985             mystrcat(result, st, MAXLNLEN);
  2986             free(st);
  2990     return mystrdup(result);
  2993 char * AffixMgr::morphgen(char * ts, int wl, const unsigned short * ap,
  2994     unsigned short al, char * morph, char * targetmorph, int level)
  2996     // handle suffixes
  2997     char * stemmorph;
  2998     char * stemmorphcatpos;
  2999     char mymorph[MAXLNLEN];
  3001     if (!morph) return NULL;
  3003     // check substandard flag
  3004     if (TESTAFF(ap, substandard, al)) return NULL;
  3006     if (morphcmp(morph, targetmorph) == 0) return mystrdup(ts);
  3008 //    int targetcount = get_sfxcount(targetmorph);
  3010     // use input suffix fields, if exist
  3011     if (strstr(morph, MORPH_INFL_SFX) || strstr(morph, MORPH_DERI_SFX)) {
  3012         stemmorph = mymorph;
  3013         strcpy(stemmorph, morph);
  3014         mystrcat(stemmorph, " ", MAXLNLEN);
  3015         stemmorphcatpos = stemmorph + strlen(stemmorph);
  3016     } else {
  3017         stemmorph = morph;
  3018         stemmorphcatpos = NULL;
  3021     for (int i = 0; i < al; i++) {
  3022         const unsigned char c = (unsigned char) (ap[i] & 0x00FF);
  3023         SfxEntry * sptr = sFlag[c];
  3024         while (sptr) {
  3025             if (sptr->getFlag() == ap[i] && sptr->getMorph() && ((sptr->getContLen() == 0) || 
  3026                 // don't generate forms with substandard affixes
  3027                 !TESTAFF(sptr->getCont(), substandard, sptr->getContLen()))) {
  3029                 if (stemmorphcatpos) strcpy(stemmorphcatpos, sptr->getMorph());
  3030                 else stemmorph = (char *) sptr->getMorph();
  3032                 int cmp = morphcmp(stemmorph, targetmorph);
  3034                 if (cmp == 0) {
  3035                     char * newword = sptr->add(ts, wl);
  3036                     if (newword) {
  3037                         hentry * check = pHMgr->lookup(newword); // XXX extra dic
  3038                         if (!check || !check->astr || 
  3039                             !(TESTAFF(check->astr, forbiddenword, check->alen) || 
  3040                               TESTAFF(check->astr, ONLYUPCASEFLAG, check->alen))) {
  3041                                 return newword;
  3043                         free(newword);
  3047                 // recursive call for secondary suffixes
  3048                 if ((level == 0) && (cmp == 1) && (sptr->getContLen() > 0) &&
  3049 //                    (get_sfxcount(stemmorph) < targetcount) &&
  3050                     !TESTAFF(sptr->getCont(), substandard, sptr->getContLen())) {
  3051                     char * newword = sptr->add(ts, wl);
  3052                     if (newword) {
  3053                         char * newword2 = morphgen(newword, strlen(newword), sptr->getCont(),
  3054                             sptr->getContLen(), stemmorph, targetmorph, 1);
  3056                         if (newword2) {
  3057                             free(newword);
  3058                             return newword2;
  3060                         free(newword);
  3061                         newword = NULL;
  3065             sptr = sptr->getFlgNxt();
  3068    return NULL;
  3072 int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts,
  3073     int wl, const unsigned short * ap, unsigned short al, char * bad, int badl,
  3074     char * phon)
  3076     int nh=0;
  3077     // first add root word to list
  3078     if ((nh < maxn) && !(al && ((needaffix && TESTAFF(ap, needaffix, al)) ||
  3079          (onlyincompound && TESTAFF(ap, onlyincompound, al))))) {
  3080        wlst[nh].word = mystrdup(ts);
  3081        if (!wlst[nh].word) return 0;
  3082        wlst[nh].allow = (1 == 0);
  3083        wlst[nh].orig = NULL;
  3084        nh++;
  3085        // add special phonetic version
  3086        if (phon && (nh < maxn)) {
  3087     	    wlst[nh].word = mystrdup(phon);
  3088             if (!wlst[nh].word) return nh - 1;
  3089     	    wlst[nh].allow = (1 == 0);
  3090     	    wlst[nh].orig = mystrdup(ts);
  3091             if (!wlst[nh].orig) return nh - 1;
  3092     	    nh++;
  3096     // handle suffixes
  3097     for (int i = 0; i < al; i++) {
  3098        const unsigned char c = (unsigned char) (ap[i] & 0x00FF);
  3099        SfxEntry * sptr = sFlag[c];
  3100        while (sptr) {
  3101          if ((sptr->getFlag() == ap[i]) && (!sptr->getKeyLen() || ((badl > sptr->getKeyLen()) &&
  3102                 (strcmp(sptr->getAffix(), bad + badl - sptr->getKeyLen()) == 0))) &&
  3103                 // check needaffix flag
  3104                 !(sptr->getCont() && ((needaffix && 
  3105                       TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) ||
  3106                   (circumfix && 
  3107                       TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())) ||
  3108                   (onlyincompound && 
  3109                       TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))
  3110                 ) {
  3111             char * newword = sptr->add(ts, wl);
  3112             if (newword) {
  3113                 if (nh < maxn) {
  3114                     wlst[nh].word = newword;
  3115                     wlst[nh].allow = sptr->allowCross();
  3116                     wlst[nh].orig = NULL;
  3117                     nh++;
  3118                     // add special phonetic version
  3119     		    if (phon && (nh < maxn)) {
  3120     			char st[MAXWORDUTF8LEN];
  3121     			strcpy(st, phon);
  3122     			strcat(st, sptr->getKey());
  3123     			reverseword(st + strlen(phon));
  3124     			wlst[nh].word = mystrdup(st);
  3125     			if (!wlst[nh].word) return nh - 1;
  3126     			wlst[nh].allow = (1 == 0);
  3127     			wlst[nh].orig = mystrdup(newword);
  3128                         if (!wlst[nh].orig) return nh - 1;
  3129     			nh++;
  3131                 } else {
  3132                     free(newword);
  3136          sptr = sptr->getFlgNxt();
  3140     int n = nh;
  3142     // handle cross products of prefixes and suffixes
  3143     for (int j=1;j<n ;j++)
  3144        if (wlst[j].allow) {
  3145           for (int k = 0; k < al; k++) {
  3146              const unsigned char c = (unsigned char) (ap[k] & 0x00FF);
  3147              PfxEntry * cptr = pFlag[c];
  3148              while (cptr) {
  3149                 if ((cptr->getFlag() == ap[k]) && cptr->allowCross() && (!cptr->getKeyLen() || ((badl > cptr->getKeyLen()) &&
  3150                         (strncmp(cptr->getKey(), bad, cptr->getKeyLen()) == 0)))) {
  3151                     int l1 = strlen(wlst[j].word);
  3152                     char * newword = cptr->add(wlst[j].word, l1);
  3153                     if (newword) {
  3154                        if (nh < maxn) {
  3155                           wlst[nh].word = newword;
  3156                           wlst[nh].allow = cptr->allowCross();
  3157                           wlst[nh].orig = NULL;
  3158                           nh++;
  3159                        } else {
  3160                           free(newword);
  3164                 cptr = cptr->getFlgNxt();
  3170     // now handle pure prefixes
  3171     for (int m = 0; m < al; m ++) {
  3172        const unsigned char c = (unsigned char) (ap[m] & 0x00FF);
  3173        PfxEntry * ptr = pFlag[c];
  3174        while (ptr) {
  3175          if ((ptr->getFlag() == ap[m]) && (!ptr->getKeyLen() || ((badl > ptr->getKeyLen()) &&
  3176                 (strncmp(ptr->getKey(), bad, ptr->getKeyLen()) == 0))) &&
  3177                 // check needaffix flag
  3178                 !(ptr->getCont() && ((needaffix && 
  3179                       TESTAFF(ptr->getCont(), needaffix, ptr->getContLen())) ||
  3180                      (circumfix && 
  3181                       TESTAFF(ptr->getCont(), circumfix, ptr->getContLen())) ||                      
  3182                   (onlyincompound && 
  3183                       TESTAFF(ptr->getCont(), onlyincompound, ptr->getContLen()))))
  3184                 ) {
  3185             char * newword = ptr->add(ts, wl);
  3186             if (newword) {
  3187                 if (nh < maxn) {
  3188                     wlst[nh].word = newword;
  3189                     wlst[nh].allow = ptr->allowCross();
  3190                     wlst[nh].orig = NULL;
  3191                     nh++;
  3192                 } else {
  3193                     free(newword);
  3197          ptr = ptr->getFlgNxt();
  3201     return nh;
  3204 // return length of replacing table
  3205 int AffixMgr::get_numrep() const
  3207   return numrep;
  3210 // return replacing table
  3211 struct replentry * AffixMgr::get_reptable() const
  3213   if (! reptable ) return NULL;
  3214   return reptable;
  3217 // return iconv table
  3218 RepList * AffixMgr::get_iconvtable() const
  3220   if (! iconvtable ) return NULL;
  3221   return iconvtable;
  3224 // return oconv table
  3225 RepList * AffixMgr::get_oconvtable() const
  3227   if (! oconvtable ) return NULL;
  3228   return oconvtable;
  3231 // return replacing table
  3232 struct phonetable * AffixMgr::get_phonetable() const
  3234   if (! phone ) return NULL;
  3235   return phone;
  3238 // return length of character map table
  3239 int AffixMgr::get_nummap() const
  3241   return nummap;
  3244 // return character map table
  3245 struct mapentry * AffixMgr::get_maptable() const
  3247   if (! maptable ) return NULL;
  3248   return maptable;
  3251 // return length of word break table
  3252 int AffixMgr::get_numbreak() const
  3254   return numbreak;
  3257 // return character map table
  3258 char ** AffixMgr::get_breaktable() const
  3260   if (! breaktable ) return NULL;
  3261   return breaktable;
  3264 // return text encoding of dictionary
  3265 char * AffixMgr::get_encoding()
  3267   if (! encoding ) encoding = mystrdup(SPELL_ENCODING);
  3268   return mystrdup(encoding);
  3271 // return text encoding of dictionary
  3272 int AffixMgr::get_langnum() const
  3274   return langnum;
  3277 // return double prefix option
  3278 int AffixMgr::get_complexprefixes() const
  3280   return complexprefixes;
  3283 // return FULLSTRIP option
  3284 int AffixMgr::get_fullstrip() const
  3286   return fullstrip;
  3289 FLAG AffixMgr::get_keepcase() const
  3291   return keepcase;
  3294 FLAG AffixMgr::get_forceucase() const
  3296   return forceucase;
  3299 FLAG AffixMgr::get_warn() const
  3301   return warn;
  3304 int AffixMgr::get_forbidwarn() const
  3306   return forbidwarn;
  3309 int AffixMgr::get_checksharps() const
  3311   return checksharps;
  3314 char * AffixMgr::encode_flag(unsigned short aflag) const
  3316   return pHMgr->encode_flag(aflag);
  3320 // return the preferred ignore string for suggestions
  3321 char * AffixMgr::get_ignore() const
  3323   if (!ignorechars) return NULL;
  3324   return ignorechars;
  3327 // return the preferred ignore string for suggestions
  3328 unsigned short * AffixMgr::get_ignore_utf16(int * len) const
  3330   *len = ignorechars_utf16_len;
  3331   return ignorechars_utf16;
  3334 // return the keyboard string for suggestions
  3335 char * AffixMgr::get_key_string()
  3337   if (! keystring ) keystring = mystrdup(SPELL_KEYSTRING);
  3338   return mystrdup(keystring);
  3341 // return the preferred try string for suggestions
  3342 char * AffixMgr::get_try_string() const
  3344   if (! trystring ) return NULL;
  3345   return mystrdup(trystring);
  3348 // return the preferred try string for suggestions
  3349 const char * AffixMgr::get_wordchars() const
  3351   return wordchars;
  3354 unsigned short * AffixMgr::get_wordchars_utf16(int * len) const
  3356   *len = wordchars_utf16_len;
  3357   return wordchars_utf16;
  3360 // is there compounding?
  3361 int AffixMgr::get_compound() const
  3363   return compoundflag || compoundbegin || numdefcpd;
  3366 // return the compound words control flag
  3367 FLAG AffixMgr::get_compoundflag() const
  3369   return compoundflag;
  3372 // return the forbidden words control flag
  3373 FLAG AffixMgr::get_forbiddenword() const
  3375   return forbiddenword;
  3378 // return the forbidden words control flag
  3379 FLAG AffixMgr::get_nosuggest() const
  3381   return nosuggest;
  3384 // return the forbidden words control flag
  3385 FLAG AffixMgr::get_nongramsuggest() const
  3387   return nongramsuggest;
  3390 // return the forbidden words flag modify flag
  3391 FLAG AffixMgr::get_needaffix() const
  3393   return needaffix;
  3396 // return the onlyincompound flag
  3397 FLAG AffixMgr::get_onlyincompound() const
  3399   return onlyincompound;
  3402 // return the compound word signal flag
  3403 FLAG AffixMgr::get_compoundroot() const
  3405   return compoundroot;
  3408 // return the compound begin signal flag
  3409 FLAG AffixMgr::get_compoundbegin() const
  3411   return compoundbegin;
  3414 // return the value of checknum
  3415 int AffixMgr::get_checknum() const
  3417   return checknum;
  3420 // return the value of prefix
  3421 const char * AffixMgr::get_prefix() const
  3423   if (pfx) return pfx->getKey();
  3424   return NULL;
  3427 // return the value of suffix
  3428 const char * AffixMgr::get_suffix() const
  3430   return sfxappnd;
  3433 // return the value of suffix
  3434 const char * AffixMgr::get_version() const
  3436   return version;
  3439 // return lemma_present flag
  3440 FLAG AffixMgr::get_lemma_present() const
  3442   return lemma_present;
  3445 // utility method to look up root words in hash table
  3446 struct hentry * AffixMgr::lookup(const char * word)
  3448   int i;
  3449   struct hentry * he = NULL;
  3450   for (i = 0; i < *maxdic && !he; i++) {
  3451     he = (alldic[i])->lookup(word);
  3453   return he;
  3456 // return the value of suffix
  3457 int AffixMgr::have_contclass() const
  3459   return havecontclass;
  3462 // return utf8
  3463 int AffixMgr::get_utf8() const
  3465   return utf8;
  3468 int AffixMgr::get_maxngramsugs(void) const
  3470   return maxngramsugs;
  3473 int AffixMgr::get_maxcpdsugs(void) const
  3475   return maxcpdsugs;
  3478 int AffixMgr::get_maxdiff(void) const
  3480   return maxdiff;
  3483 int AffixMgr::get_onlymaxdiff(void) const
  3485   return onlymaxdiff;
  3488 // return nosplitsugs
  3489 int AffixMgr::get_nosplitsugs(void) const
  3491   return nosplitsugs;
  3494 // return sugswithdots
  3495 int AffixMgr::get_sugswithdots(void) const
  3497   return sugswithdots;
  3500 /* parse flag */
  3501 int AffixMgr::parse_flag(char * line, unsigned short * out, FileMgr * af) {
  3502    char * s = NULL;
  3503    if (*out != FLAG_NULL && !(*out >= DEFAULTFLAGS)) {
  3504       HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of an affix file parameter\n", af->getlinenum());
  3505       return 1;
  3507    if (parse_string(line, &s, af->getlinenum())) return 1;
  3508    *out = pHMgr->decode_flag(s);
  3509    free(s);
  3510    return 0;
  3513 /* parse num */
  3514 int AffixMgr::parse_num(char * line, int * out, FileMgr * af) {
  3515    char * s = NULL;
  3516    if (*out != -1) {
  3517       HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of an affix file parameter\n", af->getlinenum());
  3518       return 1;
  3520    if (parse_string(line, &s, af->getlinenum())) return 1;
  3521    *out = atoi(s);
  3522    free(s);
  3523    return 0;
  3526 /* parse in the max syllablecount of compound words and  */
  3527 int  AffixMgr::parse_cpdsyllable(char * line, FileMgr * af)
  3529    char * tp = line;
  3530    char * piece;
  3531    int i = 0;
  3532    int np = 0;
  3533    w_char w[MAXWORDLEN];
  3534    piece = mystrsep(&tp, 0);
  3535    while (piece) {
  3536       if (*piece != '\0') {
  3537           switch(i) {
  3538              case 0: { np++; break; }
  3539              case 1: { cpdmaxsyllable = atoi(piece); np++; break; }
  3540              case 2: {
  3541                 if (!utf8) {
  3542                     cpdvowels = mystrdup(piece);
  3543                 } else {
  3544                     int n = u8_u16(w, MAXWORDLEN, piece);
  3545                     if (n > 0) {
  3546                         flag_qsort((unsigned short *) w, 0, n);
  3547                         cpdvowels_utf16 = (w_char *) malloc(n * sizeof(w_char));
  3548                         if (!cpdvowels_utf16) return 1;
  3549                         memcpy(cpdvowels_utf16, w, n * sizeof(w_char));
  3551                     cpdvowels_utf16_len = n;
  3553                 np++;
  3554                 break;
  3556              default: break;
  3558           i++;
  3560       piece = mystrsep(&tp, 0);
  3562    if (np < 2) {
  3563       HUNSPELL_WARNING(stderr, "error: line %d: missing compoundsyllable information\n", af->getlinenum());
  3564       return 1;
  3566    if (np == 2) cpdvowels = mystrdup("aeiouAEIOU");
  3567    return 0;
  3570 /* parse in the typical fault correcting table */
  3571 int  AffixMgr::parse_reptable(char * line, FileMgr * af)
  3573    if (numrep != 0) {
  3574       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
  3575       return 1;
  3577    char * tp = line;
  3578    char * piece;
  3579    int i = 0;
  3580    int np = 0;
  3581    piece = mystrsep(&tp, 0);
  3582    while (piece) {
  3583        if (*piece != '\0') {
  3584           switch(i) {
  3585              case 0: { np++; break; }
  3586              case 1: { 
  3587                        numrep = atoi(piece);
  3588                        if (numrep < 1) {
  3589                           HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n", af->getlinenum());
  3590                           return 1;
  3592                        reptable = (replentry *) malloc(numrep * sizeof(struct replentry));
  3593                        if (!reptable) return 1;
  3594                        np++;
  3595                        break;
  3597              default: break;
  3599           i++;
  3601        piece = mystrsep(&tp, 0);
  3603    if (np != 2) {
  3604       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
  3605       return 1;
  3608    /* now parse the numrep lines to read in the remainder of the table */
  3609    char * nl;
  3610    for (int j=0; j < numrep; j++) {
  3611         if (!(nl = af->getline())) return 1;
  3612         mychomp(nl);
  3613         tp = nl;
  3614         i = 0;
  3615         reptable[j].pattern = NULL;
  3616         reptable[j].pattern2 = NULL;
  3617         piece = mystrsep(&tp, 0);
  3618         while (piece) {
  3619            if (*piece != '\0') {
  3620                switch(i) {
  3621                   case 0: {
  3622                              if (strncmp(piece,"REP",3) != 0) {
  3623                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  3624                                  numrep = 0;
  3625                                  return 1;
  3627                              break;
  3629                   case 1: {
  3630                             if (*piece == '^') reptable[j].start = true; else reptable[j].start = false;
  3631                             reptable[j].pattern = mystrrep(mystrdup(piece + int(reptable[j].start)),"_"," ");
  3632                             int lr = strlen(reptable[j].pattern) - 1;
  3633                             if (reptable[j].pattern[lr] == '$') {
  3634                                 reptable[j].end = true;
  3635                                 reptable[j].pattern[lr] = '\0';
  3636                             } else reptable[j].end = false;
  3637                             break;
  3639                   case 2: { reptable[j].pattern2 = mystrrep(mystrdup(piece),"_"," "); break; }
  3640                   default: break;
  3642                i++;
  3644            piece = mystrsep(&tp, 0);
  3646         if ((!(reptable[j].pattern)) || (!(reptable[j].pattern2))) {
  3647              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  3648              numrep = 0;
  3649              return 1;
  3652    return 0;
  3655 /* parse in the typical fault correcting table */
  3656 int  AffixMgr::parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword)
  3658    if (*rl) {
  3659       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
  3660       return 1;
  3662    char * tp = line;
  3663    char * piece;
  3664    int i = 0;
  3665    int np = 0;
  3666    int numrl = 0;
  3667    piece = mystrsep(&tp, 0);
  3668    while (piece) {
  3669        if (*piece != '\0') {
  3670           switch(i) {
  3671              case 0: { np++; break; }
  3672              case 1: { 
  3673                        numrl = atoi(piece);
  3674                        if (numrl < 1) {
  3675                           HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n", af->getlinenum());
  3676                           return 1;
  3678                        *rl = new RepList(numrl);
  3679                        if (!*rl) return 1;
  3680                        np++;
  3681                        break;
  3683              default: break;
  3685           i++;
  3687        piece = mystrsep(&tp, 0);
  3689    if (np != 2) {
  3690       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
  3691       return 1;
  3694    /* now parse the num lines to read in the remainder of the table */
  3695    char * nl;
  3696    for (int j=0; j < numrl; j++) {
  3697         if (!(nl = af->getline())) return 1;
  3698         mychomp(nl);
  3699         tp = nl;
  3700         i = 0;
  3701         char * pattern = NULL;
  3702         char * pattern2 = NULL;
  3703         piece = mystrsep(&tp, 0);
  3704         while (piece) {
  3705            if (*piece != '\0') {
  3706                switch(i) {
  3707                   case 0: {
  3708                              if (strncmp(piece, keyword, strlen(keyword)) != 0) {
  3709                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  3710                                  delete *rl;
  3711                                  *rl = NULL;
  3712                                  return 1;
  3714                              break;
  3716                   case 1: { pattern = mystrrep(mystrdup(piece),"_"," "); break; }
  3717                   case 2: { 
  3718                     pattern2 = mystrrep(mystrdup(piece),"_"," ");
  3719                     break; 
  3721                   default: break;
  3723                i++;
  3725            piece = mystrsep(&tp, 0);
  3727         if (!pattern || !pattern2) {
  3728             if (pattern)
  3729                 free(pattern);
  3730             if (pattern2)
  3731                 free(pattern2);
  3732             HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  3733             return 1;
  3735         (*rl)->add(pattern, pattern2);
  3737    return 0;
  3741 /* parse in the typical fault correcting table */
  3742 int  AffixMgr::parse_phonetable(char * line, FileMgr * af)
  3744    if (phone) {
  3745       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
  3746       return 1;
  3748    char * tp = line;
  3749    char * piece;
  3750    int i = 0;
  3751    int np = 0;
  3752    piece = mystrsep(&tp, 0);
  3753    while (piece) {
  3754        if (*piece != '\0') {
  3755           switch(i) {
  3756              case 0: { np++; break; }
  3757              case 1: { 
  3758                        phone = (phonetable *) malloc(sizeof(struct phonetable));
  3759                        if (!phone) return 1;
  3760                        phone->num = atoi(piece);
  3761                        phone->rules = NULL;
  3762                        phone->utf8 = (char) utf8;
  3763                        if (phone->num < 1) {
  3764                           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
  3765                           return 1;
  3767                        phone->rules = (char * *) malloc(2 * (phone->num + 1) * sizeof(char *));
  3768                        if (!phone->rules) {
  3769                           free(phone);
  3770                           phone = NULL;
  3771                           return 1;
  3773                        np++;
  3774                        break;
  3776              default: break;
  3778           i++;
  3780        piece = mystrsep(&tp, 0);
  3782    if (np != 2) {
  3783       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
  3784       return 1;
  3787    /* now parse the phone->num lines to read in the remainder of the table */
  3788    char * nl;
  3789    for (int j=0; j < phone->num; j++) {
  3790         if (!(nl = af->getline())) return 1;
  3791         mychomp(nl);
  3792         tp = nl;
  3793         i = 0;
  3794         phone->rules[j * 2] = NULL;
  3795         phone->rules[j * 2 + 1] = NULL;
  3796         piece = mystrsep(&tp, 0);
  3797         while (piece) {
  3798            if (*piece != '\0') {
  3799                switch(i) {
  3800                   case 0: {
  3801                              if (strncmp(piece,"PHONE",5) != 0) {
  3802                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  3803                                  phone->num = 0;
  3804                                  return 1;
  3806                              break;
  3808                   case 1: { phone->rules[j * 2] = mystrrep(mystrdup(piece),"_",""); break; }
  3809                   case 2: { phone->rules[j * 2 + 1] = mystrrep(mystrdup(piece),"_",""); break; }
  3810                   default: break;
  3812                i++;
  3814            piece = mystrsep(&tp, 0);
  3816         if ((!(phone->rules[j * 2])) || (!(phone->rules[j * 2 + 1]))) {
  3817              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  3818              phone->num = 0;
  3819              return 1;
  3822    phone->rules[phone->num * 2] = mystrdup("");
  3823    phone->rules[phone->num * 2 + 1] = mystrdup("");
  3824    init_phonet_hash(*phone);
  3825    return 0;
  3828 /* parse in the checkcompoundpattern table */
  3829 int  AffixMgr::parse_checkcpdtable(char * line, FileMgr * af)
  3831    if (numcheckcpd != 0) {
  3832       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
  3833       return 1;
  3835    char * tp = line;
  3836    char * piece;
  3837    int i = 0;
  3838    int np = 0;
  3839    piece = mystrsep(&tp, 0);
  3840    while (piece) {
  3841        if (*piece != '\0') {
  3842           switch(i) {
  3843              case 0: { np++; break; }
  3844              case 1: { 
  3845                        numcheckcpd = atoi(piece);
  3846                        if (numcheckcpd < 1) {
  3847                           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
  3848                           return 1;
  3850                        checkcpdtable = (patentry *) malloc(numcheckcpd * sizeof(struct patentry));
  3851                        if (!checkcpdtable) return 1;
  3852                        np++;
  3853                        break;
  3855              default: break;
  3857           i++;
  3859        piece = mystrsep(&tp, 0);
  3861    if (np != 2) {
  3862       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",  af->getlinenum());
  3863       return 1;
  3866    /* now parse the numcheckcpd lines to read in the remainder of the table */
  3867    char * nl;
  3868    for (int j=0; j < numcheckcpd; j++) {
  3869         if (!(nl = af->getline())) return 1;
  3870         mychomp(nl);
  3871         tp = nl;
  3872         i = 0;
  3873         checkcpdtable[j].pattern = NULL;
  3874         checkcpdtable[j].pattern2 = NULL;
  3875         checkcpdtable[j].pattern3 = NULL;
  3876         checkcpdtable[j].cond = FLAG_NULL;
  3877         checkcpdtable[j].cond2 = FLAG_NULL;
  3878         piece = mystrsep(&tp, 0);
  3879         while (piece) {
  3880            if (*piece != '\0') {
  3881                switch(i) {
  3882                   case 0: {
  3883                              if (strncmp(piece,"CHECKCOMPOUNDPATTERN",20) != 0) {
  3884                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  3885                                  numcheckcpd = 0;
  3886                                  return 1;
  3888                              break;
  3890                   case 1: { 
  3891                     checkcpdtable[j].pattern = mystrdup(piece); 
  3892                     char * p = strchr(checkcpdtable[j].pattern, '/');
  3893                     if (p) {
  3894                       *p = '\0';
  3895                     checkcpdtable[j].cond = pHMgr->decode_flag(p + 1);
  3897                     break; }
  3898                   case 2: { 
  3899                     checkcpdtable[j].pattern2 = mystrdup(piece);
  3900                     char * p = strchr(checkcpdtable[j].pattern2, '/');
  3901                     if (p) {
  3902                       *p = '\0';
  3903                       checkcpdtable[j].cond2 = pHMgr->decode_flag(p + 1);
  3905                     break;
  3907                   case 3: { checkcpdtable[j].pattern3 = mystrdup(piece); simplifiedcpd = 1; break; }
  3908                   default: break;
  3910                i++;
  3912            piece = mystrsep(&tp, 0);
  3914         if ((!(checkcpdtable[j].pattern)) || (!(checkcpdtable[j].pattern2))) {
  3915              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  3916              numcheckcpd = 0;
  3917              return 1;
  3920    return 0;
  3923 /* parse in the compound rule table */
  3924 int  AffixMgr::parse_defcpdtable(char * line, FileMgr * af)
  3926    if (numdefcpd != 0) {
  3927       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
  3928       return 1;
  3930    char * tp = line;
  3931    char * piece;
  3932    int i = 0;
  3933    int np = 0;
  3934    piece = mystrsep(&tp, 0);
  3935    while (piece) {
  3936        if (*piece != '\0') {
  3937           switch(i) {
  3938              case 0: { np++; break; }
  3939              case 1: { 
  3940                        numdefcpd = atoi(piece);
  3941                        if (numdefcpd < 1) {
  3942                           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
  3943                           return 1;
  3945                        defcpdtable = (flagentry *) malloc(numdefcpd * sizeof(flagentry));
  3946                        if (!defcpdtable) return 1;
  3947                        np++;
  3948                        break;
  3950              default: break;
  3952           i++;
  3954        piece = mystrsep(&tp, 0);
  3956    if (np != 2) {
  3957       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
  3958       return 1;
  3961    /* now parse the numdefcpd lines to read in the remainder of the table */
  3962    char * nl;
  3963    for (int j=0; j < numdefcpd; j++) {
  3964         if (!(nl = af->getline())) return 1;
  3965         mychomp(nl);
  3966         tp = nl;
  3967         i = 0;
  3968         defcpdtable[j].def = NULL;
  3969         piece = mystrsep(&tp, 0);
  3970         while (piece) {
  3971            if (*piece != '\0') {
  3972                switch(i) {
  3973                   case 0: {
  3974                              if (strncmp(piece, "COMPOUNDRULE", 12) != 0) {
  3975                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  3976                                  numdefcpd = 0;
  3977                                  return 1;
  3979                              break;
  3981                   case 1: { // handle parenthesized flags
  3982                             if (strchr(piece, '(')) {
  3983                                 defcpdtable[j].def = (FLAG *) malloc(strlen(piece) * sizeof(FLAG));
  3984                                 defcpdtable[j].len = 0;
  3985                                 int end = 0;
  3986                                 FLAG * conv;
  3987                                 while (!end) {
  3988                                     char * par = piece + 1;
  3989                                     while (*par != '(' && *par != ')' && *par != '\0') par++;
  3990                                     if (*par == '\0') end = 1; else *par = '\0';
  3991                                     if (*piece == '(') piece++;
  3992                                     if (*piece == '*' || *piece == '?') {
  3993                                         defcpdtable[j].def[defcpdtable[j].len++] = (FLAG) *piece;
  3994                                     } else if (*piece != '\0') {
  3995                                         int l = pHMgr->decode_flags(&conv, piece, af);
  3996                                         for (int k = 0; k < l; k++) defcpdtable[j].def[defcpdtable[j].len++] = conv[k];
  3997                                         free(conv);
  3999                                     piece = par + 1;
  4001                             } else {
  4002                                 defcpdtable[j].len = pHMgr->decode_flags(&(defcpdtable[j].def), piece, af);
  4004                             break; 
  4006                   default: break;
  4008                i++;
  4010            piece = mystrsep(&tp, 0);
  4012         if (!defcpdtable[j].len) {
  4013              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  4014              numdefcpd = 0;
  4015              return 1;
  4018    return 0;
  4022 /* parse in the character map table */
  4023 int  AffixMgr::parse_maptable(char * line, FileMgr * af)
  4025    if (nummap != 0) {
  4026       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
  4027       return 1;
  4029    char * tp = line;
  4030    char * piece;
  4031    int i = 0;
  4032    int np = 0;
  4033    piece = mystrsep(&tp, 0);
  4034    while (piece) {
  4035        if (*piece != '\0') {
  4036           switch(i) {
  4037              case 0: { np++; break; }
  4038              case 1: { 
  4039                        nummap = atoi(piece);
  4040                        if (nummap < 1) {
  4041                           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
  4042                           return 1;
  4044                        maptable = (mapentry *) malloc(nummap * sizeof(struct mapentry));
  4045                        if (!maptable) return 1;
  4046                        np++;
  4047                        break;
  4049              default: break;
  4051           i++;
  4053        piece = mystrsep(&tp, 0);
  4055    if (np != 2) {
  4056       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
  4057       return 1;
  4060    /* now parse the nummap lines to read in the remainder of the table */
  4061    char * nl;
  4062    for (int j=0; j < nummap; j++) {
  4063         if (!(nl = af->getline())) return 1;
  4064         mychomp(nl);
  4065         tp = nl;
  4066         i = 0;
  4067         maptable[j].set = NULL;
  4068         maptable[j].len = 0;
  4069         piece = mystrsep(&tp, 0);
  4070         while (piece) {
  4071            if (*piece != '\0') {
  4072                switch(i) {
  4073                   case 0: {
  4074                              if (strncmp(piece,"MAP",3) != 0) {
  4075                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  4076                                  nummap = 0;
  4077                                  return 1;
  4079                              break;
  4081                   case 1: {
  4082 			    int setn = 0;
  4083                             maptable[j].len = strlen(piece);
  4084                             maptable[j].set = (char **) malloc(maptable[j].len * sizeof(char*));
  4085                             if (!maptable[j].set) return 1;
  4086 			    for (int k = 0; k < maptable[j].len; k++) {
  4087 				int chl = 1;
  4088 				int chb = k;
  4089 			        if (piece[k] == '(') {
  4090 				    char * parpos = strchr(piece + k, ')');
  4091 				    if (parpos != NULL) {
  4092 					chb = k + 1;
  4093 					chl = (int)(parpos - piece) - k - 1;
  4094 					k = k + chl + 1;
  4096 				} else {
  4097 				    if (utf8 && (piece[k] & 0xc0) == 0xc0) {
  4098 					for (k++; utf8 && (piece[k] & 0xc0) == 0x80; k++);
  4099 					chl = k - chb;
  4100 					k--;
  4103 				maptable[j].set[setn] = (char *) malloc(chl + 1);
  4104 				if (!maptable[j].set[setn]) return 1;
  4105 				strncpy(maptable[j].set[setn], piece + chb, chl);
  4106 				maptable[j].set[setn][chl] = '\0';
  4107 				setn++;
  4109                             maptable[j].len = setn;
  4110                             break; }
  4111                   default: break;
  4113                i++;
  4115            piece = mystrsep(&tp, 0);
  4117         if (!maptable[j].set || !maptable[j].len) {
  4118              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  4119              nummap = 0;
  4120              return 1;
  4123    return 0;
  4126 /* parse in the word breakpoint table */
  4127 int  AffixMgr::parse_breaktable(char * line, FileMgr * af)
  4129    if (numbreak > -1) {
  4130       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
  4131       return 1;
  4133    char * tp = line;
  4134    char * piece;
  4135    int i = 0;
  4136    int np = 0;
  4137    piece = mystrsep(&tp, 0);
  4138    while (piece) {
  4139        if (*piece != '\0') {
  4140           switch(i) {
  4141              case 0: { np++; break; }
  4142              case 1: { 
  4143                        numbreak = atoi(piece);
  4144                        if (numbreak < 0) {
  4145                           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
  4146                           return 1;
  4148                        if (numbreak == 0) return 0;
  4149                        breaktable = (char **) malloc(numbreak * sizeof(char *));
  4150                        if (!breaktable) return 1;
  4151                        np++;
  4152                        break;
  4154              default: break;
  4156           i++;
  4158        piece = mystrsep(&tp, 0);
  4160    if (np != 2) {
  4161       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
  4162       return 1;
  4165    /* now parse the numbreak lines to read in the remainder of the table */
  4166    char * nl;
  4167    for (int j=0; j < numbreak; j++) {
  4168         if (!(nl = af->getline())) return 1;
  4169         mychomp(nl);
  4170         tp = nl;
  4171         i = 0;
  4172         piece = mystrsep(&tp, 0);
  4173         while (piece) {
  4174            if (*piece != '\0') {
  4175                switch(i) {
  4176                   case 0: {
  4177                              if (strncmp(piece,"BREAK",5) != 0) {
  4178                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  4179                                  numbreak = 0;
  4180                                  return 1;
  4182                              break;
  4184                   case 1: {
  4185                             breaktable[j] = mystrdup(piece);
  4186                             break;
  4188                   default: break;
  4190                i++;
  4192            piece = mystrsep(&tp, 0);
  4194         if (!breaktable) {
  4195              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
  4196              numbreak = 0;
  4197              return 1;
  4200    return 0;
  4203 void AffixMgr::reverse_condition(char * piece) {
  4204     int neg = 0;
  4205     for (char * k = piece + strlen(piece) - 1; k >= piece; k--) {
  4206         switch(*k) {
  4207           case '[': {
  4208                 if (neg) *(k+1) = '['; else *k = ']';
  4209                     break;
  4211           case ']': {
  4212                 *k = '[';
  4213                 if (neg) *(k+1) = '^';
  4214                 neg = 0;
  4215                 break;
  4217           case '^': {
  4218                if (*(k+1) == ']') neg = 1; else *(k+1) = *k;
  4219                break;
  4221           default: {
  4222             if (neg) *(k+1) = *k;
  4228 int  AffixMgr::parse_affix(char * line, const char at, FileMgr * af, char * dupflags)
  4230    int numents = 0;      // number of affentry structures to parse
  4232    unsigned short aflag = 0;      // affix char identifier
  4234    char ff=0;
  4235    std::vector<affentry> affentries;
  4237    char * tp = line;
  4238    char * nl = line;
  4239    char * piece;
  4240    int i = 0;
  4242    // checking lines with bad syntax
  4243 #ifdef DEBUG
  4244    int basefieldnum = 0;
  4245 #endif
  4247    // split affix header line into pieces
  4249    int np = 0;
  4251    piece = mystrsep(&tp, 0);
  4252    while (piece) {
  4253       if (*piece != '\0') {
  4254           switch(i) {
  4255              // piece 1 - is type of affix
  4256              case 0: { np++; break; }
  4258              // piece 2 - is affix char
  4259              case 1: { 
  4260                     np++;
  4261                     aflag = pHMgr->decode_flag(piece);
  4262                     if (((at == 'S') && (dupflags[aflag] & dupSFX)) ||
  4263                         ((at == 'P') && (dupflags[aflag] & dupPFX))) {
  4264                         HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of an affix flag\n",
  4265                             af->getlinenum());
  4266                         // return 1; XXX permissive mode for bad dictionaries
  4268                     dupflags[aflag] += (char) ((at == 'S') ? dupSFX : dupPFX);
  4269                     break; 
  4271              // piece 3 - is cross product indicator 
  4272              case 2: { np++; if (*piece == 'Y') ff = aeXPRODUCT; break; }
  4274              // piece 4 - is number of affentries
  4275              case 3: { 
  4276                        np++;
  4277                        numents = atoi(piece); 
  4278                        if (numents == 0) {
  4279                            char * err = pHMgr->encode_flag(aflag);
  4280                            if (err) {
  4281                                 HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
  4282                                    af->getlinenum());
  4283                                 free(err);
  4285                            return 1;
  4287                        affentries.resize(numents);
  4288                        affentries[0].opts = ff;
  4289                        if (utf8) affentries[0].opts += aeUTF8;
  4290                        if (pHMgr->is_aliasf()) affentries[0].opts += aeALIASF;
  4291                        if (pHMgr->is_aliasm()) affentries[0].opts += aeALIASM;
  4292                        affentries[0].aflag = aflag;
  4295              default: break;
  4297           i++;
  4299       piece = mystrsep(&tp, 0);
  4301    // check to make sure we parsed enough pieces
  4302    if (np != 4) {
  4303        char * err = pHMgr->encode_flag(aflag);
  4304        if (err) {
  4305             HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
  4306             free(err);
  4308        return 1;
  4311    // now parse numents affentries for this affix
  4312    std::vector<affentry>::iterator start = affentries.begin();
  4313    std::vector<affentry>::iterator end = affentries.end();
  4314    for (std::vector<affentry>::iterator entry = start; entry != end; ++entry) {
  4315       if (!(nl = af->getline())) return 1;
  4316       mychomp(nl);
  4317       tp = nl;
  4318       i = 0;
  4319       np = 0;
  4321       // split line into pieces
  4322       piece = mystrsep(&tp, 0);
  4323       while (piece) {
  4324          if (*piece != '\0') {
  4325              switch(i) {
  4326                 // piece 1 - is type
  4327                 case 0: { 
  4328                           np++;
  4329                           if (entry != start) entry->opts = start->opts &
  4330                              (char) (aeXPRODUCT + aeUTF8 + aeALIASF + aeALIASM);
  4331                           break;
  4334                 // piece 2 - is affix char
  4335                 case 1: { 
  4336                           np++;
  4337                           if (pHMgr->decode_flag(piece) != aflag) {
  4338                               char * err = pHMgr->encode_flag(aflag);
  4339                               if (err) {
  4340                                 HUNSPELL_WARNING(stderr, "error: line %d: affix %s is corrupt\n",
  4341                                     af->getlinenum(), err);
  4342                                 free(err);
  4344                               return 1;
  4347                           if (entry != start) entry->aflag = start->aflag;
  4348                           break;
  4351                 // piece 3 - is string to strip or 0 for null 
  4352                 case 2: { 
  4353                           np++;
  4354                           if (complexprefixes) {
  4355                             if (utf8) reverseword_utf(piece); else reverseword(piece);
  4357                           entry->strip = mystrdup(piece);
  4358                           entry->stripl = (unsigned char) strlen(entry->strip);
  4359                           if (strcmp(entry->strip,"0") == 0) {
  4360                               free(entry->strip);
  4361                               entry->strip=mystrdup("");
  4362                               entry->stripl = 0;
  4364                           break; 
  4367                 // piece 4 - is affix string or 0 for null
  4368                 case 3: { 
  4369                           char * dash;  
  4370                           entry->morphcode = NULL;
  4371                           entry->contclass = NULL;
  4372                           entry->contclasslen = 0;
  4373                           np++;
  4374                           dash = strchr(piece, '/');
  4375                           if (dash) {
  4376                             *dash = '\0';
  4378                             if (ignorechars) {
  4379                               if (utf8) {
  4380                                 remove_ignored_chars_utf(piece, ignorechars_utf16, ignorechars_utf16_len);
  4381                               } else {
  4382                                 remove_ignored_chars(piece,ignorechars);
  4386                             if (complexprefixes) {
  4387                                 if (utf8) reverseword_utf(piece); else reverseword(piece);
  4389                             entry->appnd = mystrdup(piece);
  4391                             if (pHMgr->is_aliasf()) {
  4392                                 int index = atoi(dash + 1);
  4393                                 entry->contclasslen = (unsigned short) pHMgr->get_aliasf(index, &(entry->contclass), af);
  4394                                 if (!entry->contclasslen) HUNSPELL_WARNING(stderr, "error: bad affix flag alias: \"%s\"\n", dash+1);
  4395                             } else {
  4396                                 entry->contclasslen = (unsigned short) pHMgr->decode_flags(&(entry->contclass), dash + 1, af);
  4397                                 flag_qsort(entry->contclass, 0, entry->contclasslen);
  4399                             *dash = '/';
  4401                             havecontclass = 1;
  4402                             for (unsigned short _i = 0; _i < entry->contclasslen; _i++) {
  4403                               contclasses[(entry->contclass)[_i]] = 1;
  4405                           } else {
  4406                             if (ignorechars) {
  4407                               if (utf8) {
  4408                                 remove_ignored_chars_utf(piece, ignorechars_utf16, ignorechars_utf16_len);
  4409                               } else {
  4410                                 remove_ignored_chars(piece,ignorechars);
  4414                             if (complexprefixes) {
  4415                                 if (utf8) reverseword_utf(piece); else reverseword(piece);
  4417                             entry->appnd = mystrdup(piece);
  4420                           entry->appndl = (unsigned char) strlen(entry->appnd);
  4421                           if (strcmp(entry->appnd,"0") == 0) {
  4422                               free(entry->appnd);
  4423                               entry->appnd=mystrdup("");
  4424                               entry->appndl = 0;
  4426                           break; 
  4429                 // piece 5 - is the conditions descriptions
  4430                 case 4: { 
  4431                           np++;
  4432                           if (complexprefixes) {
  4433                             if (utf8) reverseword_utf(piece); else reverseword(piece);
  4434                             reverse_condition(piece);
  4436                           if (entry->stripl && (strcmp(piece, ".") != 0) &&
  4437                             redundant_condition(at, entry->strip, entry->stripl, piece, af->getlinenum()))
  4438                                 strcpy(piece, ".");
  4439                           if (at == 'S') {
  4440                             reverseword(piece);
  4441                             reverse_condition(piece);
  4443                           if (encodeit(*entry, piece)) return 1;
  4444                          break;
  4447                 case 5: {
  4448                           np++;
  4449                           if (pHMgr->is_aliasm()) {
  4450                             int index = atoi(piece);
  4451                             entry->morphcode = pHMgr->get_aliasm(index);
  4452                           } else {
  4453                             if (complexprefixes) { // XXX - fix me for morph. gen.
  4454                                 if (utf8) reverseword_utf(piece); else reverseword(piece);
  4456                             // add the remaining of the line
  4457                             if (*tp) {
  4458                                 *(tp - 1) = ' ';
  4459                                 tp = tp + strlen(tp);
  4461                             entry->morphcode = mystrdup(piece);
  4462                             if (!entry->morphcode) return 1;
  4464                           break; 
  4466                 default: break;
  4468              i++;
  4470          piece = mystrsep(&tp, 0);
  4472       // check to make sure we parsed enough pieces
  4473       if (np < 4) {
  4474           char * err = pHMgr->encode_flag(aflag);
  4475           if (err) {
  4476             HUNSPELL_WARNING(stderr, "error: line %d: affix %s is corrupt\n",
  4477                 af->getlinenum(), err);
  4478             free(err);
  4480           return 1;
  4483 #ifdef DEBUG
  4484       // detect unnecessary fields, excepting comments
  4485       if (basefieldnum) {
  4486         int fieldnum = !(entry->morphcode) ? 5 : ((*(entry->morphcode)=='#') ? 5 : 6);
  4487           if (fieldnum != basefieldnum) 
  4488             HUNSPELL_WARNING(stderr, "warning: line %d: bad field number\n", af->getlinenum());
  4489       } else {
  4490         basefieldnum = !(entry->morphcode) ? 5 : ((*(entry->morphcode)=='#') ? 5 : 6);
  4492 #endif
  4495    // now create SfxEntry or PfxEntry objects and use links to
  4496    // build an ordered (sorted by affix string) list
  4497    for (std::vector<affentry>::iterator entry = start; entry != end; ++entry) {
  4498       if (at == 'P') {
  4499           PfxEntry * pfxptr = new PfxEntry(this,&(*entry));
  4500           build_pfxtree(pfxptr);
  4501       } else {
  4502           SfxEntry * sfxptr = new SfxEntry(this,&(*entry));
  4503           build_sfxtree(sfxptr); 
  4506    return 0;
  4509 int AffixMgr::redundant_condition(char ft, char * strip, int stripl, const char * cond, int linenum) {
  4510   int condl = strlen(cond);
  4511   int i;
  4512   int j;
  4513   int neg;
  4514   int in;
  4515   if (ft == 'P') { // prefix
  4516     if (strncmp(strip, cond, condl) == 0) return 1;
  4517     if (utf8) {
  4518     } else {
  4519       for (i = 0, j = 0; (i < stripl) && (j < condl); i++, j++) {
  4520         if (cond[j] != '[') {
  4521           if (cond[j] != strip[i]) {
  4522             HUNSPELL_WARNING(stderr, "warning: line %d: incompatible stripping characters and condition\n", linenum);
  4523             return 0;
  4525         } else {
  4526           neg = (cond[j+1] == '^') ? 1 : 0;
  4527           in = 0;
  4528           do {
  4529             j++;
  4530             if (strip[i] == cond[j]) in = 1;
  4531           } while ((j < (condl - 1)) && (cond[j] != ']'));
  4532           if (j == (condl - 1) && (cond[j] != ']')) {
  4533             HUNSPELL_WARNING(stderr, "error: line %d: missing ] in condition:\n%s\n", linenum, cond);
  4534             return 0;
  4536           if ((!neg && !in) || (neg && in)) {
  4537             HUNSPELL_WARNING(stderr, "warning: line %d: incompatible stripping characters and condition\n", linenum);
  4538             return 0;
  4542       if (j >= condl) return 1;
  4544   } else { // suffix
  4545     if ((stripl >= condl) && strcmp(strip + stripl - condl, cond) == 0) return 1;
  4546     if (utf8) {
  4547     } else {
  4548       for (i = stripl - 1, j = condl - 1; (i >= 0) && (j >= 0); i--, j--) {
  4549         if (cond[j] != ']') {
  4550           if (cond[j] != strip[i]) {
  4551             HUNSPELL_WARNING(stderr, "warning: line %d: incompatible stripping characters and condition\n", linenum);
  4552             return 0;
  4554         } else {
  4555           in = 0;
  4556           do {
  4557             j--;
  4558             if (strip[i] == cond[j]) in = 1;
  4559           } while ((j > 0) && (cond[j] != '['));
  4560           if ((j == 0) && (cond[j] != '[')) {
  4561             HUNSPELL_WARNING(stderr, "error: line: %d: missing ] in condition:\n%s\n", linenum, cond);
  4562             return 0;
  4564           neg = (cond[j+1] == '^') ? 1 : 0;
  4565           if ((!neg && !in) || (neg && in)) {
  4566             HUNSPELL_WARNING(stderr, "warning: line %d: incompatible stripping characters and condition\n", linenum);
  4567             return 0;
  4571       if (j < 0) return 1;
  4574   return 0;

mercurial