extensions/spellcheck/hunspell/src/affixmgr.hxx

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /******* BEGIN LICENSE BLOCK *******
     2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
     3  * 
     4  * The contents of this file are subject to the Mozilla Public License Version
     5  * 1.1 (the "License"); you may not use this file except in compliance with
     6  * the License. You may obtain a copy of the License at
     7  * http://www.mozilla.org/MPL/
     8  * 
     9  * Software distributed under the License is distributed on an "AS IS" basis,
    10  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
    11  * for the specific language governing rights and limitations under the
    12  * License.
    13  * 
    14  * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
    15  * and László Németh (Hunspell). Portions created by the Initial Developers
    16  * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
    17  * 
    18  * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
    19  *                 David Einstein (deinst@world.std.com)
    20  *                 László Németh (nemethl@gyorsposta.hu)
    21  *                 Caolan McNamara (caolanm@redhat.com)
    22  *                 Davide Prina
    23  *                 Giuseppe Modugno
    24  *                 Gianluca Turconi
    25  *                 Simon Brouwer
    26  *                 Noll Janos
    27  *                 Biro Arpad
    28  *                 Goldman Eleonora
    29  *                 Sarlos Tamas
    30  *                 Bencsath Boldizsar
    31  *                 Halacsy Peter
    32  *                 Dvornik Laszlo
    33  *                 Gefferth Andras
    34  *                 Nagy Viktor
    35  *                 Varga Daniel
    36  *                 Chris Halls
    37  *                 Rene Engelhard
    38  *                 Bram Moolenaar
    39  *                 Dafydd Jones
    40  *                 Harri Pitkanen
    41  *                 Andras Timar
    42  *                 Tor Lillqvist
    43  * 
    44  * Alternatively, the contents of this file may be used under the terms of
    45  * either the GNU General Public License Version 2 or later (the "GPL"), or
    46  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
    47  * in which case the provisions of the GPL or the LGPL are applicable instead
    48  * of those above. If you wish to allow use of your version of this file only
    49  * under the terms of either the GPL or the LGPL, and not to allow others to
    50  * use your version of this file under the terms of the MPL, indicate your
    51  * decision by deleting the provisions above and replace them with the notice
    52  * and other provisions required by the GPL or the LGPL. If you do not delete
    53  * the provisions above, a recipient may use your version of this file under
    54  * the terms of any one of the MPL, the GPL or the LGPL.
    55  *
    56  ******* END LICENSE BLOCK *******/
    58 #ifndef _AFFIXMGR_HXX_
    59 #define _AFFIXMGR_HXX_
    61 #include "hunvisapi.h"
    63 #include <stdio.h>
    65 #include "atypes.hxx"
    66 #include "baseaffix.hxx"
    67 #include "hashmgr.hxx"
    68 #include "phonet.hxx"
    69 #include "replist.hxx"
    71 // check flag duplication
    72 #define dupSFX        (1 << 0)
    73 #define dupPFX        (1 << 1)
    75 class PfxEntry;
    76 class SfxEntry;
    78 class LIBHUNSPELL_DLL_EXPORTED AffixMgr
    79 {
    81   PfxEntry *          pStart[SETSIZE];
    82   SfxEntry *          sStart[SETSIZE];
    83   PfxEntry *          pFlag[SETSIZE];
    84   SfxEntry *          sFlag[SETSIZE];
    85   HashMgr *           pHMgr;
    86   HashMgr **          alldic;
    87   int *               maxdic;
    88   char *              keystring;
    89   char *              trystring;
    90   char *              encoding;
    91   struct cs_info *    csconv;
    92   int                 utf8;
    93   int                 complexprefixes;
    94   FLAG                compoundflag;
    95   FLAG                compoundbegin;
    96   FLAG                compoundmiddle;
    97   FLAG                compoundend;
    98   FLAG                compoundroot;
    99   FLAG                compoundforbidflag;
   100   FLAG                compoundpermitflag;
   101   int                 checkcompounddup;
   102   int                 checkcompoundrep;
   103   int                 checkcompoundcase;
   104   int                 checkcompoundtriple;
   105   int                 simplifiedtriple;
   106   FLAG                forbiddenword;
   107   FLAG                nosuggest;
   108   FLAG                nongramsuggest;
   109   FLAG                needaffix;
   110   int                 cpdmin;
   111   int                 numrep;
   112   replentry *         reptable;
   113   RepList *           iconvtable;
   114   RepList *           oconvtable;
   115   int                 nummap;
   116   mapentry *          maptable;
   117   int                 numbreak;
   118   char **             breaktable;
   119   int                 numcheckcpd;
   120   patentry *          checkcpdtable;
   121   int                 simplifiedcpd;
   122   int                 numdefcpd;
   123   flagentry *         defcpdtable;
   124   phonetable *        phone;
   125   int                 maxngramsugs;
   126   int                 maxcpdsugs;
   127   int                 maxdiff;
   128   int                 onlymaxdiff;
   129   int                 nosplitsugs;
   130   int                 sugswithdots;
   131   int                 cpdwordmax;
   132   int                 cpdmaxsyllable;
   133   char *              cpdvowels;
   134   w_char *            cpdvowels_utf16;
   135   int                 cpdvowels_utf16_len;
   136   char *              cpdsyllablenum;
   137   const char *        pfxappnd; // BUG: not stateless
   138   const char *        sfxappnd; // BUG: not stateless
   139   FLAG                sfxflag;  // BUG: not stateless
   140   char *              derived;  // BUG: not stateless
   141   SfxEntry *          sfx;      // BUG: not stateless
   142   PfxEntry *          pfx;      // BUG: not stateless
   143   int                 checknum;
   144   char *              wordchars;
   145   unsigned short *    wordchars_utf16;
   146   int                 wordchars_utf16_len;
   147   char *              ignorechars;
   148   unsigned short *    ignorechars_utf16;
   149   int                 ignorechars_utf16_len;
   150   char *              version;
   151   char *              lang;
   152   int                 langnum;
   153   FLAG                lemma_present;
   154   FLAG                circumfix;
   155   FLAG                onlyincompound;
   156   FLAG                keepcase;
   157   FLAG                forceucase;
   158   FLAG                warn;
   159   int                 forbidwarn;
   160   FLAG                substandard;
   161   int                 checksharps;
   162   int                 fullstrip;
   164   int                 havecontclass; // boolean variable
   165   char                contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix)
   167 public:
   169   AffixMgr(const char * affpath, HashMgr** ptr, int * md,
   170     const char * key = NULL);
   171   ~AffixMgr();
   172   struct hentry *     affix_check(const char * word, int len,
   173             const unsigned short needflag = (unsigned short) 0,
   174             char in_compound = IN_CPD_NOT);
   175   struct hentry *     prefix_check(const char * word, int len,
   176             char in_compound, const FLAG needflag = FLAG_NULL);
   177   inline int isSubset(const char * s1, const char * s2);
   178   struct hentry *     prefix_check_twosfx(const char * word, int len,
   179             char in_compound, const FLAG needflag = FLAG_NULL);
   180   inline int isRevSubset(const char * s1, const char * end_of_s2, int len);
   181   struct hentry *     suffix_check(const char * word, int len, int sfxopts,
   182             PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
   183             const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL,
   184             char in_compound = IN_CPD_NOT);
   185   struct hentry *     suffix_check_twosfx(const char * word, int len,
   186             int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
   188   char * affix_check_morph(const char * word, int len,
   189             const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
   190   char * prefix_check_morph(const char * word, int len,
   191             char in_compound, const FLAG needflag = FLAG_NULL);
   192   char * suffix_check_morph (const char * word, int len, int sfxopts,
   193             PfxEntry * ppfx, const FLAG cclass = FLAG_NULL,
   194             const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
   196   char * prefix_check_twosfx_morph(const char * word, int len,
   197             char in_compound, const FLAG needflag = FLAG_NULL);
   198   char * suffix_check_twosfx_morph(const char * word, int len,
   199             int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL);
   201   char * morphgen(char * ts, int wl, const unsigned short * ap,
   202             unsigned short al, char * morph, char * targetmorph, int level);
   204   int    expand_rootword(struct guessword * wlst, int maxn, const char * ts,
   205             int wl, const unsigned short * ap, unsigned short al, char * bad,
   206             int, char *);
   208   short       get_syllable (const char * word, int wlen);
   209   int         cpdrep_check(const char * word, int len);
   210   int         cpdpat_check(const char * word, int len, hentry * r1, hentry * r2,
   211                     const char affixed);
   212   int         defcpd_check(hentry *** words, short wnum, hentry * rv,
   213                     hentry ** rwords, char all);
   214   int         cpdcase_check(const char * word, int len);
   215   inline int  candidate_check(const char * word, int len);
   216   void        setcminmax(int * cmin, int * cmax, const char * word, int len);
   217   struct hentry * compound_check(const char * word, int len, short wordnum,
   218             short numsyllable, short maxwordnum, short wnum, hentry ** words,
   219             char hu_mov_rule, char is_sug, int * info);
   221   int compound_check_morph(const char * word, int len, short wordnum,
   222             short numsyllable, short maxwordnum, short wnum, hentry ** words,
   223             char hu_mov_rule, char ** result, char * partresult);
   225   struct hentry * lookup(const char * word);
   226   int                 get_numrep() const;
   227   struct replentry *  get_reptable() const;
   228   RepList *           get_iconvtable() const;
   229   RepList *           get_oconvtable() const;
   230   struct phonetable * get_phonetable() const;
   231   int                 get_nummap() const;
   232   struct mapentry *   get_maptable() const;
   233   int                 get_numbreak() const;
   234   char **             get_breaktable() const;
   235   char *              get_encoding();
   236   int                 get_langnum() const;
   237   char *              get_key_string();
   238   char *              get_try_string() const;
   239   const char *        get_wordchars() const;
   240   unsigned short *    get_wordchars_utf16(int * len) const;
   241   char *              get_ignore() const;
   242   unsigned short *    get_ignore_utf16(int * len) const;
   243   int                 get_compound() const;
   244   FLAG                get_compoundflag() const;
   245   FLAG                get_compoundbegin() const;
   246   FLAG                get_forbiddenword() const;
   247   FLAG                get_nosuggest() const;
   248   FLAG                get_nongramsuggest() const;
   249   FLAG                get_needaffix() const;
   250   FLAG                get_onlyincompound() const;
   251   FLAG                get_compoundroot() const;
   252   FLAG                get_lemma_present() const;
   253   int                 get_checknum() const;
   254   const char *        get_prefix() const;
   255   const char *        get_suffix() const;
   256   const char *        get_derived() const;
   257   const char *        get_version() const;
   258   int                 have_contclass() const;
   259   int                 get_utf8() const;
   260   int                 get_complexprefixes() const;
   261   char *              get_suffixed(char ) const;
   262   int                 get_maxngramsugs() const;
   263   int                 get_maxcpdsugs() const;
   264   int                 get_maxdiff() const;
   265   int                 get_onlymaxdiff() const;
   266   int                 get_nosplitsugs() const;
   267   int                 get_sugswithdots(void) const;
   268   FLAG                get_keepcase(void) const;
   269   FLAG                get_forceucase(void) const;
   270   FLAG                get_warn(void) const;
   271   int                 get_forbidwarn(void) const;
   272   int                 get_checksharps(void) const;
   273   char *              encode_flag(unsigned short aflag) const;
   274   int                 get_fullstrip() const;
   276 private:
   277   int  parse_file(const char * affpath, const char * key);
   278   int  parse_flag(char * line, unsigned short * out, FileMgr * af);
   279   int  parse_num(char * line, int * out, FileMgr * af);
   280   int  parse_cpdsyllable(char * line, FileMgr * af);
   281   int  parse_reptable(char * line, FileMgr * af);
   282   int  parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword);
   283   int  parse_phonetable(char * line, FileMgr * af);
   284   int  parse_maptable(char * line, FileMgr * af);
   285   int  parse_breaktable(char * line, FileMgr * af);
   286   int  parse_checkcpdtable(char * line, FileMgr * af);
   287   int  parse_defcpdtable(char * line, FileMgr * af);
   288   int  parse_affix(char * line, const char at, FileMgr * af, char * dupflags);
   290   void reverse_condition(char *);
   291   void debugflag(char * result, unsigned short flag);
   292   int condlen(char *);
   293   int encodeit(affentry &entry, char * cs);
   294   int build_pfxtree(PfxEntry* pfxptr);
   295   int build_sfxtree(SfxEntry* sfxptr);
   296   int process_pfx_order();
   297   int process_sfx_order();
   298   PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr);
   299   SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr);
   300   int process_pfx_tree_to_list();
   301   int process_sfx_tree_to_list();
   302   int redundant_condition(char, char * strip, int stripl,
   303       const char * cond, int);
   304 };
   306 #endif

mercurial