extensions/spellcheck/hunspell/src/affixmgr.hxx

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/extensions/spellcheck/hunspell/src/affixmgr.hxx	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,307 @@
     1.4 +/******* BEGIN LICENSE BLOCK *******
     1.5 + * Version: MPL 1.1/GPL 2.0/LGPL 2.1
     1.6 + * 
     1.7 + * The contents of this file are subject to the Mozilla Public License Version
     1.8 + * 1.1 (the "License"); you may not use this file except in compliance with
     1.9 + * the License. You may obtain a copy of the License at
    1.10 + * http://www.mozilla.org/MPL/
    1.11 + * 
    1.12 + * Software distributed under the License is distributed on an "AS IS" basis,
    1.13 + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
    1.14 + * for the specific language governing rights and limitations under the
    1.15 + * License.
    1.16 + * 
    1.17 + * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
    1.18 + * and László Németh (Hunspell). Portions created by the Initial Developers
    1.19 + * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
    1.20 + * 
    1.21 + * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
    1.22 + *                 David Einstein (deinst@world.std.com)
    1.23 + *                 László Németh (nemethl@gyorsposta.hu)
    1.24 + *                 Caolan McNamara (caolanm@redhat.com)
    1.25 + *                 Davide Prina
    1.26 + *                 Giuseppe Modugno
    1.27 + *                 Gianluca Turconi
    1.28 + *                 Simon Brouwer
    1.29 + *                 Noll Janos
    1.30 + *                 Biro Arpad
    1.31 + *                 Goldman Eleonora
    1.32 + *                 Sarlos Tamas
    1.33 + *                 Bencsath Boldizsar
    1.34 + *                 Halacsy Peter
    1.35 + *                 Dvornik Laszlo
    1.36 + *                 Gefferth Andras
    1.37 + *                 Nagy Viktor
    1.38 + *                 Varga Daniel
    1.39 + *                 Chris Halls
    1.40 + *                 Rene Engelhard
    1.41 + *                 Bram Moolenaar
    1.42 + *                 Dafydd Jones
    1.43 + *                 Harri Pitkanen
    1.44 + *                 Andras Timar
    1.45 + *                 Tor Lillqvist
    1.46 + * 
    1.47 + * Alternatively, the contents of this file may be used under the terms of
    1.48 + * either the GNU General Public License Version 2 or later (the "GPL"), or
    1.49 + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
    1.50 + * in which case the provisions of the GPL or the LGPL are applicable instead
    1.51 + * of those above. If you wish to allow use of your version of this file only
    1.52 + * under the terms of either the GPL or the LGPL, and not to allow others to
    1.53 + * use your version of this file under the terms of the MPL, indicate your
    1.54 + * decision by deleting the provisions above and replace them with the notice
    1.55 + * and other provisions required by the GPL or the LGPL. If you do not delete
    1.56 + * the provisions above, a recipient may use your version of this file under
    1.57 + * the terms of any one of the MPL, the GPL or the LGPL.
    1.58 + *
    1.59 + ******* END LICENSE BLOCK *******/
    1.60 +
    1.61 +#ifndef _AFFIXMGR_HXX_
    1.62 +#define _AFFIXMGR_HXX_
    1.63 +
    1.64 +#include "hunvisapi.h"
    1.65 +
    1.66 +#include <stdio.h>
    1.67 +
    1.68 +#include "atypes.hxx"
    1.69 +#include "baseaffix.hxx"
    1.70 +#include "hashmgr.hxx"
    1.71 +#include "phonet.hxx"
    1.72 +#include "replist.hxx"
    1.73 +
    1.74 +// check flag duplication
    1.75 +#define dupSFX        (1 << 0)
    1.76 +#define dupPFX        (1 << 1)
    1.77 +
    1.78 +class PfxEntry;
    1.79 +class SfxEntry;
    1.80 +
    1.81 +class LIBHUNSPELL_DLL_EXPORTED AffixMgr
    1.82 +{
    1.83 +
    1.84 +  PfxEntry *          pStart[SETSIZE];
    1.85 +  SfxEntry *          sStart[SETSIZE];
    1.86 +  PfxEntry *          pFlag[SETSIZE];
    1.87 +  SfxEntry *          sFlag[SETSIZE];
    1.88 +  HashMgr *           pHMgr;
    1.89 +  HashMgr **          alldic;
    1.90 +  int *               maxdic;
    1.91 +  char *              keystring;
    1.92 +  char *              trystring;
    1.93 +  char *              encoding;
    1.94 +  struct cs_info *    csconv;
    1.95 +  int                 utf8;
    1.96 +  int                 complexprefixes;
    1.97 +  FLAG                compoundflag;
    1.98 +  FLAG                compoundbegin;
    1.99 +  FLAG                compoundmiddle;
   1.100 +  FLAG                compoundend;
   1.101 +  FLAG                compoundroot;
   1.102 +  FLAG                compoundforbidflag;
   1.103 +  FLAG                compoundpermitflag;
   1.104 +  int                 checkcompounddup;
   1.105 +  int                 checkcompoundrep;
   1.106 +  int                 checkcompoundcase;
   1.107 +  int                 checkcompoundtriple;
   1.108 +  int                 simplifiedtriple;
   1.109 +  FLAG                forbiddenword;
   1.110 +  FLAG                nosuggest;
   1.111 +  FLAG                nongramsuggest;
   1.112 +  FLAG                needaffix;
   1.113 +  int                 cpdmin;
   1.114 +  int                 numrep;
   1.115 +  replentry *         reptable;
   1.116 +  RepList *           iconvtable;
   1.117 +  RepList *           oconvtable;
   1.118 +  int                 nummap;
   1.119 +  mapentry *          maptable;
   1.120 +  int                 numbreak;
   1.121 +  char **             breaktable;
   1.122 +  int                 numcheckcpd;
   1.123 +  patentry *          checkcpdtable;
   1.124 +  int                 simplifiedcpd;
   1.125 +  int                 numdefcpd;
   1.126 +  flagentry *         defcpdtable;
   1.127 +  phonetable *        phone;
   1.128 +  int                 maxngramsugs;
   1.129 +  int                 maxcpdsugs;
   1.130 +  int                 maxdiff;
   1.131 +  int                 onlymaxdiff;
   1.132 +  int                 nosplitsugs;
   1.133 +  int                 sugswithdots;
   1.134 +  int                 cpdwordmax;
   1.135 +  int                 cpdmaxsyllable;
   1.136 +  char *              cpdvowels;
   1.137 +  w_char *            cpdvowels_utf16;
   1.138 +  int                 cpdvowels_utf16_len;
   1.139 +  char *              cpdsyllablenum;
   1.140 +  const char *        pfxappnd; // BUG: not stateless
   1.141 +  const char *        sfxappnd; // BUG: not stateless
   1.142 +  FLAG                sfxflag;  // BUG: not stateless
   1.143 +  char *              derived;  // BUG: not stateless
   1.144 +  SfxEntry *          sfx;      // BUG: not stateless
   1.145 +  PfxEntry *          pfx;      // BUG: not stateless
   1.146 +  int                 checknum;
   1.147 +  char *              wordchars;
   1.148 +  unsigned short *    wordchars_utf16;
   1.149 +  int                 wordchars_utf16_len;
   1.150 +  char *              ignorechars;
   1.151 +  unsigned short *    ignorechars_utf16;
   1.152 +  int                 ignorechars_utf16_len;
   1.153 +  char *              version;
   1.154 +  char *              lang;
   1.155 +  int                 langnum;
   1.156 +  FLAG                lemma_present;
   1.157 +  FLAG                circumfix;
   1.158 +  FLAG                onlyincompound;
   1.159 +  FLAG                keepcase;
   1.160 +  FLAG                forceucase;
   1.161 +  FLAG                warn;
   1.162 +  int                 forbidwarn;
   1.163 +  FLAG                substandard;
   1.164 +  int                 checksharps;
   1.165 +  int                 fullstrip;
   1.166 +
   1.167 +  int                 havecontclass; // boolean variable
   1.168 +  char                contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix)
   1.169 +
   1.170 +public:
   1.171 +
   1.172 +  AffixMgr(const char * affpath, HashMgr** ptr, int * md,
   1.173 +    const char * key = NULL);
   1.174 +  ~AffixMgr();
   1.175 +  struct hentry *     affix_check(const char * word, int len,
   1.176 +            const unsigned short needflag = (unsigned short) 0,
   1.177 +            char in_compound = IN_CPD_NOT);
   1.178 +  struct hentry *     prefix_check(const char * word, int len,
   1.179 +            char in_compound, const FLAG needflag = FLAG_NULL);
   1.180 +  inline int isSubset(const char * s1, const char * s2);
   1.181 +  struct hentry *     prefix_check_twosfx(const char * word, int len,
   1.182 +            char in_compound, const FLAG needflag = FLAG_NULL);
   1.183 +  inline int isRevSubset(const char * s1, const char * end_of_s2, int len);
   1.184 +  struct hentry *     suffix_check(const char * word, int len, int sfxopts,
   1.185 +            PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
   1.186 +            const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL,
   1.187 +            char in_compound = IN_CPD_NOT);
   1.188 +  struct hentry *     suffix_check_twosfx(const char * word, int len,
   1.189 +            int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
   1.190 +
   1.191 +  char * affix_check_morph(const char * word, int len,
   1.192 +            const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
   1.193 +  char * prefix_check_morph(const char * word, int len,
   1.194 +            char in_compound, const FLAG needflag = FLAG_NULL);
   1.195 +  char * suffix_check_morph (const char * word, int len, int sfxopts,
   1.196 +            PfxEntry * ppfx, const FLAG cclass = FLAG_NULL,
   1.197 +            const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
   1.198 +
   1.199 +  char * prefix_check_twosfx_morph(const char * word, int len,
   1.200 +            char in_compound, const FLAG needflag = FLAG_NULL);
   1.201 +  char * suffix_check_twosfx_morph(const char * word, int len,
   1.202 +            int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL);
   1.203 +
   1.204 +  char * morphgen(char * ts, int wl, const unsigned short * ap,
   1.205 +            unsigned short al, char * morph, char * targetmorph, int level);
   1.206 +
   1.207 +  int    expand_rootword(struct guessword * wlst, int maxn, const char * ts,
   1.208 +            int wl, const unsigned short * ap, unsigned short al, char * bad,
   1.209 +            int, char *);
   1.210 +
   1.211 +  short       get_syllable (const char * word, int wlen);
   1.212 +  int         cpdrep_check(const char * word, int len);
   1.213 +  int         cpdpat_check(const char * word, int len, hentry * r1, hentry * r2,
   1.214 +                    const char affixed);
   1.215 +  int         defcpd_check(hentry *** words, short wnum, hentry * rv,
   1.216 +                    hentry ** rwords, char all);
   1.217 +  int         cpdcase_check(const char * word, int len);
   1.218 +  inline int  candidate_check(const char * word, int len);
   1.219 +  void        setcminmax(int * cmin, int * cmax, const char * word, int len);
   1.220 +  struct hentry * compound_check(const char * word, int len, short wordnum,
   1.221 +            short numsyllable, short maxwordnum, short wnum, hentry ** words,
   1.222 +            char hu_mov_rule, char is_sug, int * info);
   1.223 +
   1.224 +  int compound_check_morph(const char * word, int len, short wordnum,
   1.225 +            short numsyllable, short maxwordnum, short wnum, hentry ** words,
   1.226 +            char hu_mov_rule, char ** result, char * partresult);
   1.227 +
   1.228 +  struct hentry * lookup(const char * word);
   1.229 +  int                 get_numrep() const;
   1.230 +  struct replentry *  get_reptable() const;
   1.231 +  RepList *           get_iconvtable() const;
   1.232 +  RepList *           get_oconvtable() const;
   1.233 +  struct phonetable * get_phonetable() const;
   1.234 +  int                 get_nummap() const;
   1.235 +  struct mapentry *   get_maptable() const;
   1.236 +  int                 get_numbreak() const;
   1.237 +  char **             get_breaktable() const;
   1.238 +  char *              get_encoding();
   1.239 +  int                 get_langnum() const;
   1.240 +  char *              get_key_string();
   1.241 +  char *              get_try_string() const;
   1.242 +  const char *        get_wordchars() const;
   1.243 +  unsigned short *    get_wordchars_utf16(int * len) const;
   1.244 +  char *              get_ignore() const;
   1.245 +  unsigned short *    get_ignore_utf16(int * len) const;
   1.246 +  int                 get_compound() const;
   1.247 +  FLAG                get_compoundflag() const;
   1.248 +  FLAG                get_compoundbegin() const;
   1.249 +  FLAG                get_forbiddenword() const;
   1.250 +  FLAG                get_nosuggest() const;
   1.251 +  FLAG                get_nongramsuggest() const;
   1.252 +  FLAG                get_needaffix() const;
   1.253 +  FLAG                get_onlyincompound() const;
   1.254 +  FLAG                get_compoundroot() const;
   1.255 +  FLAG                get_lemma_present() const;
   1.256 +  int                 get_checknum() const;
   1.257 +  const char *        get_prefix() const;
   1.258 +  const char *        get_suffix() const;
   1.259 +  const char *        get_derived() const;
   1.260 +  const char *        get_version() const;
   1.261 +  int                 have_contclass() const;
   1.262 +  int                 get_utf8() const;
   1.263 +  int                 get_complexprefixes() const;
   1.264 +  char *              get_suffixed(char ) const;
   1.265 +  int                 get_maxngramsugs() const;
   1.266 +  int                 get_maxcpdsugs() const;
   1.267 +  int                 get_maxdiff() const;
   1.268 +  int                 get_onlymaxdiff() const;
   1.269 +  int                 get_nosplitsugs() const;
   1.270 +  int                 get_sugswithdots(void) const;
   1.271 +  FLAG                get_keepcase(void) const;
   1.272 +  FLAG                get_forceucase(void) const;
   1.273 +  FLAG                get_warn(void) const;
   1.274 +  int                 get_forbidwarn(void) const;
   1.275 +  int                 get_checksharps(void) const;
   1.276 +  char *              encode_flag(unsigned short aflag) const;
   1.277 +  int                 get_fullstrip() const;
   1.278 +
   1.279 +private:
   1.280 +  int  parse_file(const char * affpath, const char * key);
   1.281 +  int  parse_flag(char * line, unsigned short * out, FileMgr * af);
   1.282 +  int  parse_num(char * line, int * out, FileMgr * af);
   1.283 +  int  parse_cpdsyllable(char * line, FileMgr * af);
   1.284 +  int  parse_reptable(char * line, FileMgr * af);
   1.285 +  int  parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword);
   1.286 +  int  parse_phonetable(char * line, FileMgr * af);
   1.287 +  int  parse_maptable(char * line, FileMgr * af);
   1.288 +  int  parse_breaktable(char * line, FileMgr * af);
   1.289 +  int  parse_checkcpdtable(char * line, FileMgr * af);
   1.290 +  int  parse_defcpdtable(char * line, FileMgr * af);
   1.291 +  int  parse_affix(char * line, const char at, FileMgr * af, char * dupflags);
   1.292 +
   1.293 +  void reverse_condition(char *);
   1.294 +  void debugflag(char * result, unsigned short flag);
   1.295 +  int condlen(char *);
   1.296 +  int encodeit(affentry &entry, char * cs);
   1.297 +  int build_pfxtree(PfxEntry* pfxptr);
   1.298 +  int build_sfxtree(SfxEntry* sfxptr);
   1.299 +  int process_pfx_order();
   1.300 +  int process_sfx_order();
   1.301 +  PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr);
   1.302 +  SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr);
   1.303 +  int process_pfx_tree_to_list();
   1.304 +  int process_sfx_tree_to_list();
   1.305 +  int redundant_condition(char, char * strip, int stripl,
   1.306 +      const char * cond, int);
   1.307 +};
   1.308 +
   1.309 +#endif
   1.310 +

mercurial