extensions/spellcheck/hunspell/src/affixmgr.hxx

Wed, 31 Dec 2014 13:27:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 13:27:57 +0100
branch
TOR_BUG_3246
changeset 6
8bccb770b82d
permissions
-rw-r--r--

Ignore runtime configuration files generated during quality assurance.

michael@0 1 /******* BEGIN LICENSE BLOCK *******
michael@0 2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
michael@0 3 *
michael@0 4 * The contents of this file are subject to the Mozilla Public License Version
michael@0 5 * 1.1 (the "License"); you may not use this file except in compliance with
michael@0 6 * the License. You may obtain a copy of the License at
michael@0 7 * http://www.mozilla.org/MPL/
michael@0 8 *
michael@0 9 * Software distributed under the License is distributed on an "AS IS" basis,
michael@0 10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
michael@0 11 * for the specific language governing rights and limitations under the
michael@0 12 * License.
michael@0 13 *
michael@0 14 * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
michael@0 15 * and László Németh (Hunspell). Portions created by the Initial Developers
michael@0 16 * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
michael@0 17 *
michael@0 18 * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
michael@0 19 * David Einstein (deinst@world.std.com)
michael@0 20 * László Németh (nemethl@gyorsposta.hu)
michael@0 21 * Caolan McNamara (caolanm@redhat.com)
michael@0 22 * Davide Prina
michael@0 23 * Giuseppe Modugno
michael@0 24 * Gianluca Turconi
michael@0 25 * Simon Brouwer
michael@0 26 * Noll Janos
michael@0 27 * Biro Arpad
michael@0 28 * Goldman Eleonora
michael@0 29 * Sarlos Tamas
michael@0 30 * Bencsath Boldizsar
michael@0 31 * Halacsy Peter
michael@0 32 * Dvornik Laszlo
michael@0 33 * Gefferth Andras
michael@0 34 * Nagy Viktor
michael@0 35 * Varga Daniel
michael@0 36 * Chris Halls
michael@0 37 * Rene Engelhard
michael@0 38 * Bram Moolenaar
michael@0 39 * Dafydd Jones
michael@0 40 * Harri Pitkanen
michael@0 41 * Andras Timar
michael@0 42 * Tor Lillqvist
michael@0 43 *
michael@0 44 * Alternatively, the contents of this file may be used under the terms of
michael@0 45 * either the GNU General Public License Version 2 or later (the "GPL"), or
michael@0 46 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
michael@0 47 * in which case the provisions of the GPL or the LGPL are applicable instead
michael@0 48 * of those above. If you wish to allow use of your version of this file only
michael@0 49 * under the terms of either the GPL or the LGPL, and not to allow others to
michael@0 50 * use your version of this file under the terms of the MPL, indicate your
michael@0 51 * decision by deleting the provisions above and replace them with the notice
michael@0 52 * and other provisions required by the GPL or the LGPL. If you do not delete
michael@0 53 * the provisions above, a recipient may use your version of this file under
michael@0 54 * the terms of any one of the MPL, the GPL or the LGPL.
michael@0 55 *
michael@0 56 ******* END LICENSE BLOCK *******/
michael@0 57
michael@0 58 #ifndef _AFFIXMGR_HXX_
michael@0 59 #define _AFFIXMGR_HXX_
michael@0 60
michael@0 61 #include "hunvisapi.h"
michael@0 62
michael@0 63 #include <stdio.h>
michael@0 64
michael@0 65 #include "atypes.hxx"
michael@0 66 #include "baseaffix.hxx"
michael@0 67 #include "hashmgr.hxx"
michael@0 68 #include "phonet.hxx"
michael@0 69 #include "replist.hxx"
michael@0 70
michael@0 71 // check flag duplication
michael@0 72 #define dupSFX (1 << 0)
michael@0 73 #define dupPFX (1 << 1)
michael@0 74
michael@0 75 class PfxEntry;
michael@0 76 class SfxEntry;
michael@0 77
michael@0 78 class LIBHUNSPELL_DLL_EXPORTED AffixMgr
michael@0 79 {
michael@0 80
michael@0 81 PfxEntry * pStart[SETSIZE];
michael@0 82 SfxEntry * sStart[SETSIZE];
michael@0 83 PfxEntry * pFlag[SETSIZE];
michael@0 84 SfxEntry * sFlag[SETSIZE];
michael@0 85 HashMgr * pHMgr;
michael@0 86 HashMgr ** alldic;
michael@0 87 int * maxdic;
michael@0 88 char * keystring;
michael@0 89 char * trystring;
michael@0 90 char * encoding;
michael@0 91 struct cs_info * csconv;
michael@0 92 int utf8;
michael@0 93 int complexprefixes;
michael@0 94 FLAG compoundflag;
michael@0 95 FLAG compoundbegin;
michael@0 96 FLAG compoundmiddle;
michael@0 97 FLAG compoundend;
michael@0 98 FLAG compoundroot;
michael@0 99 FLAG compoundforbidflag;
michael@0 100 FLAG compoundpermitflag;
michael@0 101 int checkcompounddup;
michael@0 102 int checkcompoundrep;
michael@0 103 int checkcompoundcase;
michael@0 104 int checkcompoundtriple;
michael@0 105 int simplifiedtriple;
michael@0 106 FLAG forbiddenword;
michael@0 107 FLAG nosuggest;
michael@0 108 FLAG nongramsuggest;
michael@0 109 FLAG needaffix;
michael@0 110 int cpdmin;
michael@0 111 int numrep;
michael@0 112 replentry * reptable;
michael@0 113 RepList * iconvtable;
michael@0 114 RepList * oconvtable;
michael@0 115 int nummap;
michael@0 116 mapentry * maptable;
michael@0 117 int numbreak;
michael@0 118 char ** breaktable;
michael@0 119 int numcheckcpd;
michael@0 120 patentry * checkcpdtable;
michael@0 121 int simplifiedcpd;
michael@0 122 int numdefcpd;
michael@0 123 flagentry * defcpdtable;
michael@0 124 phonetable * phone;
michael@0 125 int maxngramsugs;
michael@0 126 int maxcpdsugs;
michael@0 127 int maxdiff;
michael@0 128 int onlymaxdiff;
michael@0 129 int nosplitsugs;
michael@0 130 int sugswithdots;
michael@0 131 int cpdwordmax;
michael@0 132 int cpdmaxsyllable;
michael@0 133 char * cpdvowels;
michael@0 134 w_char * cpdvowels_utf16;
michael@0 135 int cpdvowels_utf16_len;
michael@0 136 char * cpdsyllablenum;
michael@0 137 const char * pfxappnd; // BUG: not stateless
michael@0 138 const char * sfxappnd; // BUG: not stateless
michael@0 139 FLAG sfxflag; // BUG: not stateless
michael@0 140 char * derived; // BUG: not stateless
michael@0 141 SfxEntry * sfx; // BUG: not stateless
michael@0 142 PfxEntry * pfx; // BUG: not stateless
michael@0 143 int checknum;
michael@0 144 char * wordchars;
michael@0 145 unsigned short * wordchars_utf16;
michael@0 146 int wordchars_utf16_len;
michael@0 147 char * ignorechars;
michael@0 148 unsigned short * ignorechars_utf16;
michael@0 149 int ignorechars_utf16_len;
michael@0 150 char * version;
michael@0 151 char * lang;
michael@0 152 int langnum;
michael@0 153 FLAG lemma_present;
michael@0 154 FLAG circumfix;
michael@0 155 FLAG onlyincompound;
michael@0 156 FLAG keepcase;
michael@0 157 FLAG forceucase;
michael@0 158 FLAG warn;
michael@0 159 int forbidwarn;
michael@0 160 FLAG substandard;
michael@0 161 int checksharps;
michael@0 162 int fullstrip;
michael@0 163
michael@0 164 int havecontclass; // boolean variable
michael@0 165 char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix)
michael@0 166
michael@0 167 public:
michael@0 168
michael@0 169 AffixMgr(const char * affpath, HashMgr** ptr, int * md,
michael@0 170 const char * key = NULL);
michael@0 171 ~AffixMgr();
michael@0 172 struct hentry * affix_check(const char * word, int len,
michael@0 173 const unsigned short needflag = (unsigned short) 0,
michael@0 174 char in_compound = IN_CPD_NOT);
michael@0 175 struct hentry * prefix_check(const char * word, int len,
michael@0 176 char in_compound, const FLAG needflag = FLAG_NULL);
michael@0 177 inline int isSubset(const char * s1, const char * s2);
michael@0 178 struct hentry * prefix_check_twosfx(const char * word, int len,
michael@0 179 char in_compound, const FLAG needflag = FLAG_NULL);
michael@0 180 inline int isRevSubset(const char * s1, const char * end_of_s2, int len);
michael@0 181 struct hentry * suffix_check(const char * word, int len, int sfxopts,
michael@0 182 PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
michael@0 183 const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL,
michael@0 184 char in_compound = IN_CPD_NOT);
michael@0 185 struct hentry * suffix_check_twosfx(const char * word, int len,
michael@0 186 int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
michael@0 187
michael@0 188 char * affix_check_morph(const char * word, int len,
michael@0 189 const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
michael@0 190 char * prefix_check_morph(const char * word, int len,
michael@0 191 char in_compound, const FLAG needflag = FLAG_NULL);
michael@0 192 char * suffix_check_morph (const char * word, int len, int sfxopts,
michael@0 193 PfxEntry * ppfx, const FLAG cclass = FLAG_NULL,
michael@0 194 const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
michael@0 195
michael@0 196 char * prefix_check_twosfx_morph(const char * word, int len,
michael@0 197 char in_compound, const FLAG needflag = FLAG_NULL);
michael@0 198 char * suffix_check_twosfx_morph(const char * word, int len,
michael@0 199 int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL);
michael@0 200
michael@0 201 char * morphgen(char * ts, int wl, const unsigned short * ap,
michael@0 202 unsigned short al, char * morph, char * targetmorph, int level);
michael@0 203
michael@0 204 int expand_rootword(struct guessword * wlst, int maxn, const char * ts,
michael@0 205 int wl, const unsigned short * ap, unsigned short al, char * bad,
michael@0 206 int, char *);
michael@0 207
michael@0 208 short get_syllable (const char * word, int wlen);
michael@0 209 int cpdrep_check(const char * word, int len);
michael@0 210 int cpdpat_check(const char * word, int len, hentry * r1, hentry * r2,
michael@0 211 const char affixed);
michael@0 212 int defcpd_check(hentry *** words, short wnum, hentry * rv,
michael@0 213 hentry ** rwords, char all);
michael@0 214 int cpdcase_check(const char * word, int len);
michael@0 215 inline int candidate_check(const char * word, int len);
michael@0 216 void setcminmax(int * cmin, int * cmax, const char * word, int len);
michael@0 217 struct hentry * compound_check(const char * word, int len, short wordnum,
michael@0 218 short numsyllable, short maxwordnum, short wnum, hentry ** words,
michael@0 219 char hu_mov_rule, char is_sug, int * info);
michael@0 220
michael@0 221 int compound_check_morph(const char * word, int len, short wordnum,
michael@0 222 short numsyllable, short maxwordnum, short wnum, hentry ** words,
michael@0 223 char hu_mov_rule, char ** result, char * partresult);
michael@0 224
michael@0 225 struct hentry * lookup(const char * word);
michael@0 226 int get_numrep() const;
michael@0 227 struct replentry * get_reptable() const;
michael@0 228 RepList * get_iconvtable() const;
michael@0 229 RepList * get_oconvtable() const;
michael@0 230 struct phonetable * get_phonetable() const;
michael@0 231 int get_nummap() const;
michael@0 232 struct mapentry * get_maptable() const;
michael@0 233 int get_numbreak() const;
michael@0 234 char ** get_breaktable() const;
michael@0 235 char * get_encoding();
michael@0 236 int get_langnum() const;
michael@0 237 char * get_key_string();
michael@0 238 char * get_try_string() const;
michael@0 239 const char * get_wordchars() const;
michael@0 240 unsigned short * get_wordchars_utf16(int * len) const;
michael@0 241 char * get_ignore() const;
michael@0 242 unsigned short * get_ignore_utf16(int * len) const;
michael@0 243 int get_compound() const;
michael@0 244 FLAG get_compoundflag() const;
michael@0 245 FLAG get_compoundbegin() const;
michael@0 246 FLAG get_forbiddenword() const;
michael@0 247 FLAG get_nosuggest() const;
michael@0 248 FLAG get_nongramsuggest() const;
michael@0 249 FLAG get_needaffix() const;
michael@0 250 FLAG get_onlyincompound() const;
michael@0 251 FLAG get_compoundroot() const;
michael@0 252 FLAG get_lemma_present() const;
michael@0 253 int get_checknum() const;
michael@0 254 const char * get_prefix() const;
michael@0 255 const char * get_suffix() const;
michael@0 256 const char * get_derived() const;
michael@0 257 const char * get_version() const;
michael@0 258 int have_contclass() const;
michael@0 259 int get_utf8() const;
michael@0 260 int get_complexprefixes() const;
michael@0 261 char * get_suffixed(char ) const;
michael@0 262 int get_maxngramsugs() const;
michael@0 263 int get_maxcpdsugs() const;
michael@0 264 int get_maxdiff() const;
michael@0 265 int get_onlymaxdiff() const;
michael@0 266 int get_nosplitsugs() const;
michael@0 267 int get_sugswithdots(void) const;
michael@0 268 FLAG get_keepcase(void) const;
michael@0 269 FLAG get_forceucase(void) const;
michael@0 270 FLAG get_warn(void) const;
michael@0 271 int get_forbidwarn(void) const;
michael@0 272 int get_checksharps(void) const;
michael@0 273 char * encode_flag(unsigned short aflag) const;
michael@0 274 int get_fullstrip() const;
michael@0 275
michael@0 276 private:
michael@0 277 int parse_file(const char * affpath, const char * key);
michael@0 278 int parse_flag(char * line, unsigned short * out, FileMgr * af);
michael@0 279 int parse_num(char * line, int * out, FileMgr * af);
michael@0 280 int parse_cpdsyllable(char * line, FileMgr * af);
michael@0 281 int parse_reptable(char * line, FileMgr * af);
michael@0 282 int parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword);
michael@0 283 int parse_phonetable(char * line, FileMgr * af);
michael@0 284 int parse_maptable(char * line, FileMgr * af);
michael@0 285 int parse_breaktable(char * line, FileMgr * af);
michael@0 286 int parse_checkcpdtable(char * line, FileMgr * af);
michael@0 287 int parse_defcpdtable(char * line, FileMgr * af);
michael@0 288 int parse_affix(char * line, const char at, FileMgr * af, char * dupflags);
michael@0 289
michael@0 290 void reverse_condition(char *);
michael@0 291 void debugflag(char * result, unsigned short flag);
michael@0 292 int condlen(char *);
michael@0 293 int encodeit(affentry &entry, char * cs);
michael@0 294 int build_pfxtree(PfxEntry* pfxptr);
michael@0 295 int build_sfxtree(SfxEntry* sfxptr);
michael@0 296 int process_pfx_order();
michael@0 297 int process_sfx_order();
michael@0 298 PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr);
michael@0 299 SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr);
michael@0 300 int process_pfx_tree_to_list();
michael@0 301 int process_sfx_tree_to_list();
michael@0 302 int redundant_condition(char, char * strip, int stripl,
michael@0 303 const char * cond, int);
michael@0 304 };
michael@0 305
michael@0 306 #endif
michael@0 307

mercurial