1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/extensions/spellcheck/hunspell/src/affixmgr.hxx Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,307 @@ 1.4 +/******* BEGIN LICENSE BLOCK ******* 1.5 + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 1.6 + * 1.7 + * The contents of this file are subject to the Mozilla Public License Version 1.8 + * 1.1 (the "License"); you may not use this file except in compliance with 1.9 + * the License. You may obtain a copy of the License at 1.10 + * http://www.mozilla.org/MPL/ 1.11 + * 1.12 + * Software distributed under the License is distributed on an "AS IS" basis, 1.13 + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 1.14 + * for the specific language governing rights and limitations under the 1.15 + * License. 1.16 + * 1.17 + * The Initial Developers of the Original Code are Kevin Hendricks (MySpell) 1.18 + * and László Németh (Hunspell). Portions created by the Initial Developers 1.19 + * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved. 1.20 + * 1.21 + * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca) 1.22 + * David Einstein (deinst@world.std.com) 1.23 + * László Németh (nemethl@gyorsposta.hu) 1.24 + * Caolan McNamara (caolanm@redhat.com) 1.25 + * Davide Prina 1.26 + * Giuseppe Modugno 1.27 + * Gianluca Turconi 1.28 + * Simon Brouwer 1.29 + * Noll Janos 1.30 + * Biro Arpad 1.31 + * Goldman Eleonora 1.32 + * Sarlos Tamas 1.33 + * Bencsath Boldizsar 1.34 + * Halacsy Peter 1.35 + * Dvornik Laszlo 1.36 + * Gefferth Andras 1.37 + * Nagy Viktor 1.38 + * Varga Daniel 1.39 + * Chris Halls 1.40 + * Rene Engelhard 1.41 + * Bram Moolenaar 1.42 + * Dafydd Jones 1.43 + * Harri Pitkanen 1.44 + * Andras Timar 1.45 + * Tor Lillqvist 1.46 + * 1.47 + * Alternatively, the contents of this file may be used under the terms of 1.48 + * either the GNU General Public License Version 2 or later (the "GPL"), or 1.49 + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 1.50 + * in which case the provisions of the GPL or the LGPL are applicable instead 1.51 + * of those above. If you wish to allow use of your version of this file only 1.52 + * under the terms of either the GPL or the LGPL, and not to allow others to 1.53 + * use your version of this file under the terms of the MPL, indicate your 1.54 + * decision by deleting the provisions above and replace them with the notice 1.55 + * and other provisions required by the GPL or the LGPL. If you do not delete 1.56 + * the provisions above, a recipient may use your version of this file under 1.57 + * the terms of any one of the MPL, the GPL or the LGPL. 1.58 + * 1.59 + ******* END LICENSE BLOCK *******/ 1.60 + 1.61 +#ifndef _AFFIXMGR_HXX_ 1.62 +#define _AFFIXMGR_HXX_ 1.63 + 1.64 +#include "hunvisapi.h" 1.65 + 1.66 +#include <stdio.h> 1.67 + 1.68 +#include "atypes.hxx" 1.69 +#include "baseaffix.hxx" 1.70 +#include "hashmgr.hxx" 1.71 +#include "phonet.hxx" 1.72 +#include "replist.hxx" 1.73 + 1.74 +// check flag duplication 1.75 +#define dupSFX (1 << 0) 1.76 +#define dupPFX (1 << 1) 1.77 + 1.78 +class PfxEntry; 1.79 +class SfxEntry; 1.80 + 1.81 +class LIBHUNSPELL_DLL_EXPORTED AffixMgr 1.82 +{ 1.83 + 1.84 + PfxEntry * pStart[SETSIZE]; 1.85 + SfxEntry * sStart[SETSIZE]; 1.86 + PfxEntry * pFlag[SETSIZE]; 1.87 + SfxEntry * sFlag[SETSIZE]; 1.88 + HashMgr * pHMgr; 1.89 + HashMgr ** alldic; 1.90 + int * maxdic; 1.91 + char * keystring; 1.92 + char * trystring; 1.93 + char * encoding; 1.94 + struct cs_info * csconv; 1.95 + int utf8; 1.96 + int complexprefixes; 1.97 + FLAG compoundflag; 1.98 + FLAG compoundbegin; 1.99 + FLAG compoundmiddle; 1.100 + FLAG compoundend; 1.101 + FLAG compoundroot; 1.102 + FLAG compoundforbidflag; 1.103 + FLAG compoundpermitflag; 1.104 + int checkcompounddup; 1.105 + int checkcompoundrep; 1.106 + int checkcompoundcase; 1.107 + int checkcompoundtriple; 1.108 + int simplifiedtriple; 1.109 + FLAG forbiddenword; 1.110 + FLAG nosuggest; 1.111 + FLAG nongramsuggest; 1.112 + FLAG needaffix; 1.113 + int cpdmin; 1.114 + int numrep; 1.115 + replentry * reptable; 1.116 + RepList * iconvtable; 1.117 + RepList * oconvtable; 1.118 + int nummap; 1.119 + mapentry * maptable; 1.120 + int numbreak; 1.121 + char ** breaktable; 1.122 + int numcheckcpd; 1.123 + patentry * checkcpdtable; 1.124 + int simplifiedcpd; 1.125 + int numdefcpd; 1.126 + flagentry * defcpdtable; 1.127 + phonetable * phone; 1.128 + int maxngramsugs; 1.129 + int maxcpdsugs; 1.130 + int maxdiff; 1.131 + int onlymaxdiff; 1.132 + int nosplitsugs; 1.133 + int sugswithdots; 1.134 + int cpdwordmax; 1.135 + int cpdmaxsyllable; 1.136 + char * cpdvowels; 1.137 + w_char * cpdvowels_utf16; 1.138 + int cpdvowels_utf16_len; 1.139 + char * cpdsyllablenum; 1.140 + const char * pfxappnd; // BUG: not stateless 1.141 + const char * sfxappnd; // BUG: not stateless 1.142 + FLAG sfxflag; // BUG: not stateless 1.143 + char * derived; // BUG: not stateless 1.144 + SfxEntry * sfx; // BUG: not stateless 1.145 + PfxEntry * pfx; // BUG: not stateless 1.146 + int checknum; 1.147 + char * wordchars; 1.148 + unsigned short * wordchars_utf16; 1.149 + int wordchars_utf16_len; 1.150 + char * ignorechars; 1.151 + unsigned short * ignorechars_utf16; 1.152 + int ignorechars_utf16_len; 1.153 + char * version; 1.154 + char * lang; 1.155 + int langnum; 1.156 + FLAG lemma_present; 1.157 + FLAG circumfix; 1.158 + FLAG onlyincompound; 1.159 + FLAG keepcase; 1.160 + FLAG forceucase; 1.161 + FLAG warn; 1.162 + int forbidwarn; 1.163 + FLAG substandard; 1.164 + int checksharps; 1.165 + int fullstrip; 1.166 + 1.167 + int havecontclass; // boolean variable 1.168 + char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix) 1.169 + 1.170 +public: 1.171 + 1.172 + AffixMgr(const char * affpath, HashMgr** ptr, int * md, 1.173 + const char * key = NULL); 1.174 + ~AffixMgr(); 1.175 + struct hentry * affix_check(const char * word, int len, 1.176 + const unsigned short needflag = (unsigned short) 0, 1.177 + char in_compound = IN_CPD_NOT); 1.178 + struct hentry * prefix_check(const char * word, int len, 1.179 + char in_compound, const FLAG needflag = FLAG_NULL); 1.180 + inline int isSubset(const char * s1, const char * s2); 1.181 + struct hentry * prefix_check_twosfx(const char * word, int len, 1.182 + char in_compound, const FLAG needflag = FLAG_NULL); 1.183 + inline int isRevSubset(const char * s1, const char * end_of_s2, int len); 1.184 + struct hentry * suffix_check(const char * word, int len, int sfxopts, 1.185 + PfxEntry* ppfx, char ** wlst, int maxSug, int * ns, 1.186 + const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, 1.187 + char in_compound = IN_CPD_NOT); 1.188 + struct hentry * suffix_check_twosfx(const char * word, int len, 1.189 + int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL); 1.190 + 1.191 + char * affix_check_morph(const char * word, int len, 1.192 + const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT); 1.193 + char * prefix_check_morph(const char * word, int len, 1.194 + char in_compound, const FLAG needflag = FLAG_NULL); 1.195 + char * suffix_check_morph (const char * word, int len, int sfxopts, 1.196 + PfxEntry * ppfx, const FLAG cclass = FLAG_NULL, 1.197 + const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT); 1.198 + 1.199 + char * prefix_check_twosfx_morph(const char * word, int len, 1.200 + char in_compound, const FLAG needflag = FLAG_NULL); 1.201 + char * suffix_check_twosfx_morph(const char * word, int len, 1.202 + int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL); 1.203 + 1.204 + char * morphgen(char * ts, int wl, const unsigned short * ap, 1.205 + unsigned short al, char * morph, char * targetmorph, int level); 1.206 + 1.207 + int expand_rootword(struct guessword * wlst, int maxn, const char * ts, 1.208 + int wl, const unsigned short * ap, unsigned short al, char * bad, 1.209 + int, char *); 1.210 + 1.211 + short get_syllable (const char * word, int wlen); 1.212 + int cpdrep_check(const char * word, int len); 1.213 + int cpdpat_check(const char * word, int len, hentry * r1, hentry * r2, 1.214 + const char affixed); 1.215 + int defcpd_check(hentry *** words, short wnum, hentry * rv, 1.216 + hentry ** rwords, char all); 1.217 + int cpdcase_check(const char * word, int len); 1.218 + inline int candidate_check(const char * word, int len); 1.219 + void setcminmax(int * cmin, int * cmax, const char * word, int len); 1.220 + struct hentry * compound_check(const char * word, int len, short wordnum, 1.221 + short numsyllable, short maxwordnum, short wnum, hentry ** words, 1.222 + char hu_mov_rule, char is_sug, int * info); 1.223 + 1.224 + int compound_check_morph(const char * word, int len, short wordnum, 1.225 + short numsyllable, short maxwordnum, short wnum, hentry ** words, 1.226 + char hu_mov_rule, char ** result, char * partresult); 1.227 + 1.228 + struct hentry * lookup(const char * word); 1.229 + int get_numrep() const; 1.230 + struct replentry * get_reptable() const; 1.231 + RepList * get_iconvtable() const; 1.232 + RepList * get_oconvtable() const; 1.233 + struct phonetable * get_phonetable() const; 1.234 + int get_nummap() const; 1.235 + struct mapentry * get_maptable() const; 1.236 + int get_numbreak() const; 1.237 + char ** get_breaktable() const; 1.238 + char * get_encoding(); 1.239 + int get_langnum() const; 1.240 + char * get_key_string(); 1.241 + char * get_try_string() const; 1.242 + const char * get_wordchars() const; 1.243 + unsigned short * get_wordchars_utf16(int * len) const; 1.244 + char * get_ignore() const; 1.245 + unsigned short * get_ignore_utf16(int * len) const; 1.246 + int get_compound() const; 1.247 + FLAG get_compoundflag() const; 1.248 + FLAG get_compoundbegin() const; 1.249 + FLAG get_forbiddenword() const; 1.250 + FLAG get_nosuggest() const; 1.251 + FLAG get_nongramsuggest() const; 1.252 + FLAG get_needaffix() const; 1.253 + FLAG get_onlyincompound() const; 1.254 + FLAG get_compoundroot() const; 1.255 + FLAG get_lemma_present() const; 1.256 + int get_checknum() const; 1.257 + const char * get_prefix() const; 1.258 + const char * get_suffix() const; 1.259 + const char * get_derived() const; 1.260 + const char * get_version() const; 1.261 + int have_contclass() const; 1.262 + int get_utf8() const; 1.263 + int get_complexprefixes() const; 1.264 + char * get_suffixed(char ) const; 1.265 + int get_maxngramsugs() const; 1.266 + int get_maxcpdsugs() const; 1.267 + int get_maxdiff() const; 1.268 + int get_onlymaxdiff() const; 1.269 + int get_nosplitsugs() const; 1.270 + int get_sugswithdots(void) const; 1.271 + FLAG get_keepcase(void) const; 1.272 + FLAG get_forceucase(void) const; 1.273 + FLAG get_warn(void) const; 1.274 + int get_forbidwarn(void) const; 1.275 + int get_checksharps(void) const; 1.276 + char * encode_flag(unsigned short aflag) const; 1.277 + int get_fullstrip() const; 1.278 + 1.279 +private: 1.280 + int parse_file(const char * affpath, const char * key); 1.281 + int parse_flag(char * line, unsigned short * out, FileMgr * af); 1.282 + int parse_num(char * line, int * out, FileMgr * af); 1.283 + int parse_cpdsyllable(char * line, FileMgr * af); 1.284 + int parse_reptable(char * line, FileMgr * af); 1.285 + int parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword); 1.286 + int parse_phonetable(char * line, FileMgr * af); 1.287 + int parse_maptable(char * line, FileMgr * af); 1.288 + int parse_breaktable(char * line, FileMgr * af); 1.289 + int parse_checkcpdtable(char * line, FileMgr * af); 1.290 + int parse_defcpdtable(char * line, FileMgr * af); 1.291 + int parse_affix(char * line, const char at, FileMgr * af, char * dupflags); 1.292 + 1.293 + void reverse_condition(char *); 1.294 + void debugflag(char * result, unsigned short flag); 1.295 + int condlen(char *); 1.296 + int encodeit(affentry &entry, char * cs); 1.297 + int build_pfxtree(PfxEntry* pfxptr); 1.298 + int build_sfxtree(SfxEntry* sfxptr); 1.299 + int process_pfx_order(); 1.300 + int process_sfx_order(); 1.301 + PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr); 1.302 + SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr); 1.303 + int process_pfx_tree_to_list(); 1.304 + int process_sfx_tree_to_list(); 1.305 + int redundant_condition(char, char * strip, int stripl, 1.306 + const char * cond, int); 1.307 +}; 1.308 + 1.309 +#endif 1.310 +