Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /******* BEGIN LICENSE BLOCK *******
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 *
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
8 *
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
12 * License.
13 *
14 * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
15 * and László Németh (Hunspell). Portions created by the Initial Developers
16 * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
17 *
18 * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
19 * David Einstein (deinst@world.std.com)
20 * László Németh (nemethl@gyorsposta.hu)
21 * Caolan McNamara (caolanm@redhat.com)
22 * Davide Prina
23 * Giuseppe Modugno
24 * Gianluca Turconi
25 * Simon Brouwer
26 * Noll Janos
27 * Biro Arpad
28 * Goldman Eleonora
29 * Sarlos Tamas
30 * Bencsath Boldizsar
31 * Halacsy Peter
32 * Dvornik Laszlo
33 * Gefferth Andras
34 * Nagy Viktor
35 * Varga Daniel
36 * Chris Halls
37 * Rene Engelhard
38 * Bram Moolenaar
39 * Dafydd Jones
40 * Harri Pitkanen
41 * Andras Timar
42 * Tor Lillqvist
43 *
44 * Alternatively, the contents of this file may be used under the terms of
45 * either the GNU General Public License Version 2 or later (the "GPL"), or
46 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
47 * in which case the provisions of the GPL or the LGPL are applicable instead
48 * of those above. If you wish to allow use of your version of this file only
49 * under the terms of either the GPL or the LGPL, and not to allow others to
50 * use your version of this file under the terms of the MPL, indicate your
51 * decision by deleting the provisions above and replace them with the notice
52 * and other provisions required by the GPL or the LGPL. If you do not delete
53 * the provisions above, a recipient may use your version of this file under
54 * the terms of any one of the MPL, the GPL or the LGPL.
55 *
56 ******* END LICENSE BLOCK *******/
58 #ifndef _AFFIXMGR_HXX_
59 #define _AFFIXMGR_HXX_
61 #include "hunvisapi.h"
63 #include <stdio.h>
65 #include "atypes.hxx"
66 #include "baseaffix.hxx"
67 #include "hashmgr.hxx"
68 #include "phonet.hxx"
69 #include "replist.hxx"
71 // check flag duplication
72 #define dupSFX (1 << 0)
73 #define dupPFX (1 << 1)
75 class PfxEntry;
76 class SfxEntry;
78 class LIBHUNSPELL_DLL_EXPORTED AffixMgr
79 {
81 PfxEntry * pStart[SETSIZE];
82 SfxEntry * sStart[SETSIZE];
83 PfxEntry * pFlag[SETSIZE];
84 SfxEntry * sFlag[SETSIZE];
85 HashMgr * pHMgr;
86 HashMgr ** alldic;
87 int * maxdic;
88 char * keystring;
89 char * trystring;
90 char * encoding;
91 struct cs_info * csconv;
92 int utf8;
93 int complexprefixes;
94 FLAG compoundflag;
95 FLAG compoundbegin;
96 FLAG compoundmiddle;
97 FLAG compoundend;
98 FLAG compoundroot;
99 FLAG compoundforbidflag;
100 FLAG compoundpermitflag;
101 int checkcompounddup;
102 int checkcompoundrep;
103 int checkcompoundcase;
104 int checkcompoundtriple;
105 int simplifiedtriple;
106 FLAG forbiddenword;
107 FLAG nosuggest;
108 FLAG nongramsuggest;
109 FLAG needaffix;
110 int cpdmin;
111 int numrep;
112 replentry * reptable;
113 RepList * iconvtable;
114 RepList * oconvtable;
115 int nummap;
116 mapentry * maptable;
117 int numbreak;
118 char ** breaktable;
119 int numcheckcpd;
120 patentry * checkcpdtable;
121 int simplifiedcpd;
122 int numdefcpd;
123 flagentry * defcpdtable;
124 phonetable * phone;
125 int maxngramsugs;
126 int maxcpdsugs;
127 int maxdiff;
128 int onlymaxdiff;
129 int nosplitsugs;
130 int sugswithdots;
131 int cpdwordmax;
132 int cpdmaxsyllable;
133 char * cpdvowels;
134 w_char * cpdvowels_utf16;
135 int cpdvowels_utf16_len;
136 char * cpdsyllablenum;
137 const char * pfxappnd; // BUG: not stateless
138 const char * sfxappnd; // BUG: not stateless
139 FLAG sfxflag; // BUG: not stateless
140 char * derived; // BUG: not stateless
141 SfxEntry * sfx; // BUG: not stateless
142 PfxEntry * pfx; // BUG: not stateless
143 int checknum;
144 char * wordchars;
145 unsigned short * wordchars_utf16;
146 int wordchars_utf16_len;
147 char * ignorechars;
148 unsigned short * ignorechars_utf16;
149 int ignorechars_utf16_len;
150 char * version;
151 char * lang;
152 int langnum;
153 FLAG lemma_present;
154 FLAG circumfix;
155 FLAG onlyincompound;
156 FLAG keepcase;
157 FLAG forceucase;
158 FLAG warn;
159 int forbidwarn;
160 FLAG substandard;
161 int checksharps;
162 int fullstrip;
164 int havecontclass; // boolean variable
165 char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix)
167 public:
169 AffixMgr(const char * affpath, HashMgr** ptr, int * md,
170 const char * key = NULL);
171 ~AffixMgr();
172 struct hentry * affix_check(const char * word, int len,
173 const unsigned short needflag = (unsigned short) 0,
174 char in_compound = IN_CPD_NOT);
175 struct hentry * prefix_check(const char * word, int len,
176 char in_compound, const FLAG needflag = FLAG_NULL);
177 inline int isSubset(const char * s1, const char * s2);
178 struct hentry * prefix_check_twosfx(const char * word, int len,
179 char in_compound, const FLAG needflag = FLAG_NULL);
180 inline int isRevSubset(const char * s1, const char * end_of_s2, int len);
181 struct hentry * suffix_check(const char * word, int len, int sfxopts,
182 PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
183 const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL,
184 char in_compound = IN_CPD_NOT);
185 struct hentry * suffix_check_twosfx(const char * word, int len,
186 int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
188 char * affix_check_morph(const char * word, int len,
189 const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
190 char * prefix_check_morph(const char * word, int len,
191 char in_compound, const FLAG needflag = FLAG_NULL);
192 char * suffix_check_morph (const char * word, int len, int sfxopts,
193 PfxEntry * ppfx, const FLAG cclass = FLAG_NULL,
194 const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
196 char * prefix_check_twosfx_morph(const char * word, int len,
197 char in_compound, const FLAG needflag = FLAG_NULL);
198 char * suffix_check_twosfx_morph(const char * word, int len,
199 int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL);
201 char * morphgen(char * ts, int wl, const unsigned short * ap,
202 unsigned short al, char * morph, char * targetmorph, int level);
204 int expand_rootword(struct guessword * wlst, int maxn, const char * ts,
205 int wl, const unsigned short * ap, unsigned short al, char * bad,
206 int, char *);
208 short get_syllable (const char * word, int wlen);
209 int cpdrep_check(const char * word, int len);
210 int cpdpat_check(const char * word, int len, hentry * r1, hentry * r2,
211 const char affixed);
212 int defcpd_check(hentry *** words, short wnum, hentry * rv,
213 hentry ** rwords, char all);
214 int cpdcase_check(const char * word, int len);
215 inline int candidate_check(const char * word, int len);
216 void setcminmax(int * cmin, int * cmax, const char * word, int len);
217 struct hentry * compound_check(const char * word, int len, short wordnum,
218 short numsyllable, short maxwordnum, short wnum, hentry ** words,
219 char hu_mov_rule, char is_sug, int * info);
221 int compound_check_morph(const char * word, int len, short wordnum,
222 short numsyllable, short maxwordnum, short wnum, hentry ** words,
223 char hu_mov_rule, char ** result, char * partresult);
225 struct hentry * lookup(const char * word);
226 int get_numrep() const;
227 struct replentry * get_reptable() const;
228 RepList * get_iconvtable() const;
229 RepList * get_oconvtable() const;
230 struct phonetable * get_phonetable() const;
231 int get_nummap() const;
232 struct mapentry * get_maptable() const;
233 int get_numbreak() const;
234 char ** get_breaktable() const;
235 char * get_encoding();
236 int get_langnum() const;
237 char * get_key_string();
238 char * get_try_string() const;
239 const char * get_wordchars() const;
240 unsigned short * get_wordchars_utf16(int * len) const;
241 char * get_ignore() const;
242 unsigned short * get_ignore_utf16(int * len) const;
243 int get_compound() const;
244 FLAG get_compoundflag() const;
245 FLAG get_compoundbegin() const;
246 FLAG get_forbiddenword() const;
247 FLAG get_nosuggest() const;
248 FLAG get_nongramsuggest() const;
249 FLAG get_needaffix() const;
250 FLAG get_onlyincompound() const;
251 FLAG get_compoundroot() const;
252 FLAG get_lemma_present() const;
253 int get_checknum() const;
254 const char * get_prefix() const;
255 const char * get_suffix() const;
256 const char * get_derived() const;
257 const char * get_version() const;
258 int have_contclass() const;
259 int get_utf8() const;
260 int get_complexprefixes() const;
261 char * get_suffixed(char ) const;
262 int get_maxngramsugs() const;
263 int get_maxcpdsugs() const;
264 int get_maxdiff() const;
265 int get_onlymaxdiff() const;
266 int get_nosplitsugs() const;
267 int get_sugswithdots(void) const;
268 FLAG get_keepcase(void) const;
269 FLAG get_forceucase(void) const;
270 FLAG get_warn(void) const;
271 int get_forbidwarn(void) const;
272 int get_checksharps(void) const;
273 char * encode_flag(unsigned short aflag) const;
274 int get_fullstrip() const;
276 private:
277 int parse_file(const char * affpath, const char * key);
278 int parse_flag(char * line, unsigned short * out, FileMgr * af);
279 int parse_num(char * line, int * out, FileMgr * af);
280 int parse_cpdsyllable(char * line, FileMgr * af);
281 int parse_reptable(char * line, FileMgr * af);
282 int parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword);
283 int parse_phonetable(char * line, FileMgr * af);
284 int parse_maptable(char * line, FileMgr * af);
285 int parse_breaktable(char * line, FileMgr * af);
286 int parse_checkcpdtable(char * line, FileMgr * af);
287 int parse_defcpdtable(char * line, FileMgr * af);
288 int parse_affix(char * line, const char at, FileMgr * af, char * dupflags);
290 void reverse_condition(char *);
291 void debugflag(char * result, unsigned short flag);
292 int condlen(char *);
293 int encodeit(affentry &entry, char * cs);
294 int build_pfxtree(PfxEntry* pfxptr);
295 int build_sfxtree(SfxEntry* sfxptr);
296 int process_pfx_order();
297 int process_sfx_order();
298 PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr);
299 SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr);
300 int process_pfx_tree_to_list();
301 int process_sfx_tree_to_list();
302 int redundant_condition(char, char * strip, int stripl,
303 const char * cond, int);
304 };
306 #endif