Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /******* BEGIN LICENSE BLOCK ******* |
michael@0 | 2 | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 |
michael@0 | 3 | * |
michael@0 | 4 | * The contents of this file are subject to the Mozilla Public License Version |
michael@0 | 5 | * 1.1 (the "License"); you may not use this file except in compliance with |
michael@0 | 6 | * the License. You may obtain a copy of the License at |
michael@0 | 7 | * http://www.mozilla.org/MPL/ |
michael@0 | 8 | * |
michael@0 | 9 | * Software distributed under the License is distributed on an "AS IS" basis, |
michael@0 | 10 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License |
michael@0 | 11 | * for the specific language governing rights and limitations under the |
michael@0 | 12 | * License. |
michael@0 | 13 | * |
michael@0 | 14 | * The Initial Developers of the Original Code are Kevin Hendricks (MySpell) |
michael@0 | 15 | * and László Németh (Hunspell). Portions created by the Initial Developers |
michael@0 | 16 | * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved. |
michael@0 | 17 | * |
michael@0 | 18 | * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca) |
michael@0 | 19 | * David Einstein (deinst@world.std.com) |
michael@0 | 20 | * László Németh (nemethl@gyorsposta.hu) |
michael@0 | 21 | * Caolan McNamara (caolanm@redhat.com) |
michael@0 | 22 | * Davide Prina |
michael@0 | 23 | * Giuseppe Modugno |
michael@0 | 24 | * Gianluca Turconi |
michael@0 | 25 | * Simon Brouwer |
michael@0 | 26 | * Noll Janos |
michael@0 | 27 | * Biro Arpad |
michael@0 | 28 | * Goldman Eleonora |
michael@0 | 29 | * Sarlos Tamas |
michael@0 | 30 | * Bencsath Boldizsar |
michael@0 | 31 | * Halacsy Peter |
michael@0 | 32 | * Dvornik Laszlo |
michael@0 | 33 | * Gefferth Andras |
michael@0 | 34 | * Nagy Viktor |
michael@0 | 35 | * Varga Daniel |
michael@0 | 36 | * Chris Halls |
michael@0 | 37 | * Rene Engelhard |
michael@0 | 38 | * Bram Moolenaar |
michael@0 | 39 | * Dafydd Jones |
michael@0 | 40 | * Harri Pitkanen |
michael@0 | 41 | * Andras Timar |
michael@0 | 42 | * Tor Lillqvist |
michael@0 | 43 | * |
michael@0 | 44 | * Alternatively, the contents of this file may be used under the terms of |
michael@0 | 45 | * either the GNU General Public License Version 2 or later (the "GPL"), or |
michael@0 | 46 | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), |
michael@0 | 47 | * in which case the provisions of the GPL or the LGPL are applicable instead |
michael@0 | 48 | * of those above. If you wish to allow use of your version of this file only |
michael@0 | 49 | * under the terms of either the GPL or the LGPL, and not to allow others to |
michael@0 | 50 | * use your version of this file under the terms of the MPL, indicate your |
michael@0 | 51 | * decision by deleting the provisions above and replace them with the notice |
michael@0 | 52 | * and other provisions required by the GPL or the LGPL. If you do not delete |
michael@0 | 53 | * the provisions above, a recipient may use your version of this file under |
michael@0 | 54 | * the terms of any one of the MPL, the GPL or the LGPL. |
michael@0 | 55 | * |
michael@0 | 56 | ******* END LICENSE BLOCK *******/ |
michael@0 | 57 | |
michael@0 | 58 | #ifndef _AFFIXMGR_HXX_ |
michael@0 | 59 | #define _AFFIXMGR_HXX_ |
michael@0 | 60 | |
michael@0 | 61 | #include "hunvisapi.h" |
michael@0 | 62 | |
michael@0 | 63 | #include <stdio.h> |
michael@0 | 64 | |
michael@0 | 65 | #include "atypes.hxx" |
michael@0 | 66 | #include "baseaffix.hxx" |
michael@0 | 67 | #include "hashmgr.hxx" |
michael@0 | 68 | #include "phonet.hxx" |
michael@0 | 69 | #include "replist.hxx" |
michael@0 | 70 | |
michael@0 | 71 | // check flag duplication |
michael@0 | 72 | #define dupSFX (1 << 0) |
michael@0 | 73 | #define dupPFX (1 << 1) |
michael@0 | 74 | |
michael@0 | 75 | class PfxEntry; |
michael@0 | 76 | class SfxEntry; |
michael@0 | 77 | |
michael@0 | 78 | class LIBHUNSPELL_DLL_EXPORTED AffixMgr |
michael@0 | 79 | { |
michael@0 | 80 | |
michael@0 | 81 | PfxEntry * pStart[SETSIZE]; |
michael@0 | 82 | SfxEntry * sStart[SETSIZE]; |
michael@0 | 83 | PfxEntry * pFlag[SETSIZE]; |
michael@0 | 84 | SfxEntry * sFlag[SETSIZE]; |
michael@0 | 85 | HashMgr * pHMgr; |
michael@0 | 86 | HashMgr ** alldic; |
michael@0 | 87 | int * maxdic; |
michael@0 | 88 | char * keystring; |
michael@0 | 89 | char * trystring; |
michael@0 | 90 | char * encoding; |
michael@0 | 91 | struct cs_info * csconv; |
michael@0 | 92 | int utf8; |
michael@0 | 93 | int complexprefixes; |
michael@0 | 94 | FLAG compoundflag; |
michael@0 | 95 | FLAG compoundbegin; |
michael@0 | 96 | FLAG compoundmiddle; |
michael@0 | 97 | FLAG compoundend; |
michael@0 | 98 | FLAG compoundroot; |
michael@0 | 99 | FLAG compoundforbidflag; |
michael@0 | 100 | FLAG compoundpermitflag; |
michael@0 | 101 | int checkcompounddup; |
michael@0 | 102 | int checkcompoundrep; |
michael@0 | 103 | int checkcompoundcase; |
michael@0 | 104 | int checkcompoundtriple; |
michael@0 | 105 | int simplifiedtriple; |
michael@0 | 106 | FLAG forbiddenword; |
michael@0 | 107 | FLAG nosuggest; |
michael@0 | 108 | FLAG nongramsuggest; |
michael@0 | 109 | FLAG needaffix; |
michael@0 | 110 | int cpdmin; |
michael@0 | 111 | int numrep; |
michael@0 | 112 | replentry * reptable; |
michael@0 | 113 | RepList * iconvtable; |
michael@0 | 114 | RepList * oconvtable; |
michael@0 | 115 | int nummap; |
michael@0 | 116 | mapentry * maptable; |
michael@0 | 117 | int numbreak; |
michael@0 | 118 | char ** breaktable; |
michael@0 | 119 | int numcheckcpd; |
michael@0 | 120 | patentry * checkcpdtable; |
michael@0 | 121 | int simplifiedcpd; |
michael@0 | 122 | int numdefcpd; |
michael@0 | 123 | flagentry * defcpdtable; |
michael@0 | 124 | phonetable * phone; |
michael@0 | 125 | int maxngramsugs; |
michael@0 | 126 | int maxcpdsugs; |
michael@0 | 127 | int maxdiff; |
michael@0 | 128 | int onlymaxdiff; |
michael@0 | 129 | int nosplitsugs; |
michael@0 | 130 | int sugswithdots; |
michael@0 | 131 | int cpdwordmax; |
michael@0 | 132 | int cpdmaxsyllable; |
michael@0 | 133 | char * cpdvowels; |
michael@0 | 134 | w_char * cpdvowels_utf16; |
michael@0 | 135 | int cpdvowels_utf16_len; |
michael@0 | 136 | char * cpdsyllablenum; |
michael@0 | 137 | const char * pfxappnd; // BUG: not stateless |
michael@0 | 138 | const char * sfxappnd; // BUG: not stateless |
michael@0 | 139 | FLAG sfxflag; // BUG: not stateless |
michael@0 | 140 | char * derived; // BUG: not stateless |
michael@0 | 141 | SfxEntry * sfx; // BUG: not stateless |
michael@0 | 142 | PfxEntry * pfx; // BUG: not stateless |
michael@0 | 143 | int checknum; |
michael@0 | 144 | char * wordchars; |
michael@0 | 145 | unsigned short * wordchars_utf16; |
michael@0 | 146 | int wordchars_utf16_len; |
michael@0 | 147 | char * ignorechars; |
michael@0 | 148 | unsigned short * ignorechars_utf16; |
michael@0 | 149 | int ignorechars_utf16_len; |
michael@0 | 150 | char * version; |
michael@0 | 151 | char * lang; |
michael@0 | 152 | int langnum; |
michael@0 | 153 | FLAG lemma_present; |
michael@0 | 154 | FLAG circumfix; |
michael@0 | 155 | FLAG onlyincompound; |
michael@0 | 156 | FLAG keepcase; |
michael@0 | 157 | FLAG forceucase; |
michael@0 | 158 | FLAG warn; |
michael@0 | 159 | int forbidwarn; |
michael@0 | 160 | FLAG substandard; |
michael@0 | 161 | int checksharps; |
michael@0 | 162 | int fullstrip; |
michael@0 | 163 | |
michael@0 | 164 | int havecontclass; // boolean variable |
michael@0 | 165 | char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix) |
michael@0 | 166 | |
michael@0 | 167 | public: |
michael@0 | 168 | |
michael@0 | 169 | AffixMgr(const char * affpath, HashMgr** ptr, int * md, |
michael@0 | 170 | const char * key = NULL); |
michael@0 | 171 | ~AffixMgr(); |
michael@0 | 172 | struct hentry * affix_check(const char * word, int len, |
michael@0 | 173 | const unsigned short needflag = (unsigned short) 0, |
michael@0 | 174 | char in_compound = IN_CPD_NOT); |
michael@0 | 175 | struct hentry * prefix_check(const char * word, int len, |
michael@0 | 176 | char in_compound, const FLAG needflag = FLAG_NULL); |
michael@0 | 177 | inline int isSubset(const char * s1, const char * s2); |
michael@0 | 178 | struct hentry * prefix_check_twosfx(const char * word, int len, |
michael@0 | 179 | char in_compound, const FLAG needflag = FLAG_NULL); |
michael@0 | 180 | inline int isRevSubset(const char * s1, const char * end_of_s2, int len); |
michael@0 | 181 | struct hentry * suffix_check(const char * word, int len, int sfxopts, |
michael@0 | 182 | PfxEntry* ppfx, char ** wlst, int maxSug, int * ns, |
michael@0 | 183 | const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, |
michael@0 | 184 | char in_compound = IN_CPD_NOT); |
michael@0 | 185 | struct hentry * suffix_check_twosfx(const char * word, int len, |
michael@0 | 186 | int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL); |
michael@0 | 187 | |
michael@0 | 188 | char * affix_check_morph(const char * word, int len, |
michael@0 | 189 | const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT); |
michael@0 | 190 | char * prefix_check_morph(const char * word, int len, |
michael@0 | 191 | char in_compound, const FLAG needflag = FLAG_NULL); |
michael@0 | 192 | char * suffix_check_morph (const char * word, int len, int sfxopts, |
michael@0 | 193 | PfxEntry * ppfx, const FLAG cclass = FLAG_NULL, |
michael@0 | 194 | const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT); |
michael@0 | 195 | |
michael@0 | 196 | char * prefix_check_twosfx_morph(const char * word, int len, |
michael@0 | 197 | char in_compound, const FLAG needflag = FLAG_NULL); |
michael@0 | 198 | char * suffix_check_twosfx_morph(const char * word, int len, |
michael@0 | 199 | int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL); |
michael@0 | 200 | |
michael@0 | 201 | char * morphgen(char * ts, int wl, const unsigned short * ap, |
michael@0 | 202 | unsigned short al, char * morph, char * targetmorph, int level); |
michael@0 | 203 | |
michael@0 | 204 | int expand_rootword(struct guessword * wlst, int maxn, const char * ts, |
michael@0 | 205 | int wl, const unsigned short * ap, unsigned short al, char * bad, |
michael@0 | 206 | int, char *); |
michael@0 | 207 | |
michael@0 | 208 | short get_syllable (const char * word, int wlen); |
michael@0 | 209 | int cpdrep_check(const char * word, int len); |
michael@0 | 210 | int cpdpat_check(const char * word, int len, hentry * r1, hentry * r2, |
michael@0 | 211 | const char affixed); |
michael@0 | 212 | int defcpd_check(hentry *** words, short wnum, hentry * rv, |
michael@0 | 213 | hentry ** rwords, char all); |
michael@0 | 214 | int cpdcase_check(const char * word, int len); |
michael@0 | 215 | inline int candidate_check(const char * word, int len); |
michael@0 | 216 | void setcminmax(int * cmin, int * cmax, const char * word, int len); |
michael@0 | 217 | struct hentry * compound_check(const char * word, int len, short wordnum, |
michael@0 | 218 | short numsyllable, short maxwordnum, short wnum, hentry ** words, |
michael@0 | 219 | char hu_mov_rule, char is_sug, int * info); |
michael@0 | 220 | |
michael@0 | 221 | int compound_check_morph(const char * word, int len, short wordnum, |
michael@0 | 222 | short numsyllable, short maxwordnum, short wnum, hentry ** words, |
michael@0 | 223 | char hu_mov_rule, char ** result, char * partresult); |
michael@0 | 224 | |
michael@0 | 225 | struct hentry * lookup(const char * word); |
michael@0 | 226 | int get_numrep() const; |
michael@0 | 227 | struct replentry * get_reptable() const; |
michael@0 | 228 | RepList * get_iconvtable() const; |
michael@0 | 229 | RepList * get_oconvtable() const; |
michael@0 | 230 | struct phonetable * get_phonetable() const; |
michael@0 | 231 | int get_nummap() const; |
michael@0 | 232 | struct mapentry * get_maptable() const; |
michael@0 | 233 | int get_numbreak() const; |
michael@0 | 234 | char ** get_breaktable() const; |
michael@0 | 235 | char * get_encoding(); |
michael@0 | 236 | int get_langnum() const; |
michael@0 | 237 | char * get_key_string(); |
michael@0 | 238 | char * get_try_string() const; |
michael@0 | 239 | const char * get_wordchars() const; |
michael@0 | 240 | unsigned short * get_wordchars_utf16(int * len) const; |
michael@0 | 241 | char * get_ignore() const; |
michael@0 | 242 | unsigned short * get_ignore_utf16(int * len) const; |
michael@0 | 243 | int get_compound() const; |
michael@0 | 244 | FLAG get_compoundflag() const; |
michael@0 | 245 | FLAG get_compoundbegin() const; |
michael@0 | 246 | FLAG get_forbiddenword() const; |
michael@0 | 247 | FLAG get_nosuggest() const; |
michael@0 | 248 | FLAG get_nongramsuggest() const; |
michael@0 | 249 | FLAG get_needaffix() const; |
michael@0 | 250 | FLAG get_onlyincompound() const; |
michael@0 | 251 | FLAG get_compoundroot() const; |
michael@0 | 252 | FLAG get_lemma_present() const; |
michael@0 | 253 | int get_checknum() const; |
michael@0 | 254 | const char * get_prefix() const; |
michael@0 | 255 | const char * get_suffix() const; |
michael@0 | 256 | const char * get_derived() const; |
michael@0 | 257 | const char * get_version() const; |
michael@0 | 258 | int have_contclass() const; |
michael@0 | 259 | int get_utf8() const; |
michael@0 | 260 | int get_complexprefixes() const; |
michael@0 | 261 | char * get_suffixed(char ) const; |
michael@0 | 262 | int get_maxngramsugs() const; |
michael@0 | 263 | int get_maxcpdsugs() const; |
michael@0 | 264 | int get_maxdiff() const; |
michael@0 | 265 | int get_onlymaxdiff() const; |
michael@0 | 266 | int get_nosplitsugs() const; |
michael@0 | 267 | int get_sugswithdots(void) const; |
michael@0 | 268 | FLAG get_keepcase(void) const; |
michael@0 | 269 | FLAG get_forceucase(void) const; |
michael@0 | 270 | FLAG get_warn(void) const; |
michael@0 | 271 | int get_forbidwarn(void) const; |
michael@0 | 272 | int get_checksharps(void) const; |
michael@0 | 273 | char * encode_flag(unsigned short aflag) const; |
michael@0 | 274 | int get_fullstrip() const; |
michael@0 | 275 | |
michael@0 | 276 | private: |
michael@0 | 277 | int parse_file(const char * affpath, const char * key); |
michael@0 | 278 | int parse_flag(char * line, unsigned short * out, FileMgr * af); |
michael@0 | 279 | int parse_num(char * line, int * out, FileMgr * af); |
michael@0 | 280 | int parse_cpdsyllable(char * line, FileMgr * af); |
michael@0 | 281 | int parse_reptable(char * line, FileMgr * af); |
michael@0 | 282 | int parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword); |
michael@0 | 283 | int parse_phonetable(char * line, FileMgr * af); |
michael@0 | 284 | int parse_maptable(char * line, FileMgr * af); |
michael@0 | 285 | int parse_breaktable(char * line, FileMgr * af); |
michael@0 | 286 | int parse_checkcpdtable(char * line, FileMgr * af); |
michael@0 | 287 | int parse_defcpdtable(char * line, FileMgr * af); |
michael@0 | 288 | int parse_affix(char * line, const char at, FileMgr * af, char * dupflags); |
michael@0 | 289 | |
michael@0 | 290 | void reverse_condition(char *); |
michael@0 | 291 | void debugflag(char * result, unsigned short flag); |
michael@0 | 292 | int condlen(char *); |
michael@0 | 293 | int encodeit(affentry &entry, char * cs); |
michael@0 | 294 | int build_pfxtree(PfxEntry* pfxptr); |
michael@0 | 295 | int build_sfxtree(SfxEntry* sfxptr); |
michael@0 | 296 | int process_pfx_order(); |
michael@0 | 297 | int process_sfx_order(); |
michael@0 | 298 | PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr); |
michael@0 | 299 | SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr); |
michael@0 | 300 | int process_pfx_tree_to_list(); |
michael@0 | 301 | int process_sfx_tree_to_list(); |
michael@0 | 302 | int redundant_condition(char, char * strip, int stripl, |
michael@0 | 303 | const char * cond, int); |
michael@0 | 304 | }; |
michael@0 | 305 | |
michael@0 | 306 | #endif |
michael@0 | 307 |