michael@0: /******* BEGIN LICENSE BLOCK ******* michael@0: * Version: MPL 1.1/GPL 2.0/LGPL 2.1 michael@0: * michael@0: * The contents of this file are subject to the Mozilla Public License Version michael@0: * 1.1 (the "License"); you may not use this file except in compliance with michael@0: * the License. You may obtain a copy of the License at michael@0: * http://www.mozilla.org/MPL/ michael@0: * michael@0: * Software distributed under the License is distributed on an "AS IS" basis, michael@0: * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License michael@0: * for the specific language governing rights and limitations under the michael@0: * License. michael@0: * michael@0: * The Initial Developers of the Original Code are Kevin Hendricks (MySpell) michael@0: * and László Németh (Hunspell). Portions created by the Initial Developers michael@0: * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved. michael@0: * michael@0: * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca) michael@0: * David Einstein (deinst@world.std.com) michael@0: * László Németh (nemethl@gyorsposta.hu) michael@0: * Caolan McNamara (caolanm@redhat.com) michael@0: * Davide Prina michael@0: * Giuseppe Modugno michael@0: * Gianluca Turconi michael@0: * Simon Brouwer michael@0: * Noll Janos michael@0: * Biro Arpad michael@0: * Goldman Eleonora michael@0: * Sarlos Tamas michael@0: * Bencsath Boldizsar michael@0: * Halacsy Peter michael@0: * Dvornik Laszlo michael@0: * Gefferth Andras michael@0: * Nagy Viktor michael@0: * Varga Daniel michael@0: * Chris Halls michael@0: * Rene Engelhard michael@0: * Bram Moolenaar michael@0: * Dafydd Jones michael@0: * Harri Pitkanen michael@0: * Andras Timar michael@0: * Tor Lillqvist michael@0: * michael@0: * Alternatively, the contents of this file may be used under the terms of michael@0: * either the GNU General Public License Version 2 or later (the "GPL"), or michael@0: * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), michael@0: * in which case the provisions of the GPL or the LGPL are applicable instead michael@0: * of those above. If you wish to allow use of your version of this file only michael@0: * under the terms of either the GPL or the LGPL, and not to allow others to michael@0: * use your version of this file under the terms of the MPL, indicate your michael@0: * decision by deleting the provisions above and replace them with the notice michael@0: * and other provisions required by the GPL or the LGPL. If you do not delete michael@0: * the provisions above, a recipient may use your version of this file under michael@0: * the terms of any one of the MPL, the GPL or the LGPL. michael@0: * michael@0: ******* END LICENSE BLOCK *******/ michael@0: michael@0: #ifndef _AFFIXMGR_HXX_ michael@0: #define _AFFIXMGR_HXX_ michael@0: michael@0: #include "hunvisapi.h" michael@0: michael@0: #include michael@0: michael@0: #include "atypes.hxx" michael@0: #include "baseaffix.hxx" michael@0: #include "hashmgr.hxx" michael@0: #include "phonet.hxx" michael@0: #include "replist.hxx" michael@0: michael@0: // check flag duplication michael@0: #define dupSFX (1 << 0) michael@0: #define dupPFX (1 << 1) michael@0: michael@0: class PfxEntry; michael@0: class SfxEntry; michael@0: michael@0: class LIBHUNSPELL_DLL_EXPORTED AffixMgr michael@0: { michael@0: michael@0: PfxEntry * pStart[SETSIZE]; michael@0: SfxEntry * sStart[SETSIZE]; michael@0: PfxEntry * pFlag[SETSIZE]; michael@0: SfxEntry * sFlag[SETSIZE]; michael@0: HashMgr * pHMgr; michael@0: HashMgr ** alldic; michael@0: int * maxdic; michael@0: char * keystring; michael@0: char * trystring; michael@0: char * encoding; michael@0: struct cs_info * csconv; michael@0: int utf8; michael@0: int complexprefixes; michael@0: FLAG compoundflag; michael@0: FLAG compoundbegin; michael@0: FLAG compoundmiddle; michael@0: FLAG compoundend; michael@0: FLAG compoundroot; michael@0: FLAG compoundforbidflag; michael@0: FLAG compoundpermitflag; michael@0: int checkcompounddup; michael@0: int checkcompoundrep; michael@0: int checkcompoundcase; michael@0: int checkcompoundtriple; michael@0: int simplifiedtriple; michael@0: FLAG forbiddenword; michael@0: FLAG nosuggest; michael@0: FLAG nongramsuggest; michael@0: FLAG needaffix; michael@0: int cpdmin; michael@0: int numrep; michael@0: replentry * reptable; michael@0: RepList * iconvtable; michael@0: RepList * oconvtable; michael@0: int nummap; michael@0: mapentry * maptable; michael@0: int numbreak; michael@0: char ** breaktable; michael@0: int numcheckcpd; michael@0: patentry * checkcpdtable; michael@0: int simplifiedcpd; michael@0: int numdefcpd; michael@0: flagentry * defcpdtable; michael@0: phonetable * phone; michael@0: int maxngramsugs; michael@0: int maxcpdsugs; michael@0: int maxdiff; michael@0: int onlymaxdiff; michael@0: int nosplitsugs; michael@0: int sugswithdots; michael@0: int cpdwordmax; michael@0: int cpdmaxsyllable; michael@0: char * cpdvowels; michael@0: w_char * cpdvowels_utf16; michael@0: int cpdvowels_utf16_len; michael@0: char * cpdsyllablenum; michael@0: const char * pfxappnd; // BUG: not stateless michael@0: const char * sfxappnd; // BUG: not stateless michael@0: FLAG sfxflag; // BUG: not stateless michael@0: char * derived; // BUG: not stateless michael@0: SfxEntry * sfx; // BUG: not stateless michael@0: PfxEntry * pfx; // BUG: not stateless michael@0: int checknum; michael@0: char * wordchars; michael@0: unsigned short * wordchars_utf16; michael@0: int wordchars_utf16_len; michael@0: char * ignorechars; michael@0: unsigned short * ignorechars_utf16; michael@0: int ignorechars_utf16_len; michael@0: char * version; michael@0: char * lang; michael@0: int langnum; michael@0: FLAG lemma_present; michael@0: FLAG circumfix; michael@0: FLAG onlyincompound; michael@0: FLAG keepcase; michael@0: FLAG forceucase; michael@0: FLAG warn; michael@0: int forbidwarn; michael@0: FLAG substandard; michael@0: int checksharps; michael@0: int fullstrip; michael@0: michael@0: int havecontclass; // boolean variable michael@0: char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix) michael@0: michael@0: public: michael@0: michael@0: AffixMgr(const char * affpath, HashMgr** ptr, int * md, michael@0: const char * key = NULL); michael@0: ~AffixMgr(); michael@0: struct hentry * affix_check(const char * word, int len, michael@0: const unsigned short needflag = (unsigned short) 0, michael@0: char in_compound = IN_CPD_NOT); michael@0: struct hentry * prefix_check(const char * word, int len, michael@0: char in_compound, const FLAG needflag = FLAG_NULL); michael@0: inline int isSubset(const char * s1, const char * s2); michael@0: struct hentry * prefix_check_twosfx(const char * word, int len, michael@0: char in_compound, const FLAG needflag = FLAG_NULL); michael@0: inline int isRevSubset(const char * s1, const char * end_of_s2, int len); michael@0: struct hentry * suffix_check(const char * word, int len, int sfxopts, michael@0: PfxEntry* ppfx, char ** wlst, int maxSug, int * ns, michael@0: const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, michael@0: char in_compound = IN_CPD_NOT); michael@0: struct hentry * suffix_check_twosfx(const char * word, int len, michael@0: int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL); michael@0: michael@0: char * affix_check_morph(const char * word, int len, michael@0: const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT); michael@0: char * prefix_check_morph(const char * word, int len, michael@0: char in_compound, const FLAG needflag = FLAG_NULL); michael@0: char * suffix_check_morph (const char * word, int len, int sfxopts, michael@0: PfxEntry * ppfx, const FLAG cclass = FLAG_NULL, michael@0: const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT); michael@0: michael@0: char * prefix_check_twosfx_morph(const char * word, int len, michael@0: char in_compound, const FLAG needflag = FLAG_NULL); michael@0: char * suffix_check_twosfx_morph(const char * word, int len, michael@0: int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL); michael@0: michael@0: char * morphgen(char * ts, int wl, const unsigned short * ap, michael@0: unsigned short al, char * morph, char * targetmorph, int level); michael@0: michael@0: int expand_rootword(struct guessword * wlst, int maxn, const char * ts, michael@0: int wl, const unsigned short * ap, unsigned short al, char * bad, michael@0: int, char *); michael@0: michael@0: short get_syllable (const char * word, int wlen); michael@0: int cpdrep_check(const char * word, int len); michael@0: int cpdpat_check(const char * word, int len, hentry * r1, hentry * r2, michael@0: const char affixed); michael@0: int defcpd_check(hentry *** words, short wnum, hentry * rv, michael@0: hentry ** rwords, char all); michael@0: int cpdcase_check(const char * word, int len); michael@0: inline int candidate_check(const char * word, int len); michael@0: void setcminmax(int * cmin, int * cmax, const char * word, int len); michael@0: struct hentry * compound_check(const char * word, int len, short wordnum, michael@0: short numsyllable, short maxwordnum, short wnum, hentry ** words, michael@0: char hu_mov_rule, char is_sug, int * info); michael@0: michael@0: int compound_check_morph(const char * word, int len, short wordnum, michael@0: short numsyllable, short maxwordnum, short wnum, hentry ** words, michael@0: char hu_mov_rule, char ** result, char * partresult); michael@0: michael@0: struct hentry * lookup(const char * word); michael@0: int get_numrep() const; michael@0: struct replentry * get_reptable() const; michael@0: RepList * get_iconvtable() const; michael@0: RepList * get_oconvtable() const; michael@0: struct phonetable * get_phonetable() const; michael@0: int get_nummap() const; michael@0: struct mapentry * get_maptable() const; michael@0: int get_numbreak() const; michael@0: char ** get_breaktable() const; michael@0: char * get_encoding(); michael@0: int get_langnum() const; michael@0: char * get_key_string(); michael@0: char * get_try_string() const; michael@0: const char * get_wordchars() const; michael@0: unsigned short * get_wordchars_utf16(int * len) const; michael@0: char * get_ignore() const; michael@0: unsigned short * get_ignore_utf16(int * len) const; michael@0: int get_compound() const; michael@0: FLAG get_compoundflag() const; michael@0: FLAG get_compoundbegin() const; michael@0: FLAG get_forbiddenword() const; michael@0: FLAG get_nosuggest() const; michael@0: FLAG get_nongramsuggest() const; michael@0: FLAG get_needaffix() const; michael@0: FLAG get_onlyincompound() const; michael@0: FLAG get_compoundroot() const; michael@0: FLAG get_lemma_present() const; michael@0: int get_checknum() const; michael@0: const char * get_prefix() const; michael@0: const char * get_suffix() const; michael@0: const char * get_derived() const; michael@0: const char * get_version() const; michael@0: int have_contclass() const; michael@0: int get_utf8() const; michael@0: int get_complexprefixes() const; michael@0: char * get_suffixed(char ) const; michael@0: int get_maxngramsugs() const; michael@0: int get_maxcpdsugs() const; michael@0: int get_maxdiff() const; michael@0: int get_onlymaxdiff() const; michael@0: int get_nosplitsugs() const; michael@0: int get_sugswithdots(void) const; michael@0: FLAG get_keepcase(void) const; michael@0: FLAG get_forceucase(void) const; michael@0: FLAG get_warn(void) const; michael@0: int get_forbidwarn(void) const; michael@0: int get_checksharps(void) const; michael@0: char * encode_flag(unsigned short aflag) const; michael@0: int get_fullstrip() const; michael@0: michael@0: private: michael@0: int parse_file(const char * affpath, const char * key); michael@0: int parse_flag(char * line, unsigned short * out, FileMgr * af); michael@0: int parse_num(char * line, int * out, FileMgr * af); michael@0: int parse_cpdsyllable(char * line, FileMgr * af); michael@0: int parse_reptable(char * line, FileMgr * af); michael@0: int parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword); michael@0: int parse_phonetable(char * line, FileMgr * af); michael@0: int parse_maptable(char * line, FileMgr * af); michael@0: int parse_breaktable(char * line, FileMgr * af); michael@0: int parse_checkcpdtable(char * line, FileMgr * af); michael@0: int parse_defcpdtable(char * line, FileMgr * af); michael@0: int parse_affix(char * line, const char at, FileMgr * af, char * dupflags); michael@0: michael@0: void reverse_condition(char *); michael@0: void debugflag(char * result, unsigned short flag); michael@0: int condlen(char *); michael@0: int encodeit(affentry &entry, char * cs); michael@0: int build_pfxtree(PfxEntry* pfxptr); michael@0: int build_sfxtree(SfxEntry* sfxptr); michael@0: int process_pfx_order(); michael@0: int process_sfx_order(); michael@0: PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr); michael@0: SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr); michael@0: int process_pfx_tree_to_list(); michael@0: int process_sfx_tree_to_list(); michael@0: int redundant_condition(char, char * strip, int stripl, michael@0: const char * cond, int); michael@0: }; michael@0: michael@0: #endif michael@0: