1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/extensions/spellcheck/hunspell/src/hunspell.hxx Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,229 @@ 1.4 +/******* BEGIN LICENSE BLOCK ******* 1.5 + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 1.6 + * 1.7 + * The contents of this file are subject to the Mozilla Public License Version 1.8 + * 1.1 (the "License"); you may not use this file except in compliance with 1.9 + * the License. You may obtain a copy of the License at 1.10 + * http://www.mozilla.org/MPL/ 1.11 + * 1.12 + * Software distributed under the License is distributed on an "AS IS" basis, 1.13 + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 1.14 + * for the specific language governing rights and limitations under the 1.15 + * License. 1.16 + * 1.17 + * The Initial Developers of the Original Code are Kevin Hendricks (MySpell) 1.18 + * and László Németh (Hunspell). Portions created by the Initial Developers 1.19 + * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved. 1.20 + * 1.21 + * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca) 1.22 + * David Einstein (deinst@world.std.com) 1.23 + * László Németh (nemethl@gyorsposta.hu) 1.24 + * Caolan McNamara (caolanm@redhat.com) 1.25 + * Davide Prina 1.26 + * Giuseppe Modugno 1.27 + * Gianluca Turconi 1.28 + * Simon Brouwer 1.29 + * Noll Janos 1.30 + * Biro Arpad 1.31 + * Goldman Eleonora 1.32 + * Sarlos Tamas 1.33 + * Bencsath Boldizsar 1.34 + * Halacsy Peter 1.35 + * Dvornik Laszlo 1.36 + * Gefferth Andras 1.37 + * Nagy Viktor 1.38 + * Varga Daniel 1.39 + * Chris Halls 1.40 + * Rene Engelhard 1.41 + * Bram Moolenaar 1.42 + * Dafydd Jones 1.43 + * Harri Pitkanen 1.44 + * Andras Timar 1.45 + * Tor Lillqvist 1.46 + * 1.47 + * Alternatively, the contents of this file may be used under the terms of 1.48 + * either the GNU General Public License Version 2 or later (the "GPL"), or 1.49 + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 1.50 + * in which case the provisions of the GPL or the LGPL are applicable instead 1.51 + * of those above. If you wish to allow use of your version of this file only 1.52 + * under the terms of either the GPL or the LGPL, and not to allow others to 1.53 + * use your version of this file under the terms of the MPL, indicate your 1.54 + * decision by deleting the provisions above and replace them with the notice 1.55 + * and other provisions required by the GPL or the LGPL. If you do not delete 1.56 + * the provisions above, a recipient may use your version of this file under 1.57 + * the terms of any one of the MPL, the GPL or the LGPL. 1.58 + * 1.59 + ******* END LICENSE BLOCK *******/ 1.60 + 1.61 +#include "hunvisapi.h" 1.62 + 1.63 +#include "hashmgr.hxx" 1.64 +#include "affixmgr.hxx" 1.65 +#include "suggestmgr.hxx" 1.66 +#include "langnum.hxx" 1.67 + 1.68 +#define SPELL_XML "<?xml?>" 1.69 + 1.70 +#define MAXDIC 20 1.71 +#define MAXSUGGESTION 15 1.72 +#define MAXSHARPS 5 1.73 + 1.74 +#define HUNSPELL_OK (1 << 0) 1.75 +#define HUNSPELL_OK_WARN (1 << 1) 1.76 + 1.77 +#ifndef _MYSPELLMGR_HXX_ 1.78 +#define _MYSPELLMGR_HXX_ 1.79 + 1.80 +class LIBHUNSPELL_DLL_EXPORTED Hunspell 1.81 +{ 1.82 + AffixMgr* pAMgr; 1.83 + HashMgr* pHMgr[MAXDIC]; 1.84 + int maxdic; 1.85 + SuggestMgr* pSMgr; 1.86 + char * affixpath; 1.87 + char * encoding; 1.88 + struct cs_info * csconv; 1.89 + int langnum; 1.90 + int utf8; 1.91 + int complexprefixes; 1.92 + char** wordbreak; 1.93 + 1.94 +public: 1.95 + 1.96 + /* Hunspell(aff, dic) - constructor of Hunspell class 1.97 + * input: path of affix file and dictionary file 1.98 + */ 1.99 + 1.100 + Hunspell(const char * affpath, const char * dpath, const char * key = NULL); 1.101 + ~Hunspell(); 1.102 + 1.103 + /* load extra dictionaries (only dic files) */ 1.104 + int add_dic(const char * dpath, const char * key = NULL); 1.105 + 1.106 + /* spell(word) - spellcheck word 1.107 + * output: 0 = bad word, not 0 = good word 1.108 + * 1.109 + * plus output: 1.110 + * info: information bit array, fields: 1.111 + * SPELL_COMPOUND = a compound word 1.112 + * SPELL_FORBIDDEN = an explicit forbidden word 1.113 + * root: root (stem), when input is a word with affix(es) 1.114 + */ 1.115 + 1.116 + int spell(const char * word, int * info = NULL, char ** root = NULL); 1.117 + 1.118 + /* suggest(suggestions, word) - search suggestions 1.119 + * input: pointer to an array of strings pointer and the (bad) word 1.120 + * array of strings pointer (here *slst) may not be initialized 1.121 + * output: number of suggestions in string array, and suggestions in 1.122 + * a newly allocated array of strings (*slts will be NULL when number 1.123 + * of suggestion equals 0.) 1.124 + */ 1.125 + 1.126 + int suggest(char*** slst, const char * word); 1.127 + 1.128 + /* deallocate suggestion lists */ 1.129 + 1.130 + void free_list(char *** slst, int n); 1.131 + 1.132 + char * get_dic_encoding(); 1.133 + 1.134 + /* morphological functions */ 1.135 + 1.136 + /* analyze(result, word) - morphological analysis of the word */ 1.137 + 1.138 + int analyze(char*** slst, const char * word); 1.139 + 1.140 + /* stem(result, word) - stemmer function */ 1.141 + 1.142 + int stem(char*** slst, const char * word); 1.143 + 1.144 + /* stem(result, analysis, n) - get stems from a morph. analysis 1.145 + * example: 1.146 + * char ** result, result2; 1.147 + * int n1 = analyze(&result, "words"); 1.148 + * int n2 = stem(&result2, result, n1); 1.149 + */ 1.150 + 1.151 + int stem(char*** slst, char ** morph, int n); 1.152 + 1.153 + /* generate(result, word, word2) - morphological generation by example(s) */ 1.154 + 1.155 + int generate(char*** slst, const char * word, const char * word2); 1.156 + 1.157 + /* generate(result, word, desc, n) - generation by morph. description(s) 1.158 + * example: 1.159 + * char ** result; 1.160 + * char * affix = "is:plural"; // description depends from dictionaries, too 1.161 + * int n = generate(&result, "word", &affix, 1); 1.162 + * for (int i = 0; i < n; i++) printf("%s\n", result[i]); 1.163 + */ 1.164 + 1.165 + int generate(char*** slst, const char * word, char ** desc, int n); 1.166 + 1.167 + /* functions for run-time modification of the dictionary */ 1.168 + 1.169 + /* add word to the run-time dictionary */ 1.170 + 1.171 + int add(const char * word); 1.172 + 1.173 + /* add word to the run-time dictionary with affix flags of 1.174 + * the example (a dictionary word): Hunspell will recognize 1.175 + * affixed forms of the new word, too. 1.176 + */ 1.177 + 1.178 + int add_with_affix(const char * word, const char * example); 1.179 + 1.180 + /* remove word from the run-time dictionary */ 1.181 + 1.182 + int remove(const char * word); 1.183 + 1.184 + /* other */ 1.185 + 1.186 + /* get extra word characters definied in affix file for tokenization */ 1.187 + const char * get_wordchars(); 1.188 + unsigned short * get_wordchars_utf16(int * len); 1.189 + 1.190 + struct cs_info * get_csconv(); 1.191 + const char * get_version(); 1.192 + 1.193 + int get_langnum() const; 1.194 + 1.195 + /* experimental and deprecated functions */ 1.196 + 1.197 +#ifdef HUNSPELL_EXPERIMENTAL 1.198 + /* suffix is an affix flag string, similarly in dictionary files */ 1.199 + int put_word_suffix(const char * word, const char * suffix); 1.200 + char * morph_with_correction(const char * word); 1.201 + 1.202 + /* spec. suggestions */ 1.203 + int suggest_auto(char*** slst, const char * word); 1.204 + int suggest_pos_stems(char*** slst, const char * word); 1.205 +#endif 1.206 + 1.207 +private: 1.208 + int cleanword(char *, const char *, int * pcaptype, int * pabbrev); 1.209 + int cleanword2(char *, const char *, w_char *, int * w_len, int * pcaptype, int * pabbrev); 1.210 + void mkinitcap(char *); 1.211 + int mkinitcap2(char * p, w_char * u, int nc); 1.212 + int mkinitsmall2(char * p, w_char * u, int nc); 1.213 + void mkallcap(char *); 1.214 + int mkallcap2(char * p, w_char * u, int nc); 1.215 + void mkallsmall(char *); 1.216 + int mkallsmall2(char * p, w_char * u, int nc); 1.217 + struct hentry * checkword(const char *, int * info, char **root); 1.218 + char * sharps_u8_l1(char * dest, char * source); 1.219 + hentry * spellsharps(char * base, char *, int, int, char * tmp, int * info, char **root); 1.220 + int is_keepcase(const hentry * rv); 1.221 + int insert_sug(char ***slst, char * word, int ns); 1.222 + void cat_result(char * result, char * st); 1.223 + char * stem_description(const char * desc); 1.224 + int spellml(char*** slst, const char * word); 1.225 + int get_xml_par(char * dest, const char * par, int maxl); 1.226 + const char * get_xml_pos(const char * s, const char * attr); 1.227 + int get_xml_list(char ***slst, char * list, const char * tag); 1.228 + int check_xml_par(const char * q, const char * attr, const char * value); 1.229 + 1.230 +}; 1.231 + 1.232 +#endif