1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/extensions/spellcheck/hunspell/src/csutil.hxx Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,277 @@ 1.4 +/******* BEGIN LICENSE BLOCK ******* 1.5 + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 1.6 + * 1.7 + * The contents of this file are subject to the Mozilla Public License Version 1.8 + * 1.1 (the "License"); you may not use this file except in compliance with 1.9 + * the License. You may obtain a copy of the License at 1.10 + * http://www.mozilla.org/MPL/ 1.11 + * 1.12 + * Software distributed under the License is distributed on an "AS IS" basis, 1.13 + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 1.14 + * for the specific language governing rights and limitations under the 1.15 + * License. 1.16 + * 1.17 + * The Initial Developers of the Original Code are Kevin Hendricks (MySpell) 1.18 + * and László Németh (Hunspell). Portions created by the Initial Developers 1.19 + * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved. 1.20 + * 1.21 + * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca) 1.22 + * David Einstein (deinst@world.std.com) 1.23 + * László Németh (nemethl@gyorsposta.hu) 1.24 + * Caolan McNamara (caolanm@redhat.com) 1.25 + * Davide Prina 1.26 + * Giuseppe Modugno 1.27 + * Gianluca Turconi 1.28 + * Simon Brouwer 1.29 + * Noll Janos 1.30 + * Biro Arpad 1.31 + * Goldman Eleonora 1.32 + * Sarlos Tamas 1.33 + * Bencsath Boldizsar 1.34 + * Halacsy Peter 1.35 + * Dvornik Laszlo 1.36 + * Gefferth Andras 1.37 + * Nagy Viktor 1.38 + * Varga Daniel 1.39 + * Chris Halls 1.40 + * Rene Engelhard 1.41 + * Bram Moolenaar 1.42 + * Dafydd Jones 1.43 + * Harri Pitkanen 1.44 + * Andras Timar 1.45 + * Tor Lillqvist 1.46 + * 1.47 + * Alternatively, the contents of this file may be used under the terms of 1.48 + * either the GNU General Public License Version 2 or later (the "GPL"), or 1.49 + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 1.50 + * in which case the provisions of the GPL or the LGPL are applicable instead 1.51 + * of those above. If you wish to allow use of your version of this file only 1.52 + * under the terms of either the GPL or the LGPL, and not to allow others to 1.53 + * use your version of this file under the terms of the MPL, indicate your 1.54 + * decision by deleting the provisions above and replace them with the notice 1.55 + * and other provisions required by the GPL or the LGPL. If you do not delete 1.56 + * the provisions above, a recipient may use your version of this file under 1.57 + * the terms of any one of the MPL, the GPL or the LGPL. 1.58 + * 1.59 + ******* END LICENSE BLOCK *******/ 1.60 + 1.61 +#ifndef __CSUTILHXX__ 1.62 +#define __CSUTILHXX__ 1.63 + 1.64 +#include "hunvisapi.h" 1.65 + 1.66 +// First some base level utility routines 1.67 + 1.68 +#include <string.h> 1.69 +#include "w_char.hxx" 1.70 +#include "htypes.hxx" 1.71 + 1.72 +#ifdef MOZILLA_CLIENT 1.73 +#include "nscore.h" // for mozalloc headers 1.74 +#endif 1.75 + 1.76 +// casing 1.77 +#define NOCAP 0 1.78 +#define INITCAP 1 1.79 +#define ALLCAP 2 1.80 +#define HUHCAP 3 1.81 +#define HUHINITCAP 4 1.82 + 1.83 +// default encoding and keystring 1.84 +#define SPELL_ENCODING "ISO8859-1" 1.85 +#define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm" 1.86 + 1.87 +// default morphological fields 1.88 +#define MORPH_STEM "st:" 1.89 +#define MORPH_ALLOMORPH "al:" 1.90 +#define MORPH_POS "po:" 1.91 +#define MORPH_DERI_PFX "dp:" 1.92 +#define MORPH_INFL_PFX "ip:" 1.93 +#define MORPH_TERM_PFX "tp:" 1.94 +#define MORPH_DERI_SFX "ds:" 1.95 +#define MORPH_INFL_SFX "is:" 1.96 +#define MORPH_TERM_SFX "ts:" 1.97 +#define MORPH_SURF_PFX "sp:" 1.98 +#define MORPH_FREQ "fr:" 1.99 +#define MORPH_PHON "ph:" 1.100 +#define MORPH_HYPH "hy:" 1.101 +#define MORPH_PART "pa:" 1.102 +#define MORPH_FLAG "fl:" 1.103 +#define MORPH_HENTRY "_H:" 1.104 +#define MORPH_TAG_LEN strlen(MORPH_STEM) 1.105 + 1.106 +#define MSEP_FLD ' ' 1.107 +#define MSEP_REC '\n' 1.108 +#define MSEP_ALT '\v' 1.109 + 1.110 +// default flags 1.111 +#define DEFAULTFLAGS 65510 1.112 +#define FORBIDDENWORD 65510 1.113 +#define ONLYUPCASEFLAG 65511 1.114 + 1.115 +// convert UTF-16 characters to UTF-8 1.116 +LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen); 1.117 + 1.118 +// convert UTF-8 characters to UTF-16 1.119 +LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src); 1.120 + 1.121 +// sort 2-byte vector 1.122 +LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end); 1.123 + 1.124 +// binary search in 2-byte vector 1.125 +LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right); 1.126 + 1.127 +// remove end of line char(s) 1.128 +LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s); 1.129 + 1.130 +// duplicate string 1.131 +LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s); 1.132 + 1.133 +// strcat for limited length destination string 1.134 +LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max); 1.135 + 1.136 +// duplicate reverse of string 1.137 +LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s); 1.138 + 1.139 +// parse into tokens with char delimiter 1.140 +LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim); 1.141 +// parse into tokens with char delimiter 1.142 +LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim); 1.143 + 1.144 +// parse into tokens with char delimiter 1.145 +LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *); 1.146 + 1.147 +// append s to ends of every lines in text 1.148 +LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s); 1.149 + 1.150 +// tokenize into lines with new line 1.151 +LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar); 1.152 + 1.153 +// tokenize into lines with new line and uniq in place 1.154 +LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar); 1.155 +LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar); 1.156 + 1.157 +// change oldchar to newchar in place 1.158 +LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc); 1.159 + 1.160 +// reverse word 1.161 +LIBHUNSPELL_DLL_EXPORTED int reverseword(char *); 1.162 + 1.163 +// reverse word 1.164 +LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *); 1.165 + 1.166 +// remove duplicates 1.167 +LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n); 1.168 + 1.169 +// free character array list 1.170 +LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n); 1.171 + 1.172 +// character encoding information 1.173 +struct cs_info { 1.174 + unsigned char ccase; 1.175 + unsigned char clower; 1.176 + unsigned char cupper; 1.177 +}; 1.178 + 1.179 +LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl(); 1.180 +LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl(); 1.181 +LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum); 1.182 +LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum); 1.183 +LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c); 1.184 + 1.185 +LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es); 1.186 + 1.187 +// get language identifiers of language codes 1.188 +LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang); 1.189 + 1.190 +// get characters of the given 8bit encoding with lower- and uppercase forms 1.191 +LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc); 1.192 + 1.193 +// convert null terminated string to all caps using encoding 1.194 +LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding); 1.195 + 1.196 +// convert null terminated string to all little using encoding 1.197 +LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding); 1.198 + 1.199 +// convert null terminated string to have initial capital using encoding 1.200 +LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding); 1.201 + 1.202 +// convert null terminated string to all caps 1.203 +LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv); 1.204 + 1.205 +// convert null terminated string to all little 1.206 +LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv); 1.207 + 1.208 +// convert null terminated string to have initial capital 1.209 +LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv); 1.210 + 1.211 +// convert first nc characters of UTF-8 string to little 1.212 +LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum); 1.213 + 1.214 +// convert first nc characters of UTF-8 string to capital 1.215 +LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum); 1.216 + 1.217 +// get type of capitalization 1.218 +LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *); 1.219 + 1.220 +// get type of capitalization (UTF-8) 1.221 +LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum); 1.222 + 1.223 +// strip all ignored characters in the string 1.224 +LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len); 1.225 + 1.226 +// strip all ignored characters in the string 1.227 +LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars); 1.228 + 1.229 +LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln); 1.230 + 1.231 +LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16, 1.232 + int * out_utf16_len, int utf8, int ln); 1.233 + 1.234 +LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r); 1.235 +LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var); 1.236 + 1.237 +LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t); 1.238 + 1.239 +LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph); 1.240 + 1.241 +// conversion function for protected memory 1.242 +LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source); 1.243 + 1.244 +// conversion function for protected memory 1.245 +LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s); 1.246 + 1.247 +// hash entry macros 1.248 +LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h) 1.249 +{ 1.250 + char *ret; 1.251 + if (!h->var) 1.252 + ret = NULL; 1.253 + else if (h->var & H_OPT_ALIASM) 1.254 + ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1); 1.255 + else 1.256 + ret = HENTRY_WORD(h) + h->blen + 1; 1.257 + return ret; 1.258 +} 1.259 + 1.260 +// NULL-free version for warning-free OOo build 1.261 +LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h) 1.262 +{ 1.263 + const char *ret; 1.264 + if (!h->var) 1.265 + ret = ""; 1.266 + else if (h->var & H_OPT_ALIASM) 1.267 + ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1); 1.268 + else 1.269 + ret = HENTRY_WORD(h) + h->blen + 1; 1.270 + return ret; 1.271 +} 1.272 + 1.273 +LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p) 1.274 +{ 1.275 + return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL); 1.276 +} 1.277 + 1.278 +#define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h)) 1.279 + 1.280 +#endif