extensions/spellcheck/hunspell/src/csutil.hxx

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /******* BEGIN LICENSE BLOCK *******
michael@0 2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
michael@0 3 *
michael@0 4 * The contents of this file are subject to the Mozilla Public License Version
michael@0 5 * 1.1 (the "License"); you may not use this file except in compliance with
michael@0 6 * the License. You may obtain a copy of the License at
michael@0 7 * http://www.mozilla.org/MPL/
michael@0 8 *
michael@0 9 * Software distributed under the License is distributed on an "AS IS" basis,
michael@0 10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
michael@0 11 * for the specific language governing rights and limitations under the
michael@0 12 * License.
michael@0 13 *
michael@0 14 * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
michael@0 15 * and László Németh (Hunspell). Portions created by the Initial Developers
michael@0 16 * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
michael@0 17 *
michael@0 18 * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
michael@0 19 * David Einstein (deinst@world.std.com)
michael@0 20 * László Németh (nemethl@gyorsposta.hu)
michael@0 21 * Caolan McNamara (caolanm@redhat.com)
michael@0 22 * Davide Prina
michael@0 23 * Giuseppe Modugno
michael@0 24 * Gianluca Turconi
michael@0 25 * Simon Brouwer
michael@0 26 * Noll Janos
michael@0 27 * Biro Arpad
michael@0 28 * Goldman Eleonora
michael@0 29 * Sarlos Tamas
michael@0 30 * Bencsath Boldizsar
michael@0 31 * Halacsy Peter
michael@0 32 * Dvornik Laszlo
michael@0 33 * Gefferth Andras
michael@0 34 * Nagy Viktor
michael@0 35 * Varga Daniel
michael@0 36 * Chris Halls
michael@0 37 * Rene Engelhard
michael@0 38 * Bram Moolenaar
michael@0 39 * Dafydd Jones
michael@0 40 * Harri Pitkanen
michael@0 41 * Andras Timar
michael@0 42 * Tor Lillqvist
michael@0 43 *
michael@0 44 * Alternatively, the contents of this file may be used under the terms of
michael@0 45 * either the GNU General Public License Version 2 or later (the "GPL"), or
michael@0 46 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
michael@0 47 * in which case the provisions of the GPL or the LGPL are applicable instead
michael@0 48 * of those above. If you wish to allow use of your version of this file only
michael@0 49 * under the terms of either the GPL or the LGPL, and not to allow others to
michael@0 50 * use your version of this file under the terms of the MPL, indicate your
michael@0 51 * decision by deleting the provisions above and replace them with the notice
michael@0 52 * and other provisions required by the GPL or the LGPL. If you do not delete
michael@0 53 * the provisions above, a recipient may use your version of this file under
michael@0 54 * the terms of any one of the MPL, the GPL or the LGPL.
michael@0 55 *
michael@0 56 ******* END LICENSE BLOCK *******/
michael@0 57
michael@0 58 #ifndef __CSUTILHXX__
michael@0 59 #define __CSUTILHXX__
michael@0 60
michael@0 61 #include "hunvisapi.h"
michael@0 62
michael@0 63 // First some base level utility routines
michael@0 64
michael@0 65 #include <string.h>
michael@0 66 #include "w_char.hxx"
michael@0 67 #include "htypes.hxx"
michael@0 68
michael@0 69 #ifdef MOZILLA_CLIENT
michael@0 70 #include "nscore.h" // for mozalloc headers
michael@0 71 #endif
michael@0 72
michael@0 73 // casing
michael@0 74 #define NOCAP 0
michael@0 75 #define INITCAP 1
michael@0 76 #define ALLCAP 2
michael@0 77 #define HUHCAP 3
michael@0 78 #define HUHINITCAP 4
michael@0 79
michael@0 80 // default encoding and keystring
michael@0 81 #define SPELL_ENCODING "ISO8859-1"
michael@0 82 #define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
michael@0 83
michael@0 84 // default morphological fields
michael@0 85 #define MORPH_STEM "st:"
michael@0 86 #define MORPH_ALLOMORPH "al:"
michael@0 87 #define MORPH_POS "po:"
michael@0 88 #define MORPH_DERI_PFX "dp:"
michael@0 89 #define MORPH_INFL_PFX "ip:"
michael@0 90 #define MORPH_TERM_PFX "tp:"
michael@0 91 #define MORPH_DERI_SFX "ds:"
michael@0 92 #define MORPH_INFL_SFX "is:"
michael@0 93 #define MORPH_TERM_SFX "ts:"
michael@0 94 #define MORPH_SURF_PFX "sp:"
michael@0 95 #define MORPH_FREQ "fr:"
michael@0 96 #define MORPH_PHON "ph:"
michael@0 97 #define MORPH_HYPH "hy:"
michael@0 98 #define MORPH_PART "pa:"
michael@0 99 #define MORPH_FLAG "fl:"
michael@0 100 #define MORPH_HENTRY "_H:"
michael@0 101 #define MORPH_TAG_LEN strlen(MORPH_STEM)
michael@0 102
michael@0 103 #define MSEP_FLD ' '
michael@0 104 #define MSEP_REC '\n'
michael@0 105 #define MSEP_ALT '\v'
michael@0 106
michael@0 107 // default flags
michael@0 108 #define DEFAULTFLAGS 65510
michael@0 109 #define FORBIDDENWORD 65510
michael@0 110 #define ONLYUPCASEFLAG 65511
michael@0 111
michael@0 112 // convert UTF-16 characters to UTF-8
michael@0 113 LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
michael@0 114
michael@0 115 // convert UTF-8 characters to UTF-16
michael@0 116 LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src);
michael@0 117
michael@0 118 // sort 2-byte vector
michael@0 119 LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end);
michael@0 120
michael@0 121 // binary search in 2-byte vector
michael@0 122 LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
michael@0 123
michael@0 124 // remove end of line char(s)
michael@0 125 LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s);
michael@0 126
michael@0 127 // duplicate string
michael@0 128 LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s);
michael@0 129
michael@0 130 // strcat for limited length destination string
michael@0 131 LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max);
michael@0 132
michael@0 133 // duplicate reverse of string
michael@0 134 LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s);
michael@0 135
michael@0 136 // parse into tokens with char delimiter
michael@0 137 LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim);
michael@0 138 // parse into tokens with char delimiter
michael@0 139 LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim);
michael@0 140
michael@0 141 // parse into tokens with char delimiter
michael@0 142 LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *);
michael@0 143
michael@0 144 // append s to ends of every lines in text
michael@0 145 LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s);
michael@0 146
michael@0 147 // tokenize into lines with new line
michael@0 148 LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar);
michael@0 149
michael@0 150 // tokenize into lines with new line and uniq in place
michael@0 151 LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar);
michael@0 152 LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar);
michael@0 153
michael@0 154 // change oldchar to newchar in place
michael@0 155 LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc);
michael@0 156
michael@0 157 // reverse word
michael@0 158 LIBHUNSPELL_DLL_EXPORTED int reverseword(char *);
michael@0 159
michael@0 160 // reverse word
michael@0 161 LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *);
michael@0 162
michael@0 163 // remove duplicates
michael@0 164 LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n);
michael@0 165
michael@0 166 // free character array list
michael@0 167 LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n);
michael@0 168
michael@0 169 // character encoding information
michael@0 170 struct cs_info {
michael@0 171 unsigned char ccase;
michael@0 172 unsigned char clower;
michael@0 173 unsigned char cupper;
michael@0 174 };
michael@0 175
michael@0 176 LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
michael@0 177 LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
michael@0 178 LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum);
michael@0 179 LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum);
michael@0 180 LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
michael@0 181
michael@0 182 LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es);
michael@0 183
michael@0 184 // get language identifiers of language codes
michael@0 185 LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang);
michael@0 186
michael@0 187 // get characters of the given 8bit encoding with lower- and uppercase forms
michael@0 188 LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc);
michael@0 189
michael@0 190 // convert null terminated string to all caps using encoding
michael@0 191 LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding);
michael@0 192
michael@0 193 // convert null terminated string to all little using encoding
michael@0 194 LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding);
michael@0 195
michael@0 196 // convert null terminated string to have initial capital using encoding
michael@0 197 LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding);
michael@0 198
michael@0 199 // convert null terminated string to all caps
michael@0 200 LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv);
michael@0 201
michael@0 202 // convert null terminated string to all little
michael@0 203 LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv);
michael@0 204
michael@0 205 // convert null terminated string to have initial capital
michael@0 206 LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv);
michael@0 207
michael@0 208 // convert first nc characters of UTF-8 string to little
michael@0 209 LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum);
michael@0 210
michael@0 211 // convert first nc characters of UTF-8 string to capital
michael@0 212 LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum);
michael@0 213
michael@0 214 // get type of capitalization
michael@0 215 LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *);
michael@0 216
michael@0 217 // get type of capitalization (UTF-8)
michael@0 218 LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum);
michael@0 219
michael@0 220 // strip all ignored characters in the string
michael@0 221 LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
michael@0 222
michael@0 223 // strip all ignored characters in the string
michael@0 224 LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars);
michael@0 225
michael@0 226 LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln);
michael@0 227
michael@0 228 LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16,
michael@0 229 int * out_utf16_len, int utf8, int ln);
michael@0 230
michael@0 231 LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r);
michael@0 232 LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var);
michael@0 233
michael@0 234 LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t);
michael@0 235
michael@0 236 LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph);
michael@0 237
michael@0 238 // conversion function for protected memory
michael@0 239 LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source);
michael@0 240
michael@0 241 // conversion function for protected memory
michael@0 242 LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s);
michael@0 243
michael@0 244 // hash entry macros
michael@0 245 LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h)
michael@0 246 {
michael@0 247 char *ret;
michael@0 248 if (!h->var)
michael@0 249 ret = NULL;
michael@0 250 else if (h->var & H_OPT_ALIASM)
michael@0 251 ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
michael@0 252 else
michael@0 253 ret = HENTRY_WORD(h) + h->blen + 1;
michael@0 254 return ret;
michael@0 255 }
michael@0 256
michael@0 257 // NULL-free version for warning-free OOo build
michael@0 258 LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h)
michael@0 259 {
michael@0 260 const char *ret;
michael@0 261 if (!h->var)
michael@0 262 ret = "";
michael@0 263 else if (h->var & H_OPT_ALIASM)
michael@0 264 ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
michael@0 265 else
michael@0 266 ret = HENTRY_WORD(h) + h->blen + 1;
michael@0 267 return ret;
michael@0 268 }
michael@0 269
michael@0 270 LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p)
michael@0 271 {
michael@0 272 return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
michael@0 273 }
michael@0 274
michael@0 275 #define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
michael@0 276
michael@0 277 #endif

mercurial