extensions/spellcheck/hunspell/src/csutil.hxx

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/extensions/spellcheck/hunspell/src/csutil.hxx	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,277 @@
     1.4 +/******* BEGIN LICENSE BLOCK *******
     1.5 + * Version: MPL 1.1/GPL 2.0/LGPL 2.1
     1.6 + * 
     1.7 + * The contents of this file are subject to the Mozilla Public License Version
     1.8 + * 1.1 (the "License"); you may not use this file except in compliance with
     1.9 + * the License. You may obtain a copy of the License at
    1.10 + * http://www.mozilla.org/MPL/
    1.11 + * 
    1.12 + * Software distributed under the License is distributed on an "AS IS" basis,
    1.13 + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
    1.14 + * for the specific language governing rights and limitations under the
    1.15 + * License.
    1.16 + * 
    1.17 + * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
    1.18 + * and László Németh (Hunspell). Portions created by the Initial Developers
    1.19 + * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
    1.20 + * 
    1.21 + * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
    1.22 + *                 David Einstein (deinst@world.std.com)
    1.23 + *                 László Németh (nemethl@gyorsposta.hu)
    1.24 + *                 Caolan McNamara (caolanm@redhat.com)
    1.25 + *                 Davide Prina
    1.26 + *                 Giuseppe Modugno
    1.27 + *                 Gianluca Turconi
    1.28 + *                 Simon Brouwer
    1.29 + *                 Noll Janos
    1.30 + *                 Biro Arpad
    1.31 + *                 Goldman Eleonora
    1.32 + *                 Sarlos Tamas
    1.33 + *                 Bencsath Boldizsar
    1.34 + *                 Halacsy Peter
    1.35 + *                 Dvornik Laszlo
    1.36 + *                 Gefferth Andras
    1.37 + *                 Nagy Viktor
    1.38 + *                 Varga Daniel
    1.39 + *                 Chris Halls
    1.40 + *                 Rene Engelhard
    1.41 + *                 Bram Moolenaar
    1.42 + *                 Dafydd Jones
    1.43 + *                 Harri Pitkanen
    1.44 + *                 Andras Timar
    1.45 + *                 Tor Lillqvist
    1.46 + * 
    1.47 + * Alternatively, the contents of this file may be used under the terms of
    1.48 + * either the GNU General Public License Version 2 or later (the "GPL"), or
    1.49 + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
    1.50 + * in which case the provisions of the GPL or the LGPL are applicable instead
    1.51 + * of those above. If you wish to allow use of your version of this file only
    1.52 + * under the terms of either the GPL or the LGPL, and not to allow others to
    1.53 + * use your version of this file under the terms of the MPL, indicate your
    1.54 + * decision by deleting the provisions above and replace them with the notice
    1.55 + * and other provisions required by the GPL or the LGPL. If you do not delete
    1.56 + * the provisions above, a recipient may use your version of this file under
    1.57 + * the terms of any one of the MPL, the GPL or the LGPL.
    1.58 + *
    1.59 + ******* END LICENSE BLOCK *******/
    1.60 +
    1.61 +#ifndef __CSUTILHXX__
    1.62 +#define __CSUTILHXX__
    1.63 +
    1.64 +#include "hunvisapi.h"
    1.65 +
    1.66 +// First some base level utility routines
    1.67 +
    1.68 +#include <string.h>
    1.69 +#include "w_char.hxx"
    1.70 +#include "htypes.hxx"
    1.71 +
    1.72 +#ifdef MOZILLA_CLIENT
    1.73 +#include "nscore.h" // for mozalloc headers
    1.74 +#endif
    1.75 +
    1.76 +// casing
    1.77 +#define NOCAP   0
    1.78 +#define INITCAP 1
    1.79 +#define ALLCAP  2
    1.80 +#define HUHCAP  3
    1.81 +#define HUHINITCAP  4
    1.82 +
    1.83 +// default encoding and keystring
    1.84 +#define SPELL_ENCODING  "ISO8859-1"
    1.85 +#define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm" 
    1.86 +
    1.87 +// default morphological fields
    1.88 +#define MORPH_STEM        "st:"
    1.89 +#define MORPH_ALLOMORPH   "al:"
    1.90 +#define MORPH_POS         "po:"
    1.91 +#define MORPH_DERI_PFX    "dp:"
    1.92 +#define MORPH_INFL_PFX    "ip:"
    1.93 +#define MORPH_TERM_PFX    "tp:"
    1.94 +#define MORPH_DERI_SFX    "ds:"
    1.95 +#define MORPH_INFL_SFX    "is:"
    1.96 +#define MORPH_TERM_SFX    "ts:"
    1.97 +#define MORPH_SURF_PFX    "sp:"
    1.98 +#define MORPH_FREQ        "fr:"
    1.99 +#define MORPH_PHON        "ph:"
   1.100 +#define MORPH_HYPH        "hy:"
   1.101 +#define MORPH_PART        "pa:"
   1.102 +#define MORPH_FLAG        "fl:"
   1.103 +#define MORPH_HENTRY      "_H:"
   1.104 +#define MORPH_TAG_LEN     strlen(MORPH_STEM)
   1.105 +
   1.106 +#define MSEP_FLD ' '
   1.107 +#define MSEP_REC '\n'
   1.108 +#define MSEP_ALT '\v'
   1.109 +
   1.110 +// default flags
   1.111 +#define DEFAULTFLAGS   65510
   1.112 +#define FORBIDDENWORD  65510
   1.113 +#define ONLYUPCASEFLAG 65511
   1.114 +
   1.115 +// convert UTF-16 characters to UTF-8
   1.116 +LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
   1.117 +
   1.118 +// convert UTF-8 characters to UTF-16
   1.119 +LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src);
   1.120 +
   1.121 +// sort 2-byte vector
   1.122 +LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end);
   1.123 +
   1.124 +// binary search in 2-byte vector
   1.125 +LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
   1.126 +
   1.127 +// remove end of line char(s)
   1.128 +LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s);
   1.129 +
   1.130 +// duplicate string
   1.131 +LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s);
   1.132 +
   1.133 +// strcat for limited length destination string
   1.134 +LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max);
   1.135 +
   1.136 +// duplicate reverse of string
   1.137 +LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s);
   1.138 +
   1.139 +// parse into tokens with char delimiter
   1.140 +LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim);
   1.141 +// parse into tokens with char delimiter
   1.142 +LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim);
   1.143 +
   1.144 +// parse into tokens with char delimiter
   1.145 +LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *);
   1.146 +
   1.147 +// append s to ends of every lines in text
   1.148 +LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s);
   1.149 +
   1.150 +// tokenize into lines with new line
   1.151 +LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar);
   1.152 +
   1.153 +// tokenize into lines with new line and uniq in place
   1.154 +LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar);
   1.155 +LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar);
   1.156 +
   1.157 +// change oldchar to newchar in place
   1.158 +LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc);
   1.159 +
   1.160 +// reverse word
   1.161 +LIBHUNSPELL_DLL_EXPORTED int reverseword(char *);
   1.162 +
   1.163 +// reverse word
   1.164 +LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *);
   1.165 +
   1.166 +// remove duplicates
   1.167 +LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n);
   1.168 +
   1.169 +// free character array list
   1.170 +LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n);
   1.171 +
   1.172 +// character encoding information
   1.173 +struct cs_info {
   1.174 +  unsigned char ccase;
   1.175 +  unsigned char clower;
   1.176 +  unsigned char cupper;
   1.177 +};
   1.178 +
   1.179 +LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
   1.180 +LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
   1.181 +LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum);
   1.182 +LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum);
   1.183 +LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
   1.184 +
   1.185 +LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es);
   1.186 +
   1.187 +// get language identifiers of language codes
   1.188 +LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang);
   1.189 +
   1.190 +// get characters of the given 8bit encoding with lower- and uppercase forms
   1.191 +LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc);
   1.192 +
   1.193 +// convert null terminated string to all caps using encoding
   1.194 +LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding);
   1.195 +
   1.196 +// convert null terminated string to all little using encoding
   1.197 +LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding);
   1.198 +
   1.199 +// convert null terminated string to have initial capital using encoding
   1.200 +LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding);
   1.201 +
   1.202 +// convert null terminated string to all caps
   1.203 +LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv);
   1.204 +
   1.205 +// convert null terminated string to all little
   1.206 +LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv);
   1.207 +
   1.208 +// convert null terminated string to have initial capital
   1.209 +LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv);
   1.210 +
   1.211 +// convert first nc characters of UTF-8 string to little
   1.212 +LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum);
   1.213 +
   1.214 +// convert first nc characters of UTF-8 string to capital
   1.215 +LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum);
   1.216 +
   1.217 +// get type of capitalization
   1.218 +LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *);
   1.219 +
   1.220 +// get type of capitalization (UTF-8)
   1.221 +LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum);
   1.222 +
   1.223 +// strip all ignored characters in the string
   1.224 +LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
   1.225 +
   1.226 +// strip all ignored characters in the string
   1.227 +LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars);
   1.228 +
   1.229 +LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln);
   1.230 +
   1.231 +LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16,
   1.232 +    int * out_utf16_len, int utf8, int ln);
   1.233 +
   1.234 +LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r);
   1.235 +LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var);
   1.236 +
   1.237 +LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t);
   1.238 +
   1.239 +LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph);
   1.240 +
   1.241 +// conversion function for protected memory
   1.242 +LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source);
   1.243 +
   1.244 +// conversion function for protected memory
   1.245 +LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s);
   1.246 +
   1.247 +// hash entry macros
   1.248 +LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h)
   1.249 +{
   1.250 +    char *ret;
   1.251 +    if (!h->var)
   1.252 +        ret = NULL;
   1.253 +    else if (h->var & H_OPT_ALIASM)
   1.254 +        ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
   1.255 +    else 
   1.256 +        ret = HENTRY_WORD(h) + h->blen + 1;
   1.257 +    return ret;
   1.258 +}
   1.259 +
   1.260 +// NULL-free version for warning-free OOo build
   1.261 +LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h)
   1.262 +{
   1.263 +    const char *ret;
   1.264 +    if (!h->var)
   1.265 +        ret = "";
   1.266 +    else if (h->var & H_OPT_ALIASM)
   1.267 +        ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
   1.268 +    else
   1.269 +        ret = HENTRY_WORD(h) + h->blen + 1;
   1.270 +    return ret;
   1.271 +}
   1.272 +
   1.273 +LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p)
   1.274 +{
   1.275 +    return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
   1.276 +}
   1.277 +
   1.278 +#define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
   1.279 +
   1.280 +#endif

mercurial