extensions/spellcheck/hunspell/src/csutil.hxx

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /******* BEGIN LICENSE BLOCK *******
     2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
     3  * 
     4  * The contents of this file are subject to the Mozilla Public License Version
     5  * 1.1 (the "License"); you may not use this file except in compliance with
     6  * the License. You may obtain a copy of the License at
     7  * http://www.mozilla.org/MPL/
     8  * 
     9  * Software distributed under the License is distributed on an "AS IS" basis,
    10  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
    11  * for the specific language governing rights and limitations under the
    12  * License.
    13  * 
    14  * The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
    15  * and László Németh (Hunspell). Portions created by the Initial Developers
    16  * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
    17  * 
    18  * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
    19  *                 David Einstein (deinst@world.std.com)
    20  *                 László Németh (nemethl@gyorsposta.hu)
    21  *                 Caolan McNamara (caolanm@redhat.com)
    22  *                 Davide Prina
    23  *                 Giuseppe Modugno
    24  *                 Gianluca Turconi
    25  *                 Simon Brouwer
    26  *                 Noll Janos
    27  *                 Biro Arpad
    28  *                 Goldman Eleonora
    29  *                 Sarlos Tamas
    30  *                 Bencsath Boldizsar
    31  *                 Halacsy Peter
    32  *                 Dvornik Laszlo
    33  *                 Gefferth Andras
    34  *                 Nagy Viktor
    35  *                 Varga Daniel
    36  *                 Chris Halls
    37  *                 Rene Engelhard
    38  *                 Bram Moolenaar
    39  *                 Dafydd Jones
    40  *                 Harri Pitkanen
    41  *                 Andras Timar
    42  *                 Tor Lillqvist
    43  * 
    44  * Alternatively, the contents of this file may be used under the terms of
    45  * either the GNU General Public License Version 2 or later (the "GPL"), or
    46  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
    47  * in which case the provisions of the GPL or the LGPL are applicable instead
    48  * of those above. If you wish to allow use of your version of this file only
    49  * under the terms of either the GPL or the LGPL, and not to allow others to
    50  * use your version of this file under the terms of the MPL, indicate your
    51  * decision by deleting the provisions above and replace them with the notice
    52  * and other provisions required by the GPL or the LGPL. If you do not delete
    53  * the provisions above, a recipient may use your version of this file under
    54  * the terms of any one of the MPL, the GPL or the LGPL.
    55  *
    56  ******* END LICENSE BLOCK *******/
    58 #ifndef __CSUTILHXX__
    59 #define __CSUTILHXX__
    61 #include "hunvisapi.h"
    63 // First some base level utility routines
    65 #include <string.h>
    66 #include "w_char.hxx"
    67 #include "htypes.hxx"
    69 #ifdef MOZILLA_CLIENT
    70 #include "nscore.h" // for mozalloc headers
    71 #endif
    73 // casing
    74 #define NOCAP   0
    75 #define INITCAP 1
    76 #define ALLCAP  2
    77 #define HUHCAP  3
    78 #define HUHINITCAP  4
    80 // default encoding and keystring
    81 #define SPELL_ENCODING  "ISO8859-1"
    82 #define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm" 
    84 // default morphological fields
    85 #define MORPH_STEM        "st:"
    86 #define MORPH_ALLOMORPH   "al:"
    87 #define MORPH_POS         "po:"
    88 #define MORPH_DERI_PFX    "dp:"
    89 #define MORPH_INFL_PFX    "ip:"
    90 #define MORPH_TERM_PFX    "tp:"
    91 #define MORPH_DERI_SFX    "ds:"
    92 #define MORPH_INFL_SFX    "is:"
    93 #define MORPH_TERM_SFX    "ts:"
    94 #define MORPH_SURF_PFX    "sp:"
    95 #define MORPH_FREQ        "fr:"
    96 #define MORPH_PHON        "ph:"
    97 #define MORPH_HYPH        "hy:"
    98 #define MORPH_PART        "pa:"
    99 #define MORPH_FLAG        "fl:"
   100 #define MORPH_HENTRY      "_H:"
   101 #define MORPH_TAG_LEN     strlen(MORPH_STEM)
   103 #define MSEP_FLD ' '
   104 #define MSEP_REC '\n'
   105 #define MSEP_ALT '\v'
   107 // default flags
   108 #define DEFAULTFLAGS   65510
   109 #define FORBIDDENWORD  65510
   110 #define ONLYUPCASEFLAG 65511
   112 // convert UTF-16 characters to UTF-8
   113 LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
   115 // convert UTF-8 characters to UTF-16
   116 LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src);
   118 // sort 2-byte vector
   119 LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end);
   121 // binary search in 2-byte vector
   122 LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
   124 // remove end of line char(s)
   125 LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s);
   127 // duplicate string
   128 LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s);
   130 // strcat for limited length destination string
   131 LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max);
   133 // duplicate reverse of string
   134 LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s);
   136 // parse into tokens with char delimiter
   137 LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim);
   138 // parse into tokens with char delimiter
   139 LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim);
   141 // parse into tokens with char delimiter
   142 LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *);
   144 // append s to ends of every lines in text
   145 LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s);
   147 // tokenize into lines with new line
   148 LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar);
   150 // tokenize into lines with new line and uniq in place
   151 LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar);
   152 LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar);
   154 // change oldchar to newchar in place
   155 LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc);
   157 // reverse word
   158 LIBHUNSPELL_DLL_EXPORTED int reverseword(char *);
   160 // reverse word
   161 LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *);
   163 // remove duplicates
   164 LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n);
   166 // free character array list
   167 LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n);
   169 // character encoding information
   170 struct cs_info {
   171   unsigned char ccase;
   172   unsigned char clower;
   173   unsigned char cupper;
   174 };
   176 LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
   177 LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
   178 LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum);
   179 LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum);
   180 LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
   182 LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es);
   184 // get language identifiers of language codes
   185 LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang);
   187 // get characters of the given 8bit encoding with lower- and uppercase forms
   188 LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc);
   190 // convert null terminated string to all caps using encoding
   191 LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding);
   193 // convert null terminated string to all little using encoding
   194 LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding);
   196 // convert null terminated string to have initial capital using encoding
   197 LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding);
   199 // convert null terminated string to all caps
   200 LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv);
   202 // convert null terminated string to all little
   203 LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv);
   205 // convert null terminated string to have initial capital
   206 LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv);
   208 // convert first nc characters of UTF-8 string to little
   209 LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum);
   211 // convert first nc characters of UTF-8 string to capital
   212 LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum);
   214 // get type of capitalization
   215 LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *);
   217 // get type of capitalization (UTF-8)
   218 LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum);
   220 // strip all ignored characters in the string
   221 LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
   223 // strip all ignored characters in the string
   224 LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars);
   226 LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln);
   228 LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16,
   229     int * out_utf16_len, int utf8, int ln);
   231 LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r);
   232 LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var);
   234 LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t);
   236 LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph);
   238 // conversion function for protected memory
   239 LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source);
   241 // conversion function for protected memory
   242 LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s);
   244 // hash entry macros
   245 LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h)
   246 {
   247     char *ret;
   248     if (!h->var)
   249         ret = NULL;
   250     else if (h->var & H_OPT_ALIASM)
   251         ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
   252     else 
   253         ret = HENTRY_WORD(h) + h->blen + 1;
   254     return ret;
   255 }
   257 // NULL-free version for warning-free OOo build
   258 LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h)
   259 {
   260     const char *ret;
   261     if (!h->var)
   262         ret = "";
   263     else if (h->var & H_OPT_ALIASM)
   264         ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
   265     else
   266         ret = HENTRY_WORD(h) + h->blen + 1;
   267     return ret;
   268 }
   270 LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p)
   271 {
   272     return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
   273 }
   275 #define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
   277 #endif

mercurial