intl/icu/source/i18n/csrsbcs.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/csrsbcs.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,287 @@
     1.4 +/*
     1.5 + **********************************************************************
     1.6 + *   Copyright (C) 2005-2013, International Business Machines
     1.7 + *   Corporation and others.  All Rights Reserved.
     1.8 + **********************************************************************
     1.9 + */
    1.10 +
    1.11 +#ifndef __CSRSBCS_H
    1.12 +#define __CSRSBCS_H
    1.13 +
    1.14 +#include "unicode/uobject.h"
    1.15 +
    1.16 +#if !UCONFIG_NO_CONVERSION
    1.17 +
    1.18 +#include "csrecog.h"
    1.19 +
    1.20 +U_NAMESPACE_BEGIN
    1.21 +
    1.22 +class NGramParser : public UMemory
    1.23 +{
    1.24 +private:
    1.25 +    int32_t ngram;
    1.26 +    const int32_t *ngramList;    
    1.27 +
    1.28 +    int32_t ngramCount;
    1.29 +    int32_t hitCount;
    1.30 +
    1.31 +protected:
    1.32 +	int32_t byteIndex;
    1.33 +    const uint8_t *charMap;
    1.34 +
    1.35 +	void addByte(int32_t b);
    1.36 +
    1.37 +public:
    1.38 +    NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
    1.39 +
    1.40 +private:
    1.41 +    /*
    1.42 +    * Binary search for value in table, which must have exactly 64 entries.
    1.43 +    */
    1.44 +    int32_t search(const int32_t *table, int32_t value);
    1.45 +
    1.46 +    void lookup(int32_t thisNgram);
    1.47 +    
    1.48 +    virtual int32_t nextByte(InputText *det);
    1.49 +	virtual void parseCharacters(InputText *det);
    1.50 +
    1.51 +public:
    1.52 +    int32_t parse(InputText *det);
    1.53 +
    1.54 +};
    1.55 +
    1.56 +class NGramParser_IBM420 : public NGramParser
    1.57 +{
    1.58 +private:
    1.59 +	int32_t alef;
    1.60 +	int32_t isLamAlef(int32_t b);
    1.61 +	int32_t nextByte(InputText *det);
    1.62 +	void parseCharacters(InputText *det);
    1.63 +
    1.64 +public:
    1.65 +    NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
    1.66 +};
    1.67 +
    1.68 +
    1.69 +class CharsetRecog_sbcs : public CharsetRecognizer
    1.70 +{
    1.71 +public:
    1.72 +    CharsetRecog_sbcs();
    1.73 +    virtual ~CharsetRecog_sbcs();
    1.74 +    virtual const char *getName() const = 0;
    1.75 +    virtual UBool match(InputText *det, CharsetMatch *results) const = 0;
    1.76 +    virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
    1.77 +};
    1.78 +
    1.79 +class CharsetRecog_8859_1 : public CharsetRecog_sbcs
    1.80 +{
    1.81 +public:
    1.82 +    virtual ~CharsetRecog_8859_1();
    1.83 +    const char *getName() const;
    1.84 +    virtual UBool match(InputText *det, CharsetMatch *results) const;
    1.85 +};
    1.86 +
    1.87 +class CharsetRecog_8859_2 : public CharsetRecog_sbcs
    1.88 +{
    1.89 +public:
    1.90 +    virtual ~CharsetRecog_8859_2();
    1.91 +    const char *getName() const;
    1.92 +    virtual UBool match(InputText *det, CharsetMatch *results) const;
    1.93 +};
    1.94 +
    1.95 +class CharsetRecog_8859_5 : public CharsetRecog_sbcs
    1.96 +{
    1.97 +public:
    1.98 +    virtual ~CharsetRecog_8859_5();
    1.99 +    const char *getName() const;
   1.100 +};
   1.101 +
   1.102 +class CharsetRecog_8859_6 : public CharsetRecog_sbcs
   1.103 +{
   1.104 +public:
   1.105 +    virtual ~CharsetRecog_8859_6();
   1.106 +
   1.107 +    const char *getName() const;
   1.108 +};
   1.109 +
   1.110 +class CharsetRecog_8859_7 : public CharsetRecog_sbcs
   1.111 +{
   1.112 +public:
   1.113 +    virtual ~CharsetRecog_8859_7();
   1.114 +
   1.115 +    const char *getName() const;
   1.116 +};
   1.117 +
   1.118 +class CharsetRecog_8859_8 : public CharsetRecog_sbcs
   1.119 +{
   1.120 +public:
   1.121 +    virtual ~CharsetRecog_8859_8();
   1.122 +	
   1.123 +    virtual const char *getName() const;
   1.124 +};
   1.125 +
   1.126 +class CharsetRecog_8859_9 : public CharsetRecog_sbcs
   1.127 +{
   1.128 +public:
   1.129 +    virtual ~CharsetRecog_8859_9();
   1.130 +
   1.131 +    const char *getName() const;
   1.132 +};
   1.133 +
   1.134 +
   1.135 +
   1.136 +class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
   1.137 +{
   1.138 +public:
   1.139 +    virtual ~CharsetRecog_8859_5_ru();
   1.140 +
   1.141 +    const char *getLanguage() const;
   1.142 +
   1.143 +    virtual UBool match(InputText *det, CharsetMatch *results) const;
   1.144 +};
   1.145 +
   1.146 +class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
   1.147 +{
   1.148 +public:
   1.149 +    virtual ~CharsetRecog_8859_6_ar();
   1.150 +
   1.151 +    const char *getLanguage() const;
   1.152 +
   1.153 +    virtual UBool match(InputText *det, CharsetMatch *results) const;
   1.154 +};
   1.155 +
   1.156 +class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
   1.157 +{
   1.158 +public:
   1.159 +    virtual ~CharsetRecog_8859_7_el();
   1.160 +
   1.161 +    const char *getLanguage() const;
   1.162 +
   1.163 +    virtual UBool match(InputText *det, CharsetMatch *results) const;
   1.164 +};
   1.165 +
   1.166 +class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
   1.167 +{
   1.168 +public:
   1.169 +    virtual ~CharsetRecog_8859_8_I_he();
   1.170 +	
   1.171 +    const char *getName() const;
   1.172 +
   1.173 +    const char *getLanguage() const;
   1.174 +
   1.175 +    virtual UBool match(InputText *det, CharsetMatch *results) const;
   1.176 +};
   1.177 +
   1.178 +class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
   1.179 +{
   1.180 +public:
   1.181 +    virtual ~CharsetRecog_8859_8_he ();
   1.182 +
   1.183 +    const char *getLanguage() const;
   1.184 +
   1.185 +    virtual UBool match(InputText *det, CharsetMatch *results) const;
   1.186 +};
   1.187 +
   1.188 +class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
   1.189 +{
   1.190 +public:
   1.191 +    virtual ~CharsetRecog_8859_9_tr ();
   1.192 +
   1.193 +    const char *getLanguage() const;
   1.194 +
   1.195 +    virtual UBool match(InputText *det, CharsetMatch *results) const;
   1.196 +};
   1.197 +
   1.198 +class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
   1.199 +{
   1.200 +public:
   1.201 +    virtual ~CharsetRecog_windows_1256();
   1.202 +
   1.203 +    const char *getName() const;
   1.204 +
   1.205 +    const char *getLanguage() const;
   1.206 +
   1.207 +    virtual UBool match(InputText *det, CharsetMatch *results) const;
   1.208 +};
   1.209 +
   1.210 +class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
   1.211 +{
   1.212 +public:
   1.213 +    virtual ~CharsetRecog_windows_1251();
   1.214 +
   1.215 +    const char *getName() const;
   1.216 +
   1.217 +    const char *getLanguage() const;
   1.218 +
   1.219 +    virtual UBool match(InputText *det, CharsetMatch *results) const;
   1.220 +};
   1.221 +
   1.222 +
   1.223 +class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
   1.224 +{
   1.225 +public:
   1.226 +    virtual ~CharsetRecog_KOI8_R();
   1.227 +
   1.228 +    const char *getName() const;
   1.229 +
   1.230 +    const char *getLanguage() const;
   1.231 +
   1.232 +    virtual UBool match(InputText *det, CharsetMatch *results) const;
   1.233 +};
   1.234 +
   1.235 +class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
   1.236 +{
   1.237 +public:
   1.238 +    virtual ~CharsetRecog_IBM424_he();
   1.239 +
   1.240 +    const char *getLanguage() const;
   1.241 +};
   1.242 +
   1.243 +class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
   1.244 +public:
   1.245 +    virtual ~CharsetRecog_IBM424_he_rtl();
   1.246 +    
   1.247 +    const char *getName() const;
   1.248 +    
   1.249 +    virtual UBool match(InputText *det, CharsetMatch *results) const;
   1.250 +};
   1.251 +
   1.252 +class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
   1.253 +    virtual ~CharsetRecog_IBM424_he_ltr();
   1.254 +    
   1.255 +    const char *getName() const;
   1.256 +    
   1.257 +    virtual UBool match(InputText *det, CharsetMatch *results) const;
   1.258 +};
   1.259 +
   1.260 +class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
   1.261 +{
   1.262 +public:
   1.263 +    virtual ~CharsetRecog_IBM420_ar();
   1.264 +
   1.265 +    const char *getLanguage() const;
   1.266 +	int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
   1.267 +    
   1.268 +};
   1.269 +
   1.270 +class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
   1.271 +public:
   1.272 +    virtual ~CharsetRecog_IBM420_ar_rtl();
   1.273 +    
   1.274 +    const char *getName() const;
   1.275 +    
   1.276 +    virtual UBool match(InputText *det, CharsetMatch *results) const;
   1.277 +};
   1.278 +
   1.279 +class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
   1.280 +    virtual ~CharsetRecog_IBM420_ar_ltr();
   1.281 +    
   1.282 +    const char *getName() const;
   1.283 +    
   1.284 +    virtual UBool match(InputText *det, CharsetMatch *results) const;
   1.285 +};
   1.286 +
   1.287 +U_NAMESPACE_END
   1.288 +
   1.289 +#endif /* !UCONFIG_NO_CONVERSION */
   1.290 +#endif /* __CSRSBCS_H */

mercurial