intl/chardet/src/nsCyrillicDetector.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/chardet/src/nsCyrillicDetector.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,153 @@
     1.4 +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +#ifndef nsCyrillicDetector_h__
     1.9 +#define nsCyrillicDetector_h__
    1.10 +
    1.11 +#include "nsCyrillicClass.h"
    1.12 +
    1.13 +
    1.14 +
    1.15 +
    1.16 +// {2002F781-3960-11d3-B3C3-00805F8A6670}
    1.17 +#define NS_RU_PROBDETECTOR_CID \
    1.18 +{ 0x2002f781, 0x3960, 0x11d3, { 0xb3, 0xc3, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
    1.19 +
    1.20 +
    1.21 +// {2002F782-3960-11d3-B3C3-00805F8A6670}
    1.22 +#define NS_UK_PROBDETECTOR_CID \
    1.23 +{ 0x2002f782, 0x3960, 0x11d3, { 0xb3, 0xc3, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
    1.24 +
    1.25 +// {2002F783-3960-11d3-B3C3-00805F8A6670}
    1.26 +#define NS_RU_STRING_PROBDETECTOR_CID \
    1.27 +{ 0x2002f783, 0x3960, 0x11d3, { 0xb3, 0xc3, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
    1.28 +
    1.29 +// {2002F784-3960-11d3-B3C3-00805F8A6670}
    1.30 +#define NS_UK_STRING_PROBDETECTOR_CID \
    1.31 +{ 0x2002f784, 0x3960, 0x11d3, { 0xb3, 0xc3, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
    1.32 +
    1.33 +static const uint8_t *gCyrillicCls[5] =
    1.34 +{
    1.35 +   CP1251Map,
    1.36 +   KOI8Map,
    1.37 +   ISO88595Map,
    1.38 +   MacCyrillicMap,
    1.39 +   IBM866Map
    1.40 +};
    1.41 +
    1.42 +static const char * gRussian[5] = {
    1.43 +  "windows-1251", 
    1.44 +  "KOI8-R", 
    1.45 +  "ISO-8859-5", 
    1.46 +  "x-mac-cyrillic",
    1.47 +  "IBM866"
    1.48 +};
    1.49 +
    1.50 +static const char * gUkrainian[5] = {
    1.51 +  "windows-1251", 
    1.52 +  "KOI8-U", 
    1.53 +  "ISO-8859-5", 
    1.54 +  "x-mac-cyrillic",
    1.55 +  "IBM866"
    1.56 +};
    1.57 +
    1.58 +#define NUM_CYR_CHARSET 5
    1.59 +
    1.60 +class nsCyrillicDetector 
    1.61 +{
    1.62 +  public:
    1.63 +    nsCyrillicDetector(uint8_t aItems, 
    1.64 +                      const uint8_t ** aCyrillicClass, 
    1.65 +                      const char **aCharsets) {
    1.66 +      mItems = aItems;
    1.67 +      mCyrillicClass = aCyrillicClass;
    1.68 +      mCharsets = aCharsets;
    1.69 +      for(unsigned i=0;i<mItems;i++)
    1.70 +        mProb[i] = mLastCls[i] =0;
    1.71 +      mDone = false;
    1.72 +    }
    1.73 +    virtual ~nsCyrillicDetector() {}
    1.74 +    virtual void HandleData(const char* aBuf, uint32_t aLen);
    1.75 +    virtual void   DataEnd();
    1.76 +  protected:
    1.77 +    virtual void Report(const char* aCharset) = 0;
    1.78 +    bool    mDone;
    1.79 +
    1.80 +  private:
    1.81 +    uint8_t  mItems;
    1.82 +    const uint8_t ** mCyrillicClass;
    1.83 +    const char** mCharsets;
    1.84 +    uint32_t mProb[NUM_CYR_CHARSET];
    1.85 +    uint8_t mLastCls[NUM_CYR_CHARSET];
    1.86 +};
    1.87 +
    1.88 +class nsCyrXPCOMDetector :  
    1.89 +      public nsCyrillicDetector,
    1.90 +      public nsICharsetDetector
    1.91 +{
    1.92 +  public:
    1.93 +    // nsISupports interface
    1.94 +    NS_DECL_ISUPPORTS
    1.95 +    nsCyrXPCOMDetector(uint8_t aItems, 
    1.96 +                      const uint8_t ** aCyrillicClass, 
    1.97 +                      const char **aCharsets);
    1.98 +    virtual ~nsCyrXPCOMDetector();
    1.99 +    NS_IMETHOD Init(nsICharsetDetectionObserver* aObserver);
   1.100 +    NS_IMETHOD DoIt(const char* aBuf, uint32_t aLen, bool *oDontFeedMe);
   1.101 +    NS_IMETHOD Done();
   1.102 +  protected:
   1.103 +    virtual void Report(const char* aCharset);
   1.104 +  private:
   1.105 +    nsCOMPtr<nsICharsetDetectionObserver> mObserver;
   1.106 +};
   1.107 +
   1.108 +class nsCyrXPCOMStringDetector :  
   1.109 +      public nsCyrillicDetector,
   1.110 +      public nsIStringCharsetDetector
   1.111 +{
   1.112 +  public:
   1.113 +    // nsISupports interface
   1.114 +    NS_DECL_ISUPPORTS
   1.115 +    nsCyrXPCOMStringDetector(uint8_t aItems, 
   1.116 +                      const uint8_t ** aCyrillicClass, 
   1.117 +                      const char **aCharsets);
   1.118 +    virtual ~nsCyrXPCOMStringDetector();
   1.119 +    NS_IMETHOD DoIt(const char* aBuf, uint32_t aLen, 
   1.120 +                     const char** oCharset, nsDetectionConfident &oConf);
   1.121 +  protected:
   1.122 +    virtual void Report(const char* aCharset);
   1.123 +  private:
   1.124 +    nsCOMPtr<nsICharsetDetectionObserver> mObserver;
   1.125 +    const char* mResult;
   1.126 +};
   1.127 +
   1.128 +class nsRUProbDetector : public nsCyrXPCOMDetector
   1.129 +{
   1.130 +  public:
   1.131 +    nsRUProbDetector() 
   1.132 +      : nsCyrXPCOMDetector(5, gCyrillicCls, gRussian) {}
   1.133 +};
   1.134 +
   1.135 +class nsRUStringProbDetector : public nsCyrXPCOMStringDetector
   1.136 +{
   1.137 +  public:
   1.138 +    nsRUStringProbDetector() 
   1.139 +      : nsCyrXPCOMStringDetector(5, gCyrillicCls, gRussian) {}
   1.140 +};
   1.141 +
   1.142 +class nsUKProbDetector : public nsCyrXPCOMDetector
   1.143 +{
   1.144 +  public:
   1.145 +    nsUKProbDetector() 
   1.146 +      : nsCyrXPCOMDetector(5, gCyrillicCls, gUkrainian) {}
   1.147 +};
   1.148 +
   1.149 +class nsUKStringProbDetector : public nsCyrXPCOMStringDetector
   1.150 +{
   1.151 +  public:
   1.152 +    nsUKStringProbDetector() 
   1.153 +      : nsCyrXPCOMStringDetector(5, gCyrillicCls, gUkrainian) {}
   1.154 +};
   1.155 +
   1.156 +#endif

mercurial