intl/chardet/src/nsCyrillicDetector.h

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     5 #ifndef nsCyrillicDetector_h__
     6 #define nsCyrillicDetector_h__
     8 #include "nsCyrillicClass.h"
    13 // {2002F781-3960-11d3-B3C3-00805F8A6670}
    14 #define NS_RU_PROBDETECTOR_CID \
    15 { 0x2002f781, 0x3960, 0x11d3, { 0xb3, 0xc3, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
    18 // {2002F782-3960-11d3-B3C3-00805F8A6670}
    19 #define NS_UK_PROBDETECTOR_CID \
    20 { 0x2002f782, 0x3960, 0x11d3, { 0xb3, 0xc3, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
    22 // {2002F783-3960-11d3-B3C3-00805F8A6670}
    23 #define NS_RU_STRING_PROBDETECTOR_CID \
    24 { 0x2002f783, 0x3960, 0x11d3, { 0xb3, 0xc3, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
    26 // {2002F784-3960-11d3-B3C3-00805F8A6670}
    27 #define NS_UK_STRING_PROBDETECTOR_CID \
    28 { 0x2002f784, 0x3960, 0x11d3, { 0xb3, 0xc3, 0x0, 0x80, 0x5f, 0x8a, 0x66, 0x70 } }
    30 static const uint8_t *gCyrillicCls[5] =
    31 {
    32    CP1251Map,
    33    KOI8Map,
    34    ISO88595Map,
    35    MacCyrillicMap,
    36    IBM866Map
    37 };
    39 static const char * gRussian[5] = {
    40   "windows-1251", 
    41   "KOI8-R", 
    42   "ISO-8859-5", 
    43   "x-mac-cyrillic",
    44   "IBM866"
    45 };
    47 static const char * gUkrainian[5] = {
    48   "windows-1251", 
    49   "KOI8-U", 
    50   "ISO-8859-5", 
    51   "x-mac-cyrillic",
    52   "IBM866"
    53 };
    55 #define NUM_CYR_CHARSET 5
    57 class nsCyrillicDetector 
    58 {
    59   public:
    60     nsCyrillicDetector(uint8_t aItems, 
    61                       const uint8_t ** aCyrillicClass, 
    62                       const char **aCharsets) {
    63       mItems = aItems;
    64       mCyrillicClass = aCyrillicClass;
    65       mCharsets = aCharsets;
    66       for(unsigned i=0;i<mItems;i++)
    67         mProb[i] = mLastCls[i] =0;
    68       mDone = false;
    69     }
    70     virtual ~nsCyrillicDetector() {}
    71     virtual void HandleData(const char* aBuf, uint32_t aLen);
    72     virtual void   DataEnd();
    73   protected:
    74     virtual void Report(const char* aCharset) = 0;
    75     bool    mDone;
    77   private:
    78     uint8_t  mItems;
    79     const uint8_t ** mCyrillicClass;
    80     const char** mCharsets;
    81     uint32_t mProb[NUM_CYR_CHARSET];
    82     uint8_t mLastCls[NUM_CYR_CHARSET];
    83 };
    85 class nsCyrXPCOMDetector :  
    86       public nsCyrillicDetector,
    87       public nsICharsetDetector
    88 {
    89   public:
    90     // nsISupports interface
    91     NS_DECL_ISUPPORTS
    92     nsCyrXPCOMDetector(uint8_t aItems, 
    93                       const uint8_t ** aCyrillicClass, 
    94                       const char **aCharsets);
    95     virtual ~nsCyrXPCOMDetector();
    96     NS_IMETHOD Init(nsICharsetDetectionObserver* aObserver);
    97     NS_IMETHOD DoIt(const char* aBuf, uint32_t aLen, bool *oDontFeedMe);
    98     NS_IMETHOD Done();
    99   protected:
   100     virtual void Report(const char* aCharset);
   101   private:
   102     nsCOMPtr<nsICharsetDetectionObserver> mObserver;
   103 };
   105 class nsCyrXPCOMStringDetector :  
   106       public nsCyrillicDetector,
   107       public nsIStringCharsetDetector
   108 {
   109   public:
   110     // nsISupports interface
   111     NS_DECL_ISUPPORTS
   112     nsCyrXPCOMStringDetector(uint8_t aItems, 
   113                       const uint8_t ** aCyrillicClass, 
   114                       const char **aCharsets);
   115     virtual ~nsCyrXPCOMStringDetector();
   116     NS_IMETHOD DoIt(const char* aBuf, uint32_t aLen, 
   117                      const char** oCharset, nsDetectionConfident &oConf);
   118   protected:
   119     virtual void Report(const char* aCharset);
   120   private:
   121     nsCOMPtr<nsICharsetDetectionObserver> mObserver;
   122     const char* mResult;
   123 };
   125 class nsRUProbDetector : public nsCyrXPCOMDetector
   126 {
   127   public:
   128     nsRUProbDetector() 
   129       : nsCyrXPCOMDetector(5, gCyrillicCls, gRussian) {}
   130 };
   132 class nsRUStringProbDetector : public nsCyrXPCOMStringDetector
   133 {
   134   public:
   135     nsRUStringProbDetector() 
   136       : nsCyrXPCOMStringDetector(5, gCyrillicCls, gRussian) {}
   137 };
   139 class nsUKProbDetector : public nsCyrXPCOMDetector
   140 {
   141   public:
   142     nsUKProbDetector() 
   143       : nsCyrXPCOMDetector(5, gCyrillicCls, gUkrainian) {}
   144 };
   146 class nsUKStringProbDetector : public nsCyrXPCOMStringDetector
   147 {
   148   public:
   149     nsUKStringProbDetector() 
   150       : nsCyrXPCOMStringDetector(5, gCyrillicCls, gUkrainian) {}
   151 };
   153 #endif

mercurial