intl/chardet/src/nsCyrillicDetector.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/chardet/src/nsCyrillicDetector.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,160 @@
     1.4 +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +#include "nscore.h"
     1.9 +#include "nsCyrillicProb.h"
    1.10 +#include <stdio.h>
    1.11 +
    1.12 +#include "nsCOMPtr.h"
    1.13 +#include "nsISupports.h"
    1.14 +#include "nsICharsetDetector.h"
    1.15 +#include "nsICharsetDetectionObserver.h"
    1.16 +#include "nsIStringCharsetDetector.h"
    1.17 +#include "nsCyrillicDetector.h"
    1.18 +
    1.19 +//----------------------------------------------------------------------
    1.20 +// Interface nsISupports [implementation]
    1.21 +NS_IMPL_ISUPPORTS(nsCyrXPCOMDetector, nsICharsetDetector)
    1.22 +NS_IMPL_ISUPPORTS(nsCyrXPCOMStringDetector, nsIStringCharsetDetector)
    1.23 +
    1.24 +void nsCyrillicDetector::HandleData(const char* aBuf, uint32_t aLen)
    1.25 +{
    1.26 +   uint8_t cls;
    1.27 +   const char* b;
    1.28 +   uint32_t i;
    1.29 +   if(mDone) 
    1.30 +      return;
    1.31 +   for(i=0, b=aBuf;i<aLen;i++,b++)
    1.32 +   {
    1.33 +     for(unsigned j=0;j<mItems;j++)
    1.34 +     {
    1.35 +        if( 0x80 & *b)
    1.36 +           cls = mCyrillicClass[j][(*b) & 0x7F];
    1.37 +        else 
    1.38 +           cls = 0;
    1.39 +        NS_ASSERTION( cls <= 32 , "illegal character class");
    1.40 +        mProb[j] += gCyrillicProb[mLastCls[j]][cls];
    1.41 +        mLastCls[j] = cls;
    1.42 +     } 
    1.43 +   }
    1.44 +   // We now only based on the first block we receive
    1.45 +   DataEnd();
    1.46 +}
    1.47 +
    1.48 +//---------------------------------------------------------------------
    1.49 +#define THRESHOLD_RATIO 1.5f
    1.50 +void nsCyrillicDetector::DataEnd()
    1.51 +{
    1.52 +   uint32_t max=0;
    1.53 +   uint8_t  maxIdx=0;
    1.54 +   uint8_t j;
    1.55 +   if(mDone) 
    1.56 +      return;
    1.57 +   for(j=0;j<mItems;j++) {
    1.58 +      if(mProb[j] > max)
    1.59 +      {
    1.60 +           max = mProb[j];
    1.61 +           maxIdx= j;
    1.62 +      }
    1.63 +   }
    1.64 +
    1.65 +   if( 0 == max ) // if we didn't get any 8 bits data 
    1.66 +     return;
    1.67 +
    1.68 +#ifdef DEBUG
    1.69 +   for(j=0;j<mItems;j++) 
    1.70 +      printf("Charset %s->\t%d\n", mCharsets[j], mProb[j]);
    1.71 +#endif
    1.72 +   this->Report(mCharsets[maxIdx]);
    1.73 +   mDone = true;
    1.74 +}
    1.75 +
    1.76 +//---------------------------------------------------------------------
    1.77 +nsCyrXPCOMDetector:: nsCyrXPCOMDetector(uint8_t aItems, 
    1.78 +                      const uint8_t ** aCyrillicClass, 
    1.79 +                      const char **aCharsets)
    1.80 +	     : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets)
    1.81 +{
    1.82 +    mObserver = nullptr;
    1.83 +}
    1.84 +
    1.85 +//---------------------------------------------------------------------
    1.86 +nsCyrXPCOMDetector::~nsCyrXPCOMDetector() 
    1.87 +{
    1.88 +}
    1.89 +
    1.90 +//---------------------------------------------------------------------
    1.91 +NS_IMETHODIMP nsCyrXPCOMDetector::Init(
    1.92 +  nsICharsetDetectionObserver* aObserver)
    1.93 +{
    1.94 +  NS_ASSERTION(mObserver == nullptr , "Init twice");
    1.95 +  if(nullptr == aObserver)
    1.96 +     return NS_ERROR_ILLEGAL_VALUE;
    1.97 +
    1.98 +  mObserver = aObserver;
    1.99 +  return NS_OK;
   1.100 +}
   1.101 +
   1.102 +//----------------------------------------------------------
   1.103 +NS_IMETHODIMP nsCyrXPCOMDetector::DoIt(
   1.104 +  const char* aBuf, uint32_t aLen, bool* oDontFeedMe)
   1.105 +{
   1.106 +  NS_ASSERTION(mObserver != nullptr , "have not init yet");
   1.107 +
   1.108 +  if((nullptr == aBuf) || (nullptr == oDontFeedMe))
   1.109 +     return NS_ERROR_ILLEGAL_VALUE;
   1.110 +
   1.111 +  this->HandleData(aBuf, aLen);
   1.112 +  *oDontFeedMe = false;
   1.113 +  return NS_OK;
   1.114 +}
   1.115 +
   1.116 +//----------------------------------------------------------
   1.117 +NS_IMETHODIMP nsCyrXPCOMDetector::Done()
   1.118 +{
   1.119 +  NS_ASSERTION(mObserver != nullptr , "have not init yet");
   1.120 +  this->DataEnd();
   1.121 +  return NS_OK;
   1.122 +}
   1.123 +
   1.124 +//----------------------------------------------------------
   1.125 +void nsCyrXPCOMDetector::Report(const char* aCharset)
   1.126 +{
   1.127 +  NS_ASSERTION(mObserver != nullptr , "have not init yet");
   1.128 +  mObserver->Notify(aCharset, eBestAnswer);
   1.129 +}
   1.130 +
   1.131 +//---------------------------------------------------------------------
   1.132 +nsCyrXPCOMStringDetector:: nsCyrXPCOMStringDetector(uint8_t aItems, 
   1.133 +                      const uint8_t ** aCyrillicClass, 
   1.134 +                      const char **aCharsets)
   1.135 +	     : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets)
   1.136 +{
   1.137 +}
   1.138 +
   1.139 +//---------------------------------------------------------------------
   1.140 +nsCyrXPCOMStringDetector::~nsCyrXPCOMStringDetector() 
   1.141 +{
   1.142 +}
   1.143 +
   1.144 +//---------------------------------------------------------------------
   1.145 +void nsCyrXPCOMStringDetector::Report(const char *aCharset) 
   1.146 +{
   1.147 +   mResult = aCharset;
   1.148 +}
   1.149 +
   1.150 +//---------------------------------------------------------------------
   1.151 +NS_IMETHODIMP nsCyrXPCOMStringDetector::DoIt(const char* aBuf, uint32_t aLen, 
   1.152 +                     const char** oCharset, nsDetectionConfident &oConf)
   1.153 +{
   1.154 +   mResult = nullptr;
   1.155 +   mDone = false;
   1.156 +   this->HandleData(aBuf, aLen); 
   1.157 +   this->DataEnd();
   1.158 +   *oCharset=mResult;
   1.159 +   oConf = eBestAnswer;
   1.160 +   return NS_OK;
   1.161 +}
   1.162 +       
   1.163 +

mercurial