1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/chardet/src/nsCyrillicDetector.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,160 @@ 1.4 +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 +#include "nscore.h" 1.9 +#include "nsCyrillicProb.h" 1.10 +#include <stdio.h> 1.11 + 1.12 +#include "nsCOMPtr.h" 1.13 +#include "nsISupports.h" 1.14 +#include "nsICharsetDetector.h" 1.15 +#include "nsICharsetDetectionObserver.h" 1.16 +#include "nsIStringCharsetDetector.h" 1.17 +#include "nsCyrillicDetector.h" 1.18 + 1.19 +//---------------------------------------------------------------------- 1.20 +// Interface nsISupports [implementation] 1.21 +NS_IMPL_ISUPPORTS(nsCyrXPCOMDetector, nsICharsetDetector) 1.22 +NS_IMPL_ISUPPORTS(nsCyrXPCOMStringDetector, nsIStringCharsetDetector) 1.23 + 1.24 +void nsCyrillicDetector::HandleData(const char* aBuf, uint32_t aLen) 1.25 +{ 1.26 + uint8_t cls; 1.27 + const char* b; 1.28 + uint32_t i; 1.29 + if(mDone) 1.30 + return; 1.31 + for(i=0, b=aBuf;i<aLen;i++,b++) 1.32 + { 1.33 + for(unsigned j=0;j<mItems;j++) 1.34 + { 1.35 + if( 0x80 & *b) 1.36 + cls = mCyrillicClass[j][(*b) & 0x7F]; 1.37 + else 1.38 + cls = 0; 1.39 + NS_ASSERTION( cls <= 32 , "illegal character class"); 1.40 + mProb[j] += gCyrillicProb[mLastCls[j]][cls]; 1.41 + mLastCls[j] = cls; 1.42 + } 1.43 + } 1.44 + // We now only based on the first block we receive 1.45 + DataEnd(); 1.46 +} 1.47 + 1.48 +//--------------------------------------------------------------------- 1.49 +#define THRESHOLD_RATIO 1.5f 1.50 +void nsCyrillicDetector::DataEnd() 1.51 +{ 1.52 + uint32_t max=0; 1.53 + uint8_t maxIdx=0; 1.54 + uint8_t j; 1.55 + if(mDone) 1.56 + return; 1.57 + for(j=0;j<mItems;j++) { 1.58 + if(mProb[j] > max) 1.59 + { 1.60 + max = mProb[j]; 1.61 + maxIdx= j; 1.62 + } 1.63 + } 1.64 + 1.65 + if( 0 == max ) // if we didn't get any 8 bits data 1.66 + return; 1.67 + 1.68 +#ifdef DEBUG 1.69 + for(j=0;j<mItems;j++) 1.70 + printf("Charset %s->\t%d\n", mCharsets[j], mProb[j]); 1.71 +#endif 1.72 + this->Report(mCharsets[maxIdx]); 1.73 + mDone = true; 1.74 +} 1.75 + 1.76 +//--------------------------------------------------------------------- 1.77 +nsCyrXPCOMDetector:: nsCyrXPCOMDetector(uint8_t aItems, 1.78 + const uint8_t ** aCyrillicClass, 1.79 + const char **aCharsets) 1.80 + : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets) 1.81 +{ 1.82 + mObserver = nullptr; 1.83 +} 1.84 + 1.85 +//--------------------------------------------------------------------- 1.86 +nsCyrXPCOMDetector::~nsCyrXPCOMDetector() 1.87 +{ 1.88 +} 1.89 + 1.90 +//--------------------------------------------------------------------- 1.91 +NS_IMETHODIMP nsCyrXPCOMDetector::Init( 1.92 + nsICharsetDetectionObserver* aObserver) 1.93 +{ 1.94 + NS_ASSERTION(mObserver == nullptr , "Init twice"); 1.95 + if(nullptr == aObserver) 1.96 + return NS_ERROR_ILLEGAL_VALUE; 1.97 + 1.98 + mObserver = aObserver; 1.99 + return NS_OK; 1.100 +} 1.101 + 1.102 +//---------------------------------------------------------- 1.103 +NS_IMETHODIMP nsCyrXPCOMDetector::DoIt( 1.104 + const char* aBuf, uint32_t aLen, bool* oDontFeedMe) 1.105 +{ 1.106 + NS_ASSERTION(mObserver != nullptr , "have not init yet"); 1.107 + 1.108 + if((nullptr == aBuf) || (nullptr == oDontFeedMe)) 1.109 + return NS_ERROR_ILLEGAL_VALUE; 1.110 + 1.111 + this->HandleData(aBuf, aLen); 1.112 + *oDontFeedMe = false; 1.113 + return NS_OK; 1.114 +} 1.115 + 1.116 +//---------------------------------------------------------- 1.117 +NS_IMETHODIMP nsCyrXPCOMDetector::Done() 1.118 +{ 1.119 + NS_ASSERTION(mObserver != nullptr , "have not init yet"); 1.120 + this->DataEnd(); 1.121 + return NS_OK; 1.122 +} 1.123 + 1.124 +//---------------------------------------------------------- 1.125 +void nsCyrXPCOMDetector::Report(const char* aCharset) 1.126 +{ 1.127 + NS_ASSERTION(mObserver != nullptr , "have not init yet"); 1.128 + mObserver->Notify(aCharset, eBestAnswer); 1.129 +} 1.130 + 1.131 +//--------------------------------------------------------------------- 1.132 +nsCyrXPCOMStringDetector:: nsCyrXPCOMStringDetector(uint8_t aItems, 1.133 + const uint8_t ** aCyrillicClass, 1.134 + const char **aCharsets) 1.135 + : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets) 1.136 +{ 1.137 +} 1.138 + 1.139 +//--------------------------------------------------------------------- 1.140 +nsCyrXPCOMStringDetector::~nsCyrXPCOMStringDetector() 1.141 +{ 1.142 +} 1.143 + 1.144 +//--------------------------------------------------------------------- 1.145 +void nsCyrXPCOMStringDetector::Report(const char *aCharset) 1.146 +{ 1.147 + mResult = aCharset; 1.148 +} 1.149 + 1.150 +//--------------------------------------------------------------------- 1.151 +NS_IMETHODIMP nsCyrXPCOMStringDetector::DoIt(const char* aBuf, uint32_t aLen, 1.152 + const char** oCharset, nsDetectionConfident &oConf) 1.153 +{ 1.154 + mResult = nullptr; 1.155 + mDone = false; 1.156 + this->HandleData(aBuf, aLen); 1.157 + this->DataEnd(); 1.158 + *oCharset=mResult; 1.159 + oConf = eBestAnswer; 1.160 + return NS_OK; 1.161 +} 1.162 + 1.163 +