1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/extensions/universalchardet/src/base/CharDistribution.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,78 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#include "CharDistribution.h" 1.10 + 1.11 +#include "JISFreq.tab" 1.12 +#include "Big5Freq.tab" 1.13 +#include "EUCKRFreq.tab" 1.14 +#include "EUCTWFreq.tab" 1.15 +#include "GB2312Freq.tab" 1.16 +#include "mozilla/ArrayUtils.h" 1.17 + 1.18 +#define SURE_YES 0.99f 1.19 +#define SURE_NO 0.01f 1.20 + 1.21 +//return confidence base on received data 1.22 +float CharDistributionAnalysis::GetConfidence(void) 1.23 +{ 1.24 + //if we didn't receive any character in our consideration range, or the 1.25 + // number of frequent characters is below the minimum threshold, return 1.26 + // negative answer 1.27 + if (mTotalChars <= 0 || mFreqChars <= mDataThreshold) 1.28 + return SURE_NO; 1.29 + 1.30 + if (mTotalChars != mFreqChars) { 1.31 + float r = mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio); 1.32 + 1.33 + if (r < SURE_YES) 1.34 + return r; 1.35 + } 1.36 + //normalize confidence, (we don't want to be 100% sure) 1.37 + return SURE_YES; 1.38 +} 1.39 + 1.40 +EUCTWDistributionAnalysis::EUCTWDistributionAnalysis() 1.41 +{ 1.42 + mCharToFreqOrder = EUCTWCharToFreqOrder; 1.43 + mTableSize = mozilla::ArrayLength(EUCTWCharToFreqOrder); 1.44 + mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO; 1.45 +} 1.46 + 1.47 +EUCKRDistributionAnalysis::EUCKRDistributionAnalysis() 1.48 +{ 1.49 + mCharToFreqOrder = EUCKRCharToFreqOrder; 1.50 + mTableSize = mozilla::ArrayLength(EUCKRCharToFreqOrder); 1.51 + mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO; 1.52 +} 1.53 + 1.54 +GB2312DistributionAnalysis::GB2312DistributionAnalysis() 1.55 +{ 1.56 + mCharToFreqOrder = GB2312CharToFreqOrder; 1.57 + mTableSize = mozilla::ArrayLength(GB2312CharToFreqOrder); 1.58 + mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO; 1.59 +} 1.60 + 1.61 +Big5DistributionAnalysis::Big5DistributionAnalysis() 1.62 +{ 1.63 + mCharToFreqOrder = Big5CharToFreqOrder; 1.64 + mTableSize = mozilla::ArrayLength(Big5CharToFreqOrder); 1.65 + mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO; 1.66 +} 1.67 + 1.68 +SJISDistributionAnalysis::SJISDistributionAnalysis() 1.69 +{ 1.70 + mCharToFreqOrder = JISCharToFreqOrder; 1.71 + mTableSize = mozilla::ArrayLength(JISCharToFreqOrder); 1.72 + mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO; 1.73 +} 1.74 + 1.75 +EUCJPDistributionAnalysis::EUCJPDistributionAnalysis() 1.76 +{ 1.77 + mCharToFreqOrder = JISCharToFreqOrder; 1.78 + mTableSize = mozilla::ArrayLength(JISCharToFreqOrder); 1.79 + mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO; 1.80 +} 1.81 +