extensions/universalchardet/src/base/CharDistribution.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 #include "CharDistribution.h"
michael@0 7
michael@0 8 #include "JISFreq.tab"
michael@0 9 #include "Big5Freq.tab"
michael@0 10 #include "EUCKRFreq.tab"
michael@0 11 #include "EUCTWFreq.tab"
michael@0 12 #include "GB2312Freq.tab"
michael@0 13 #include "mozilla/ArrayUtils.h"
michael@0 14
michael@0 15 #define SURE_YES 0.99f
michael@0 16 #define SURE_NO 0.01f
michael@0 17
michael@0 18 //return confidence base on received data
michael@0 19 float CharDistributionAnalysis::GetConfidence(void)
michael@0 20 {
michael@0 21 //if we didn't receive any character in our consideration range, or the
michael@0 22 // number of frequent characters is below the minimum threshold, return
michael@0 23 // negative answer
michael@0 24 if (mTotalChars <= 0 || mFreqChars <= mDataThreshold)
michael@0 25 return SURE_NO;
michael@0 26
michael@0 27 if (mTotalChars != mFreqChars) {
michael@0 28 float r = mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio);
michael@0 29
michael@0 30 if (r < SURE_YES)
michael@0 31 return r;
michael@0 32 }
michael@0 33 //normalize confidence, (we don't want to be 100% sure)
michael@0 34 return SURE_YES;
michael@0 35 }
michael@0 36
michael@0 37 EUCTWDistributionAnalysis::EUCTWDistributionAnalysis()
michael@0 38 {
michael@0 39 mCharToFreqOrder = EUCTWCharToFreqOrder;
michael@0 40 mTableSize = mozilla::ArrayLength(EUCTWCharToFreqOrder);
michael@0 41 mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO;
michael@0 42 }
michael@0 43
michael@0 44 EUCKRDistributionAnalysis::EUCKRDistributionAnalysis()
michael@0 45 {
michael@0 46 mCharToFreqOrder = EUCKRCharToFreqOrder;
michael@0 47 mTableSize = mozilla::ArrayLength(EUCKRCharToFreqOrder);
michael@0 48 mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO;
michael@0 49 }
michael@0 50
michael@0 51 GB2312DistributionAnalysis::GB2312DistributionAnalysis()
michael@0 52 {
michael@0 53 mCharToFreqOrder = GB2312CharToFreqOrder;
michael@0 54 mTableSize = mozilla::ArrayLength(GB2312CharToFreqOrder);
michael@0 55 mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO;
michael@0 56 }
michael@0 57
michael@0 58 Big5DistributionAnalysis::Big5DistributionAnalysis()
michael@0 59 {
michael@0 60 mCharToFreqOrder = Big5CharToFreqOrder;
michael@0 61 mTableSize = mozilla::ArrayLength(Big5CharToFreqOrder);
michael@0 62 mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO;
michael@0 63 }
michael@0 64
michael@0 65 SJISDistributionAnalysis::SJISDistributionAnalysis()
michael@0 66 {
michael@0 67 mCharToFreqOrder = JISCharToFreqOrder;
michael@0 68 mTableSize = mozilla::ArrayLength(JISCharToFreqOrder);
michael@0 69 mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO;
michael@0 70 }
michael@0 71
michael@0 72 EUCJPDistributionAnalysis::EUCJPDistributionAnalysis()
michael@0 73 {
michael@0 74 mCharToFreqOrder = JISCharToFreqOrder;
michael@0 75 mTableSize = mozilla::ArrayLength(JISCharToFreqOrder);
michael@0 76 mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO;
michael@0 77 }
michael@0 78

mercurial