extensions/universalchardet/src/base/CharDistribution.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 #include "CharDistribution.h"
     8 #include "JISFreq.tab"
     9 #include "Big5Freq.tab"
    10 #include "EUCKRFreq.tab"
    11 #include "EUCTWFreq.tab"
    12 #include "GB2312Freq.tab"
    13 #include "mozilla/ArrayUtils.h"
    15 #define SURE_YES 0.99f
    16 #define SURE_NO  0.01f
    18 //return confidence base on received data
    19 float CharDistributionAnalysis::GetConfidence(void)
    20 { 
    21   //if we didn't receive any character in our consideration range, or the
    22   // number of frequent characters is below the minimum threshold, return
    23   // negative answer
    24   if (mTotalChars <= 0 || mFreqChars <= mDataThreshold)
    25     return SURE_NO;
    27   if (mTotalChars != mFreqChars) {
    28     float r = mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio);
    30     if (r < SURE_YES)
    31       return r;
    32   }
    33   //normalize confidence, (we don't want to be 100% sure)
    34   return SURE_YES;
    35 }
    37 EUCTWDistributionAnalysis::EUCTWDistributionAnalysis()
    38 {
    39   mCharToFreqOrder = EUCTWCharToFreqOrder;
    40   mTableSize = mozilla::ArrayLength(EUCTWCharToFreqOrder);
    41   mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO;
    42 }
    44 EUCKRDistributionAnalysis::EUCKRDistributionAnalysis()
    45 {
    46   mCharToFreqOrder = EUCKRCharToFreqOrder;
    47   mTableSize = mozilla::ArrayLength(EUCKRCharToFreqOrder);
    48   mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO;
    49 }
    51 GB2312DistributionAnalysis::GB2312DistributionAnalysis()
    52 {
    53   mCharToFreqOrder = GB2312CharToFreqOrder;
    54   mTableSize = mozilla::ArrayLength(GB2312CharToFreqOrder);
    55   mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO;
    56 }
    58 Big5DistributionAnalysis::Big5DistributionAnalysis()
    59 {
    60   mCharToFreqOrder = Big5CharToFreqOrder;
    61   mTableSize = mozilla::ArrayLength(Big5CharToFreqOrder);
    62   mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO;
    63 }
    65 SJISDistributionAnalysis::SJISDistributionAnalysis()
    66 {
    67   mCharToFreqOrder = JISCharToFreqOrder;
    68   mTableSize = mozilla::ArrayLength(JISCharToFreqOrder);
    69   mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO;
    70 }
    72 EUCJPDistributionAnalysis::EUCJPDistributionAnalysis()
    73 {
    74   mCharToFreqOrder = JISCharToFreqOrder;
    75   mTableSize = mozilla::ArrayLength(JISCharToFreqOrder);
    76   mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO;
    77 }

mercurial