extensions/universalchardet/src/base/CharDistribution.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/extensions/universalchardet/src/base/CharDistribution.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,78 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#include "CharDistribution.h"
    1.10 +
    1.11 +#include "JISFreq.tab"
    1.12 +#include "Big5Freq.tab"
    1.13 +#include "EUCKRFreq.tab"
    1.14 +#include "EUCTWFreq.tab"
    1.15 +#include "GB2312Freq.tab"
    1.16 +#include "mozilla/ArrayUtils.h"
    1.17 +
    1.18 +#define SURE_YES 0.99f
    1.19 +#define SURE_NO  0.01f
    1.20 +
    1.21 +//return confidence base on received data
    1.22 +float CharDistributionAnalysis::GetConfidence(void)
    1.23 +{ 
    1.24 +  //if we didn't receive any character in our consideration range, or the
    1.25 +  // number of frequent characters is below the minimum threshold, return
    1.26 +  // negative answer
    1.27 +  if (mTotalChars <= 0 || mFreqChars <= mDataThreshold)
    1.28 +    return SURE_NO;
    1.29 +
    1.30 +  if (mTotalChars != mFreqChars) {
    1.31 +    float r = mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio);
    1.32 +
    1.33 +    if (r < SURE_YES)
    1.34 +      return r;
    1.35 +  }
    1.36 +  //normalize confidence, (we don't want to be 100% sure)
    1.37 +  return SURE_YES;
    1.38 +}
    1.39 +
    1.40 +EUCTWDistributionAnalysis::EUCTWDistributionAnalysis()
    1.41 +{
    1.42 +  mCharToFreqOrder = EUCTWCharToFreqOrder;
    1.43 +  mTableSize = mozilla::ArrayLength(EUCTWCharToFreqOrder);
    1.44 +  mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO;
    1.45 +}
    1.46 +
    1.47 +EUCKRDistributionAnalysis::EUCKRDistributionAnalysis()
    1.48 +{
    1.49 +  mCharToFreqOrder = EUCKRCharToFreqOrder;
    1.50 +  mTableSize = mozilla::ArrayLength(EUCKRCharToFreqOrder);
    1.51 +  mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO;
    1.52 +}
    1.53 +
    1.54 +GB2312DistributionAnalysis::GB2312DistributionAnalysis()
    1.55 +{
    1.56 +  mCharToFreqOrder = GB2312CharToFreqOrder;
    1.57 +  mTableSize = mozilla::ArrayLength(GB2312CharToFreqOrder);
    1.58 +  mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO;
    1.59 +}
    1.60 +
    1.61 +Big5DistributionAnalysis::Big5DistributionAnalysis()
    1.62 +{
    1.63 +  mCharToFreqOrder = Big5CharToFreqOrder;
    1.64 +  mTableSize = mozilla::ArrayLength(Big5CharToFreqOrder);
    1.65 +  mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO;
    1.66 +}
    1.67 +
    1.68 +SJISDistributionAnalysis::SJISDistributionAnalysis()
    1.69 +{
    1.70 +  mCharToFreqOrder = JISCharToFreqOrder;
    1.71 +  mTableSize = mozilla::ArrayLength(JISCharToFreqOrder);
    1.72 +  mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO;
    1.73 +}
    1.74 +
    1.75 +EUCJPDistributionAnalysis::EUCJPDistributionAnalysis()
    1.76 +{
    1.77 +  mCharToFreqOrder = JISCharToFreqOrder;
    1.78 +  mTableSize = mozilla::ArrayLength(JISCharToFreqOrder);
    1.79 +  mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO;
    1.80 +}
    1.81 +

mercurial