|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include "CharDistribution.h" |
|
7 |
|
8 #include "JISFreq.tab" |
|
9 #include "Big5Freq.tab" |
|
10 #include "EUCKRFreq.tab" |
|
11 #include "EUCTWFreq.tab" |
|
12 #include "GB2312Freq.tab" |
|
13 #include "mozilla/ArrayUtils.h" |
|
14 |
|
15 #define SURE_YES 0.99f |
|
16 #define SURE_NO 0.01f |
|
17 |
|
18 //return confidence base on received data |
|
19 float CharDistributionAnalysis::GetConfidence(void) |
|
20 { |
|
21 //if we didn't receive any character in our consideration range, or the |
|
22 // number of frequent characters is below the minimum threshold, return |
|
23 // negative answer |
|
24 if (mTotalChars <= 0 || mFreqChars <= mDataThreshold) |
|
25 return SURE_NO; |
|
26 |
|
27 if (mTotalChars != mFreqChars) { |
|
28 float r = mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio); |
|
29 |
|
30 if (r < SURE_YES) |
|
31 return r; |
|
32 } |
|
33 //normalize confidence, (we don't want to be 100% sure) |
|
34 return SURE_YES; |
|
35 } |
|
36 |
|
37 EUCTWDistributionAnalysis::EUCTWDistributionAnalysis() |
|
38 { |
|
39 mCharToFreqOrder = EUCTWCharToFreqOrder; |
|
40 mTableSize = mozilla::ArrayLength(EUCTWCharToFreqOrder); |
|
41 mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO; |
|
42 } |
|
43 |
|
44 EUCKRDistributionAnalysis::EUCKRDistributionAnalysis() |
|
45 { |
|
46 mCharToFreqOrder = EUCKRCharToFreqOrder; |
|
47 mTableSize = mozilla::ArrayLength(EUCKRCharToFreqOrder); |
|
48 mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO; |
|
49 } |
|
50 |
|
51 GB2312DistributionAnalysis::GB2312DistributionAnalysis() |
|
52 { |
|
53 mCharToFreqOrder = GB2312CharToFreqOrder; |
|
54 mTableSize = mozilla::ArrayLength(GB2312CharToFreqOrder); |
|
55 mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO; |
|
56 } |
|
57 |
|
58 Big5DistributionAnalysis::Big5DistributionAnalysis() |
|
59 { |
|
60 mCharToFreqOrder = Big5CharToFreqOrder; |
|
61 mTableSize = mozilla::ArrayLength(Big5CharToFreqOrder); |
|
62 mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO; |
|
63 } |
|
64 |
|
65 SJISDistributionAnalysis::SJISDistributionAnalysis() |
|
66 { |
|
67 mCharToFreqOrder = JISCharToFreqOrder; |
|
68 mTableSize = mozilla::ArrayLength(JISCharToFreqOrder); |
|
69 mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO; |
|
70 } |
|
71 |
|
72 EUCJPDistributionAnalysis::EUCJPDistributionAnalysis() |
|
73 { |
|
74 mCharToFreqOrder = JISCharToFreqOrder; |
|
75 mTableSize = mozilla::ArrayLength(JISCharToFreqOrder); |
|
76 mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO; |
|
77 } |
|
78 |