Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include <stdio.h>
7 #include "nsMBCSGroupProber.h"
8 #include "nsUniversalDetector.h"
10 #if defined(DEBUG_chardet) || defined(DEBUG_jgmyers)
11 const char *ProberName[] =
12 {
13 "UTF8",
14 "SJIS",
15 "EUCJP",
16 "GB18030",
17 "EUCKR",
18 "Big5",
19 "EUCTW",
20 };
22 #endif
24 nsMBCSGroupProber::nsMBCSGroupProber(uint32_t aLanguageFilter)
25 {
26 for (uint32_t i = 0; i < NUM_OF_PROBERS; i++)
27 mProbers[i] = nullptr;
29 mProbers[0] = new nsUTF8Prober();
30 if (aLanguageFilter & NS_FILTER_JAPANESE)
31 {
32 mProbers[1] = new nsSJISProber(aLanguageFilter == NS_FILTER_JAPANESE);
33 mProbers[2] = new nsEUCJPProber(aLanguageFilter == NS_FILTER_JAPANESE);
34 }
35 if (aLanguageFilter & NS_FILTER_CHINESE_SIMPLIFIED)
36 mProbers[3] = new nsGB18030Prober(aLanguageFilter == NS_FILTER_CHINESE_SIMPLIFIED);
37 if (aLanguageFilter & NS_FILTER_KOREAN)
38 mProbers[4] = new nsEUCKRProber(aLanguageFilter == NS_FILTER_KOREAN);
39 if (aLanguageFilter & NS_FILTER_CHINESE_TRADITIONAL)
40 {
41 mProbers[5] = new nsBig5Prober(aLanguageFilter == NS_FILTER_CHINESE_TRADITIONAL);
42 mProbers[6] = new nsEUCTWProber(aLanguageFilter == NS_FILTER_CHINESE_TRADITIONAL);
43 }
44 Reset();
45 }
47 nsMBCSGroupProber::~nsMBCSGroupProber()
48 {
49 for (uint32_t i = 0; i < NUM_OF_PROBERS; i++)
50 {
51 delete mProbers[i];
52 }
53 }
55 const char* nsMBCSGroupProber::GetCharSetName()
56 {
57 if (mBestGuess == -1)
58 {
59 GetConfidence();
60 if (mBestGuess == -1)
61 mBestGuess = 0;
62 }
63 return mProbers[mBestGuess]->GetCharSetName();
64 }
66 void nsMBCSGroupProber::Reset(void)
67 {
68 mActiveNum = 0;
69 for (uint32_t i = 0; i < NUM_OF_PROBERS; i++)
70 {
71 if (mProbers[i])
72 {
73 mProbers[i]->Reset();
74 mIsActive[i] = true;
75 ++mActiveNum;
76 }
77 else
78 mIsActive[i] = false;
79 }
80 mBestGuess = -1;
81 mState = eDetecting;
82 mKeepNext = 0;
83 }
85 nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, uint32_t aLen)
86 {
87 nsProbingState st;
88 uint32_t start = 0;
89 uint32_t keepNext = mKeepNext;
91 //do filtering to reduce load to probers
92 for (uint32_t pos = 0; pos < aLen; ++pos)
93 {
94 if (aBuf[pos] & 0x80)
95 {
96 if (!keepNext)
97 start = pos;
98 keepNext = 2;
99 }
100 else if (keepNext)
101 {
102 if (--keepNext == 0)
103 {
104 for (uint32_t i = 0; i < NUM_OF_PROBERS; i++)
105 {
106 if (!mIsActive[i])
107 continue;
108 st = mProbers[i]->HandleData(aBuf + start, pos + 1 - start);
109 if (st == eFoundIt)
110 {
111 mBestGuess = i;
112 mState = eFoundIt;
113 return mState;
114 }
115 }
116 }
117 }
118 }
120 if (keepNext) {
121 for (uint32_t i = 0; i < NUM_OF_PROBERS; i++)
122 {
123 if (!mIsActive[i])
124 continue;
125 st = mProbers[i]->HandleData(aBuf + start, aLen - start);
126 if (st == eFoundIt)
127 {
128 mBestGuess = i;
129 mState = eFoundIt;
130 return mState;
131 }
132 }
133 }
134 mKeepNext = keepNext;
136 return mState;
137 }
139 float nsMBCSGroupProber::GetConfidence(void)
140 {
141 uint32_t i;
142 float bestConf = 0.0, cf;
144 switch (mState)
145 {
146 case eFoundIt:
147 return (float)0.99;
148 case eNotMe:
149 return (float)0.01;
150 default:
151 for (i = 0; i < NUM_OF_PROBERS; i++)
152 {
153 if (!mIsActive[i])
154 continue;
155 cf = mProbers[i]->GetConfidence();
156 if (bestConf < cf)
157 {
158 bestConf = cf;
159 mBestGuess = i;
160 }
161 }
162 }
163 return bestConf;
164 }
166 #ifdef DEBUG_chardet
167 void nsMBCSGroupProber::DumpStatus()
168 {
169 uint32_t i;
170 float cf;
172 GetConfidence();
173 for (i = 0; i < NUM_OF_PROBERS; i++)
174 {
175 if (!mIsActive[i])
176 printf(" MBCS inactive: [%s] (confidence is too low).\r\n", ProberName[i]);
177 else
178 {
179 cf = mProbers[i]->GetConfidence();
180 printf(" MBCS %1.3f: [%s]\r\n", cf, ProberName[i]);
181 }
182 }
183 }
184 #endif
186 #ifdef DEBUG_jgmyers
187 void nsMBCSGroupProber::GetDetectorState(nsUniversalDetector::DetectorState (&states)[nsUniversalDetector::NumDetectors], uint32_t &offset)
188 {
189 for (uint32_t i = 0; i < NUM_OF_PROBERS; ++i) {
190 states[offset].name = ProberName[i];
191 states[offset].isActive = mIsActive[i];
192 states[offset].confidence = mIsActive[i] ? mProbers[i]->GetConfidence() : 0.0;
193 ++offset;
194 }
195 }
196 #endif /* DEBUG_jgmyers */