|
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 #include <stdio.h> |
|
6 |
|
7 #include "nsMBCSGroupProber.h" |
|
8 #include "nsUniversalDetector.h" |
|
9 |
|
10 #if defined(DEBUG_chardet) || defined(DEBUG_jgmyers) |
|
11 const char *ProberName[] = |
|
12 { |
|
13 "UTF8", |
|
14 "SJIS", |
|
15 "EUCJP", |
|
16 "GB18030", |
|
17 "EUCKR", |
|
18 "Big5", |
|
19 "EUCTW", |
|
20 }; |
|
21 |
|
22 #endif |
|
23 |
|
24 nsMBCSGroupProber::nsMBCSGroupProber(uint32_t aLanguageFilter) |
|
25 { |
|
26 for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) |
|
27 mProbers[i] = nullptr; |
|
28 |
|
29 mProbers[0] = new nsUTF8Prober(); |
|
30 if (aLanguageFilter & NS_FILTER_JAPANESE) |
|
31 { |
|
32 mProbers[1] = new nsSJISProber(aLanguageFilter == NS_FILTER_JAPANESE); |
|
33 mProbers[2] = new nsEUCJPProber(aLanguageFilter == NS_FILTER_JAPANESE); |
|
34 } |
|
35 if (aLanguageFilter & NS_FILTER_CHINESE_SIMPLIFIED) |
|
36 mProbers[3] = new nsGB18030Prober(aLanguageFilter == NS_FILTER_CHINESE_SIMPLIFIED); |
|
37 if (aLanguageFilter & NS_FILTER_KOREAN) |
|
38 mProbers[4] = new nsEUCKRProber(aLanguageFilter == NS_FILTER_KOREAN); |
|
39 if (aLanguageFilter & NS_FILTER_CHINESE_TRADITIONAL) |
|
40 { |
|
41 mProbers[5] = new nsBig5Prober(aLanguageFilter == NS_FILTER_CHINESE_TRADITIONAL); |
|
42 mProbers[6] = new nsEUCTWProber(aLanguageFilter == NS_FILTER_CHINESE_TRADITIONAL); |
|
43 } |
|
44 Reset(); |
|
45 } |
|
46 |
|
47 nsMBCSGroupProber::~nsMBCSGroupProber() |
|
48 { |
|
49 for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) |
|
50 { |
|
51 delete mProbers[i]; |
|
52 } |
|
53 } |
|
54 |
|
55 const char* nsMBCSGroupProber::GetCharSetName() |
|
56 { |
|
57 if (mBestGuess == -1) |
|
58 { |
|
59 GetConfidence(); |
|
60 if (mBestGuess == -1) |
|
61 mBestGuess = 0; |
|
62 } |
|
63 return mProbers[mBestGuess]->GetCharSetName(); |
|
64 } |
|
65 |
|
66 void nsMBCSGroupProber::Reset(void) |
|
67 { |
|
68 mActiveNum = 0; |
|
69 for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) |
|
70 { |
|
71 if (mProbers[i]) |
|
72 { |
|
73 mProbers[i]->Reset(); |
|
74 mIsActive[i] = true; |
|
75 ++mActiveNum; |
|
76 } |
|
77 else |
|
78 mIsActive[i] = false; |
|
79 } |
|
80 mBestGuess = -1; |
|
81 mState = eDetecting; |
|
82 mKeepNext = 0; |
|
83 } |
|
84 |
|
85 nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, uint32_t aLen) |
|
86 { |
|
87 nsProbingState st; |
|
88 uint32_t start = 0; |
|
89 uint32_t keepNext = mKeepNext; |
|
90 |
|
91 //do filtering to reduce load to probers |
|
92 for (uint32_t pos = 0; pos < aLen; ++pos) |
|
93 { |
|
94 if (aBuf[pos] & 0x80) |
|
95 { |
|
96 if (!keepNext) |
|
97 start = pos; |
|
98 keepNext = 2; |
|
99 } |
|
100 else if (keepNext) |
|
101 { |
|
102 if (--keepNext == 0) |
|
103 { |
|
104 for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) |
|
105 { |
|
106 if (!mIsActive[i]) |
|
107 continue; |
|
108 st = mProbers[i]->HandleData(aBuf + start, pos + 1 - start); |
|
109 if (st == eFoundIt) |
|
110 { |
|
111 mBestGuess = i; |
|
112 mState = eFoundIt; |
|
113 return mState; |
|
114 } |
|
115 } |
|
116 } |
|
117 } |
|
118 } |
|
119 |
|
120 if (keepNext) { |
|
121 for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) |
|
122 { |
|
123 if (!mIsActive[i]) |
|
124 continue; |
|
125 st = mProbers[i]->HandleData(aBuf + start, aLen - start); |
|
126 if (st == eFoundIt) |
|
127 { |
|
128 mBestGuess = i; |
|
129 mState = eFoundIt; |
|
130 return mState; |
|
131 } |
|
132 } |
|
133 } |
|
134 mKeepNext = keepNext; |
|
135 |
|
136 return mState; |
|
137 } |
|
138 |
|
139 float nsMBCSGroupProber::GetConfidence(void) |
|
140 { |
|
141 uint32_t i; |
|
142 float bestConf = 0.0, cf; |
|
143 |
|
144 switch (mState) |
|
145 { |
|
146 case eFoundIt: |
|
147 return (float)0.99; |
|
148 case eNotMe: |
|
149 return (float)0.01; |
|
150 default: |
|
151 for (i = 0; i < NUM_OF_PROBERS; i++) |
|
152 { |
|
153 if (!mIsActive[i]) |
|
154 continue; |
|
155 cf = mProbers[i]->GetConfidence(); |
|
156 if (bestConf < cf) |
|
157 { |
|
158 bestConf = cf; |
|
159 mBestGuess = i; |
|
160 } |
|
161 } |
|
162 } |
|
163 return bestConf; |
|
164 } |
|
165 |
|
166 #ifdef DEBUG_chardet |
|
167 void nsMBCSGroupProber::DumpStatus() |
|
168 { |
|
169 uint32_t i; |
|
170 float cf; |
|
171 |
|
172 GetConfidence(); |
|
173 for (i = 0; i < NUM_OF_PROBERS; i++) |
|
174 { |
|
175 if (!mIsActive[i]) |
|
176 printf(" MBCS inactive: [%s] (confidence is too low).\r\n", ProberName[i]); |
|
177 else |
|
178 { |
|
179 cf = mProbers[i]->GetConfidence(); |
|
180 printf(" MBCS %1.3f: [%s]\r\n", cf, ProberName[i]); |
|
181 } |
|
182 } |
|
183 } |
|
184 #endif |
|
185 |
|
186 #ifdef DEBUG_jgmyers |
|
187 void nsMBCSGroupProber::GetDetectorState(nsUniversalDetector::DetectorState (&states)[nsUniversalDetector::NumDetectors], uint32_t &offset) |
|
188 { |
|
189 for (uint32_t i = 0; i < NUM_OF_PROBERS; ++i) { |
|
190 states[offset].name = ProberName[i]; |
|
191 states[offset].isActive = mIsActive[i]; |
|
192 states[offset].confidence = mIsActive[i] ? mProbers[i]->GetConfidence() : 0.0; |
|
193 ++offset; |
|
194 } |
|
195 } |
|
196 #endif /* DEBUG_jgmyers */ |