|
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include <stdio.h> |
|
7 #include "prmem.h" |
|
8 |
|
9 #include "nsSBCharSetProber.h" |
|
10 #include "nsSBCSGroupProber.h" |
|
11 |
|
12 #include "nsHebrewProber.h" |
|
13 |
|
14 nsSBCSGroupProber::nsSBCSGroupProber() |
|
15 { |
|
16 mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model); |
|
17 mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel); |
|
18 mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model); |
|
19 mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel); |
|
20 mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model); |
|
21 mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model); |
|
22 mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model); |
|
23 mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model); |
|
24 mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel); |
|
25 mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel); |
|
26 mProbers[10] = new nsSingleByteCharSetProber(&TIS620ThaiModel); |
|
27 |
|
28 nsHebrewProber *hebprober = new nsHebrewProber(); |
|
29 // Notice: Any change in these indexes - 10,11,12 must be reflected |
|
30 // in the code below as well. |
|
31 mProbers[11] = hebprober; |
|
32 mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, false, hebprober); // Logical Hebrew |
|
33 mProbers[13] = new nsSingleByteCharSetProber(&Win1255Model, true, hebprober); // Visual Hebrew |
|
34 // Tell the Hebrew prober about the logical and visual probers |
|
35 if (mProbers[11] && mProbers[12] && mProbers[13]) // all are not null |
|
36 { |
|
37 hebprober->SetModelProbers(mProbers[12], mProbers[13]); |
|
38 } |
|
39 else // One or more is null. avoid any Hebrew probing, null them all |
|
40 { |
|
41 for (uint32_t i = 11; i <= 13; ++i) |
|
42 { |
|
43 delete mProbers[i]; |
|
44 mProbers[i] = 0; |
|
45 } |
|
46 } |
|
47 |
|
48 // disable latin2 before latin1 is available, otherwise all latin1 |
|
49 // will be detected as latin2 because of their similarity. |
|
50 //mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel); |
|
51 //mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel); |
|
52 |
|
53 Reset(); |
|
54 } |
|
55 |
|
56 nsSBCSGroupProber::~nsSBCSGroupProber() |
|
57 { |
|
58 for (uint32_t i = 0; i < NUM_OF_SBCS_PROBERS; i++) |
|
59 { |
|
60 delete mProbers[i]; |
|
61 } |
|
62 } |
|
63 |
|
64 |
|
65 const char* nsSBCSGroupProber::GetCharSetName() |
|
66 { |
|
67 //if we have no answer yet |
|
68 if (mBestGuess == -1) |
|
69 { |
|
70 GetConfidence(); |
|
71 //no charset seems positive |
|
72 if (mBestGuess == -1) |
|
73 //we will use default. |
|
74 mBestGuess = 0; |
|
75 } |
|
76 return mProbers[mBestGuess]->GetCharSetName(); |
|
77 } |
|
78 |
|
79 void nsSBCSGroupProber::Reset(void) |
|
80 { |
|
81 mActiveNum = 0; |
|
82 for (uint32_t i = 0; i < NUM_OF_SBCS_PROBERS; i++) |
|
83 { |
|
84 if (mProbers[i]) // not null |
|
85 { |
|
86 mProbers[i]->Reset(); |
|
87 mIsActive[i] = true; |
|
88 ++mActiveNum; |
|
89 } |
|
90 else |
|
91 mIsActive[i] = false; |
|
92 } |
|
93 mBestGuess = -1; |
|
94 mState = eDetecting; |
|
95 } |
|
96 |
|
97 |
|
98 nsProbingState nsSBCSGroupProber::HandleData(const char* aBuf, uint32_t aLen) |
|
99 { |
|
100 nsProbingState st; |
|
101 uint32_t i; |
|
102 char *newBuf1 = 0; |
|
103 uint32_t newLen1 = 0; |
|
104 |
|
105 //apply filter to original buffer, and we got new buffer back |
|
106 //depend on what script it is, we will feed them the new buffer |
|
107 //we got after applying proper filter |
|
108 //this is done without any consideration to KeepEnglishLetters |
|
109 //of each prober since as of now, there are no probers here which |
|
110 //recognize languages with English characters. |
|
111 if (!FilterWithoutEnglishLetters(aBuf, aLen, &newBuf1, newLen1)) |
|
112 goto done; |
|
113 |
|
114 if (newLen1 == 0) |
|
115 goto done; // Nothing to see here, move on. |
|
116 |
|
117 for (i = 0; i < NUM_OF_SBCS_PROBERS; i++) |
|
118 { |
|
119 if (!mIsActive[i]) |
|
120 continue; |
|
121 st = mProbers[i]->HandleData(newBuf1, newLen1); |
|
122 if (st == eFoundIt) |
|
123 { |
|
124 mBestGuess = i; |
|
125 mState = eFoundIt; |
|
126 break; |
|
127 } |
|
128 else if (st == eNotMe) |
|
129 { |
|
130 mIsActive[i] = false; |
|
131 mActiveNum--; |
|
132 if (mActiveNum <= 0) |
|
133 { |
|
134 mState = eNotMe; |
|
135 break; |
|
136 } |
|
137 } |
|
138 } |
|
139 |
|
140 done: |
|
141 PR_FREEIF(newBuf1); |
|
142 |
|
143 return mState; |
|
144 } |
|
145 |
|
146 float nsSBCSGroupProber::GetConfidence(void) |
|
147 { |
|
148 uint32_t i; |
|
149 float bestConf = 0.0, cf; |
|
150 |
|
151 switch (mState) |
|
152 { |
|
153 case eFoundIt: |
|
154 return (float)0.99; //sure yes |
|
155 case eNotMe: |
|
156 return (float)0.01; //sure no |
|
157 default: |
|
158 for (i = 0; i < NUM_OF_SBCS_PROBERS; i++) |
|
159 { |
|
160 if (!mIsActive[i]) |
|
161 continue; |
|
162 cf = mProbers[i]->GetConfidence(); |
|
163 if (bestConf < cf) |
|
164 { |
|
165 bestConf = cf; |
|
166 mBestGuess = i; |
|
167 } |
|
168 } |
|
169 } |
|
170 return bestConf; |
|
171 } |
|
172 |
|
173 #ifdef DEBUG_chardet |
|
174 void nsSBCSGroupProber::DumpStatus() |
|
175 { |
|
176 uint32_t i; |
|
177 float cf; |
|
178 |
|
179 cf = GetConfidence(); |
|
180 printf(" SBCS Group Prober --------begin status \r\n"); |
|
181 for (i = 0; i < NUM_OF_SBCS_PROBERS; i++) |
|
182 { |
|
183 if (!mIsActive[i]) |
|
184 printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName()); |
|
185 else |
|
186 mProbers[i]->DumpStatus(); |
|
187 } |
|
188 printf(" SBCS Group found best match [%s] confidence %f.\r\n", |
|
189 mProbers[mBestGuess]->GetCharSetName(), cf); |
|
190 } |
|
191 #endif |