Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include <stdio.h>
7 #include "prmem.h"
9 #include "nsSBCharSetProber.h"
10 #include "nsSBCSGroupProber.h"
12 #include "nsHebrewProber.h"
14 nsSBCSGroupProber::nsSBCSGroupProber()
15 {
16 mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model);
17 mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel);
18 mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model);
19 mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel);
20 mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model);
21 mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model);
22 mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model);
23 mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model);
24 mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
25 mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
26 mProbers[10] = new nsSingleByteCharSetProber(&TIS620ThaiModel);
28 nsHebrewProber *hebprober = new nsHebrewProber();
29 // Notice: Any change in these indexes - 10,11,12 must be reflected
30 // in the code below as well.
31 mProbers[11] = hebprober;
32 mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, false, hebprober); // Logical Hebrew
33 mProbers[13] = new nsSingleByteCharSetProber(&Win1255Model, true, hebprober); // Visual Hebrew
34 // Tell the Hebrew prober about the logical and visual probers
35 if (mProbers[11] && mProbers[12] && mProbers[13]) // all are not null
36 {
37 hebprober->SetModelProbers(mProbers[12], mProbers[13]);
38 }
39 else // One or more is null. avoid any Hebrew probing, null them all
40 {
41 for (uint32_t i = 11; i <= 13; ++i)
42 {
43 delete mProbers[i];
44 mProbers[i] = 0;
45 }
46 }
48 // disable latin2 before latin1 is available, otherwise all latin1
49 // will be detected as latin2 because of their similarity.
50 //mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
51 //mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
53 Reset();
54 }
56 nsSBCSGroupProber::~nsSBCSGroupProber()
57 {
58 for (uint32_t i = 0; i < NUM_OF_SBCS_PROBERS; i++)
59 {
60 delete mProbers[i];
61 }
62 }
65 const char* nsSBCSGroupProber::GetCharSetName()
66 {
67 //if we have no answer yet
68 if (mBestGuess == -1)
69 {
70 GetConfidence();
71 //no charset seems positive
72 if (mBestGuess == -1)
73 //we will use default.
74 mBestGuess = 0;
75 }
76 return mProbers[mBestGuess]->GetCharSetName();
77 }
79 void nsSBCSGroupProber::Reset(void)
80 {
81 mActiveNum = 0;
82 for (uint32_t i = 0; i < NUM_OF_SBCS_PROBERS; i++)
83 {
84 if (mProbers[i]) // not null
85 {
86 mProbers[i]->Reset();
87 mIsActive[i] = true;
88 ++mActiveNum;
89 }
90 else
91 mIsActive[i] = false;
92 }
93 mBestGuess = -1;
94 mState = eDetecting;
95 }
98 nsProbingState nsSBCSGroupProber::HandleData(const char* aBuf, uint32_t aLen)
99 {
100 nsProbingState st;
101 uint32_t i;
102 char *newBuf1 = 0;
103 uint32_t newLen1 = 0;
105 //apply filter to original buffer, and we got new buffer back
106 //depend on what script it is, we will feed them the new buffer
107 //we got after applying proper filter
108 //this is done without any consideration to KeepEnglishLetters
109 //of each prober since as of now, there are no probers here which
110 //recognize languages with English characters.
111 if (!FilterWithoutEnglishLetters(aBuf, aLen, &newBuf1, newLen1))
112 goto done;
114 if (newLen1 == 0)
115 goto done; // Nothing to see here, move on.
117 for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
118 {
119 if (!mIsActive[i])
120 continue;
121 st = mProbers[i]->HandleData(newBuf1, newLen1);
122 if (st == eFoundIt)
123 {
124 mBestGuess = i;
125 mState = eFoundIt;
126 break;
127 }
128 else if (st == eNotMe)
129 {
130 mIsActive[i] = false;
131 mActiveNum--;
132 if (mActiveNum <= 0)
133 {
134 mState = eNotMe;
135 break;
136 }
137 }
138 }
140 done:
141 PR_FREEIF(newBuf1);
143 return mState;
144 }
146 float nsSBCSGroupProber::GetConfidence(void)
147 {
148 uint32_t i;
149 float bestConf = 0.0, cf;
151 switch (mState)
152 {
153 case eFoundIt:
154 return (float)0.99; //sure yes
155 case eNotMe:
156 return (float)0.01; //sure no
157 default:
158 for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
159 {
160 if (!mIsActive[i])
161 continue;
162 cf = mProbers[i]->GetConfidence();
163 if (bestConf < cf)
164 {
165 bestConf = cf;
166 mBestGuess = i;
167 }
168 }
169 }
170 return bestConf;
171 }
173 #ifdef DEBUG_chardet
174 void nsSBCSGroupProber::DumpStatus()
175 {
176 uint32_t i;
177 float cf;
179 cf = GetConfidence();
180 printf(" SBCS Group Prober --------begin status \r\n");
181 for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
182 {
183 if (!mIsActive[i])
184 printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName());
185 else
186 mProbers[i]->DumpStatus();
187 }
188 printf(" SBCS Group found best match [%s] confidence %f.\r\n",
189 mProbers[mBestGuess]->GetCharSetName(), cf);
190 }
191 #endif