|
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 // for japanese encoding, obeserve characteristic: |
|
7 // 1, kana character (or hankaku?) often have hight frequency of appereance |
|
8 // 2, kana character often exist in group |
|
9 // 3, certain combination of kana is never used in japanese language |
|
10 |
|
11 #include "nsEUCJPProber.h" |
|
12 #include "nsDebug.h" |
|
13 |
|
14 void nsEUCJPProber::Reset(void) |
|
15 { |
|
16 mCodingSM->Reset(); |
|
17 mState = eDetecting; |
|
18 mContextAnalyser.Reset(mIsPreferredLanguage); |
|
19 mDistributionAnalyser.Reset(mIsPreferredLanguage); |
|
20 } |
|
21 |
|
22 nsProbingState nsEUCJPProber::HandleData(const char* aBuf, uint32_t aLen) |
|
23 { |
|
24 NS_ASSERTION(aLen, "HandleData called with empty buffer"); |
|
25 nsSMState codingState; |
|
26 |
|
27 for (uint32_t i = 0; i < aLen; i++) |
|
28 { |
|
29 codingState = mCodingSM->NextState(aBuf[i]); |
|
30 if (codingState == eItsMe) |
|
31 { |
|
32 mState = eFoundIt; |
|
33 break; |
|
34 } |
|
35 if (codingState == eStart) |
|
36 { |
|
37 uint32_t charLen = mCodingSM->GetCurrentCharLen(); |
|
38 |
|
39 if (i == 0) |
|
40 { |
|
41 mLastChar[1] = aBuf[0]; |
|
42 mContextAnalyser.HandleOneChar(mLastChar, charLen); |
|
43 mDistributionAnalyser.HandleOneChar(mLastChar, charLen); |
|
44 } |
|
45 else |
|
46 { |
|
47 mContextAnalyser.HandleOneChar(aBuf+i-1, charLen); |
|
48 mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen); |
|
49 } |
|
50 } |
|
51 } |
|
52 |
|
53 mLastChar[0] = aBuf[aLen-1]; |
|
54 |
|
55 if (mState == eDetecting) |
|
56 if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) |
|
57 mState = eFoundIt; |
|
58 |
|
59 return mState; |
|
60 } |
|
61 |
|
62 float nsEUCJPProber::GetConfidence(void) |
|
63 { |
|
64 float contxtCf = mContextAnalyser.GetConfidence(); |
|
65 float distribCf = mDistributionAnalyser.GetConfidence(); |
|
66 |
|
67 return (contxtCf > distribCf ? contxtCf : distribCf); |
|
68 } |
|
69 |