|
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 // for S-JIS encoding, obeserve characteristic: |
|
7 // 1, kana character (or hankaku?) often have hight frequency of appereance |
|
8 // 2, kana character often exist in group |
|
9 // 3, certain combination of kana is never used in japanese language |
|
10 |
|
11 #include "nsSJISProber.h" |
|
12 #include "nsDebug.h" |
|
13 |
|
14 void nsSJISProber::Reset(void) |
|
15 { |
|
16 mCodingSM->Reset(); |
|
17 mState = eDetecting; |
|
18 mContextAnalyser.Reset(mIsPreferredLanguage); |
|
19 mDistributionAnalyser.Reset(mIsPreferredLanguage); |
|
20 } |
|
21 |
|
22 nsProbingState nsSJISProber::HandleData(const char* aBuf, uint32_t aLen) |
|
23 { |
|
24 NS_ASSERTION(aLen, "HandleData called with empty buffer"); |
|
25 nsSMState codingState; |
|
26 |
|
27 for (uint32_t i = 0; i < aLen; i++) |
|
28 { |
|
29 codingState = mCodingSM->NextState(aBuf[i]); |
|
30 if (codingState == eItsMe) |
|
31 { |
|
32 mState = eFoundIt; |
|
33 break; |
|
34 } |
|
35 if (codingState == eStart) |
|
36 { |
|
37 uint32_t charLen = mCodingSM->GetCurrentCharLen(); |
|
38 if (i == 0) |
|
39 { |
|
40 mLastChar[1] = aBuf[0]; |
|
41 mContextAnalyser.HandleOneChar(mLastChar+2-charLen, charLen); |
|
42 mDistributionAnalyser.HandleOneChar(mLastChar, charLen); |
|
43 } |
|
44 else |
|
45 { |
|
46 mContextAnalyser.HandleOneChar(aBuf+i+1-charLen, charLen); |
|
47 mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen); |
|
48 } |
|
49 } |
|
50 } |
|
51 |
|
52 mLastChar[0] = aBuf[aLen-1]; |
|
53 |
|
54 if (mState == eDetecting) |
|
55 if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) |
|
56 mState = eFoundIt; |
|
57 |
|
58 return mState; |
|
59 } |
|
60 |
|
61 float nsSJISProber::GetConfidence(void) |
|
62 { |
|
63 float contxtCf = mContextAnalyser.GetConfidence(); |
|
64 float distribCf = mDistributionAnalyser.GetConfidence(); |
|
65 |
|
66 return (contxtCf > distribCf ? contxtCf : distribCf); |
|
67 } |
|
68 |