Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 // for S-JIS encoding, obeserve characteristic:
7 // 1, kana character (or hankaku?) often have hight frequency of appereance
8 // 2, kana character often exist in group
9 // 3, certain combination of kana is never used in japanese language
11 #include "nsSJISProber.h"
12 #include "nsDebug.h"
14 void nsSJISProber::Reset(void)
15 {
16 mCodingSM->Reset();
17 mState = eDetecting;
18 mContextAnalyser.Reset(mIsPreferredLanguage);
19 mDistributionAnalyser.Reset(mIsPreferredLanguage);
20 }
22 nsProbingState nsSJISProber::HandleData(const char* aBuf, uint32_t aLen)
23 {
24 NS_ASSERTION(aLen, "HandleData called with empty buffer");
25 nsSMState codingState;
27 for (uint32_t i = 0; i < aLen; i++)
28 {
29 codingState = mCodingSM->NextState(aBuf[i]);
30 if (codingState == eItsMe)
31 {
32 mState = eFoundIt;
33 break;
34 }
35 if (codingState == eStart)
36 {
37 uint32_t charLen = mCodingSM->GetCurrentCharLen();
38 if (i == 0)
39 {
40 mLastChar[1] = aBuf[0];
41 mContextAnalyser.HandleOneChar(mLastChar+2-charLen, charLen);
42 mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
43 }
44 else
45 {
46 mContextAnalyser.HandleOneChar(aBuf+i+1-charLen, charLen);
47 mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen);
48 }
49 }
50 }
52 mLastChar[0] = aBuf[aLen-1];
54 if (mState == eDetecting)
55 if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
56 mState = eFoundIt;
58 return mState;
59 }
61 float nsSJISProber::GetConfidence(void)
62 {
63 float contxtCf = mContextAnalyser.GetConfidence();
64 float distribCf = mDistributionAnalyser.GetConfidence();
66 return (contxtCf > distribCf ? contxtCf : distribCf);
67 }