Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | // for japanese encoding, obeserve characteristic: |
michael@0 | 7 | // 1, kana character (or hankaku?) often have hight frequency of appereance |
michael@0 | 8 | // 2, kana character often exist in group |
michael@0 | 9 | // 3, certain combination of kana is never used in japanese language |
michael@0 | 10 | |
michael@0 | 11 | #include "nsEUCJPProber.h" |
michael@0 | 12 | #include "nsDebug.h" |
michael@0 | 13 | |
michael@0 | 14 | void nsEUCJPProber::Reset(void) |
michael@0 | 15 | { |
michael@0 | 16 | mCodingSM->Reset(); |
michael@0 | 17 | mState = eDetecting; |
michael@0 | 18 | mContextAnalyser.Reset(mIsPreferredLanguage); |
michael@0 | 19 | mDistributionAnalyser.Reset(mIsPreferredLanguage); |
michael@0 | 20 | } |
michael@0 | 21 | |
michael@0 | 22 | nsProbingState nsEUCJPProber::HandleData(const char* aBuf, uint32_t aLen) |
michael@0 | 23 | { |
michael@0 | 24 | NS_ASSERTION(aLen, "HandleData called with empty buffer"); |
michael@0 | 25 | nsSMState codingState; |
michael@0 | 26 | |
michael@0 | 27 | for (uint32_t i = 0; i < aLen; i++) |
michael@0 | 28 | { |
michael@0 | 29 | codingState = mCodingSM->NextState(aBuf[i]); |
michael@0 | 30 | if (codingState == eItsMe) |
michael@0 | 31 | { |
michael@0 | 32 | mState = eFoundIt; |
michael@0 | 33 | break; |
michael@0 | 34 | } |
michael@0 | 35 | if (codingState == eStart) |
michael@0 | 36 | { |
michael@0 | 37 | uint32_t charLen = mCodingSM->GetCurrentCharLen(); |
michael@0 | 38 | |
michael@0 | 39 | if (i == 0) |
michael@0 | 40 | { |
michael@0 | 41 | mLastChar[1] = aBuf[0]; |
michael@0 | 42 | mContextAnalyser.HandleOneChar(mLastChar, charLen); |
michael@0 | 43 | mDistributionAnalyser.HandleOneChar(mLastChar, charLen); |
michael@0 | 44 | } |
michael@0 | 45 | else |
michael@0 | 46 | { |
michael@0 | 47 | mContextAnalyser.HandleOneChar(aBuf+i-1, charLen); |
michael@0 | 48 | mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen); |
michael@0 | 49 | } |
michael@0 | 50 | } |
michael@0 | 51 | } |
michael@0 | 52 | |
michael@0 | 53 | mLastChar[0] = aBuf[aLen-1]; |
michael@0 | 54 | |
michael@0 | 55 | if (mState == eDetecting) |
michael@0 | 56 | if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) |
michael@0 | 57 | mState = eFoundIt; |
michael@0 | 58 | |
michael@0 | 59 | return mState; |
michael@0 | 60 | } |
michael@0 | 61 | |
michael@0 | 62 | float nsEUCJPProber::GetConfidence(void) |
michael@0 | 63 | { |
michael@0 | 64 | float contxtCf = mContextAnalyser.GetConfidence(); |
michael@0 | 65 | float distribCf = mDistributionAnalyser.GetConfidence(); |
michael@0 | 66 | |
michael@0 | 67 | return (contxtCf > distribCf ? contxtCf : distribCf); |
michael@0 | 68 | } |
michael@0 | 69 |