michael@0: /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: // for S-JIS encoding, obeserve characteristic: michael@0: // 1, kana character (or hankaku?) often have hight frequency of appereance michael@0: // 2, kana character often exist in group michael@0: // 3, certain combination of kana is never used in japanese language michael@0: michael@0: #include "nsSJISProber.h" michael@0: #include "nsDebug.h" michael@0: michael@0: void nsSJISProber::Reset(void) michael@0: { michael@0: mCodingSM->Reset(); michael@0: mState = eDetecting; michael@0: mContextAnalyser.Reset(mIsPreferredLanguage); michael@0: mDistributionAnalyser.Reset(mIsPreferredLanguage); michael@0: } michael@0: michael@0: nsProbingState nsSJISProber::HandleData(const char* aBuf, uint32_t aLen) michael@0: { michael@0: NS_ASSERTION(aLen, "HandleData called with empty buffer"); michael@0: nsSMState codingState; michael@0: michael@0: for (uint32_t i = 0; i < aLen; i++) michael@0: { michael@0: codingState = mCodingSM->NextState(aBuf[i]); michael@0: if (codingState == eItsMe) michael@0: { michael@0: mState = eFoundIt; michael@0: break; michael@0: } michael@0: if (codingState == eStart) michael@0: { michael@0: uint32_t charLen = mCodingSM->GetCurrentCharLen(); michael@0: if (i == 0) michael@0: { michael@0: mLastChar[1] = aBuf[0]; michael@0: mContextAnalyser.HandleOneChar(mLastChar+2-charLen, charLen); michael@0: mDistributionAnalyser.HandleOneChar(mLastChar, charLen); michael@0: } michael@0: else michael@0: { michael@0: mContextAnalyser.HandleOneChar(aBuf+i+1-charLen, charLen); michael@0: mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen); michael@0: } michael@0: } michael@0: } michael@0: michael@0: mLastChar[0] = aBuf[aLen-1]; michael@0: michael@0: if (mState == eDetecting) michael@0: if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) michael@0: mState = eFoundIt; michael@0: michael@0: return mState; michael@0: } michael@0: michael@0: float nsSJISProber::GetConfidence(void) michael@0: { michael@0: float contxtCf = mContextAnalyser.GetConfidence(); michael@0: float distribCf = mDistributionAnalyser.GetConfidence(); michael@0: michael@0: return (contxtCf > distribCf ? contxtCf : distribCf); michael@0: } michael@0: