extensions/universalchardet/src/base/nsEUCJPProber.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 // for japanese encoding, obeserve characteristic:
michael@0 7 // 1, kana character (or hankaku?) often have hight frequency of appereance
michael@0 8 // 2, kana character often exist in group
michael@0 9 // 3, certain combination of kana is never used in japanese language
michael@0 10
michael@0 11 #include "nsEUCJPProber.h"
michael@0 12 #include "nsDebug.h"
michael@0 13
michael@0 14 void nsEUCJPProber::Reset(void)
michael@0 15 {
michael@0 16 mCodingSM->Reset();
michael@0 17 mState = eDetecting;
michael@0 18 mContextAnalyser.Reset(mIsPreferredLanguage);
michael@0 19 mDistributionAnalyser.Reset(mIsPreferredLanguage);
michael@0 20 }
michael@0 21
michael@0 22 nsProbingState nsEUCJPProber::HandleData(const char* aBuf, uint32_t aLen)
michael@0 23 {
michael@0 24 NS_ASSERTION(aLen, "HandleData called with empty buffer");
michael@0 25 nsSMState codingState;
michael@0 26
michael@0 27 for (uint32_t i = 0; i < aLen; i++)
michael@0 28 {
michael@0 29 codingState = mCodingSM->NextState(aBuf[i]);
michael@0 30 if (codingState == eItsMe)
michael@0 31 {
michael@0 32 mState = eFoundIt;
michael@0 33 break;
michael@0 34 }
michael@0 35 if (codingState == eStart)
michael@0 36 {
michael@0 37 uint32_t charLen = mCodingSM->GetCurrentCharLen();
michael@0 38
michael@0 39 if (i == 0)
michael@0 40 {
michael@0 41 mLastChar[1] = aBuf[0];
michael@0 42 mContextAnalyser.HandleOneChar(mLastChar, charLen);
michael@0 43 mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
michael@0 44 }
michael@0 45 else
michael@0 46 {
michael@0 47 mContextAnalyser.HandleOneChar(aBuf+i-1, charLen);
michael@0 48 mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen);
michael@0 49 }
michael@0 50 }
michael@0 51 }
michael@0 52
michael@0 53 mLastChar[0] = aBuf[aLen-1];
michael@0 54
michael@0 55 if (mState == eDetecting)
michael@0 56 if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
michael@0 57 mState = eFoundIt;
michael@0 58
michael@0 59 return mState;
michael@0 60 }
michael@0 61
michael@0 62 float nsEUCJPProber::GetConfidence(void)
michael@0 63 {
michael@0 64 float contxtCf = mContextAnalyser.GetConfidence();
michael@0 65 float distribCf = mDistributionAnalyser.GetConfidence();
michael@0 66
michael@0 67 return (contxtCf > distribCf ? contxtCf : distribCf);
michael@0 68 }
michael@0 69

mercurial