michael@0: /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: #include michael@0: #include "nsSBCharSetProber.h" michael@0: michael@0: nsProbingState nsSingleByteCharSetProber::HandleData(const char* aBuf, uint32_t aLen) michael@0: { michael@0: unsigned char order; michael@0: michael@0: for (uint32_t i = 0; i < aLen; i++) michael@0: { michael@0: order = mModel->charToOrderMap[(unsigned char)aBuf[i]]; michael@0: michael@0: if (order < SYMBOL_CAT_ORDER) michael@0: mTotalChar++; michael@0: if (order < SAMPLE_SIZE) michael@0: { michael@0: mFreqChar++; michael@0: michael@0: if (mLastOrder < SAMPLE_SIZE) michael@0: { michael@0: mTotalSeqs++; michael@0: if (!mReversed) michael@0: ++(mSeqCounters[mModel->precedenceMatrix[mLastOrder*SAMPLE_SIZE+order]]); michael@0: else // reverse the order of the letters in the lookup michael@0: ++(mSeqCounters[mModel->precedenceMatrix[order*SAMPLE_SIZE+mLastOrder]]); michael@0: } michael@0: } michael@0: mLastOrder = order; michael@0: } michael@0: michael@0: if (mState == eDetecting) michael@0: if (mTotalSeqs > SB_ENOUGH_REL_THRESHOLD) michael@0: { michael@0: float cf = GetConfidence(); michael@0: if (cf > POSITIVE_SHORTCUT_THRESHOLD) michael@0: mState = eFoundIt; michael@0: else if (cf < NEGATIVE_SHORTCUT_THRESHOLD) michael@0: mState = eNotMe; michael@0: } michael@0: michael@0: return mState; michael@0: } michael@0: michael@0: void nsSingleByteCharSetProber::Reset(void) michael@0: { michael@0: mState = eDetecting; michael@0: mLastOrder = 255; michael@0: for (uint32_t i = 0; i < NUMBER_OF_SEQ_CAT; i++) michael@0: mSeqCounters[i] = 0; michael@0: mTotalSeqs = 0; michael@0: mTotalChar = 0; michael@0: mFreqChar = 0; michael@0: } michael@0: michael@0: //#define NEGATIVE_APPROACH 1 michael@0: michael@0: float nsSingleByteCharSetProber::GetConfidence(void) michael@0: { michael@0: #ifdef NEGATIVE_APPROACH michael@0: if (mTotalSeqs > 0) michael@0: if (mTotalSeqs > mSeqCounters[NEGATIVE_CAT]*10 ) michael@0: return ((float)(mTotalSeqs - mSeqCounters[NEGATIVE_CAT]*10))/mTotalSeqs * mFreqChar / mTotalChar; michael@0: return (float)0.01; michael@0: #else //POSITIVE_APPROACH michael@0: float r; michael@0: michael@0: if (mTotalSeqs > 0) { michael@0: r = ((float)1.0) * mSeqCounters[POSITIVE_CAT] / mTotalSeqs / mModel->mTypicalPositiveRatio; michael@0: r = r*mFreqChar/mTotalChar; michael@0: if (r >= (float)1.00) michael@0: r = (float)0.99; michael@0: return r; michael@0: } michael@0: return (float)0.01; michael@0: #endif michael@0: } michael@0: michael@0: const char* nsSingleByteCharSetProber::GetCharSetName() michael@0: { michael@0: if (!mNameProber) michael@0: return mModel->charsetName; michael@0: return mNameProber->GetCharSetName(); michael@0: } michael@0: michael@0: #ifdef DEBUG_chardet michael@0: void nsSingleByteCharSetProber::DumpStatus() michael@0: { michael@0: printf(" SBCS: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName()); michael@0: } michael@0: #endif