michael@0: /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: #include michael@0: michael@0: #include "nsMBCSGroupProber.h" michael@0: #include "nsUniversalDetector.h" michael@0: michael@0: #if defined(DEBUG_chardet) || defined(DEBUG_jgmyers) michael@0: const char *ProberName[] = michael@0: { michael@0: "UTF8", michael@0: "SJIS", michael@0: "EUCJP", michael@0: "GB18030", michael@0: "EUCKR", michael@0: "Big5", michael@0: "EUCTW", michael@0: }; michael@0: michael@0: #endif michael@0: michael@0: nsMBCSGroupProber::nsMBCSGroupProber(uint32_t aLanguageFilter) michael@0: { michael@0: for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) michael@0: mProbers[i] = nullptr; michael@0: michael@0: mProbers[0] = new nsUTF8Prober(); michael@0: if (aLanguageFilter & NS_FILTER_JAPANESE) michael@0: { michael@0: mProbers[1] = new nsSJISProber(aLanguageFilter == NS_FILTER_JAPANESE); michael@0: mProbers[2] = new nsEUCJPProber(aLanguageFilter == NS_FILTER_JAPANESE); michael@0: } michael@0: if (aLanguageFilter & NS_FILTER_CHINESE_SIMPLIFIED) michael@0: mProbers[3] = new nsGB18030Prober(aLanguageFilter == NS_FILTER_CHINESE_SIMPLIFIED); michael@0: if (aLanguageFilter & NS_FILTER_KOREAN) michael@0: mProbers[4] = new nsEUCKRProber(aLanguageFilter == NS_FILTER_KOREAN); michael@0: if (aLanguageFilter & NS_FILTER_CHINESE_TRADITIONAL) michael@0: { michael@0: mProbers[5] = new nsBig5Prober(aLanguageFilter == NS_FILTER_CHINESE_TRADITIONAL); michael@0: mProbers[6] = new nsEUCTWProber(aLanguageFilter == NS_FILTER_CHINESE_TRADITIONAL); michael@0: } michael@0: Reset(); michael@0: } michael@0: michael@0: nsMBCSGroupProber::~nsMBCSGroupProber() michael@0: { michael@0: for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) michael@0: { michael@0: delete mProbers[i]; michael@0: } michael@0: } michael@0: michael@0: const char* nsMBCSGroupProber::GetCharSetName() michael@0: { michael@0: if (mBestGuess == -1) michael@0: { michael@0: GetConfidence(); michael@0: if (mBestGuess == -1) michael@0: mBestGuess = 0; michael@0: } michael@0: return mProbers[mBestGuess]->GetCharSetName(); michael@0: } michael@0: michael@0: void nsMBCSGroupProber::Reset(void) michael@0: { michael@0: mActiveNum = 0; michael@0: for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) michael@0: { michael@0: if (mProbers[i]) michael@0: { michael@0: mProbers[i]->Reset(); michael@0: mIsActive[i] = true; michael@0: ++mActiveNum; michael@0: } michael@0: else michael@0: mIsActive[i] = false; michael@0: } michael@0: mBestGuess = -1; michael@0: mState = eDetecting; michael@0: mKeepNext = 0; michael@0: } michael@0: michael@0: nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, uint32_t aLen) michael@0: { michael@0: nsProbingState st; michael@0: uint32_t start = 0; michael@0: uint32_t keepNext = mKeepNext; michael@0: michael@0: //do filtering to reduce load to probers michael@0: for (uint32_t pos = 0; pos < aLen; ++pos) michael@0: { michael@0: if (aBuf[pos] & 0x80) michael@0: { michael@0: if (!keepNext) michael@0: start = pos; michael@0: keepNext = 2; michael@0: } michael@0: else if (keepNext) michael@0: { michael@0: if (--keepNext == 0) michael@0: { michael@0: for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) michael@0: { michael@0: if (!mIsActive[i]) michael@0: continue; michael@0: st = mProbers[i]->HandleData(aBuf + start, pos + 1 - start); michael@0: if (st == eFoundIt) michael@0: { michael@0: mBestGuess = i; michael@0: mState = eFoundIt; michael@0: return mState; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: if (keepNext) { michael@0: for (uint32_t i = 0; i < NUM_OF_PROBERS; i++) michael@0: { michael@0: if (!mIsActive[i]) michael@0: continue; michael@0: st = mProbers[i]->HandleData(aBuf + start, aLen - start); michael@0: if (st == eFoundIt) michael@0: { michael@0: mBestGuess = i; michael@0: mState = eFoundIt; michael@0: return mState; michael@0: } michael@0: } michael@0: } michael@0: mKeepNext = keepNext; michael@0: michael@0: return mState; michael@0: } michael@0: michael@0: float nsMBCSGroupProber::GetConfidence(void) michael@0: { michael@0: uint32_t i; michael@0: float bestConf = 0.0, cf; michael@0: michael@0: switch (mState) michael@0: { michael@0: case eFoundIt: michael@0: return (float)0.99; michael@0: case eNotMe: michael@0: return (float)0.01; michael@0: default: michael@0: for (i = 0; i < NUM_OF_PROBERS; i++) michael@0: { michael@0: if (!mIsActive[i]) michael@0: continue; michael@0: cf = mProbers[i]->GetConfidence(); michael@0: if (bestConf < cf) michael@0: { michael@0: bestConf = cf; michael@0: mBestGuess = i; michael@0: } michael@0: } michael@0: } michael@0: return bestConf; michael@0: } michael@0: michael@0: #ifdef DEBUG_chardet michael@0: void nsMBCSGroupProber::DumpStatus() michael@0: { michael@0: uint32_t i; michael@0: float cf; michael@0: michael@0: GetConfidence(); michael@0: for (i = 0; i < NUM_OF_PROBERS; i++) michael@0: { michael@0: if (!mIsActive[i]) michael@0: printf(" MBCS inactive: [%s] (confidence is too low).\r\n", ProberName[i]); michael@0: else michael@0: { michael@0: cf = mProbers[i]->GetConfidence(); michael@0: printf(" MBCS %1.3f: [%s]\r\n", cf, ProberName[i]); michael@0: } michael@0: } michael@0: } michael@0: #endif michael@0: michael@0: #ifdef DEBUG_jgmyers michael@0: void nsMBCSGroupProber::GetDetectorState(nsUniversalDetector::DetectorState (&states)[nsUniversalDetector::NumDetectors], uint32_t &offset) michael@0: { michael@0: for (uint32_t i = 0; i < NUM_OF_PROBERS; ++i) { michael@0: states[offset].name = ProberName[i]; michael@0: states[offset].isActive = mIsActive[i]; michael@0: states[offset].confidence = mIsActive[i] ? mProbers[i]->GetConfidence() : 0.0; michael@0: ++offset; michael@0: } michael@0: } michael@0: #endif /* DEBUG_jgmyers */