michael@0: /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include michael@0: #include "prmem.h" michael@0: michael@0: #include "nsSBCharSetProber.h" michael@0: #include "nsSBCSGroupProber.h" michael@0: michael@0: #include "nsHebrewProber.h" michael@0: michael@0: nsSBCSGroupProber::nsSBCSGroupProber() michael@0: { michael@0: mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model); michael@0: mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel); michael@0: mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model); michael@0: mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel); michael@0: mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model); michael@0: mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model); michael@0: mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model); michael@0: mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model); michael@0: mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel); michael@0: mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel); michael@0: mProbers[10] = new nsSingleByteCharSetProber(&TIS620ThaiModel); michael@0: michael@0: nsHebrewProber *hebprober = new nsHebrewProber(); michael@0: // Notice: Any change in these indexes - 10,11,12 must be reflected michael@0: // in the code below as well. michael@0: mProbers[11] = hebprober; michael@0: mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, false, hebprober); // Logical Hebrew michael@0: mProbers[13] = new nsSingleByteCharSetProber(&Win1255Model, true, hebprober); // Visual Hebrew michael@0: // Tell the Hebrew prober about the logical and visual probers michael@0: if (mProbers[11] && mProbers[12] && mProbers[13]) // all are not null michael@0: { michael@0: hebprober->SetModelProbers(mProbers[12], mProbers[13]); michael@0: } michael@0: else // One or more is null. avoid any Hebrew probing, null them all michael@0: { michael@0: for (uint32_t i = 11; i <= 13; ++i) michael@0: { michael@0: delete mProbers[i]; michael@0: mProbers[i] = 0; michael@0: } michael@0: } michael@0: michael@0: // disable latin2 before latin1 is available, otherwise all latin1 michael@0: // will be detected as latin2 because of their similarity. michael@0: //mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel); michael@0: //mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel); michael@0: michael@0: Reset(); michael@0: } michael@0: michael@0: nsSBCSGroupProber::~nsSBCSGroupProber() michael@0: { michael@0: for (uint32_t i = 0; i < NUM_OF_SBCS_PROBERS; i++) michael@0: { michael@0: delete mProbers[i]; michael@0: } michael@0: } michael@0: michael@0: michael@0: const char* nsSBCSGroupProber::GetCharSetName() michael@0: { michael@0: //if we have no answer yet michael@0: if (mBestGuess == -1) michael@0: { michael@0: GetConfidence(); michael@0: //no charset seems positive michael@0: if (mBestGuess == -1) michael@0: //we will use default. michael@0: mBestGuess = 0; michael@0: } michael@0: return mProbers[mBestGuess]->GetCharSetName(); michael@0: } michael@0: michael@0: void nsSBCSGroupProber::Reset(void) michael@0: { michael@0: mActiveNum = 0; michael@0: for (uint32_t i = 0; i < NUM_OF_SBCS_PROBERS; i++) michael@0: { michael@0: if (mProbers[i]) // not null michael@0: { michael@0: mProbers[i]->Reset(); michael@0: mIsActive[i] = true; michael@0: ++mActiveNum; michael@0: } michael@0: else michael@0: mIsActive[i] = false; michael@0: } michael@0: mBestGuess = -1; michael@0: mState = eDetecting; michael@0: } michael@0: michael@0: michael@0: nsProbingState nsSBCSGroupProber::HandleData(const char* aBuf, uint32_t aLen) michael@0: { michael@0: nsProbingState st; michael@0: uint32_t i; michael@0: char *newBuf1 = 0; michael@0: uint32_t newLen1 = 0; michael@0: michael@0: //apply filter to original buffer, and we got new buffer back michael@0: //depend on what script it is, we will feed them the new buffer michael@0: //we got after applying proper filter michael@0: //this is done without any consideration to KeepEnglishLetters michael@0: //of each prober since as of now, there are no probers here which michael@0: //recognize languages with English characters. michael@0: if (!FilterWithoutEnglishLetters(aBuf, aLen, &newBuf1, newLen1)) michael@0: goto done; michael@0: michael@0: if (newLen1 == 0) michael@0: goto done; // Nothing to see here, move on. michael@0: michael@0: for (i = 0; i < NUM_OF_SBCS_PROBERS; i++) michael@0: { michael@0: if (!mIsActive[i]) michael@0: continue; michael@0: st = mProbers[i]->HandleData(newBuf1, newLen1); michael@0: if (st == eFoundIt) michael@0: { michael@0: mBestGuess = i; michael@0: mState = eFoundIt; michael@0: break; michael@0: } michael@0: else if (st == eNotMe) michael@0: { michael@0: mIsActive[i] = false; michael@0: mActiveNum--; michael@0: if (mActiveNum <= 0) michael@0: { michael@0: mState = eNotMe; michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: done: michael@0: PR_FREEIF(newBuf1); michael@0: michael@0: return mState; michael@0: } michael@0: michael@0: float nsSBCSGroupProber::GetConfidence(void) michael@0: { michael@0: uint32_t i; michael@0: float bestConf = 0.0, cf; michael@0: michael@0: switch (mState) michael@0: { michael@0: case eFoundIt: michael@0: return (float)0.99; //sure yes michael@0: case eNotMe: michael@0: return (float)0.01; //sure no michael@0: default: michael@0: for (i = 0; i < NUM_OF_SBCS_PROBERS; i++) michael@0: { michael@0: if (!mIsActive[i]) michael@0: continue; michael@0: cf = mProbers[i]->GetConfidence(); michael@0: if (bestConf < cf) michael@0: { michael@0: bestConf = cf; michael@0: mBestGuess = i; michael@0: } michael@0: } michael@0: } michael@0: return bestConf; michael@0: } michael@0: michael@0: #ifdef DEBUG_chardet michael@0: void nsSBCSGroupProber::DumpStatus() michael@0: { michael@0: uint32_t i; michael@0: float cf; michael@0: michael@0: cf = GetConfidence(); michael@0: printf(" SBCS Group Prober --------begin status \r\n"); michael@0: for (i = 0; i < NUM_OF_SBCS_PROBERS; i++) michael@0: { michael@0: if (!mIsActive[i]) michael@0: printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName()); michael@0: else michael@0: mProbers[i]->DumpStatus(); michael@0: } michael@0: printf(" SBCS Group found best match [%s] confidence %f.\r\n", michael@0: mProbers[mBestGuess]->GetCharSetName(), cf); michael@0: } michael@0: #endif