extensions/universalchardet/src/base/nsMBCSGroupProber.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5 #include <stdio.h>
michael@0 6
michael@0 7 #include "nsMBCSGroupProber.h"
michael@0 8 #include "nsUniversalDetector.h"
michael@0 9
michael@0 10 #if defined(DEBUG_chardet) || defined(DEBUG_jgmyers)
michael@0 11 const char *ProberName[] =
michael@0 12 {
michael@0 13 "UTF8",
michael@0 14 "SJIS",
michael@0 15 "EUCJP",
michael@0 16 "GB18030",
michael@0 17 "EUCKR",
michael@0 18 "Big5",
michael@0 19 "EUCTW",
michael@0 20 };
michael@0 21
michael@0 22 #endif
michael@0 23
michael@0 24 nsMBCSGroupProber::nsMBCSGroupProber(uint32_t aLanguageFilter)
michael@0 25 {
michael@0 26 for (uint32_t i = 0; i < NUM_OF_PROBERS; i++)
michael@0 27 mProbers[i] = nullptr;
michael@0 28
michael@0 29 mProbers[0] = new nsUTF8Prober();
michael@0 30 if (aLanguageFilter & NS_FILTER_JAPANESE)
michael@0 31 {
michael@0 32 mProbers[1] = new nsSJISProber(aLanguageFilter == NS_FILTER_JAPANESE);
michael@0 33 mProbers[2] = new nsEUCJPProber(aLanguageFilter == NS_FILTER_JAPANESE);
michael@0 34 }
michael@0 35 if (aLanguageFilter & NS_FILTER_CHINESE_SIMPLIFIED)
michael@0 36 mProbers[3] = new nsGB18030Prober(aLanguageFilter == NS_FILTER_CHINESE_SIMPLIFIED);
michael@0 37 if (aLanguageFilter & NS_FILTER_KOREAN)
michael@0 38 mProbers[4] = new nsEUCKRProber(aLanguageFilter == NS_FILTER_KOREAN);
michael@0 39 if (aLanguageFilter & NS_FILTER_CHINESE_TRADITIONAL)
michael@0 40 {
michael@0 41 mProbers[5] = new nsBig5Prober(aLanguageFilter == NS_FILTER_CHINESE_TRADITIONAL);
michael@0 42 mProbers[6] = new nsEUCTWProber(aLanguageFilter == NS_FILTER_CHINESE_TRADITIONAL);
michael@0 43 }
michael@0 44 Reset();
michael@0 45 }
michael@0 46
michael@0 47 nsMBCSGroupProber::~nsMBCSGroupProber()
michael@0 48 {
michael@0 49 for (uint32_t i = 0; i < NUM_OF_PROBERS; i++)
michael@0 50 {
michael@0 51 delete mProbers[i];
michael@0 52 }
michael@0 53 }
michael@0 54
michael@0 55 const char* nsMBCSGroupProber::GetCharSetName()
michael@0 56 {
michael@0 57 if (mBestGuess == -1)
michael@0 58 {
michael@0 59 GetConfidence();
michael@0 60 if (mBestGuess == -1)
michael@0 61 mBestGuess = 0;
michael@0 62 }
michael@0 63 return mProbers[mBestGuess]->GetCharSetName();
michael@0 64 }
michael@0 65
michael@0 66 void nsMBCSGroupProber::Reset(void)
michael@0 67 {
michael@0 68 mActiveNum = 0;
michael@0 69 for (uint32_t i = 0; i < NUM_OF_PROBERS; i++)
michael@0 70 {
michael@0 71 if (mProbers[i])
michael@0 72 {
michael@0 73 mProbers[i]->Reset();
michael@0 74 mIsActive[i] = true;
michael@0 75 ++mActiveNum;
michael@0 76 }
michael@0 77 else
michael@0 78 mIsActive[i] = false;
michael@0 79 }
michael@0 80 mBestGuess = -1;
michael@0 81 mState = eDetecting;
michael@0 82 mKeepNext = 0;
michael@0 83 }
michael@0 84
michael@0 85 nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, uint32_t aLen)
michael@0 86 {
michael@0 87 nsProbingState st;
michael@0 88 uint32_t start = 0;
michael@0 89 uint32_t keepNext = mKeepNext;
michael@0 90
michael@0 91 //do filtering to reduce load to probers
michael@0 92 for (uint32_t pos = 0; pos < aLen; ++pos)
michael@0 93 {
michael@0 94 if (aBuf[pos] & 0x80)
michael@0 95 {
michael@0 96 if (!keepNext)
michael@0 97 start = pos;
michael@0 98 keepNext = 2;
michael@0 99 }
michael@0 100 else if (keepNext)
michael@0 101 {
michael@0 102 if (--keepNext == 0)
michael@0 103 {
michael@0 104 for (uint32_t i = 0; i < NUM_OF_PROBERS; i++)
michael@0 105 {
michael@0 106 if (!mIsActive[i])
michael@0 107 continue;
michael@0 108 st = mProbers[i]->HandleData(aBuf + start, pos + 1 - start);
michael@0 109 if (st == eFoundIt)
michael@0 110 {
michael@0 111 mBestGuess = i;
michael@0 112 mState = eFoundIt;
michael@0 113 return mState;
michael@0 114 }
michael@0 115 }
michael@0 116 }
michael@0 117 }
michael@0 118 }
michael@0 119
michael@0 120 if (keepNext) {
michael@0 121 for (uint32_t i = 0; i < NUM_OF_PROBERS; i++)
michael@0 122 {
michael@0 123 if (!mIsActive[i])
michael@0 124 continue;
michael@0 125 st = mProbers[i]->HandleData(aBuf + start, aLen - start);
michael@0 126 if (st == eFoundIt)
michael@0 127 {
michael@0 128 mBestGuess = i;
michael@0 129 mState = eFoundIt;
michael@0 130 return mState;
michael@0 131 }
michael@0 132 }
michael@0 133 }
michael@0 134 mKeepNext = keepNext;
michael@0 135
michael@0 136 return mState;
michael@0 137 }
michael@0 138
michael@0 139 float nsMBCSGroupProber::GetConfidence(void)
michael@0 140 {
michael@0 141 uint32_t i;
michael@0 142 float bestConf = 0.0, cf;
michael@0 143
michael@0 144 switch (mState)
michael@0 145 {
michael@0 146 case eFoundIt:
michael@0 147 return (float)0.99;
michael@0 148 case eNotMe:
michael@0 149 return (float)0.01;
michael@0 150 default:
michael@0 151 for (i = 0; i < NUM_OF_PROBERS; i++)
michael@0 152 {
michael@0 153 if (!mIsActive[i])
michael@0 154 continue;
michael@0 155 cf = mProbers[i]->GetConfidence();
michael@0 156 if (bestConf < cf)
michael@0 157 {
michael@0 158 bestConf = cf;
michael@0 159 mBestGuess = i;
michael@0 160 }
michael@0 161 }
michael@0 162 }
michael@0 163 return bestConf;
michael@0 164 }
michael@0 165
michael@0 166 #ifdef DEBUG_chardet
michael@0 167 void nsMBCSGroupProber::DumpStatus()
michael@0 168 {
michael@0 169 uint32_t i;
michael@0 170 float cf;
michael@0 171
michael@0 172 GetConfidence();
michael@0 173 for (i = 0; i < NUM_OF_PROBERS; i++)
michael@0 174 {
michael@0 175 if (!mIsActive[i])
michael@0 176 printf(" MBCS inactive: [%s] (confidence is too low).\r\n", ProberName[i]);
michael@0 177 else
michael@0 178 {
michael@0 179 cf = mProbers[i]->GetConfidence();
michael@0 180 printf(" MBCS %1.3f: [%s]\r\n", cf, ProberName[i]);
michael@0 181 }
michael@0 182 }
michael@0 183 }
michael@0 184 #endif
michael@0 185
michael@0 186 #ifdef DEBUG_jgmyers
michael@0 187 void nsMBCSGroupProber::GetDetectorState(nsUniversalDetector::DetectorState (&states)[nsUniversalDetector::NumDetectors], uint32_t &offset)
michael@0 188 {
michael@0 189 for (uint32_t i = 0; i < NUM_OF_PROBERS; ++i) {
michael@0 190 states[offset].name = ProberName[i];
michael@0 191 states[offset].isActive = mIsActive[i];
michael@0 192 states[offset].confidence = mIsActive[i] ? mProbers[i]->GetConfidence() : 0.0;
michael@0 193 ++offset;
michael@0 194 }
michael@0 195 }
michael@0 196 #endif /* DEBUG_jgmyers */

mercurial