Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | #include <stdio.h> |
michael@0 | 7 | #include "prmem.h" |
michael@0 | 8 | |
michael@0 | 9 | #include "nsSBCharSetProber.h" |
michael@0 | 10 | #include "nsSBCSGroupProber.h" |
michael@0 | 11 | |
michael@0 | 12 | #include "nsHebrewProber.h" |
michael@0 | 13 | |
michael@0 | 14 | nsSBCSGroupProber::nsSBCSGroupProber() |
michael@0 | 15 | { |
michael@0 | 16 | mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model); |
michael@0 | 17 | mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel); |
michael@0 | 18 | mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model); |
michael@0 | 19 | mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel); |
michael@0 | 20 | mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model); |
michael@0 | 21 | mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model); |
michael@0 | 22 | mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model); |
michael@0 | 23 | mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model); |
michael@0 | 24 | mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel); |
michael@0 | 25 | mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel); |
michael@0 | 26 | mProbers[10] = new nsSingleByteCharSetProber(&TIS620ThaiModel); |
michael@0 | 27 | |
michael@0 | 28 | nsHebrewProber *hebprober = new nsHebrewProber(); |
michael@0 | 29 | // Notice: Any change in these indexes - 10,11,12 must be reflected |
michael@0 | 30 | // in the code below as well. |
michael@0 | 31 | mProbers[11] = hebprober; |
michael@0 | 32 | mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, false, hebprober); // Logical Hebrew |
michael@0 | 33 | mProbers[13] = new nsSingleByteCharSetProber(&Win1255Model, true, hebprober); // Visual Hebrew |
michael@0 | 34 | // Tell the Hebrew prober about the logical and visual probers |
michael@0 | 35 | if (mProbers[11] && mProbers[12] && mProbers[13]) // all are not null |
michael@0 | 36 | { |
michael@0 | 37 | hebprober->SetModelProbers(mProbers[12], mProbers[13]); |
michael@0 | 38 | } |
michael@0 | 39 | else // One or more is null. avoid any Hebrew probing, null them all |
michael@0 | 40 | { |
michael@0 | 41 | for (uint32_t i = 11; i <= 13; ++i) |
michael@0 | 42 | { |
michael@0 | 43 | delete mProbers[i]; |
michael@0 | 44 | mProbers[i] = 0; |
michael@0 | 45 | } |
michael@0 | 46 | } |
michael@0 | 47 | |
michael@0 | 48 | // disable latin2 before latin1 is available, otherwise all latin1 |
michael@0 | 49 | // will be detected as latin2 because of their similarity. |
michael@0 | 50 | //mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel); |
michael@0 | 51 | //mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel); |
michael@0 | 52 | |
michael@0 | 53 | Reset(); |
michael@0 | 54 | } |
michael@0 | 55 | |
michael@0 | 56 | nsSBCSGroupProber::~nsSBCSGroupProber() |
michael@0 | 57 | { |
michael@0 | 58 | for (uint32_t i = 0; i < NUM_OF_SBCS_PROBERS; i++) |
michael@0 | 59 | { |
michael@0 | 60 | delete mProbers[i]; |
michael@0 | 61 | } |
michael@0 | 62 | } |
michael@0 | 63 | |
michael@0 | 64 | |
michael@0 | 65 | const char* nsSBCSGroupProber::GetCharSetName() |
michael@0 | 66 | { |
michael@0 | 67 | //if we have no answer yet |
michael@0 | 68 | if (mBestGuess == -1) |
michael@0 | 69 | { |
michael@0 | 70 | GetConfidence(); |
michael@0 | 71 | //no charset seems positive |
michael@0 | 72 | if (mBestGuess == -1) |
michael@0 | 73 | //we will use default. |
michael@0 | 74 | mBestGuess = 0; |
michael@0 | 75 | } |
michael@0 | 76 | return mProbers[mBestGuess]->GetCharSetName(); |
michael@0 | 77 | } |
michael@0 | 78 | |
michael@0 | 79 | void nsSBCSGroupProber::Reset(void) |
michael@0 | 80 | { |
michael@0 | 81 | mActiveNum = 0; |
michael@0 | 82 | for (uint32_t i = 0; i < NUM_OF_SBCS_PROBERS; i++) |
michael@0 | 83 | { |
michael@0 | 84 | if (mProbers[i]) // not null |
michael@0 | 85 | { |
michael@0 | 86 | mProbers[i]->Reset(); |
michael@0 | 87 | mIsActive[i] = true; |
michael@0 | 88 | ++mActiveNum; |
michael@0 | 89 | } |
michael@0 | 90 | else |
michael@0 | 91 | mIsActive[i] = false; |
michael@0 | 92 | } |
michael@0 | 93 | mBestGuess = -1; |
michael@0 | 94 | mState = eDetecting; |
michael@0 | 95 | } |
michael@0 | 96 | |
michael@0 | 97 | |
michael@0 | 98 | nsProbingState nsSBCSGroupProber::HandleData(const char* aBuf, uint32_t aLen) |
michael@0 | 99 | { |
michael@0 | 100 | nsProbingState st; |
michael@0 | 101 | uint32_t i; |
michael@0 | 102 | char *newBuf1 = 0; |
michael@0 | 103 | uint32_t newLen1 = 0; |
michael@0 | 104 | |
michael@0 | 105 | //apply filter to original buffer, and we got new buffer back |
michael@0 | 106 | //depend on what script it is, we will feed them the new buffer |
michael@0 | 107 | //we got after applying proper filter |
michael@0 | 108 | //this is done without any consideration to KeepEnglishLetters |
michael@0 | 109 | //of each prober since as of now, there are no probers here which |
michael@0 | 110 | //recognize languages with English characters. |
michael@0 | 111 | if (!FilterWithoutEnglishLetters(aBuf, aLen, &newBuf1, newLen1)) |
michael@0 | 112 | goto done; |
michael@0 | 113 | |
michael@0 | 114 | if (newLen1 == 0) |
michael@0 | 115 | goto done; // Nothing to see here, move on. |
michael@0 | 116 | |
michael@0 | 117 | for (i = 0; i < NUM_OF_SBCS_PROBERS; i++) |
michael@0 | 118 | { |
michael@0 | 119 | if (!mIsActive[i]) |
michael@0 | 120 | continue; |
michael@0 | 121 | st = mProbers[i]->HandleData(newBuf1, newLen1); |
michael@0 | 122 | if (st == eFoundIt) |
michael@0 | 123 | { |
michael@0 | 124 | mBestGuess = i; |
michael@0 | 125 | mState = eFoundIt; |
michael@0 | 126 | break; |
michael@0 | 127 | } |
michael@0 | 128 | else if (st == eNotMe) |
michael@0 | 129 | { |
michael@0 | 130 | mIsActive[i] = false; |
michael@0 | 131 | mActiveNum--; |
michael@0 | 132 | if (mActiveNum <= 0) |
michael@0 | 133 | { |
michael@0 | 134 | mState = eNotMe; |
michael@0 | 135 | break; |
michael@0 | 136 | } |
michael@0 | 137 | } |
michael@0 | 138 | } |
michael@0 | 139 | |
michael@0 | 140 | done: |
michael@0 | 141 | PR_FREEIF(newBuf1); |
michael@0 | 142 | |
michael@0 | 143 | return mState; |
michael@0 | 144 | } |
michael@0 | 145 | |
michael@0 | 146 | float nsSBCSGroupProber::GetConfidence(void) |
michael@0 | 147 | { |
michael@0 | 148 | uint32_t i; |
michael@0 | 149 | float bestConf = 0.0, cf; |
michael@0 | 150 | |
michael@0 | 151 | switch (mState) |
michael@0 | 152 | { |
michael@0 | 153 | case eFoundIt: |
michael@0 | 154 | return (float)0.99; //sure yes |
michael@0 | 155 | case eNotMe: |
michael@0 | 156 | return (float)0.01; //sure no |
michael@0 | 157 | default: |
michael@0 | 158 | for (i = 0; i < NUM_OF_SBCS_PROBERS; i++) |
michael@0 | 159 | { |
michael@0 | 160 | if (!mIsActive[i]) |
michael@0 | 161 | continue; |
michael@0 | 162 | cf = mProbers[i]->GetConfidence(); |
michael@0 | 163 | if (bestConf < cf) |
michael@0 | 164 | { |
michael@0 | 165 | bestConf = cf; |
michael@0 | 166 | mBestGuess = i; |
michael@0 | 167 | } |
michael@0 | 168 | } |
michael@0 | 169 | } |
michael@0 | 170 | return bestConf; |
michael@0 | 171 | } |
michael@0 | 172 | |
michael@0 | 173 | #ifdef DEBUG_chardet |
michael@0 | 174 | void nsSBCSGroupProber::DumpStatus() |
michael@0 | 175 | { |
michael@0 | 176 | uint32_t i; |
michael@0 | 177 | float cf; |
michael@0 | 178 | |
michael@0 | 179 | cf = GetConfidence(); |
michael@0 | 180 | printf(" SBCS Group Prober --------begin status \r\n"); |
michael@0 | 181 | for (i = 0; i < NUM_OF_SBCS_PROBERS; i++) |
michael@0 | 182 | { |
michael@0 | 183 | if (!mIsActive[i]) |
michael@0 | 184 | printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName()); |
michael@0 | 185 | else |
michael@0 | 186 | mProbers[i]->DumpStatus(); |
michael@0 | 187 | } |
michael@0 | 188 | printf(" SBCS Group found best match [%s] confidence %f.\r\n", |
michael@0 | 189 | mProbers[mBestGuess]->GetCharSetName(), cf); |
michael@0 | 190 | } |
michael@0 | 191 | #endif |