extensions/universalchardet/src/base/nsUTF8Prober.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 #include "nsUTF8Prober.h"
michael@0 7
michael@0 8 void nsUTF8Prober::Reset(void)
michael@0 9 {
michael@0 10 mCodingSM->Reset();
michael@0 11 mNumOfMBChar = 0;
michael@0 12 mState = eDetecting;
michael@0 13 }
michael@0 14
michael@0 15 nsProbingState nsUTF8Prober::HandleData(const char* aBuf, uint32_t aLen)
michael@0 16 {
michael@0 17 nsSMState codingState;
michael@0 18
michael@0 19 for (uint32_t i = 0; i < aLen; i++)
michael@0 20 {
michael@0 21 codingState = mCodingSM->NextState(aBuf[i]);
michael@0 22 if (codingState == eItsMe)
michael@0 23 {
michael@0 24 mState = eFoundIt;
michael@0 25 break;
michael@0 26 }
michael@0 27 if (codingState == eStart)
michael@0 28 {
michael@0 29 if (mCodingSM->GetCurrentCharLen() >= 2)
michael@0 30 mNumOfMBChar++;
michael@0 31 }
michael@0 32 }
michael@0 33
michael@0 34 if (mState == eDetecting)
michael@0 35 if (GetConfidence() > SHORTCUT_THRESHOLD)
michael@0 36 mState = eFoundIt;
michael@0 37 return mState;
michael@0 38 }
michael@0 39
michael@0 40 #define ONE_CHAR_PROB (float)0.50
michael@0 41
michael@0 42 float nsUTF8Prober::GetConfidence(void)
michael@0 43 {
michael@0 44 float unlike = (float)0.99;
michael@0 45
michael@0 46 if (mNumOfMBChar < 6)
michael@0 47 {
michael@0 48 for (uint32_t i = 0; i < mNumOfMBChar; i++)
michael@0 49 unlike *= ONE_CHAR_PROB;
michael@0 50 return (float)1.0 - unlike;
michael@0 51 }
michael@0 52 else
michael@0 53 return (float)0.99;
michael@0 54 }
michael@0 55

mercurial