1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/extensions/universalchardet/src/base/nsUTF8Prober.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,55 @@ 1.4 +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#include "nsUTF8Prober.h" 1.10 + 1.11 +void nsUTF8Prober::Reset(void) 1.12 +{ 1.13 + mCodingSM->Reset(); 1.14 + mNumOfMBChar = 0; 1.15 + mState = eDetecting; 1.16 +} 1.17 + 1.18 +nsProbingState nsUTF8Prober::HandleData(const char* aBuf, uint32_t aLen) 1.19 +{ 1.20 + nsSMState codingState; 1.21 + 1.22 + for (uint32_t i = 0; i < aLen; i++) 1.23 + { 1.24 + codingState = mCodingSM->NextState(aBuf[i]); 1.25 + if (codingState == eItsMe) 1.26 + { 1.27 + mState = eFoundIt; 1.28 + break; 1.29 + } 1.30 + if (codingState == eStart) 1.31 + { 1.32 + if (mCodingSM->GetCurrentCharLen() >= 2) 1.33 + mNumOfMBChar++; 1.34 + } 1.35 + } 1.36 + 1.37 + if (mState == eDetecting) 1.38 + if (GetConfidence() > SHORTCUT_THRESHOLD) 1.39 + mState = eFoundIt; 1.40 + return mState; 1.41 +} 1.42 + 1.43 +#define ONE_CHAR_PROB (float)0.50 1.44 + 1.45 +float nsUTF8Prober::GetConfidence(void) 1.46 +{ 1.47 + float unlike = (float)0.99; 1.48 + 1.49 + if (mNumOfMBChar < 6) 1.50 + { 1.51 + for (uint32_t i = 0; i < mNumOfMBChar; i++) 1.52 + unlike *= ONE_CHAR_PROB; 1.53 + return (float)1.0 - unlike; 1.54 + } 1.55 + else 1.56 + return (float)0.99; 1.57 +} 1.58 +