extensions/universalchardet/src/base/nsUTF8Prober.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/extensions/universalchardet/src/base/nsUTF8Prober.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,55 @@
     1.4 +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#include "nsUTF8Prober.h"
    1.10 +
    1.11 +void  nsUTF8Prober::Reset(void)
    1.12 +{
    1.13 +  mCodingSM->Reset(); 
    1.14 +  mNumOfMBChar = 0;
    1.15 +  mState = eDetecting;
    1.16 +}
    1.17 +
    1.18 +nsProbingState nsUTF8Prober::HandleData(const char* aBuf, uint32_t aLen)
    1.19 +{
    1.20 +  nsSMState codingState;
    1.21 +
    1.22 +  for (uint32_t i = 0; i < aLen; i++)
    1.23 +  {
    1.24 +    codingState = mCodingSM->NextState(aBuf[i]);
    1.25 +    if (codingState == eItsMe)
    1.26 +    {
    1.27 +      mState = eFoundIt;
    1.28 +      break;
    1.29 +    }
    1.30 +    if (codingState == eStart)
    1.31 +    {
    1.32 +      if (mCodingSM->GetCurrentCharLen() >= 2)
    1.33 +        mNumOfMBChar++;
    1.34 +    }
    1.35 +  }
    1.36 +
    1.37 +  if (mState == eDetecting)
    1.38 +    if (GetConfidence() > SHORTCUT_THRESHOLD)
    1.39 +      mState = eFoundIt;
    1.40 +  return mState;
    1.41 +}
    1.42 +
    1.43 +#define ONE_CHAR_PROB   (float)0.50
    1.44 +
    1.45 +float nsUTF8Prober::GetConfidence(void)
    1.46 +{
    1.47 +  float unlike = (float)0.99;
    1.48 +
    1.49 +  if (mNumOfMBChar < 6)
    1.50 +  {
    1.51 +    for (uint32_t i = 0; i < mNumOfMBChar; i++)
    1.52 +      unlike *= ONE_CHAR_PROB;
    1.53 +    return (float)1.0 - unlike;
    1.54 +  }
    1.55 +  else
    1.56 +    return (float)0.99;
    1.57 +}
    1.58 +

mercurial