extensions/universalchardet/src/base/nsSJISProber.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 // for S-JIS encoding, obeserve characteristic:
     7 // 1, kana character (or hankaku?) often have hight frequency of appereance
     8 // 2, kana character often exist in group
     9 // 3, certain combination of kana is never used in japanese language
    11 #include "nsSJISProber.h"
    12 #include "nsDebug.h"
    14 void  nsSJISProber::Reset(void)
    15 {
    16   mCodingSM->Reset(); 
    17   mState = eDetecting;
    18   mContextAnalyser.Reset(mIsPreferredLanguage);
    19   mDistributionAnalyser.Reset(mIsPreferredLanguage);
    20 }
    22 nsProbingState nsSJISProber::HandleData(const char* aBuf, uint32_t aLen)
    23 {
    24   NS_ASSERTION(aLen, "HandleData called with empty buffer");
    25   nsSMState codingState;
    27   for (uint32_t i = 0; i < aLen; i++)
    28   {
    29     codingState = mCodingSM->NextState(aBuf[i]);
    30     if (codingState == eItsMe)
    31     {
    32       mState = eFoundIt;
    33       break;
    34     }
    35     if (codingState == eStart)
    36     {
    37       uint32_t charLen = mCodingSM->GetCurrentCharLen();
    38       if (i == 0)
    39       {
    40         mLastChar[1] = aBuf[0];
    41         mContextAnalyser.HandleOneChar(mLastChar+2-charLen, charLen);
    42         mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
    43       }
    44       else
    45       {
    46         mContextAnalyser.HandleOneChar(aBuf+i+1-charLen, charLen);
    47         mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen);
    48       }
    49     }
    50   }
    52   mLastChar[0] = aBuf[aLen-1];
    54   if (mState == eDetecting)
    55     if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
    56       mState = eFoundIt;
    58   return mState;
    59 }
    61 float nsSJISProber::GetConfidence(void)
    62 {
    63   float contxtCf = mContextAnalyser.GetConfidence();
    64   float distribCf = mDistributionAnalyser.GetConfidence();
    66   return (contxtCf > distribCf ? contxtCf : distribCf);
    67 }

mercurial