extensions/universalchardet/src/base/nsSBCharSetProber.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     5 #include <stdio.h>
     6 #include "nsSBCharSetProber.h"
     8 nsProbingState nsSingleByteCharSetProber::HandleData(const char* aBuf, uint32_t aLen)
     9 {
    10   unsigned char order;
    12   for (uint32_t i = 0; i < aLen; i++)
    13   {
    14     order = mModel->charToOrderMap[(unsigned char)aBuf[i]];
    16     if (order < SYMBOL_CAT_ORDER)
    17       mTotalChar++;
    18     if (order < SAMPLE_SIZE)
    19     {
    20         mFreqChar++;
    22       if (mLastOrder < SAMPLE_SIZE)
    23       {
    24         mTotalSeqs++;
    25         if (!mReversed)
    26           ++(mSeqCounters[mModel->precedenceMatrix[mLastOrder*SAMPLE_SIZE+order]]);
    27         else // reverse the order of the letters in the lookup
    28           ++(mSeqCounters[mModel->precedenceMatrix[order*SAMPLE_SIZE+mLastOrder]]);
    29       }
    30     }
    31     mLastOrder = order;
    32   }
    34   if (mState == eDetecting)
    35     if (mTotalSeqs > SB_ENOUGH_REL_THRESHOLD)
    36     {
    37       float cf = GetConfidence();
    38       if (cf > POSITIVE_SHORTCUT_THRESHOLD)
    39         mState = eFoundIt;
    40       else if (cf < NEGATIVE_SHORTCUT_THRESHOLD)
    41         mState = eNotMe;
    42     }
    44   return mState;
    45 }
    47 void  nsSingleByteCharSetProber::Reset(void)
    48 {
    49   mState = eDetecting;
    50   mLastOrder = 255;
    51   for (uint32_t i = 0; i < NUMBER_OF_SEQ_CAT; i++)
    52     mSeqCounters[i] = 0;
    53   mTotalSeqs = 0;
    54   mTotalChar = 0;
    55   mFreqChar = 0;
    56 }
    58 //#define NEGATIVE_APPROACH 1
    60 float nsSingleByteCharSetProber::GetConfidence(void)
    61 {
    62 #ifdef NEGATIVE_APPROACH
    63   if (mTotalSeqs > 0)
    64     if (mTotalSeqs > mSeqCounters[NEGATIVE_CAT]*10 )
    65       return ((float)(mTotalSeqs - mSeqCounters[NEGATIVE_CAT]*10))/mTotalSeqs * mFreqChar / mTotalChar;
    66   return (float)0.01;
    67 #else  //POSITIVE_APPROACH
    68   float r;
    70   if (mTotalSeqs > 0) {
    71     r = ((float)1.0) * mSeqCounters[POSITIVE_CAT] / mTotalSeqs / mModel->mTypicalPositiveRatio;
    72     r = r*mFreqChar/mTotalChar;
    73     if (r >= (float)1.00)
    74       r = (float)0.99;
    75     return r;
    76   }
    77   return (float)0.01;
    78 #endif
    79 }
    81 const char* nsSingleByteCharSetProber::GetCharSetName() 
    82 {
    83   if (!mNameProber)
    84     return mModel->charsetName;
    85   return mNameProber->GetCharSetName();
    86 }
    88 #ifdef DEBUG_chardet
    89 void nsSingleByteCharSetProber::DumpStatus()
    90 {
    91   printf("  SBCS: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName());
    92 }
    93 #endif

mercurial