extensions/universalchardet/src/base/nsSJISProber.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/extensions/universalchardet/src/base/nsSJISProber.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,47 @@
     1.4 +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +// for S-JIS encoding, obeserve characteristic:
    1.10 +// 1, kana character (or hankaku?) often have hight frequency of appereance
    1.11 +// 2, kana character often exist in group
    1.12 +// 3, certain combination of kana is never used in japanese language
    1.13 +
    1.14 +#ifndef nsSJISProber_h__
    1.15 +#define nsSJISProber_h__
    1.16 +
    1.17 +#include "nsCharSetProber.h"
    1.18 +#include "nsCodingStateMachine.h"
    1.19 +#include "JpCntx.h"
    1.20 +#include "CharDistribution.h"
    1.21 +
    1.22 +
    1.23 +class nsSJISProber: public nsCharSetProber {
    1.24 +public:
    1.25 +  nsSJISProber(bool aIsPreferredLanguage)
    1.26 +    :mIsPreferredLanguage(aIsPreferredLanguage)
    1.27 +  {mCodingSM = new nsCodingStateMachine(&SJISSMModel);
    1.28 +    Reset();}
    1.29 +  virtual ~nsSJISProber(void){delete mCodingSM;}
    1.30 +  nsProbingState HandleData(const char* aBuf, uint32_t aLen);
    1.31 +  const char* GetCharSetName() {return "Shift_JIS";}
    1.32 +  nsProbingState GetState(void) {return mState;}
    1.33 +  void      Reset(void);
    1.34 +  float     GetConfidence(void);
    1.35 +
    1.36 +protected:
    1.37 +  nsCodingStateMachine* mCodingSM;
    1.38 +  nsProbingState mState;
    1.39 +
    1.40 +  SJISContextAnalysis mContextAnalyser;
    1.41 +  SJISDistributionAnalysis mDistributionAnalyser;
    1.42 +
    1.43 +  char mLastChar[2];
    1.44 +  bool mIsPreferredLanguage;
    1.45 +
    1.46 +};
    1.47 +
    1.48 +
    1.49 +#endif /* nsSJISProber_h__ */
    1.50 +

mercurial