|
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 // for S-JIS encoding, obeserve characteristic: |
|
7 // 1, kana character (or hankaku?) often have hight frequency of appereance |
|
8 // 2, kana character often exist in group |
|
9 // 3, certain combination of kana is never used in japanese language |
|
10 |
|
11 #ifndef nsEUCJPProber_h__ |
|
12 #define nsEUCJPProber_h__ |
|
13 |
|
14 #include "nsCharSetProber.h" |
|
15 #include "nsCodingStateMachine.h" |
|
16 #include "JpCntx.h" |
|
17 #include "CharDistribution.h" |
|
18 |
|
19 class nsEUCJPProber: public nsCharSetProber { |
|
20 public: |
|
21 nsEUCJPProber(bool aIsPreferredLanguage) |
|
22 :mIsPreferredLanguage(aIsPreferredLanguage) |
|
23 {mCodingSM = new nsCodingStateMachine(&EUCJPSMModel); |
|
24 Reset();} |
|
25 virtual ~nsEUCJPProber(void){delete mCodingSM;} |
|
26 nsProbingState HandleData(const char* aBuf, uint32_t aLen); |
|
27 const char* GetCharSetName() {return "EUC-JP";} |
|
28 nsProbingState GetState(void) {return mState;} |
|
29 void Reset(void); |
|
30 float GetConfidence(void); |
|
31 |
|
32 protected: |
|
33 nsCodingStateMachine* mCodingSM; |
|
34 nsProbingState mState; |
|
35 |
|
36 EUCJPContextAnalysis mContextAnalyser; |
|
37 EUCJPDistributionAnalysis mDistributionAnalyser; |
|
38 |
|
39 char mLastChar[2]; |
|
40 bool mIsPreferredLanguage; |
|
41 }; |
|
42 |
|
43 |
|
44 #endif /* nsEUCJPProber_h__ */ |
|
45 |