|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #ifndef __JPCNTX_H__ |
|
7 #define __JPCNTX_H__ |
|
8 |
|
9 #define NUM_OF_CATEGORY 6 |
|
10 |
|
11 #include "nscore.h" |
|
12 |
|
13 #define ENOUGH_REL_THRESHOLD 100 |
|
14 #define MAX_REL_THRESHOLD 1000 |
|
15 |
|
16 //hiragana frequency category table |
|
17 extern const uint8_t jp2CharContext[83][83]; |
|
18 |
|
19 class JapaneseContextAnalysis |
|
20 { |
|
21 public: |
|
22 JapaneseContextAnalysis() {Reset(false);} |
|
23 |
|
24 void HandleData(const char* aBuf, uint32_t aLen); |
|
25 |
|
26 void HandleOneChar(const char* aStr, uint32_t aCharLen) |
|
27 { |
|
28 int32_t order; |
|
29 |
|
30 //if we received enough data, stop here |
|
31 if (mTotalRel > MAX_REL_THRESHOLD) mDone = true; |
|
32 if (mDone) return; |
|
33 |
|
34 //Only 2-bytes characters are of our interest |
|
35 order = (aCharLen == 2) ? GetOrder(aStr) : -1; |
|
36 if (order != -1 && mLastCharOrder != -1) |
|
37 { |
|
38 mTotalRel++; |
|
39 //count this sequence to its category counter |
|
40 mRelSample[jp2CharContext[mLastCharOrder][order]]++; |
|
41 } |
|
42 mLastCharOrder = order; |
|
43 } |
|
44 |
|
45 float GetConfidence(void); |
|
46 void Reset(bool aIsPreferredLanguage); |
|
47 bool GotEnoughData() {return mTotalRel > ENOUGH_REL_THRESHOLD;} |
|
48 |
|
49 protected: |
|
50 virtual int32_t GetOrder(const char* str, uint32_t *charLen) = 0; |
|
51 virtual int32_t GetOrder(const char* str) = 0; |
|
52 |
|
53 //category counters, each integer counts sequences in its category |
|
54 uint32_t mRelSample[NUM_OF_CATEGORY]; |
|
55 |
|
56 //total sequence received |
|
57 uint32_t mTotalRel; |
|
58 |
|
59 //Number of sequences needed to trigger detection |
|
60 uint32_t mDataThreshold; |
|
61 |
|
62 //The order of previous char |
|
63 int32_t mLastCharOrder; |
|
64 |
|
65 //if last byte in current buffer is not the last byte of a character, we |
|
66 //need to know how many byte to skip in next buffer. |
|
67 uint32_t mNeedToSkipCharNum; |
|
68 |
|
69 //If this flag is set to true, detection is done and conclusion has been made |
|
70 bool mDone; |
|
71 }; |
|
72 |
|
73 |
|
74 class SJISContextAnalysis : public JapaneseContextAnalysis |
|
75 { |
|
76 //SJISContextAnalysis(){}; |
|
77 protected: |
|
78 int32_t GetOrder(const char* str, uint32_t *charLen); |
|
79 |
|
80 int32_t GetOrder(const char* str) |
|
81 { |
|
82 //We only interested in Hiragana, so first byte is '\202' |
|
83 if (*str == '\202' && |
|
84 (unsigned char)*(str+1) >= (unsigned char)0x9f && |
|
85 (unsigned char)*(str+1) <= (unsigned char)0xf1) |
|
86 return (unsigned char)*(str+1) - (unsigned char)0x9f; |
|
87 return -1; |
|
88 } |
|
89 }; |
|
90 |
|
91 class EUCJPContextAnalysis : public JapaneseContextAnalysis |
|
92 { |
|
93 protected: |
|
94 int32_t GetOrder(const char* str, uint32_t *charLen); |
|
95 int32_t GetOrder(const char* str) |
|
96 //We only interested in Hiragana, so first byte is '\244' |
|
97 { |
|
98 if (*str == '\244' && |
|
99 (unsigned char)*(str+1) >= (unsigned char)0xa1 && |
|
100 (unsigned char)*(str+1) <= (unsigned char)0xf3) |
|
101 return (unsigned char)*(str+1) - (unsigned char)0xa1; |
|
102 return -1; |
|
103 } |
|
104 }; |
|
105 |
|
106 #endif /* __JPCNTX_H__ */ |
|
107 |