|
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 #include "nscore.h" |
|
6 #include "nsCyrillicProb.h" |
|
7 #include <stdio.h> |
|
8 |
|
9 #include "nsCOMPtr.h" |
|
10 #include "nsISupports.h" |
|
11 #include "nsICharsetDetector.h" |
|
12 #include "nsICharsetDetectionObserver.h" |
|
13 #include "nsIStringCharsetDetector.h" |
|
14 #include "nsCyrillicDetector.h" |
|
15 |
|
16 //---------------------------------------------------------------------- |
|
17 // Interface nsISupports [implementation] |
|
18 NS_IMPL_ISUPPORTS(nsCyrXPCOMDetector, nsICharsetDetector) |
|
19 NS_IMPL_ISUPPORTS(nsCyrXPCOMStringDetector, nsIStringCharsetDetector) |
|
20 |
|
21 void nsCyrillicDetector::HandleData(const char* aBuf, uint32_t aLen) |
|
22 { |
|
23 uint8_t cls; |
|
24 const char* b; |
|
25 uint32_t i; |
|
26 if(mDone) |
|
27 return; |
|
28 for(i=0, b=aBuf;i<aLen;i++,b++) |
|
29 { |
|
30 for(unsigned j=0;j<mItems;j++) |
|
31 { |
|
32 if( 0x80 & *b) |
|
33 cls = mCyrillicClass[j][(*b) & 0x7F]; |
|
34 else |
|
35 cls = 0; |
|
36 NS_ASSERTION( cls <= 32 , "illegal character class"); |
|
37 mProb[j] += gCyrillicProb[mLastCls[j]][cls]; |
|
38 mLastCls[j] = cls; |
|
39 } |
|
40 } |
|
41 // We now only based on the first block we receive |
|
42 DataEnd(); |
|
43 } |
|
44 |
|
45 //--------------------------------------------------------------------- |
|
46 #define THRESHOLD_RATIO 1.5f |
|
47 void nsCyrillicDetector::DataEnd() |
|
48 { |
|
49 uint32_t max=0; |
|
50 uint8_t maxIdx=0; |
|
51 uint8_t j; |
|
52 if(mDone) |
|
53 return; |
|
54 for(j=0;j<mItems;j++) { |
|
55 if(mProb[j] > max) |
|
56 { |
|
57 max = mProb[j]; |
|
58 maxIdx= j; |
|
59 } |
|
60 } |
|
61 |
|
62 if( 0 == max ) // if we didn't get any 8 bits data |
|
63 return; |
|
64 |
|
65 #ifdef DEBUG |
|
66 for(j=0;j<mItems;j++) |
|
67 printf("Charset %s->\t%d\n", mCharsets[j], mProb[j]); |
|
68 #endif |
|
69 this->Report(mCharsets[maxIdx]); |
|
70 mDone = true; |
|
71 } |
|
72 |
|
73 //--------------------------------------------------------------------- |
|
74 nsCyrXPCOMDetector:: nsCyrXPCOMDetector(uint8_t aItems, |
|
75 const uint8_t ** aCyrillicClass, |
|
76 const char **aCharsets) |
|
77 : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets) |
|
78 { |
|
79 mObserver = nullptr; |
|
80 } |
|
81 |
|
82 //--------------------------------------------------------------------- |
|
83 nsCyrXPCOMDetector::~nsCyrXPCOMDetector() |
|
84 { |
|
85 } |
|
86 |
|
87 //--------------------------------------------------------------------- |
|
88 NS_IMETHODIMP nsCyrXPCOMDetector::Init( |
|
89 nsICharsetDetectionObserver* aObserver) |
|
90 { |
|
91 NS_ASSERTION(mObserver == nullptr , "Init twice"); |
|
92 if(nullptr == aObserver) |
|
93 return NS_ERROR_ILLEGAL_VALUE; |
|
94 |
|
95 mObserver = aObserver; |
|
96 return NS_OK; |
|
97 } |
|
98 |
|
99 //---------------------------------------------------------- |
|
100 NS_IMETHODIMP nsCyrXPCOMDetector::DoIt( |
|
101 const char* aBuf, uint32_t aLen, bool* oDontFeedMe) |
|
102 { |
|
103 NS_ASSERTION(mObserver != nullptr , "have not init yet"); |
|
104 |
|
105 if((nullptr == aBuf) || (nullptr == oDontFeedMe)) |
|
106 return NS_ERROR_ILLEGAL_VALUE; |
|
107 |
|
108 this->HandleData(aBuf, aLen); |
|
109 *oDontFeedMe = false; |
|
110 return NS_OK; |
|
111 } |
|
112 |
|
113 //---------------------------------------------------------- |
|
114 NS_IMETHODIMP nsCyrXPCOMDetector::Done() |
|
115 { |
|
116 NS_ASSERTION(mObserver != nullptr , "have not init yet"); |
|
117 this->DataEnd(); |
|
118 return NS_OK; |
|
119 } |
|
120 |
|
121 //---------------------------------------------------------- |
|
122 void nsCyrXPCOMDetector::Report(const char* aCharset) |
|
123 { |
|
124 NS_ASSERTION(mObserver != nullptr , "have not init yet"); |
|
125 mObserver->Notify(aCharset, eBestAnswer); |
|
126 } |
|
127 |
|
128 //--------------------------------------------------------------------- |
|
129 nsCyrXPCOMStringDetector:: nsCyrXPCOMStringDetector(uint8_t aItems, |
|
130 const uint8_t ** aCyrillicClass, |
|
131 const char **aCharsets) |
|
132 : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets) |
|
133 { |
|
134 } |
|
135 |
|
136 //--------------------------------------------------------------------- |
|
137 nsCyrXPCOMStringDetector::~nsCyrXPCOMStringDetector() |
|
138 { |
|
139 } |
|
140 |
|
141 //--------------------------------------------------------------------- |
|
142 void nsCyrXPCOMStringDetector::Report(const char *aCharset) |
|
143 { |
|
144 mResult = aCharset; |
|
145 } |
|
146 |
|
147 //--------------------------------------------------------------------- |
|
148 NS_IMETHODIMP nsCyrXPCOMStringDetector::DoIt(const char* aBuf, uint32_t aLen, |
|
149 const char** oCharset, nsDetectionConfident &oConf) |
|
150 { |
|
151 mResult = nullptr; |
|
152 mDone = false; |
|
153 this->HandleData(aBuf, aLen); |
|
154 this->DataEnd(); |
|
155 *oCharset=mResult; |
|
156 oConf = eBestAnswer; |
|
157 return NS_OK; |
|
158 } |
|
159 |
|
160 |