|
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include "nsUTF8Prober.h" |
|
7 |
|
8 void nsUTF8Prober::Reset(void) |
|
9 { |
|
10 mCodingSM->Reset(); |
|
11 mNumOfMBChar = 0; |
|
12 mState = eDetecting; |
|
13 } |
|
14 |
|
15 nsProbingState nsUTF8Prober::HandleData(const char* aBuf, uint32_t aLen) |
|
16 { |
|
17 nsSMState codingState; |
|
18 |
|
19 for (uint32_t i = 0; i < aLen; i++) |
|
20 { |
|
21 codingState = mCodingSM->NextState(aBuf[i]); |
|
22 if (codingState == eItsMe) |
|
23 { |
|
24 mState = eFoundIt; |
|
25 break; |
|
26 } |
|
27 if (codingState == eStart) |
|
28 { |
|
29 if (mCodingSM->GetCurrentCharLen() >= 2) |
|
30 mNumOfMBChar++; |
|
31 } |
|
32 } |
|
33 |
|
34 if (mState == eDetecting) |
|
35 if (GetConfidence() > SHORTCUT_THRESHOLD) |
|
36 mState = eFoundIt; |
|
37 return mState; |
|
38 } |
|
39 |
|
40 #define ONE_CHAR_PROB (float)0.50 |
|
41 |
|
42 float nsUTF8Prober::GetConfidence(void) |
|
43 { |
|
44 float unlike = (float)0.99; |
|
45 |
|
46 if (mNumOfMBChar < 6) |
|
47 { |
|
48 for (uint32_t i = 0; i < mNumOfMBChar; i++) |
|
49 unlike *= ONE_CHAR_PROB; |
|
50 return (float)1.0 - unlike; |
|
51 } |
|
52 else |
|
53 return (float)0.99; |
|
54 } |
|
55 |