Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | ********************************************************************** |
michael@0 | 3 | * Copyright (C) 2005-2012, International Business Machines |
michael@0 | 4 | * Corporation and others. All Rights Reserved. |
michael@0 | 5 | ********************************************************************** |
michael@0 | 6 | */ |
michael@0 | 7 | |
michael@0 | 8 | #ifndef __CSR2022_H |
michael@0 | 9 | #define __CSR2022_H |
michael@0 | 10 | |
michael@0 | 11 | #include "unicode/utypes.h" |
michael@0 | 12 | |
michael@0 | 13 | #if !UCONFIG_NO_CONVERSION |
michael@0 | 14 | |
michael@0 | 15 | #include "csrecog.h" |
michael@0 | 16 | |
michael@0 | 17 | U_NAMESPACE_BEGIN |
michael@0 | 18 | |
michael@0 | 19 | class CharsetMatch; |
michael@0 | 20 | |
michael@0 | 21 | /** |
michael@0 | 22 | * class CharsetRecog_2022 part of the ICU charset detection imlementation. |
michael@0 | 23 | * This is a superclass for the individual detectors for |
michael@0 | 24 | * each of the detectable members of the ISO 2022 family |
michael@0 | 25 | * of encodings. |
michael@0 | 26 | * |
michael@0 | 27 | * The separate classes are nested within this class. |
michael@0 | 28 | * |
michael@0 | 29 | * @internal |
michael@0 | 30 | */ |
michael@0 | 31 | class CharsetRecog_2022 : public CharsetRecognizer |
michael@0 | 32 | { |
michael@0 | 33 | |
michael@0 | 34 | public: |
michael@0 | 35 | virtual ~CharsetRecog_2022() = 0; |
michael@0 | 36 | |
michael@0 | 37 | protected: |
michael@0 | 38 | |
michael@0 | 39 | /** |
michael@0 | 40 | * Matching function shared among the 2022 detectors JP, CN and KR |
michael@0 | 41 | * Counts up the number of legal an unrecognized escape sequences in |
michael@0 | 42 | * the sample of text, and computes a score based on the total number & |
michael@0 | 43 | * the proportion that fit the encoding. |
michael@0 | 44 | * |
michael@0 | 45 | * |
michael@0 | 46 | * @param text the byte buffer containing text to analyse |
michael@0 | 47 | * @param textLen the size of the text in the byte. |
michael@0 | 48 | * @param escapeSequences the byte escape sequences to test for. |
michael@0 | 49 | * @return match quality, in the range of 0-100. |
michael@0 | 50 | */ |
michael@0 | 51 | int32_t match_2022(const uint8_t *text, |
michael@0 | 52 | int32_t textLen, |
michael@0 | 53 | const uint8_t escapeSequences[][5], |
michael@0 | 54 | int32_t escapeSequences_length) const; |
michael@0 | 55 | |
michael@0 | 56 | }; |
michael@0 | 57 | |
michael@0 | 58 | class CharsetRecog_2022JP :public CharsetRecog_2022 |
michael@0 | 59 | { |
michael@0 | 60 | public: |
michael@0 | 61 | virtual ~CharsetRecog_2022JP(); |
michael@0 | 62 | |
michael@0 | 63 | const char *getName() const; |
michael@0 | 64 | |
michael@0 | 65 | UBool match(InputText *textIn, CharsetMatch *results) const; |
michael@0 | 66 | }; |
michael@0 | 67 | |
michael@0 | 68 | class CharsetRecog_2022KR :public CharsetRecog_2022 { |
michael@0 | 69 | public: |
michael@0 | 70 | virtual ~CharsetRecog_2022KR(); |
michael@0 | 71 | |
michael@0 | 72 | const char *getName() const; |
michael@0 | 73 | |
michael@0 | 74 | UBool match(InputText *textIn, CharsetMatch *results) const; |
michael@0 | 75 | |
michael@0 | 76 | }; |
michael@0 | 77 | |
michael@0 | 78 | class CharsetRecog_2022CN :public CharsetRecog_2022 |
michael@0 | 79 | { |
michael@0 | 80 | public: |
michael@0 | 81 | virtual ~CharsetRecog_2022CN(); |
michael@0 | 82 | |
michael@0 | 83 | const char* getName() const; |
michael@0 | 84 | |
michael@0 | 85 | UBool match(InputText *textIn, CharsetMatch *results) const; |
michael@0 | 86 | }; |
michael@0 | 87 | |
michael@0 | 88 | U_NAMESPACE_END |
michael@0 | 89 | |
michael@0 | 90 | #endif |
michael@0 | 91 | #endif /* __CSR2022_H */ |