intl/icu/source/i18n/csr2022.h

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 **********************************************************************
michael@0 3 * Copyright (C) 2005-2012, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 **********************************************************************
michael@0 6 */
michael@0 7
michael@0 8 #ifndef __CSR2022_H
michael@0 9 #define __CSR2022_H
michael@0 10
michael@0 11 #include "unicode/utypes.h"
michael@0 12
michael@0 13 #if !UCONFIG_NO_CONVERSION
michael@0 14
michael@0 15 #include "csrecog.h"
michael@0 16
michael@0 17 U_NAMESPACE_BEGIN
michael@0 18
michael@0 19 class CharsetMatch;
michael@0 20
michael@0 21 /**
michael@0 22 * class CharsetRecog_2022 part of the ICU charset detection imlementation.
michael@0 23 * This is a superclass for the individual detectors for
michael@0 24 * each of the detectable members of the ISO 2022 family
michael@0 25 * of encodings.
michael@0 26 *
michael@0 27 * The separate classes are nested within this class.
michael@0 28 *
michael@0 29 * @internal
michael@0 30 */
michael@0 31 class CharsetRecog_2022 : public CharsetRecognizer
michael@0 32 {
michael@0 33
michael@0 34 public:
michael@0 35 virtual ~CharsetRecog_2022() = 0;
michael@0 36
michael@0 37 protected:
michael@0 38
michael@0 39 /**
michael@0 40 * Matching function shared among the 2022 detectors JP, CN and KR
michael@0 41 * Counts up the number of legal an unrecognized escape sequences in
michael@0 42 * the sample of text, and computes a score based on the total number &
michael@0 43 * the proportion that fit the encoding.
michael@0 44 *
michael@0 45 *
michael@0 46 * @param text the byte buffer containing text to analyse
michael@0 47 * @param textLen the size of the text in the byte.
michael@0 48 * @param escapeSequences the byte escape sequences to test for.
michael@0 49 * @return match quality, in the range of 0-100.
michael@0 50 */
michael@0 51 int32_t match_2022(const uint8_t *text,
michael@0 52 int32_t textLen,
michael@0 53 const uint8_t escapeSequences[][5],
michael@0 54 int32_t escapeSequences_length) const;
michael@0 55
michael@0 56 };
michael@0 57
michael@0 58 class CharsetRecog_2022JP :public CharsetRecog_2022
michael@0 59 {
michael@0 60 public:
michael@0 61 virtual ~CharsetRecog_2022JP();
michael@0 62
michael@0 63 const char *getName() const;
michael@0 64
michael@0 65 UBool match(InputText *textIn, CharsetMatch *results) const;
michael@0 66 };
michael@0 67
michael@0 68 class CharsetRecog_2022KR :public CharsetRecog_2022 {
michael@0 69 public:
michael@0 70 virtual ~CharsetRecog_2022KR();
michael@0 71
michael@0 72 const char *getName() const;
michael@0 73
michael@0 74 UBool match(InputText *textIn, CharsetMatch *results) const;
michael@0 75
michael@0 76 };
michael@0 77
michael@0 78 class CharsetRecog_2022CN :public CharsetRecog_2022
michael@0 79 {
michael@0 80 public:
michael@0 81 virtual ~CharsetRecog_2022CN();
michael@0 82
michael@0 83 const char* getName() const;
michael@0 84
michael@0 85 UBool match(InputText *textIn, CharsetMatch *results) const;
michael@0 86 };
michael@0 87
michael@0 88 U_NAMESPACE_END
michael@0 89
michael@0 90 #endif
michael@0 91 #endif /* __CSR2022_H */

mercurial