intl/icu/source/i18n/csr2022.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/csr2022.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,91 @@
     1.4 +/*
     1.5 + **********************************************************************
     1.6 + *   Copyright (C) 2005-2012, International Business Machines
     1.7 + *   Corporation and others.  All Rights Reserved.
     1.8 + **********************************************************************
     1.9 + */
    1.10 +
    1.11 +#ifndef __CSR2022_H
    1.12 +#define __CSR2022_H
    1.13 +
    1.14 +#include "unicode/utypes.h"
    1.15 +
    1.16 +#if !UCONFIG_NO_CONVERSION
    1.17 +
    1.18 +#include "csrecog.h"
    1.19 +
    1.20 +U_NAMESPACE_BEGIN
    1.21 +
    1.22 +class CharsetMatch;
    1.23 +
    1.24 +/**
    1.25 + *  class CharsetRecog_2022  part of the ICU charset detection imlementation.
    1.26 + *                           This is a superclass for the individual detectors for
    1.27 + *                           each of the detectable members of the ISO 2022 family
    1.28 + *                           of encodings.
    1.29 + * 
    1.30 + *                           The separate classes are nested within this class.
    1.31 + * 
    1.32 + * @internal
    1.33 + */
    1.34 +class CharsetRecog_2022 : public CharsetRecognizer
    1.35 +{
    1.36 +
    1.37 +public:    
    1.38 +    virtual ~CharsetRecog_2022() = 0;
    1.39 +
    1.40 +protected:
    1.41 +
    1.42 +    /**
    1.43 +     * Matching function shared among the 2022 detectors JP, CN and KR
    1.44 +     * Counts up the number of legal an unrecognized escape sequences in
    1.45 +     * the sample of text, and computes a score based on the total number &
    1.46 +     * the proportion that fit the encoding.
    1.47 +     * 
    1.48 +     * 
    1.49 +     * @param text the byte buffer containing text to analyse
    1.50 +     * @param textLen  the size of the text in the byte.
    1.51 +     * @param escapeSequences the byte escape sequences to test for.
    1.52 +     * @return match quality, in the range of 0-100.
    1.53 +     */
    1.54 +    int32_t match_2022(const uint8_t *text,
    1.55 +                       int32_t textLen,
    1.56 +                       const uint8_t escapeSequences[][5],
    1.57 +                       int32_t escapeSequences_length) const;
    1.58 +
    1.59 +};
    1.60 +
    1.61 +class CharsetRecog_2022JP :public CharsetRecog_2022
    1.62 +{
    1.63 +public:
    1.64 +    virtual ~CharsetRecog_2022JP();
    1.65 +
    1.66 +    const char *getName() const;
    1.67 +
    1.68 +    UBool match(InputText *textIn, CharsetMatch *results) const;
    1.69 +};
    1.70 +
    1.71 +class CharsetRecog_2022KR :public CharsetRecog_2022 {
    1.72 +public:
    1.73 +    virtual ~CharsetRecog_2022KR();
    1.74 +
    1.75 +    const char *getName() const;
    1.76 +
    1.77 +    UBool match(InputText *textIn, CharsetMatch *results) const;
    1.78 +
    1.79 +};
    1.80 +
    1.81 +class CharsetRecog_2022CN :public CharsetRecog_2022
    1.82 +{
    1.83 +public:
    1.84 +    virtual ~CharsetRecog_2022CN();
    1.85 +
    1.86 +    const char* getName() const;
    1.87 +
    1.88 +    UBool match(InputText *textIn, CharsetMatch *results) const;
    1.89 +};
    1.90 +
    1.91 +U_NAMESPACE_END
    1.92 +
    1.93 +#endif
    1.94 +#endif /* __CSR2022_H */

mercurial