michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (C) 2005-2012, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: */ michael@0: michael@0: #ifndef __CSR2022_H michael@0: #define __CSR2022_H michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_CONVERSION michael@0: michael@0: #include "csrecog.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: class CharsetMatch; michael@0: michael@0: /** michael@0: * class CharsetRecog_2022 part of the ICU charset detection imlementation. michael@0: * This is a superclass for the individual detectors for michael@0: * each of the detectable members of the ISO 2022 family michael@0: * of encodings. michael@0: * michael@0: * The separate classes are nested within this class. michael@0: * michael@0: * @internal michael@0: */ michael@0: class CharsetRecog_2022 : public CharsetRecognizer michael@0: { michael@0: michael@0: public: michael@0: virtual ~CharsetRecog_2022() = 0; michael@0: michael@0: protected: michael@0: michael@0: /** michael@0: * Matching function shared among the 2022 detectors JP, CN and KR michael@0: * Counts up the number of legal an unrecognized escape sequences in michael@0: * the sample of text, and computes a score based on the total number & michael@0: * the proportion that fit the encoding. michael@0: * michael@0: * michael@0: * @param text the byte buffer containing text to analyse michael@0: * @param textLen the size of the text in the byte. michael@0: * @param escapeSequences the byte escape sequences to test for. michael@0: * @return match quality, in the range of 0-100. michael@0: */ michael@0: int32_t match_2022(const uint8_t *text, michael@0: int32_t textLen, michael@0: const uint8_t escapeSequences[][5], michael@0: int32_t escapeSequences_length) const; michael@0: michael@0: }; michael@0: michael@0: class CharsetRecog_2022JP :public CharsetRecog_2022 michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_2022JP(); michael@0: michael@0: const char *getName() const; michael@0: michael@0: UBool match(InputText *textIn, CharsetMatch *results) const; michael@0: }; michael@0: michael@0: class CharsetRecog_2022KR :public CharsetRecog_2022 { michael@0: public: michael@0: virtual ~CharsetRecog_2022KR(); michael@0: michael@0: const char *getName() const; michael@0: michael@0: UBool match(InputText *textIn, CharsetMatch *results) const; michael@0: michael@0: }; michael@0: michael@0: class CharsetRecog_2022CN :public CharsetRecog_2022 michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_2022CN(); michael@0: michael@0: const char* getName() const; michael@0: michael@0: UBool match(InputText *textIn, CharsetMatch *results) const; michael@0: }; michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif michael@0: #endif /* __CSR2022_H */