michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (C) 2005-2012, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: */ michael@0: michael@0: #ifndef __CSRECOG_H michael@0: #define __CSRECOG_H michael@0: michael@0: #include "unicode/uobject.h" michael@0: michael@0: #if !UCONFIG_NO_CONVERSION michael@0: michael@0: #include "inputext.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: class CharsetMatch; michael@0: michael@0: class CharsetRecognizer : public UMemory michael@0: { michael@0: public: michael@0: /** michael@0: * Get the IANA name of this charset. michael@0: * Note that some recognizers can recognize more than one charset, but that this API michael@0: * assumes just one name per recognizer. michael@0: * TODO: need to account for multiple names in public API that enumerates over the michael@0: * known detectable charsets. michael@0: * @return the charset name. michael@0: */ michael@0: virtual const char *getName() const = 0; michael@0: michael@0: /** michael@0: * Get the ISO language code for this charset. michael@0: * @return the language code, or null if the language cannot be determined. michael@0: */ michael@0: virtual const char *getLanguage() const; michael@0: michael@0: /* michael@0: * Try the given input text against this Charset, and fill in the results object michael@0: * with the quality of the match plus other information related to the match. michael@0: * michael@0: * Return TRUE if the the input bytes are a potential match, and michael@0: * FALSE if the input data is not compatible with, or illegal in this charset. michael@0: */ michael@0: virtual UBool match(InputText *textIn, CharsetMatch *results) const = 0; michael@0: michael@0: virtual ~CharsetRecognizer(); michael@0: }; michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif michael@0: #endif /* __CSRECOG_H */