|
1 /* |
|
2 ********************************************************************** |
|
3 * Copyright (C) 2005-2012, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ********************************************************************** |
|
6 */ |
|
7 |
|
8 #ifndef __CSRECOG_H |
|
9 #define __CSRECOG_H |
|
10 |
|
11 #include "unicode/uobject.h" |
|
12 |
|
13 #if !UCONFIG_NO_CONVERSION |
|
14 |
|
15 #include "inputext.h" |
|
16 |
|
17 U_NAMESPACE_BEGIN |
|
18 |
|
19 class CharsetMatch; |
|
20 |
|
21 class CharsetRecognizer : public UMemory |
|
22 { |
|
23 public: |
|
24 /** |
|
25 * Get the IANA name of this charset. |
|
26 * Note that some recognizers can recognize more than one charset, but that this API |
|
27 * assumes just one name per recognizer. |
|
28 * TODO: need to account for multiple names in public API that enumerates over the |
|
29 * known detectable charsets. |
|
30 * @return the charset name. |
|
31 */ |
|
32 virtual const char *getName() const = 0; |
|
33 |
|
34 /** |
|
35 * Get the ISO language code for this charset. |
|
36 * @return the language code, or <code>null</code> if the language cannot be determined. |
|
37 */ |
|
38 virtual const char *getLanguage() const; |
|
39 |
|
40 /* |
|
41 * Try the given input text against this Charset, and fill in the results object |
|
42 * with the quality of the match plus other information related to the match. |
|
43 * |
|
44 * Return TRUE if the the input bytes are a potential match, and |
|
45 * FALSE if the input data is not compatible with, or illegal in this charset. |
|
46 */ |
|
47 virtual UBool match(InputText *textIn, CharsetMatch *results) const = 0; |
|
48 |
|
49 virtual ~CharsetRecognizer(); |
|
50 }; |
|
51 |
|
52 U_NAMESPACE_END |
|
53 |
|
54 #endif |
|
55 #endif /* __CSRECOG_H */ |