michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (C) 2005-2013, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: */ michael@0: michael@0: #ifndef __CSRSBCS_H michael@0: #define __CSRSBCS_H michael@0: michael@0: #include "unicode/uobject.h" michael@0: michael@0: #if !UCONFIG_NO_CONVERSION michael@0: michael@0: #include "csrecog.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: class NGramParser : public UMemory michael@0: { michael@0: private: michael@0: int32_t ngram; michael@0: const int32_t *ngramList; michael@0: michael@0: int32_t ngramCount; michael@0: int32_t hitCount; michael@0: michael@0: protected: michael@0: int32_t byteIndex; michael@0: const uint8_t *charMap; michael@0: michael@0: void addByte(int32_t b); michael@0: michael@0: public: michael@0: NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap); michael@0: michael@0: private: michael@0: /* michael@0: * Binary search for value in table, which must have exactly 64 entries. michael@0: */ michael@0: int32_t search(const int32_t *table, int32_t value); michael@0: michael@0: void lookup(int32_t thisNgram); michael@0: michael@0: virtual int32_t nextByte(InputText *det); michael@0: virtual void parseCharacters(InputText *det); michael@0: michael@0: public: michael@0: int32_t parse(InputText *det); michael@0: michael@0: }; michael@0: michael@0: class NGramParser_IBM420 : public NGramParser michael@0: { michael@0: private: michael@0: int32_t alef; michael@0: int32_t isLamAlef(int32_t b); michael@0: int32_t nextByte(InputText *det); michael@0: void parseCharacters(InputText *det); michael@0: michael@0: public: michael@0: NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap); michael@0: }; michael@0: michael@0: michael@0: class CharsetRecog_sbcs : public CharsetRecognizer michael@0: { michael@0: public: michael@0: CharsetRecog_sbcs(); michael@0: virtual ~CharsetRecog_sbcs(); michael@0: virtual const char *getName() const = 0; michael@0: virtual UBool match(InputText *det, CharsetMatch *results) const = 0; michael@0: virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const; michael@0: }; michael@0: michael@0: class CharsetRecog_8859_1 : public CharsetRecog_sbcs michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_8859_1(); michael@0: const char *getName() const; michael@0: virtual UBool match(InputText *det, CharsetMatch *results) const; michael@0: }; michael@0: michael@0: class CharsetRecog_8859_2 : public CharsetRecog_sbcs michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_8859_2(); michael@0: const char *getName() const; michael@0: virtual UBool match(InputText *det, CharsetMatch *results) const; michael@0: }; michael@0: michael@0: class CharsetRecog_8859_5 : public CharsetRecog_sbcs michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_8859_5(); michael@0: const char *getName() const; michael@0: }; michael@0: michael@0: class CharsetRecog_8859_6 : public CharsetRecog_sbcs michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_8859_6(); michael@0: michael@0: const char *getName() const; michael@0: }; michael@0: michael@0: class CharsetRecog_8859_7 : public CharsetRecog_sbcs michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_8859_7(); michael@0: michael@0: const char *getName() const; michael@0: }; michael@0: michael@0: class CharsetRecog_8859_8 : public CharsetRecog_sbcs michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_8859_8(); michael@0: michael@0: virtual const char *getName() const; michael@0: }; michael@0: michael@0: class CharsetRecog_8859_9 : public CharsetRecog_sbcs michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_8859_9(); michael@0: michael@0: const char *getName() const; michael@0: }; michael@0: michael@0: michael@0: michael@0: class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5 michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_8859_5_ru(); michael@0: michael@0: const char *getLanguage() const; michael@0: michael@0: virtual UBool match(InputText *det, CharsetMatch *results) const; michael@0: }; michael@0: michael@0: class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6 michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_8859_6_ar(); michael@0: michael@0: const char *getLanguage() const; michael@0: michael@0: virtual UBool match(InputText *det, CharsetMatch *results) const; michael@0: }; michael@0: michael@0: class CharsetRecog_8859_7_el : public CharsetRecog_8859_7 michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_8859_7_el(); michael@0: michael@0: const char *getLanguage() const; michael@0: michael@0: virtual UBool match(InputText *det, CharsetMatch *results) const; michael@0: }; michael@0: michael@0: class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8 michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_8859_8_I_he(); michael@0: michael@0: const char *getName() const; michael@0: michael@0: const char *getLanguage() const; michael@0: michael@0: virtual UBool match(InputText *det, CharsetMatch *results) const; michael@0: }; michael@0: michael@0: class CharsetRecog_8859_8_he : public CharsetRecog_8859_8 michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_8859_8_he (); michael@0: michael@0: const char *getLanguage() const; michael@0: michael@0: virtual UBool match(InputText *det, CharsetMatch *results) const; michael@0: }; michael@0: michael@0: class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9 michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_8859_9_tr (); michael@0: michael@0: const char *getLanguage() const; michael@0: michael@0: virtual UBool match(InputText *det, CharsetMatch *results) const; michael@0: }; michael@0: michael@0: class CharsetRecog_windows_1256 : public CharsetRecog_sbcs michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_windows_1256(); michael@0: michael@0: const char *getName() const; michael@0: michael@0: const char *getLanguage() const; michael@0: michael@0: virtual UBool match(InputText *det, CharsetMatch *results) const; michael@0: }; michael@0: michael@0: class CharsetRecog_windows_1251 : public CharsetRecog_sbcs michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_windows_1251(); michael@0: michael@0: const char *getName() const; michael@0: michael@0: const char *getLanguage() const; michael@0: michael@0: virtual UBool match(InputText *det, CharsetMatch *results) const; michael@0: }; michael@0: michael@0: michael@0: class CharsetRecog_KOI8_R : public CharsetRecog_sbcs michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_KOI8_R(); michael@0: michael@0: const char *getName() const; michael@0: michael@0: const char *getLanguage() const; michael@0: michael@0: virtual UBool match(InputText *det, CharsetMatch *results) const; michael@0: }; michael@0: michael@0: class CharsetRecog_IBM424_he : public CharsetRecog_sbcs michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_IBM424_he(); michael@0: michael@0: const char *getLanguage() const; michael@0: }; michael@0: michael@0: class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he { michael@0: public: michael@0: virtual ~CharsetRecog_IBM424_he_rtl(); michael@0: michael@0: const char *getName() const; michael@0: michael@0: virtual UBool match(InputText *det, CharsetMatch *results) const; michael@0: }; michael@0: michael@0: class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he { michael@0: virtual ~CharsetRecog_IBM424_he_ltr(); michael@0: michael@0: const char *getName() const; michael@0: michael@0: virtual UBool match(InputText *det, CharsetMatch *results) const; michael@0: }; michael@0: michael@0: class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs michael@0: { michael@0: public: michael@0: virtual ~CharsetRecog_IBM420_ar(); michael@0: michael@0: const char *getLanguage() const; michael@0: int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const; michael@0: michael@0: }; michael@0: michael@0: class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar { michael@0: public: michael@0: virtual ~CharsetRecog_IBM420_ar_rtl(); michael@0: michael@0: const char *getName() const; michael@0: michael@0: virtual UBool match(InputText *det, CharsetMatch *results) const; michael@0: }; michael@0: michael@0: class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar { michael@0: virtual ~CharsetRecog_IBM420_ar_ltr(); michael@0: michael@0: const char *getName() const; michael@0: michael@0: virtual UBool match(InputText *det, CharsetMatch *results) const; michael@0: }; michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif /* !UCONFIG_NO_CONVERSION */ michael@0: #endif /* __CSRSBCS_H */