michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #ifndef __JPCNTX_H__ michael@0: #define __JPCNTX_H__ michael@0: michael@0: #define NUM_OF_CATEGORY 6 michael@0: michael@0: #include "nscore.h" michael@0: michael@0: #define ENOUGH_REL_THRESHOLD 100 michael@0: #define MAX_REL_THRESHOLD 1000 michael@0: michael@0: //hiragana frequency category table michael@0: extern const uint8_t jp2CharContext[83][83]; michael@0: michael@0: class JapaneseContextAnalysis michael@0: { michael@0: public: michael@0: JapaneseContextAnalysis() {Reset(false);} michael@0: michael@0: void HandleData(const char* aBuf, uint32_t aLen); michael@0: michael@0: void HandleOneChar(const char* aStr, uint32_t aCharLen) michael@0: { michael@0: int32_t order; michael@0: michael@0: //if we received enough data, stop here michael@0: if (mTotalRel > MAX_REL_THRESHOLD) mDone = true; michael@0: if (mDone) return; michael@0: michael@0: //Only 2-bytes characters are of our interest michael@0: order = (aCharLen == 2) ? GetOrder(aStr) : -1; michael@0: if (order != -1 && mLastCharOrder != -1) michael@0: { michael@0: mTotalRel++; michael@0: //count this sequence to its category counter michael@0: mRelSample[jp2CharContext[mLastCharOrder][order]]++; michael@0: } michael@0: mLastCharOrder = order; michael@0: } michael@0: michael@0: float GetConfidence(void); michael@0: void Reset(bool aIsPreferredLanguage); michael@0: bool GotEnoughData() {return mTotalRel > ENOUGH_REL_THRESHOLD;} michael@0: michael@0: protected: michael@0: virtual int32_t GetOrder(const char* str, uint32_t *charLen) = 0; michael@0: virtual int32_t GetOrder(const char* str) = 0; michael@0: michael@0: //category counters, each integer counts sequences in its category michael@0: uint32_t mRelSample[NUM_OF_CATEGORY]; michael@0: michael@0: //total sequence received michael@0: uint32_t mTotalRel; michael@0: michael@0: //Number of sequences needed to trigger detection michael@0: uint32_t mDataThreshold; michael@0: michael@0: //The order of previous char michael@0: int32_t mLastCharOrder; michael@0: michael@0: //if last byte in current buffer is not the last byte of a character, we michael@0: //need to know how many byte to skip in next buffer. michael@0: uint32_t mNeedToSkipCharNum; michael@0: michael@0: //If this flag is set to true, detection is done and conclusion has been made michael@0: bool mDone; michael@0: }; michael@0: michael@0: michael@0: class SJISContextAnalysis : public JapaneseContextAnalysis michael@0: { michael@0: //SJISContextAnalysis(){}; michael@0: protected: michael@0: int32_t GetOrder(const char* str, uint32_t *charLen); michael@0: michael@0: int32_t GetOrder(const char* str) michael@0: { michael@0: //We only interested in Hiragana, so first byte is '\202' michael@0: if (*str == '\202' && michael@0: (unsigned char)*(str+1) >= (unsigned char)0x9f && michael@0: (unsigned char)*(str+1) <= (unsigned char)0xf1) michael@0: return (unsigned char)*(str+1) - (unsigned char)0x9f; michael@0: return -1; michael@0: } michael@0: }; michael@0: michael@0: class EUCJPContextAnalysis : public JapaneseContextAnalysis michael@0: { michael@0: protected: michael@0: int32_t GetOrder(const char* str, uint32_t *charLen); michael@0: int32_t GetOrder(const char* str) michael@0: //We only interested in Hiragana, so first byte is '\244' michael@0: { michael@0: if (*str == '\244' && michael@0: (unsigned char)*(str+1) >= (unsigned char)0xa1 && michael@0: (unsigned char)*(str+1) <= (unsigned char)0xf3) michael@0: return (unsigned char)*(str+1) - (unsigned char)0xa1; michael@0: return -1; michael@0: } michael@0: }; michael@0: michael@0: #endif /* __JPCNTX_H__ */ michael@0: