michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "nsGBKConvUtil.h" michael@0: #include "gbku.h" michael@0: #include "nsDebug.h" michael@0: #define MAX_GBK_LENGTH 24066 /* (0xfe-0x80)*(0xfe-0x3f) */ michael@0: //-------------------------------------------------------------------- michael@0: // nsGBKConvUtil michael@0: //-------------------------------------------------------------------- michael@0: michael@0: static const char16_t gGBKToUnicodeTable[MAX_GBK_LENGTH] = { michael@0: #include "cp936map.h" michael@0: }; michael@0: static const uint16_t gUnicodeToGBKTable[0xA000-0x4e00] = { michael@0: #include "cp936invmap.h" michael@0: }; michael@0: michael@0: bool nsGBKConvUtil::UnicodeToGBKChar( michael@0: char16_t aChar, bool aToGL, char* michael@0: aOutByte1, char* aOutByte2) michael@0: { michael@0: bool found=false; michael@0: *aOutByte1 = *aOutByte2 = 0; michael@0: if(UNICHAR_IN_RANGE(0xd800, aChar, 0xdfff)) michael@0: { michael@0: // surrogate is not in here michael@0: return false; michael@0: } michael@0: if(UNICHAR_IN_RANGE(0x4e00, aChar, 0x9FFF)) michael@0: { michael@0: uint16_t item = gUnicodeToGBKTable[aChar - 0x4e00]; michael@0: if(item != 0) michael@0: { michael@0: *aOutByte1 = item >> 8; michael@0: *aOutByte2 = item & 0x00FF; michael@0: found = true; michael@0: } else { michael@0: return false; michael@0: } michael@0: } else { michael@0: // ugly linear search michael@0: for( int32_t i = 0; i < MAX_GBK_LENGTH; i++ ) michael@0: { michael@0: if( aChar == gGBKToUnicodeTable[i]) michael@0: { michael@0: *aOutByte1 = (i / 0x00BF + 0x0081) ; michael@0: *aOutByte2 = (i % 0x00BF + 0x0040) ; michael@0: found = true; michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: if(! found) michael@0: return false; michael@0: michael@0: if(aToGL) { michael@0: // to GL, we only return if it is in the range michael@0: if(UINT8_IN_RANGE(0xA1, *aOutByte1, 0xFE) && michael@0: UINT8_IN_RANGE(0xA1, *aOutByte2, 0xFE)) michael@0: { michael@0: // mask them to GL michael@0: *aOutByte1 &= 0x7F; michael@0: *aOutByte2 &= 0x7F; michael@0: } else { michael@0: // if it does not fit into 0xa1-0xfe 0xa1-0xfe range that mean michael@0: // it is not a GB2312 character, we cannot map to GL michael@0: *aOutByte1 = 0x00; michael@0: *aOutByte2 = 0x00; michael@0: return false; michael@0: } michael@0: } michael@0: return true; michael@0: } michael@0: char16_t nsGBKConvUtil::GBKCharToUnicode(char aByte1, char aByte2) michael@0: { michael@0: NS_ASSERTION(UINT8_IN_RANGE(0x81,aByte1, 0xFE), "first byte out of range"); michael@0: NS_ASSERTION(UINT8_IN_RANGE(0x40,aByte2, 0xFE), "second byte out of range"); michael@0: michael@0: uint8_t i1 = (uint8_t)aByte1; michael@0: uint8_t i2 = (uint8_t)aByte2; michael@0: uint16_t idx = (i1 - 0x0081) * 0x00bf + i2 - 0x0040 ; michael@0: michael@0: NS_ASSERTION(idx < MAX_GBK_LENGTH, "ARB"); michael@0: // play it safe- add if statement here ot protect ARB michael@0: // probably not necessary michael@0: if(idx < MAX_GBK_LENGTH) michael@0: return gGBKToUnicodeTable[ idx ]; michael@0: else michael@0: return UCS2_NO_MAPPING; michael@0: }