michael@0: /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: #include "unicpriv.h" michael@0: #define CHK_GR94(b) ( (uint8_t) 0xa0 < (uint8_t) (b) && (uint8_t) (b) < (uint8_t) 0xff ) michael@0: #define CHK_GR94_2Byte(b1,b2) (CHK_GR94(b1) && CHK_GR94(b2)) michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: typedef int (*uSubScannerFunc) (unsigned char* in, uint16_t* out); michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: michael@0: typedef int (*uScannerFunc) ( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ); michael@0: michael@0: int uScan( michael@0: uScanClassID scanClass, michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ); michael@0: michael@0: #define uSubScanner(sub,in,out) (* m_subscanner[sub])((in),(out)) michael@0: michael@0: int uCheckAndScanAlways1Byte( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ); michael@0: int uCheckAndScanAlways2Byte( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ); michael@0: int uCheckAndScanAlways2ByteShiftGR( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ); michael@0: int uCheckAndScanAlways2ByteGR128( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ); michael@0: int uScanShift( michael@0: uShiftInTable *shift, michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ); michael@0: michael@0: int uCheckAndScan2ByteGRPrefix8F( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ); michael@0: int uCheckAndScan2ByteGRPrefix8EA2( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ); michael@0: int uCheckAndScan2ByteGRPrefix8EA3( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ); michael@0: int uCheckAndScan2ByteGRPrefix8EA4( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ); michael@0: int uCheckAndScan2ByteGRPrefix8EA5( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ); michael@0: int uCheckAndScan2ByteGRPrefix8EA6( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ); michael@0: int uCheckAndScan2ByteGRPrefix8EA7( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ); michael@0: int uCnSAlways8BytesDecomposedHangul( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ); michael@0: int uCheckAndScanJohabHangul( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ); michael@0: int uCheckAndScanJohabSymbol( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ); michael@0: michael@0: int uCheckAndScan4BytesGB18030( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ); michael@0: michael@0: int uScanAlways2Byte( michael@0: unsigned char* in, michael@0: uint16_t* out michael@0: ); michael@0: int uScanAlways2ByteShiftGR( michael@0: unsigned char* in, michael@0: uint16_t* out michael@0: ); michael@0: int uScanAlways1Byte( michael@0: unsigned char* in, michael@0: uint16_t* out michael@0: ); michael@0: int uScanAlways1BytePrefix8E( michael@0: unsigned char* in, michael@0: uint16_t* out michael@0: ); michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: const uScannerFunc m_scanner[uNumOfCharsetType] = michael@0: { michael@0: uCheckAndScanAlways1Byte, michael@0: uCheckAndScanAlways2Byte, michael@0: uCheckAndScanAlways2ByteShiftGR, michael@0: uCheckAndScan2ByteGRPrefix8F, michael@0: uCheckAndScan2ByteGRPrefix8EA2, michael@0: uCheckAndScan2ByteGRPrefix8EA3, michael@0: uCheckAndScan2ByteGRPrefix8EA4, michael@0: uCheckAndScan2ByteGRPrefix8EA5, michael@0: uCheckAndScan2ByteGRPrefix8EA6, michael@0: uCheckAndScan2ByteGRPrefix8EA7, michael@0: uCnSAlways8BytesDecomposedHangul, michael@0: uCheckAndScanJohabHangul, michael@0: uCheckAndScanJohabSymbol, michael@0: uCheckAndScan4BytesGB18030, michael@0: uCheckAndScanAlways2ByteGR128 michael@0: }; michael@0: michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: michael@0: const uSubScannerFunc m_subscanner[uNumOfCharType] = michael@0: { michael@0: uScanAlways1Byte, michael@0: uScanAlways2Byte, michael@0: uScanAlways2ByteShiftGR, michael@0: uScanAlways1BytePrefix8E michael@0: }; michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: int uScan( michael@0: uScanClassID scanClass, michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ) michael@0: { michael@0: return (* m_scanner[scanClass]) (state,in,out,inbuflen,inscanlen); michael@0: } michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: int uScanAlways1Byte( michael@0: unsigned char* in, michael@0: uint16_t* out michael@0: ) michael@0: { michael@0: *out = (uint16_t) in[0]; michael@0: return 1; michael@0: } michael@0: michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: int uScanAlways2Byte( michael@0: unsigned char* in, michael@0: uint16_t* out michael@0: ) michael@0: { michael@0: *out = (uint16_t) (( in[0] << 8) | (in[1])); michael@0: return 1; michael@0: } michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: int uScanAlways2ByteShiftGR( michael@0: unsigned char* in, michael@0: uint16_t* out michael@0: ) michael@0: { michael@0: *out = (uint16_t) ((( in[0] << 8) | (in[1])) & 0x7F7F); michael@0: return 1; michael@0: } michael@0: michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: int uScanAlways1BytePrefix8E( michael@0: unsigned char* in, michael@0: uint16_t* out michael@0: ) michael@0: { michael@0: *out = (uint16_t) in[1]; michael@0: return 1; michael@0: } michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: int uCheckAndScanAlways1Byte( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ) michael@0: { michael@0: /* Don't check inlen. The caller should ensure it is larger than 0 */ michael@0: *inscanlen = 1; michael@0: *out = (uint16_t) in[0]; michael@0: michael@0: return 1; michael@0: } michael@0: michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: int uCheckAndScanAlways2Byte( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ) michael@0: { michael@0: if(inbuflen < 2) michael@0: return 0; michael@0: else michael@0: { michael@0: *inscanlen = 2; michael@0: *out = ((in[0] << 8) | ( in[1])) ; michael@0: return 1; michael@0: } michael@0: } michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: int uCheckAndScanAlways2ByteShiftGR( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ) michael@0: { michael@0: /* michael@0: * Both bytes should be in the range of [0xa1,0xfe] for 94x94 character sets michael@0: * invoked on GR. No encoding implemented in Mozilla uses 96x96 char. sets. michael@0: * Only 2nd byte range needs to be checked because michael@0: * 1st byte is checked before calling this in nsUnicodeDecoerHelper.cpp michael@0: */ michael@0: if(inbuflen < 2) /* will lead to NS_OK_UDEC_MOREINPUT */ michael@0: return 0; michael@0: else if (! CHK_GR94(in[1])) michael@0: { michael@0: *inscanlen = 2; michael@0: *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ michael@0: return 1; michael@0: } michael@0: else michael@0: { michael@0: *inscanlen = 2; michael@0: *out = (((in[0] << 8) | ( in[1])) & 0x7F7F); michael@0: return 1; michael@0: } michael@0: } michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: int uCheckAndScanAlways2ByteGR128( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ) michael@0: { michael@0: /* michael@0: * The first byte should be in [0xa1,0xfe] michael@0: * and the second byte in [0x41,0xfe] michael@0: * Used by CP949 -> Unicode converter. michael@0: * Only 2nd byte range needs to be checked because michael@0: * 1st byte is checked before calling this in nsUnicodeDecoderHelper.cpp michael@0: */ michael@0: if(inbuflen < 2) /* will lead to NS_OK_UDEC_MOREINPUT */ michael@0: return 0; michael@0: else if (in[1] < 0x41) /* 2nd byte range check */ michael@0: { michael@0: *inscanlen = 2; michael@0: *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ michael@0: return 1; michael@0: } michael@0: else michael@0: { michael@0: *inscanlen = 2; michael@0: *out = (in[0] << 8) | in[1]; michael@0: return 1; michael@0: } michael@0: } michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: int uScanShift( michael@0: uShiftInTable *shift, michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ) michael@0: { michael@0: int16_t i; michael@0: const uShiftInCell* cell = &(shift->shiftcell[0]); michael@0: int16_t itemnum = shift->numOfItem; michael@0: for(i=0;i= cell[i].shiftin_Min) && michael@0: ( in[0] <= cell[i].shiftin_Max)) michael@0: { michael@0: if(inbuflen < cell[i].reserveLen) michael@0: return 0; michael@0: else michael@0: { michael@0: *inscanlen = cell[i].reserveLen; michael@0: return (uSubScanner(cell[i].classID,in,out)); michael@0: } michael@0: } michael@0: } michael@0: return 0; michael@0: } michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: int uCheckAndScan2ByteGRPrefix8F( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ) michael@0: { michael@0: if((inbuflen < 3) ||(in[0] != 0x8F)) michael@0: return 0; michael@0: else if (! CHK_GR94(in[1])) /* 2nd byte range check */ michael@0: { michael@0: *inscanlen = 2; michael@0: *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ michael@0: return 1; michael@0: } michael@0: else if (! CHK_GR94(in[2])) /* 3rd byte range check */ michael@0: { michael@0: *inscanlen = 3; michael@0: *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ michael@0: return 1; michael@0: } michael@0: else michael@0: { michael@0: *inscanlen = 3; michael@0: *out = (((in[1] << 8) | ( in[2])) & 0x7F7F); michael@0: return 1; michael@0: } michael@0: } michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: michael@0: /* Macro definition to use for uCheckAndScan2ByteGRPrefix8EAX() michael@0: * where X is 2,3,4,5,6,7 michael@0: */ michael@0: #define CNS_8EAX_4BYTE(PREFIX) \ michael@0: if((inbuflen < 4) || (in[0] != 0x8E)) \ michael@0: return 0; \ michael@0: else if((in[1] != (PREFIX))) \ michael@0: { \ michael@0: *inscanlen = 2; \ michael@0: *out = 0xFF; \ michael@0: return 1; \ michael@0: } \ michael@0: else if(! CHK_GR94(in[2])) \ michael@0: { \ michael@0: *inscanlen = 3; \ michael@0: *out = 0xFF; \ michael@0: return 1; \ michael@0: } \ michael@0: else if(! CHK_GR94(in[3])) \ michael@0: { \ michael@0: *inscanlen = 4; \ michael@0: *out = 0xFF; \ michael@0: return 1; \ michael@0: } \ michael@0: else \ michael@0: { \ michael@0: *inscanlen = 4; \ michael@0: *out = (((in[2] << 8) | ( in[3])) & 0x7F7F); \ michael@0: return 1; \ michael@0: } michael@0: michael@0: int uCheckAndScan2ByteGRPrefix8EA2( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ) michael@0: { michael@0: CNS_8EAX_4BYTE(0xA2) michael@0: } michael@0: michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: int uCheckAndScan2ByteGRPrefix8EA3( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ) michael@0: { michael@0: CNS_8EAX_4BYTE(0xA3) michael@0: } michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: int uCheckAndScan2ByteGRPrefix8EA4( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ) michael@0: { michael@0: CNS_8EAX_4BYTE(0xA4) michael@0: } michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: int uCheckAndScan2ByteGRPrefix8EA5( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ) michael@0: { michael@0: CNS_8EAX_4BYTE(0xA5) michael@0: } michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: int uCheckAndScan2ByteGRPrefix8EA6( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ) michael@0: { michael@0: CNS_8EAX_4BYTE(0xA6) michael@0: } michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: int uCheckAndScan2ByteGRPrefix8EA7( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ) michael@0: { michael@0: CNS_8EAX_4BYTE(0xA7) michael@0: } michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: #define SBase 0xAC00 michael@0: #define SCount 11172 michael@0: #define LCount 19 michael@0: #define VCount 21 michael@0: #define TCount 28 michael@0: #define NCount (VCount * TCount) michael@0: michael@0: int uCnSAlways8BytesDecomposedHangul( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ) michael@0: { michael@0: michael@0: uint16_t LIndex, VIndex, TIndex; michael@0: /* no 8 bytes, not in a4 range, or the first 2 byte are not a4d4 */ michael@0: if((inbuflen < 8) || (0xa4 != in[0]) || (0xd4 != in[1]) || michael@0: (0xa4 != in[2] ) || (0xa4 != in[4]) || (0xa4 != in[6])) michael@0: return 0; michael@0: michael@0: /* Compute LIndex */ michael@0: if((in[3] < 0xa1) || (in[3] > 0xbe)) { /* illegal leading consonant */ michael@0: return 0; michael@0: } michael@0: else { michael@0: static const uint8_t lMap[] = { michael@0: /* A1 A2 A3 A4 A5 A6 A7 */ michael@0: 0, 1,0xff, 2,0xff,0xff, 3, michael@0: /* A8 A9 AA AB AC AD AE AF */ michael@0: 4, 5,0xff,0xff,0xff,0xff,0xff,0xff, michael@0: /* B0 B1 B2 B3 B4 B5 B6 B7 */ michael@0: 0xff, 6, 7, 8,0xff, 9, 10, 11, michael@0: /* B8 B9 BA BB BC BD BE */ michael@0: 12, 13, 14, 15, 16, 17, 18 michael@0: }; michael@0: michael@0: LIndex = lMap[in[3] - 0xa1]; michael@0: if(0xff == (0xff & LIndex)) michael@0: return 0; michael@0: } michael@0: michael@0: /* Compute VIndex */ michael@0: if((in[5] < 0xbf) || (in[5] > 0xd3)) { /* illegal medial vowel */ michael@0: return 0; michael@0: } michael@0: else { michael@0: VIndex = in[5] - 0xbf; michael@0: } michael@0: michael@0: /* Compute TIndex */ michael@0: if(0xd4 == in[7]) michael@0: { michael@0: TIndex = 0; michael@0: } michael@0: else if((in[7] < 0xa1) || (in[7] > 0xbe)) {/* illegal trailing consonant */ michael@0: return 0; michael@0: } michael@0: else { michael@0: static const uint8_t tMap[] = { michael@0: /* A1 A2 A3 A4 A5 A6 A7 */ michael@0: 1, 2, 3, 4, 5, 6, 7, michael@0: /* A8 A9 AA AB AC AD AE AF */ michael@0: 0xff, 8, 9, 10, 11, 12, 13, 14, michael@0: /* B0 B1 B2 B3 B4 B5 B6 B7 */ michael@0: 15, 16, 17,0xff, 18, 19, 20, 21, michael@0: /* B8 B9 BA BB BC BD BE */ michael@0: 22,0xff, 23, 24, 25, 26, 27 michael@0: }; michael@0: TIndex = tMap[in[7] - 0xa1]; michael@0: if(0xff == (0xff & TIndex)) michael@0: return 0; michael@0: } michael@0: michael@0: *inscanlen = 8; michael@0: /* the following line is from Unicode 2.0 page 3-13 item 5 */ michael@0: *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase; michael@0: michael@0: return 1; michael@0: } michael@0: /*================================================================================= michael@0: michael@0: =================================================================================*/ michael@0: michael@0: int uCheckAndScanJohabHangul( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ) michael@0: { michael@0: /* since we don't have code to convert Johab to Unicode right now * michael@0: * make this part of code #if 0 to save space until we fully test it */ michael@0: if(inbuflen < 2) michael@0: return 0; michael@0: else { michael@0: /* michael@0: * See Table 4-45 Johab Encoding's Five-Bit Binary Patterns in page 183 michael@0: * of "CJKV Information Processing" for details michael@0: */ michael@0: static const uint8_t lMap[32]={ /* totaly 19 */ michael@0: 0xff,0xff,0, 1, 2, 3, 4, 5, /* 0-7 */ michael@0: 6, 7, 8, 9, 10, 11, 12, 13, /* 8-15 */ michael@0: 14, 15, 16, 17, 18, 0xff,0xff,0xff, /* 16-23 */ michael@0: 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff /* 24-31 */ michael@0: }; michael@0: static const uint8_t vMap[32]={ /* totaly 21 */ michael@0: 0xff,0xff,0xff,0, 1, 2, 3, 4, /* 0-7 */ michael@0: 0xff,0xff,5, 6, 7, 8, 9, 10, /* 8-15 */ michael@0: 0xff,0xff,11, 12, 13, 14, 15, 16, /* 16-23 */ michael@0: 0xff,0xff,17, 18, 19, 20, 0xff,0xff /* 24-31 */ michael@0: }; michael@0: static const uint8_t tMap[32]={ /* totaly 29 */ michael@0: 0xff,0, 1, 2, 3, 4, 5, 6, /* 0-7 */ michael@0: 7, 8, 9, 10, 11, 12, 13, 14, /* 8-15 */ michael@0: 15, 16, 0xff,17, 18, 19, 20, 21, /* 16-23 */ michael@0: 22, 23, 24, 25, 26, 27, 0xff,0xff /* 24-31 */ michael@0: }; michael@0: uint16_t ch = (in[0] << 8) | in[1]; michael@0: uint16_t LIndex, VIndex, TIndex; michael@0: if(0 == (0x8000 & ch)) michael@0: return 0; michael@0: LIndex=lMap[(ch>>10)& 0x1F]; michael@0: VIndex=vMap[(ch>>5) & 0x1F]; michael@0: TIndex=tMap[(ch>>0) & 0x1F]; michael@0: if((0xff==(LIndex)) || michael@0: (0xff==(VIndex)) || michael@0: (0xff==(TIndex))) michael@0: return 0; michael@0: /* the following line is from Unicode 2.0 page 3-13 item 5 */ michael@0: *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase; michael@0: *inscanlen = 2; michael@0: return 1; michael@0: } michael@0: } michael@0: int uCheckAndScanJohabSymbol( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ) michael@0: { michael@0: if(inbuflen < 2) michael@0: return 0; michael@0: else { michael@0: /* michael@0: * The following code are based on the Perl code lised under michael@0: * "Johab to ISO-2022-KR or EUC-KR Conversion" in page 1014 of michael@0: * "CJKV Information Processing" by Ken Lunde michael@0: * michael@0: * sub johab2ks ($) { # Convert Johab to ISO-2022-KR michael@0: * my @johab = unpack("C*", $_[0]); michael@0: * my ($offset, $d8_off) = (0,0); michael@0: * my @out = (); michael@0: * while(($hi, $lo) = splice($johab, 0, 2)) { michael@0: * $offset = 1 if ($hi > 223 and $hi < 250); michael@0: * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42)); michael@0: * push (@out, (((($hi - ($hi < 223 ? 200 : 187)) << 1) - michael@0: * ($lo < 161 ? 1 : 0) + $offset) + $d8_off), michael@0: * $lo - ($lo < 161 ? ($lo > 126 ? 34 : 16) : 128 )); michael@0: * } michael@0: * return pack ("C*", @out); michael@0: * } michael@0: * additional comments from Ken Lunde michael@0: * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42)); michael@0: * has three possible return values: michael@0: * 0 if $hi is not equal to 216 michael@0: * 94 if $hi is euqal to 216 and if $lo is greater than 160 michael@0: * 42 if $hi is euqal to 216 and if $lo is not greater than 160 michael@0: */ michael@0: unsigned char hi = in[0]; michael@0: unsigned char lo = in[1]; michael@0: uint16_t offset = (( hi > 223 ) && ( hi < 250)) ? 1 : 0; michael@0: uint16_t d8_off = 0; michael@0: if(216 == hi) { michael@0: if( lo > 160) michael@0: d8_off = 94; michael@0: else michael@0: d8_off = 42; michael@0: } michael@0: michael@0: *out = (((((hi - ((hi < 223) ? 200 : 187)) << 1) - michael@0: (lo < 161 ? 1 : 0) + offset) + d8_off) << 8 ) | michael@0: (lo - ((lo < 161) ? ((lo > 126) ? 34 : 16) : michael@0: 128)); michael@0: *inscanlen = 2; michael@0: return 1; michael@0: } michael@0: } michael@0: int uCheckAndScan4BytesGB18030( michael@0: int32_t* state, michael@0: unsigned char *in, michael@0: uint16_t *out, michael@0: uint32_t inbuflen, michael@0: uint32_t* inscanlen michael@0: ) michael@0: { michael@0: uint32_t data; michael@0: if(inbuflen < 4) michael@0: return 0; michael@0: michael@0: if((in[0] < 0x81 ) || (0xfe < in[0])) michael@0: return 0; michael@0: if((in[1] < 0x30 ) || (0x39 < in[1])) michael@0: return 0; michael@0: if((in[2] < 0x81 ) || (0xfe < in[2])) michael@0: return 0; michael@0: if((in[3] < 0x30 ) || (0x39 < in[3])) michael@0: return 0; michael@0: michael@0: data = (((((in[0] - 0x81) * 10 + (in[1] - 0x30)) * 126) + michael@0: (in[2] - 0x81)) * 10 ) + (in[3] - 0x30); michael@0: michael@0: *inscanlen = 4; michael@0: *out = (data < 0x00010000) ? data : 0xFFFD; michael@0: return 1; michael@0: }