intl/uconv/ucvcn/gbku.h

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 4 // =======================================================================
michael@0 5 // Original Author: Yueheng Xu
michael@0 6 // email: yueheng.xu@intel.com
michael@0 7 // phone: (503)264-2248
michael@0 8 // Intel Corporation, Oregon, USA
michael@0 9 // Last Update: September 7, 1999
michael@0 10 // Revision History:
michael@0 11 // 09/07/1999 - initial version.
michael@0 12 // 09/28/1999 - changed leftbyte and rightbyte from char to unsigned char
michael@0 13 // in struct DByte
michael@0 14 // 04/10/1999 - changed leftbyte. rightbyte to uint8_t in struct DByte;
michael@0 15 // added table UnicodeToGBKTable[0x5200]
michael@0 16 //
michael@0 17 // 05/16/2000 - added gUnicodeToGBKTableInitialized flag for optimization
michael@0 18 // ======================================================================================
michael@0 19 // Table GBKToUnicode[] maps the GBK code to its unicode.
michael@0 20 // The mapping data of this GBK table is obtained from
michael@0 21 // ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT
michael@0 22 // Frank Tang of Netscape wrote the original perl tool to re-align the
michael@0 23 // mapping data into an 8-item per line format ( i.e. file cp936map.txt ).
michael@0 24 //
michael@0 25 // The valid GBK charset range: left byte is [0x81, 0xfe], right byte are
michael@0 26 // [0x40, 0x7e] and [0x80, 0xfe]. But for the convenience of index
michael@0 27 // calculation, the table here has a single consecutive range of
michael@0 28 // [0x40, 0xfe] for the right byte. Those invalid chars whose right byte
michael@0 29 // is 0x7f will be mapped to undefined unicode 0xFFFF.
michael@0 30 //
michael@0 31 //
michael@0 32 // Table UnicodeToGBK[] maps the unicode to GBK code. To reduce memory usage, we
michael@0 33 // only do Unicode to GBK table mapping for unicode between 0x4E00 and 0xA000;
michael@0 34 // Others let converter to do search from table GBKToUnicode[]. If we want further
michael@0 35 // trade memory for performance, we can let more unicode to do table mapping to get
michael@0 36 // its GBK instead of searching table GBKToUnicode[].
michael@0 37 #ifndef _GBKU_H__
michael@0 38 #define _GBKU_H__
michael@0 39
michael@0 40
michael@0 41 #define UCS2_NO_MAPPING ((char16_t) 0xfffd)
michael@0 42 #define UINT8_IN_RANGE(a, b, c) \
michael@0 43 (((uint8_t)(a) <= (uint8_t)(b))&&((uint8_t)(b) <= (uint8_t)(c)))
michael@0 44 #define UNICHAR_IN_RANGE(a, b, c) \
michael@0 45 (((char16_t)(a) <= (char16_t)(b))&&((char16_t)(b) <= (char16_t)(c)))
michael@0 46 #define CAST_CHAR_TO_UNICHAR(a) ((char16_t)((unsigned char)(a)))
michael@0 47 #define CAST_UNICHAR_TO_CHAR(a) ((char)a)
michael@0 48
michael@0 49 #define IS_ASCII(a) (0==(0xff80 & (a)))
michael@0 50 #define IS_GBK_EURO(c) ((char)0x80 == (c))
michael@0 51 #define UCS2_EURO ((char16_t) 0x20ac)
michael@0 52
michael@0 53 #include "nsGBKConvUtil.h"
michael@0 54
michael@0 55 #endif /* _GBKU_H__ */

mercurial