michael@0: /* michael@0: ******************************************************************************* michael@0: * Copyright (C) 2013, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ******************************************************************************* michael@0: * file name: uscript_props.cpp michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2013feb16 michael@0: * created by: Markus W. Scherer michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/unistr.h" michael@0: #include "unicode/uscript.h" michael@0: #include "unicode/utf16.h" michael@0: #include "ustr_imp.h" michael@0: michael@0: #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) michael@0: michael@0: namespace { michael@0: michael@0: // Script metadata (script properties). michael@0: // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt michael@0: michael@0: // 0 = NOT_ENCODED, no sample character, default false script properties. michael@0: // Bits 20.. 0: sample character michael@0: michael@0: // Bits 23..21: usage michael@0: const int32_t UNKNOWN = 1 << 21; michael@0: const int32_t EXCLUSION = 2 << 21; michael@0: const int32_t LIMITED_USE = 3 << 21; michael@0: const int32_t ASPIRATIONAL = 4 << 21; michael@0: const int32_t RECOMMENDED = 5 << 21; michael@0: michael@0: // Bits 31..24: Single-bit flags michael@0: const int32_t RTL = 1 << 24; michael@0: const int32_t LB_LETTERS = 1 << 25; michael@0: const int32_t CASED = 1 << 26; michael@0: michael@0: const int32_t SCRIPT_PROPS[] = { michael@0: // Begin copy-paste output from michael@0: // tools/trunk/unicode/py/parsescriptmetadata.py michael@0: 0x0040 | UNKNOWN, // Zyyy michael@0: 0x0308 | UNKNOWN, // Zinh michael@0: 0x0628 | RECOMMENDED | RTL, // Arab michael@0: 0x0531 | RECOMMENDED | CASED, // Armn michael@0: 0x0995 | RECOMMENDED, // Beng michael@0: 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo michael@0: 0x13C4 | LIMITED_USE, // Cher michael@0: 0x03E2 | EXCLUSION | CASED, // Copt michael@0: 0x042F | RECOMMENDED | CASED, // Cyrl michael@0: 0x10414 | EXCLUSION | CASED, // Dsrt michael@0: 0x0905 | RECOMMENDED, // Deva michael@0: 0x12A0 | RECOMMENDED, // Ethi michael@0: 0x10D3 | RECOMMENDED, // Geor michael@0: 0x10330 | EXCLUSION, // Goth michael@0: 0x03A9 | RECOMMENDED | CASED, // Grek michael@0: 0x0A95 | RECOMMENDED, // Gujr michael@0: 0x0A15 | RECOMMENDED, // Guru michael@0: 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani michael@0: 0xAC00 | RECOMMENDED, // Hang michael@0: 0x05D0 | RECOMMENDED | RTL, // Hebr michael@0: 0x304B | RECOMMENDED | LB_LETTERS, // Hira michael@0: 0x0C95 | RECOMMENDED, // Knda michael@0: 0x30AB | RECOMMENDED | LB_LETTERS, // Kana michael@0: 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr michael@0: 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo michael@0: 0x004C | RECOMMENDED | CASED, // Latn michael@0: 0x0D15 | RECOMMENDED, // Mlym michael@0: 0x1826 | ASPIRATIONAL, // Mong michael@0: 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr michael@0: 0x168F | EXCLUSION, // Ogam michael@0: 0x10300 | EXCLUSION, // Ital michael@0: 0x0B15 | RECOMMENDED, // Orya michael@0: 0x16A0 | EXCLUSION, // Runr michael@0: 0x0D85 | RECOMMENDED, // Sinh michael@0: 0x0710 | LIMITED_USE | RTL, // Syrc michael@0: 0x0B95 | RECOMMENDED, // Taml michael@0: 0x0C15 | RECOMMENDED, // Telu michael@0: 0x078C | RECOMMENDED | RTL, // Thaa michael@0: 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai michael@0: 0x0F40 | RECOMMENDED, // Tibt michael@0: 0x14C0 | ASPIRATIONAL, // Cans michael@0: 0xA288 | ASPIRATIONAL | LB_LETTERS, // Yiii michael@0: 0x1703 | EXCLUSION, // Tglg michael@0: 0x1723 | EXCLUSION, // Hano michael@0: 0x1743 | EXCLUSION, // Buhd michael@0: 0x1763 | EXCLUSION, // Tagb michael@0: 0x2800 | UNKNOWN, // Brai michael@0: 0x10800 | EXCLUSION | RTL, // Cprt michael@0: 0x1900 | LIMITED_USE, // Limb michael@0: 0x10000 | EXCLUSION, // Linb michael@0: 0x10480 | EXCLUSION, // Osma michael@0: 0x10450 | EXCLUSION, // Shaw michael@0: 0x1950 | LIMITED_USE | LB_LETTERS, // Tale michael@0: 0x10380 | EXCLUSION, // Ugar michael@0: 0, michael@0: 0x1A00 | EXCLUSION, // Bugi michael@0: 0x2C00 | EXCLUSION | CASED, // Glag michael@0: 0x10A00 | EXCLUSION | RTL, // Khar michael@0: 0xA800 | LIMITED_USE, // Sylo michael@0: 0x1980 | LIMITED_USE | LB_LETTERS, // Talu michael@0: 0x2D30 | ASPIRATIONAL, // Tfng michael@0: 0x103A0 | EXCLUSION, // Xpeo michael@0: 0x1B05 | LIMITED_USE | LB_LETTERS, // Bali michael@0: 0x1BC0 | LIMITED_USE, // Batk michael@0: 0, michael@0: 0x11005 | EXCLUSION, // Brah michael@0: 0xAA00 | LIMITED_USE, // Cham michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0x13153 | EXCLUSION, // Egyp michael@0: 0, michael@0: 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans michael@0: 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0xA984 | LIMITED_USE | LB_LETTERS, // Java michael@0: 0xA90A | LIMITED_USE, // Kali michael@0: 0, michael@0: 0, michael@0: 0x1C00 | LIMITED_USE, // Lepc michael@0: 0, michael@0: 0x0840 | LIMITED_USE | RTL, // Mand michael@0: 0, michael@0: 0x10980 | EXCLUSION | RTL, // Mero michael@0: 0x07CA | LIMITED_USE | RTL, // Nkoo michael@0: 0x10C00 | EXCLUSION | RTL, // Orkh michael@0: 0, michael@0: 0xA840 | EXCLUSION, // Phag michael@0: 0x10900 | EXCLUSION | RTL, // Phnx michael@0: 0x16F00 | ASPIRATIONAL, // Plrd michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0xA549 | LIMITED_USE, // Vaii michael@0: 0, michael@0: 0x12000 | EXCLUSION, // Xsux michael@0: 0, michael@0: 0xFDD0 | UNKNOWN, // Zzzz michael@0: 0x102A0 | EXCLUSION, // Cari michael@0: 0x304B | RECOMMENDED | LB_LETTERS, // Jpan michael@0: 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana michael@0: 0x10280 | EXCLUSION, // Lyci michael@0: 0x10920 | EXCLUSION | RTL, // Lydi michael@0: 0x1C5A | LIMITED_USE, // Olck michael@0: 0xA930 | EXCLUSION, // Rjng michael@0: 0xA882 | LIMITED_USE, // Saur michael@0: 0, michael@0: 0x1B83 | LIMITED_USE, // Sund michael@0: 0, michael@0: 0xABC0 | LIMITED_USE, // Mtei michael@0: 0x10840 | EXCLUSION | RTL, // Armi michael@0: 0x10B00 | EXCLUSION | RTL, // Avst michael@0: 0x11103 | LIMITED_USE, // Cakm michael@0: 0xAC00 | RECOMMENDED, // Kore michael@0: 0x11083 | EXCLUSION, // Kthi michael@0: 0, michael@0: 0x10B60 | EXCLUSION | RTL, // Phli michael@0: 0, michael@0: 0, michael@0: 0x10B40 | EXCLUSION | RTL, // Prti michael@0: 0x0800 | EXCLUSION | RTL, // Samr michael@0: 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt michael@0: 0, michael@0: 0, michael@0: 0xA6A0 | LIMITED_USE, // Bamu michael@0: 0xA4D0 | LIMITED_USE, // Lisu michael@0: 0, michael@0: 0x10A60 | EXCLUSION | RTL, // Sarb michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0x109A0 | EXCLUSION | RTL, // Merc michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0x11183 | EXCLUSION, // Shrd michael@0: 0x110D0 | EXCLUSION, // Sora michael@0: 0x11680 | EXCLUSION, // Takr michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0, michael@0: 0, michael@0: // End copy-paste from parsescriptmetadata.py michael@0: }; michael@0: michael@0: int32_t getScriptProps(UScriptCode script) { michael@0: if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) { michael@0: return SCRIPT_PROPS[script]; michael@0: } else { michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: } // namespace michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) { michael@0: if(U_FAILURE(*pErrorCode)) { return 0; } michael@0: if(capacity < 0 || (capacity > 0 && dest == NULL)) { michael@0: *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: int32_t sampleChar = getScriptProps(script) & 0x1fffff; michael@0: int32_t length; michael@0: if(sampleChar == 0) { michael@0: length = 0; michael@0: } else { michael@0: length = U16_LENGTH(sampleChar); michael@0: if(length <= capacity) { michael@0: int32_t i = 0; michael@0: U16_APPEND_UNSAFE(dest, i, sampleChar); michael@0: } michael@0: } michael@0: return u_terminateUChars(dest, capacity, length, pErrorCode); michael@0: } michael@0: michael@0: U_COMMON_API icu::UnicodeString U_EXPORT2 michael@0: uscript_getSampleUnicodeString(UScriptCode script) { michael@0: icu::UnicodeString sample; michael@0: int32_t sampleChar = getScriptProps(script) & 0x1fffff; michael@0: if(sampleChar != 0) { michael@0: sample.append(sampleChar); michael@0: } michael@0: return sample; michael@0: } michael@0: michael@0: U_CAPI UScriptUsage U_EXPORT2 michael@0: uscript_getUsage(UScriptCode script) { michael@0: return (UScriptUsage)((getScriptProps(script) >> 21) & 7); michael@0: } michael@0: michael@0: U_CAPI UBool U_EXPORT2 michael@0: uscript_isRightToLeft(UScriptCode script) { michael@0: return (getScriptProps(script) & RTL) != 0; michael@0: } michael@0: michael@0: U_CAPI UBool U_EXPORT2 michael@0: uscript_breaksBetweenLetters(UScriptCode script) { michael@0: return (getScriptProps(script) & LB_LETTERS) != 0; michael@0: } michael@0: michael@0: U_CAPI UBool U_EXPORT2 michael@0: uscript_isCased(UScriptCode script) { michael@0: return (getScriptProps(script) & CASED) != 0; michael@0: }