1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/uscript_props.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,269 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* Copyright (C) 2013, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +******************************************************************************* 1.9 +* file name: uscript_props.cpp 1.10 +* encoding: US-ASCII 1.11 +* tab size: 8 (not used) 1.12 +* indentation:4 1.13 +* 1.14 +* created on: 2013feb16 1.15 +* created by: Markus W. Scherer 1.16 +*/ 1.17 + 1.18 +#include "unicode/utypes.h" 1.19 +#include "unicode/unistr.h" 1.20 +#include "unicode/uscript.h" 1.21 +#include "unicode/utf16.h" 1.22 +#include "ustr_imp.h" 1.23 + 1.24 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 1.25 + 1.26 +namespace { 1.27 + 1.28 +// Script metadata (script properties). 1.29 +// See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt 1.30 + 1.31 +// 0 = NOT_ENCODED, no sample character, default false script properties. 1.32 +// Bits 20.. 0: sample character 1.33 + 1.34 +// Bits 23..21: usage 1.35 +const int32_t UNKNOWN = 1 << 21; 1.36 +const int32_t EXCLUSION = 2 << 21; 1.37 +const int32_t LIMITED_USE = 3 << 21; 1.38 +const int32_t ASPIRATIONAL = 4 << 21; 1.39 +const int32_t RECOMMENDED = 5 << 21; 1.40 + 1.41 +// Bits 31..24: Single-bit flags 1.42 +const int32_t RTL = 1 << 24; 1.43 +const int32_t LB_LETTERS = 1 << 25; 1.44 +const int32_t CASED = 1 << 26; 1.45 + 1.46 +const int32_t SCRIPT_PROPS[] = { 1.47 + // Begin copy-paste output from 1.48 + // tools/trunk/unicode/py/parsescriptmetadata.py 1.49 + 0x0040 | UNKNOWN, // Zyyy 1.50 + 0x0308 | UNKNOWN, // Zinh 1.51 + 0x0628 | RECOMMENDED | RTL, // Arab 1.52 + 0x0531 | RECOMMENDED | CASED, // Armn 1.53 + 0x0995 | RECOMMENDED, // Beng 1.54 + 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo 1.55 + 0x13C4 | LIMITED_USE, // Cher 1.56 + 0x03E2 | EXCLUSION | CASED, // Copt 1.57 + 0x042F | RECOMMENDED | CASED, // Cyrl 1.58 + 0x10414 | EXCLUSION | CASED, // Dsrt 1.59 + 0x0905 | RECOMMENDED, // Deva 1.60 + 0x12A0 | RECOMMENDED, // Ethi 1.61 + 0x10D3 | RECOMMENDED, // Geor 1.62 + 0x10330 | EXCLUSION, // Goth 1.63 + 0x03A9 | RECOMMENDED | CASED, // Grek 1.64 + 0x0A95 | RECOMMENDED, // Gujr 1.65 + 0x0A15 | RECOMMENDED, // Guru 1.66 + 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani 1.67 + 0xAC00 | RECOMMENDED, // Hang 1.68 + 0x05D0 | RECOMMENDED | RTL, // Hebr 1.69 + 0x304B | RECOMMENDED | LB_LETTERS, // Hira 1.70 + 0x0C95 | RECOMMENDED, // Knda 1.71 + 0x30AB | RECOMMENDED | LB_LETTERS, // Kana 1.72 + 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr 1.73 + 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo 1.74 + 0x004C | RECOMMENDED | CASED, // Latn 1.75 + 0x0D15 | RECOMMENDED, // Mlym 1.76 + 0x1826 | ASPIRATIONAL, // Mong 1.77 + 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr 1.78 + 0x168F | EXCLUSION, // Ogam 1.79 + 0x10300 | EXCLUSION, // Ital 1.80 + 0x0B15 | RECOMMENDED, // Orya 1.81 + 0x16A0 | EXCLUSION, // Runr 1.82 + 0x0D85 | RECOMMENDED, // Sinh 1.83 + 0x0710 | LIMITED_USE | RTL, // Syrc 1.84 + 0x0B95 | RECOMMENDED, // Taml 1.85 + 0x0C15 | RECOMMENDED, // Telu 1.86 + 0x078C | RECOMMENDED | RTL, // Thaa 1.87 + 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai 1.88 + 0x0F40 | RECOMMENDED, // Tibt 1.89 + 0x14C0 | ASPIRATIONAL, // Cans 1.90 + 0xA288 | ASPIRATIONAL | LB_LETTERS, // Yiii 1.91 + 0x1703 | EXCLUSION, // Tglg 1.92 + 0x1723 | EXCLUSION, // Hano 1.93 + 0x1743 | EXCLUSION, // Buhd 1.94 + 0x1763 | EXCLUSION, // Tagb 1.95 + 0x2800 | UNKNOWN, // Brai 1.96 + 0x10800 | EXCLUSION | RTL, // Cprt 1.97 + 0x1900 | LIMITED_USE, // Limb 1.98 + 0x10000 | EXCLUSION, // Linb 1.99 + 0x10480 | EXCLUSION, // Osma 1.100 + 0x10450 | EXCLUSION, // Shaw 1.101 + 0x1950 | LIMITED_USE | LB_LETTERS, // Tale 1.102 + 0x10380 | EXCLUSION, // Ugar 1.103 + 0, 1.104 + 0x1A00 | EXCLUSION, // Bugi 1.105 + 0x2C00 | EXCLUSION | CASED, // Glag 1.106 + 0x10A00 | EXCLUSION | RTL, // Khar 1.107 + 0xA800 | LIMITED_USE, // Sylo 1.108 + 0x1980 | LIMITED_USE | LB_LETTERS, // Talu 1.109 + 0x2D30 | ASPIRATIONAL, // Tfng 1.110 + 0x103A0 | EXCLUSION, // Xpeo 1.111 + 0x1B05 | LIMITED_USE | LB_LETTERS, // Bali 1.112 + 0x1BC0 | LIMITED_USE, // Batk 1.113 + 0, 1.114 + 0x11005 | EXCLUSION, // Brah 1.115 + 0xAA00 | LIMITED_USE, // Cham 1.116 + 0, 1.117 + 0, 1.118 + 0, 1.119 + 0, 1.120 + 0x13153 | EXCLUSION, // Egyp 1.121 + 0, 1.122 + 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans 1.123 + 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant 1.124 + 0, 1.125 + 0, 1.126 + 0, 1.127 + 0xA984 | LIMITED_USE | LB_LETTERS, // Java 1.128 + 0xA90A | LIMITED_USE, // Kali 1.129 + 0, 1.130 + 0, 1.131 + 0x1C00 | LIMITED_USE, // Lepc 1.132 + 0, 1.133 + 0x0840 | LIMITED_USE | RTL, // Mand 1.134 + 0, 1.135 + 0x10980 | EXCLUSION | RTL, // Mero 1.136 + 0x07CA | LIMITED_USE | RTL, // Nkoo 1.137 + 0x10C00 | EXCLUSION | RTL, // Orkh 1.138 + 0, 1.139 + 0xA840 | EXCLUSION, // Phag 1.140 + 0x10900 | EXCLUSION | RTL, // Phnx 1.141 + 0x16F00 | ASPIRATIONAL, // Plrd 1.142 + 0, 1.143 + 0, 1.144 + 0, 1.145 + 0, 1.146 + 0, 1.147 + 0, 1.148 + 0xA549 | LIMITED_USE, // Vaii 1.149 + 0, 1.150 + 0x12000 | EXCLUSION, // Xsux 1.151 + 0, 1.152 + 0xFDD0 | UNKNOWN, // Zzzz 1.153 + 0x102A0 | EXCLUSION, // Cari 1.154 + 0x304B | RECOMMENDED | LB_LETTERS, // Jpan 1.155 + 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana 1.156 + 0x10280 | EXCLUSION, // Lyci 1.157 + 0x10920 | EXCLUSION | RTL, // Lydi 1.158 + 0x1C5A | LIMITED_USE, // Olck 1.159 + 0xA930 | EXCLUSION, // Rjng 1.160 + 0xA882 | LIMITED_USE, // Saur 1.161 + 0, 1.162 + 0x1B83 | LIMITED_USE, // Sund 1.163 + 0, 1.164 + 0xABC0 | LIMITED_USE, // Mtei 1.165 + 0x10840 | EXCLUSION | RTL, // Armi 1.166 + 0x10B00 | EXCLUSION | RTL, // Avst 1.167 + 0x11103 | LIMITED_USE, // Cakm 1.168 + 0xAC00 | RECOMMENDED, // Kore 1.169 + 0x11083 | EXCLUSION, // Kthi 1.170 + 0, 1.171 + 0x10B60 | EXCLUSION | RTL, // Phli 1.172 + 0, 1.173 + 0, 1.174 + 0x10B40 | EXCLUSION | RTL, // Prti 1.175 + 0x0800 | EXCLUSION | RTL, // Samr 1.176 + 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt 1.177 + 0, 1.178 + 0, 1.179 + 0xA6A0 | LIMITED_USE, // Bamu 1.180 + 0xA4D0 | LIMITED_USE, // Lisu 1.181 + 0, 1.182 + 0x10A60 | EXCLUSION | RTL, // Sarb 1.183 + 0, 1.184 + 0, 1.185 + 0, 1.186 + 0, 1.187 + 0, 1.188 + 0, 1.189 + 0, 1.190 + 0x109A0 | EXCLUSION | RTL, // Merc 1.191 + 0, 1.192 + 0, 1.193 + 0, 1.194 + 0, 1.195 + 0, 1.196 + 0, 1.197 + 0, 1.198 + 0, 1.199 + 0, 1.200 + 0x11183 | EXCLUSION, // Shrd 1.201 + 0x110D0 | EXCLUSION, // Sora 1.202 + 0x11680 | EXCLUSION, // Takr 1.203 + 0, 1.204 + 0, 1.205 + 0, 1.206 + 0, 1.207 + 0, 1.208 + 0, 1.209 + 0, 1.210 + // End copy-paste from parsescriptmetadata.py 1.211 +}; 1.212 + 1.213 +int32_t getScriptProps(UScriptCode script) { 1.214 + if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) { 1.215 + return SCRIPT_PROPS[script]; 1.216 + } else { 1.217 + return 0; 1.218 + } 1.219 +} 1.220 + 1.221 +} // namespace 1.222 + 1.223 +U_CAPI int32_t U_EXPORT2 1.224 +uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) { 1.225 + if(U_FAILURE(*pErrorCode)) { return 0; } 1.226 + if(capacity < 0 || (capacity > 0 && dest == NULL)) { 1.227 + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 1.228 + return 0; 1.229 + } 1.230 + int32_t sampleChar = getScriptProps(script) & 0x1fffff; 1.231 + int32_t length; 1.232 + if(sampleChar == 0) { 1.233 + length = 0; 1.234 + } else { 1.235 + length = U16_LENGTH(sampleChar); 1.236 + if(length <= capacity) { 1.237 + int32_t i = 0; 1.238 + U16_APPEND_UNSAFE(dest, i, sampleChar); 1.239 + } 1.240 + } 1.241 + return u_terminateUChars(dest, capacity, length, pErrorCode); 1.242 +} 1.243 + 1.244 +U_COMMON_API icu::UnicodeString U_EXPORT2 1.245 +uscript_getSampleUnicodeString(UScriptCode script) { 1.246 + icu::UnicodeString sample; 1.247 + int32_t sampleChar = getScriptProps(script) & 0x1fffff; 1.248 + if(sampleChar != 0) { 1.249 + sample.append(sampleChar); 1.250 + } 1.251 + return sample; 1.252 +} 1.253 + 1.254 +U_CAPI UScriptUsage U_EXPORT2 1.255 +uscript_getUsage(UScriptCode script) { 1.256 + return (UScriptUsage)((getScriptProps(script) >> 21) & 7); 1.257 +} 1.258 + 1.259 +U_CAPI UBool U_EXPORT2 1.260 +uscript_isRightToLeft(UScriptCode script) { 1.261 + return (getScriptProps(script) & RTL) != 0; 1.262 +} 1.263 + 1.264 +U_CAPI UBool U_EXPORT2 1.265 +uscript_breaksBetweenLetters(UScriptCode script) { 1.266 + return (getScriptProps(script) & LB_LETTERS) != 0; 1.267 +} 1.268 + 1.269 +U_CAPI UBool U_EXPORT2 1.270 +uscript_isCased(UScriptCode script) { 1.271 + return (getScriptProps(script) & CASED) != 0; 1.272 +}