intl/icu/source/common/uscript_props.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 * Copyright (C) 2013, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 *******************************************************************************
michael@0 6 * file name: uscript_props.cpp
michael@0 7 * encoding: US-ASCII
michael@0 8 * tab size: 8 (not used)
michael@0 9 * indentation:4
michael@0 10 *
michael@0 11 * created on: 2013feb16
michael@0 12 * created by: Markus W. Scherer
michael@0 13 */
michael@0 14
michael@0 15 #include "unicode/utypes.h"
michael@0 16 #include "unicode/unistr.h"
michael@0 17 #include "unicode/uscript.h"
michael@0 18 #include "unicode/utf16.h"
michael@0 19 #include "ustr_imp.h"
michael@0 20
michael@0 21 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
michael@0 22
michael@0 23 namespace {
michael@0 24
michael@0 25 // Script metadata (script properties).
michael@0 26 // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
michael@0 27
michael@0 28 // 0 = NOT_ENCODED, no sample character, default false script properties.
michael@0 29 // Bits 20.. 0: sample character
michael@0 30
michael@0 31 // Bits 23..21: usage
michael@0 32 const int32_t UNKNOWN = 1 << 21;
michael@0 33 const int32_t EXCLUSION = 2 << 21;
michael@0 34 const int32_t LIMITED_USE = 3 << 21;
michael@0 35 const int32_t ASPIRATIONAL = 4 << 21;
michael@0 36 const int32_t RECOMMENDED = 5 << 21;
michael@0 37
michael@0 38 // Bits 31..24: Single-bit flags
michael@0 39 const int32_t RTL = 1 << 24;
michael@0 40 const int32_t LB_LETTERS = 1 << 25;
michael@0 41 const int32_t CASED = 1 << 26;
michael@0 42
michael@0 43 const int32_t SCRIPT_PROPS[] = {
michael@0 44 // Begin copy-paste output from
michael@0 45 // tools/trunk/unicode/py/parsescriptmetadata.py
michael@0 46 0x0040 | UNKNOWN, // Zyyy
michael@0 47 0x0308 | UNKNOWN, // Zinh
michael@0 48 0x0628 | RECOMMENDED | RTL, // Arab
michael@0 49 0x0531 | RECOMMENDED | CASED, // Armn
michael@0 50 0x0995 | RECOMMENDED, // Beng
michael@0 51 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo
michael@0 52 0x13C4 | LIMITED_USE, // Cher
michael@0 53 0x03E2 | EXCLUSION | CASED, // Copt
michael@0 54 0x042F | RECOMMENDED | CASED, // Cyrl
michael@0 55 0x10414 | EXCLUSION | CASED, // Dsrt
michael@0 56 0x0905 | RECOMMENDED, // Deva
michael@0 57 0x12A0 | RECOMMENDED, // Ethi
michael@0 58 0x10D3 | RECOMMENDED, // Geor
michael@0 59 0x10330 | EXCLUSION, // Goth
michael@0 60 0x03A9 | RECOMMENDED | CASED, // Grek
michael@0 61 0x0A95 | RECOMMENDED, // Gujr
michael@0 62 0x0A15 | RECOMMENDED, // Guru
michael@0 63 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani
michael@0 64 0xAC00 | RECOMMENDED, // Hang
michael@0 65 0x05D0 | RECOMMENDED | RTL, // Hebr
michael@0 66 0x304B | RECOMMENDED | LB_LETTERS, // Hira
michael@0 67 0x0C95 | RECOMMENDED, // Knda
michael@0 68 0x30AB | RECOMMENDED | LB_LETTERS, // Kana
michael@0 69 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr
michael@0 70 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo
michael@0 71 0x004C | RECOMMENDED | CASED, // Latn
michael@0 72 0x0D15 | RECOMMENDED, // Mlym
michael@0 73 0x1826 | ASPIRATIONAL, // Mong
michael@0 74 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr
michael@0 75 0x168F | EXCLUSION, // Ogam
michael@0 76 0x10300 | EXCLUSION, // Ital
michael@0 77 0x0B15 | RECOMMENDED, // Orya
michael@0 78 0x16A0 | EXCLUSION, // Runr
michael@0 79 0x0D85 | RECOMMENDED, // Sinh
michael@0 80 0x0710 | LIMITED_USE | RTL, // Syrc
michael@0 81 0x0B95 | RECOMMENDED, // Taml
michael@0 82 0x0C15 | RECOMMENDED, // Telu
michael@0 83 0x078C | RECOMMENDED | RTL, // Thaa
michael@0 84 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai
michael@0 85 0x0F40 | RECOMMENDED, // Tibt
michael@0 86 0x14C0 | ASPIRATIONAL, // Cans
michael@0 87 0xA288 | ASPIRATIONAL | LB_LETTERS, // Yiii
michael@0 88 0x1703 | EXCLUSION, // Tglg
michael@0 89 0x1723 | EXCLUSION, // Hano
michael@0 90 0x1743 | EXCLUSION, // Buhd
michael@0 91 0x1763 | EXCLUSION, // Tagb
michael@0 92 0x2800 | UNKNOWN, // Brai
michael@0 93 0x10800 | EXCLUSION | RTL, // Cprt
michael@0 94 0x1900 | LIMITED_USE, // Limb
michael@0 95 0x10000 | EXCLUSION, // Linb
michael@0 96 0x10480 | EXCLUSION, // Osma
michael@0 97 0x10450 | EXCLUSION, // Shaw
michael@0 98 0x1950 | LIMITED_USE | LB_LETTERS, // Tale
michael@0 99 0x10380 | EXCLUSION, // Ugar
michael@0 100 0,
michael@0 101 0x1A00 | EXCLUSION, // Bugi
michael@0 102 0x2C00 | EXCLUSION | CASED, // Glag
michael@0 103 0x10A00 | EXCLUSION | RTL, // Khar
michael@0 104 0xA800 | LIMITED_USE, // Sylo
michael@0 105 0x1980 | LIMITED_USE | LB_LETTERS, // Talu
michael@0 106 0x2D30 | ASPIRATIONAL, // Tfng
michael@0 107 0x103A0 | EXCLUSION, // Xpeo
michael@0 108 0x1B05 | LIMITED_USE | LB_LETTERS, // Bali
michael@0 109 0x1BC0 | LIMITED_USE, // Batk
michael@0 110 0,
michael@0 111 0x11005 | EXCLUSION, // Brah
michael@0 112 0xAA00 | LIMITED_USE, // Cham
michael@0 113 0,
michael@0 114 0,
michael@0 115 0,
michael@0 116 0,
michael@0 117 0x13153 | EXCLUSION, // Egyp
michael@0 118 0,
michael@0 119 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans
michael@0 120 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant
michael@0 121 0,
michael@0 122 0,
michael@0 123 0,
michael@0 124 0xA984 | LIMITED_USE | LB_LETTERS, // Java
michael@0 125 0xA90A | LIMITED_USE, // Kali
michael@0 126 0,
michael@0 127 0,
michael@0 128 0x1C00 | LIMITED_USE, // Lepc
michael@0 129 0,
michael@0 130 0x0840 | LIMITED_USE | RTL, // Mand
michael@0 131 0,
michael@0 132 0x10980 | EXCLUSION | RTL, // Mero
michael@0 133 0x07CA | LIMITED_USE | RTL, // Nkoo
michael@0 134 0x10C00 | EXCLUSION | RTL, // Orkh
michael@0 135 0,
michael@0 136 0xA840 | EXCLUSION, // Phag
michael@0 137 0x10900 | EXCLUSION | RTL, // Phnx
michael@0 138 0x16F00 | ASPIRATIONAL, // Plrd
michael@0 139 0,
michael@0 140 0,
michael@0 141 0,
michael@0 142 0,
michael@0 143 0,
michael@0 144 0,
michael@0 145 0xA549 | LIMITED_USE, // Vaii
michael@0 146 0,
michael@0 147 0x12000 | EXCLUSION, // Xsux
michael@0 148 0,
michael@0 149 0xFDD0 | UNKNOWN, // Zzzz
michael@0 150 0x102A0 | EXCLUSION, // Cari
michael@0 151 0x304B | RECOMMENDED | LB_LETTERS, // Jpan
michael@0 152 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana
michael@0 153 0x10280 | EXCLUSION, // Lyci
michael@0 154 0x10920 | EXCLUSION | RTL, // Lydi
michael@0 155 0x1C5A | LIMITED_USE, // Olck
michael@0 156 0xA930 | EXCLUSION, // Rjng
michael@0 157 0xA882 | LIMITED_USE, // Saur
michael@0 158 0,
michael@0 159 0x1B83 | LIMITED_USE, // Sund
michael@0 160 0,
michael@0 161 0xABC0 | LIMITED_USE, // Mtei
michael@0 162 0x10840 | EXCLUSION | RTL, // Armi
michael@0 163 0x10B00 | EXCLUSION | RTL, // Avst
michael@0 164 0x11103 | LIMITED_USE, // Cakm
michael@0 165 0xAC00 | RECOMMENDED, // Kore
michael@0 166 0x11083 | EXCLUSION, // Kthi
michael@0 167 0,
michael@0 168 0x10B60 | EXCLUSION | RTL, // Phli
michael@0 169 0,
michael@0 170 0,
michael@0 171 0x10B40 | EXCLUSION | RTL, // Prti
michael@0 172 0x0800 | EXCLUSION | RTL, // Samr
michael@0 173 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt
michael@0 174 0,
michael@0 175 0,
michael@0 176 0xA6A0 | LIMITED_USE, // Bamu
michael@0 177 0xA4D0 | LIMITED_USE, // Lisu
michael@0 178 0,
michael@0 179 0x10A60 | EXCLUSION | RTL, // Sarb
michael@0 180 0,
michael@0 181 0,
michael@0 182 0,
michael@0 183 0,
michael@0 184 0,
michael@0 185 0,
michael@0 186 0,
michael@0 187 0x109A0 | EXCLUSION | RTL, // Merc
michael@0 188 0,
michael@0 189 0,
michael@0 190 0,
michael@0 191 0,
michael@0 192 0,
michael@0 193 0,
michael@0 194 0,
michael@0 195 0,
michael@0 196 0,
michael@0 197 0x11183 | EXCLUSION, // Shrd
michael@0 198 0x110D0 | EXCLUSION, // Sora
michael@0 199 0x11680 | EXCLUSION, // Takr
michael@0 200 0,
michael@0 201 0,
michael@0 202 0,
michael@0 203 0,
michael@0 204 0,
michael@0 205 0,
michael@0 206 0,
michael@0 207 // End copy-paste from parsescriptmetadata.py
michael@0 208 };
michael@0 209
michael@0 210 int32_t getScriptProps(UScriptCode script) {
michael@0 211 if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) {
michael@0 212 return SCRIPT_PROPS[script];
michael@0 213 } else {
michael@0 214 return 0;
michael@0 215 }
michael@0 216 }
michael@0 217
michael@0 218 } // namespace
michael@0 219
michael@0 220 U_CAPI int32_t U_EXPORT2
michael@0 221 uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
michael@0 222 if(U_FAILURE(*pErrorCode)) { return 0; }
michael@0 223 if(capacity < 0 || (capacity > 0 && dest == NULL)) {
michael@0 224 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 225 return 0;
michael@0 226 }
michael@0 227 int32_t sampleChar = getScriptProps(script) & 0x1fffff;
michael@0 228 int32_t length;
michael@0 229 if(sampleChar == 0) {
michael@0 230 length = 0;
michael@0 231 } else {
michael@0 232 length = U16_LENGTH(sampleChar);
michael@0 233 if(length <= capacity) {
michael@0 234 int32_t i = 0;
michael@0 235 U16_APPEND_UNSAFE(dest, i, sampleChar);
michael@0 236 }
michael@0 237 }
michael@0 238 return u_terminateUChars(dest, capacity, length, pErrorCode);
michael@0 239 }
michael@0 240
michael@0 241 U_COMMON_API icu::UnicodeString U_EXPORT2
michael@0 242 uscript_getSampleUnicodeString(UScriptCode script) {
michael@0 243 icu::UnicodeString sample;
michael@0 244 int32_t sampleChar = getScriptProps(script) & 0x1fffff;
michael@0 245 if(sampleChar != 0) {
michael@0 246 sample.append(sampleChar);
michael@0 247 }
michael@0 248 return sample;
michael@0 249 }
michael@0 250
michael@0 251 U_CAPI UScriptUsage U_EXPORT2
michael@0 252 uscript_getUsage(UScriptCode script) {
michael@0 253 return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
michael@0 254 }
michael@0 255
michael@0 256 U_CAPI UBool U_EXPORT2
michael@0 257 uscript_isRightToLeft(UScriptCode script) {
michael@0 258 return (getScriptProps(script) & RTL) != 0;
michael@0 259 }
michael@0 260
michael@0 261 U_CAPI UBool U_EXPORT2
michael@0 262 uscript_breaksBetweenLetters(UScriptCode script) {
michael@0 263 return (getScriptProps(script) & LB_LETTERS) != 0;
michael@0 264 }
michael@0 265
michael@0 266 U_CAPI UBool U_EXPORT2
michael@0 267 uscript_isCased(UScriptCode script) {
michael@0 268 return (getScriptProps(script) & CASED) != 0;
michael@0 269 }

mercurial