Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * Copyright (C) 2013, International Business Machines |
michael@0 | 4 | * Corporation and others. All Rights Reserved. |
michael@0 | 5 | ******************************************************************************* |
michael@0 | 6 | * file name: uscript_props.cpp |
michael@0 | 7 | * encoding: US-ASCII |
michael@0 | 8 | * tab size: 8 (not used) |
michael@0 | 9 | * indentation:4 |
michael@0 | 10 | * |
michael@0 | 11 | * created on: 2013feb16 |
michael@0 | 12 | * created by: Markus W. Scherer |
michael@0 | 13 | */ |
michael@0 | 14 | |
michael@0 | 15 | #include "unicode/utypes.h" |
michael@0 | 16 | #include "unicode/unistr.h" |
michael@0 | 17 | #include "unicode/uscript.h" |
michael@0 | 18 | #include "unicode/utf16.h" |
michael@0 | 19 | #include "ustr_imp.h" |
michael@0 | 20 | |
michael@0 | 21 | #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |
michael@0 | 22 | |
michael@0 | 23 | namespace { |
michael@0 | 24 | |
michael@0 | 25 | // Script metadata (script properties). |
michael@0 | 26 | // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt |
michael@0 | 27 | |
michael@0 | 28 | // 0 = NOT_ENCODED, no sample character, default false script properties. |
michael@0 | 29 | // Bits 20.. 0: sample character |
michael@0 | 30 | |
michael@0 | 31 | // Bits 23..21: usage |
michael@0 | 32 | const int32_t UNKNOWN = 1 << 21; |
michael@0 | 33 | const int32_t EXCLUSION = 2 << 21; |
michael@0 | 34 | const int32_t LIMITED_USE = 3 << 21; |
michael@0 | 35 | const int32_t ASPIRATIONAL = 4 << 21; |
michael@0 | 36 | const int32_t RECOMMENDED = 5 << 21; |
michael@0 | 37 | |
michael@0 | 38 | // Bits 31..24: Single-bit flags |
michael@0 | 39 | const int32_t RTL = 1 << 24; |
michael@0 | 40 | const int32_t LB_LETTERS = 1 << 25; |
michael@0 | 41 | const int32_t CASED = 1 << 26; |
michael@0 | 42 | |
michael@0 | 43 | const int32_t SCRIPT_PROPS[] = { |
michael@0 | 44 | // Begin copy-paste output from |
michael@0 | 45 | // tools/trunk/unicode/py/parsescriptmetadata.py |
michael@0 | 46 | 0x0040 | UNKNOWN, // Zyyy |
michael@0 | 47 | 0x0308 | UNKNOWN, // Zinh |
michael@0 | 48 | 0x0628 | RECOMMENDED | RTL, // Arab |
michael@0 | 49 | 0x0531 | RECOMMENDED | CASED, // Armn |
michael@0 | 50 | 0x0995 | RECOMMENDED, // Beng |
michael@0 | 51 | 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo |
michael@0 | 52 | 0x13C4 | LIMITED_USE, // Cher |
michael@0 | 53 | 0x03E2 | EXCLUSION | CASED, // Copt |
michael@0 | 54 | 0x042F | RECOMMENDED | CASED, // Cyrl |
michael@0 | 55 | 0x10414 | EXCLUSION | CASED, // Dsrt |
michael@0 | 56 | 0x0905 | RECOMMENDED, // Deva |
michael@0 | 57 | 0x12A0 | RECOMMENDED, // Ethi |
michael@0 | 58 | 0x10D3 | RECOMMENDED, // Geor |
michael@0 | 59 | 0x10330 | EXCLUSION, // Goth |
michael@0 | 60 | 0x03A9 | RECOMMENDED | CASED, // Grek |
michael@0 | 61 | 0x0A95 | RECOMMENDED, // Gujr |
michael@0 | 62 | 0x0A15 | RECOMMENDED, // Guru |
michael@0 | 63 | 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani |
michael@0 | 64 | 0xAC00 | RECOMMENDED, // Hang |
michael@0 | 65 | 0x05D0 | RECOMMENDED | RTL, // Hebr |
michael@0 | 66 | 0x304B | RECOMMENDED | LB_LETTERS, // Hira |
michael@0 | 67 | 0x0C95 | RECOMMENDED, // Knda |
michael@0 | 68 | 0x30AB | RECOMMENDED | LB_LETTERS, // Kana |
michael@0 | 69 | 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr |
michael@0 | 70 | 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo |
michael@0 | 71 | 0x004C | RECOMMENDED | CASED, // Latn |
michael@0 | 72 | 0x0D15 | RECOMMENDED, // Mlym |
michael@0 | 73 | 0x1826 | ASPIRATIONAL, // Mong |
michael@0 | 74 | 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr |
michael@0 | 75 | 0x168F | EXCLUSION, // Ogam |
michael@0 | 76 | 0x10300 | EXCLUSION, // Ital |
michael@0 | 77 | 0x0B15 | RECOMMENDED, // Orya |
michael@0 | 78 | 0x16A0 | EXCLUSION, // Runr |
michael@0 | 79 | 0x0D85 | RECOMMENDED, // Sinh |
michael@0 | 80 | 0x0710 | LIMITED_USE | RTL, // Syrc |
michael@0 | 81 | 0x0B95 | RECOMMENDED, // Taml |
michael@0 | 82 | 0x0C15 | RECOMMENDED, // Telu |
michael@0 | 83 | 0x078C | RECOMMENDED | RTL, // Thaa |
michael@0 | 84 | 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai |
michael@0 | 85 | 0x0F40 | RECOMMENDED, // Tibt |
michael@0 | 86 | 0x14C0 | ASPIRATIONAL, // Cans |
michael@0 | 87 | 0xA288 | ASPIRATIONAL | LB_LETTERS, // Yiii |
michael@0 | 88 | 0x1703 | EXCLUSION, // Tglg |
michael@0 | 89 | 0x1723 | EXCLUSION, // Hano |
michael@0 | 90 | 0x1743 | EXCLUSION, // Buhd |
michael@0 | 91 | 0x1763 | EXCLUSION, // Tagb |
michael@0 | 92 | 0x2800 | UNKNOWN, // Brai |
michael@0 | 93 | 0x10800 | EXCLUSION | RTL, // Cprt |
michael@0 | 94 | 0x1900 | LIMITED_USE, // Limb |
michael@0 | 95 | 0x10000 | EXCLUSION, // Linb |
michael@0 | 96 | 0x10480 | EXCLUSION, // Osma |
michael@0 | 97 | 0x10450 | EXCLUSION, // Shaw |
michael@0 | 98 | 0x1950 | LIMITED_USE | LB_LETTERS, // Tale |
michael@0 | 99 | 0x10380 | EXCLUSION, // Ugar |
michael@0 | 100 | 0, |
michael@0 | 101 | 0x1A00 | EXCLUSION, // Bugi |
michael@0 | 102 | 0x2C00 | EXCLUSION | CASED, // Glag |
michael@0 | 103 | 0x10A00 | EXCLUSION | RTL, // Khar |
michael@0 | 104 | 0xA800 | LIMITED_USE, // Sylo |
michael@0 | 105 | 0x1980 | LIMITED_USE | LB_LETTERS, // Talu |
michael@0 | 106 | 0x2D30 | ASPIRATIONAL, // Tfng |
michael@0 | 107 | 0x103A0 | EXCLUSION, // Xpeo |
michael@0 | 108 | 0x1B05 | LIMITED_USE | LB_LETTERS, // Bali |
michael@0 | 109 | 0x1BC0 | LIMITED_USE, // Batk |
michael@0 | 110 | 0, |
michael@0 | 111 | 0x11005 | EXCLUSION, // Brah |
michael@0 | 112 | 0xAA00 | LIMITED_USE, // Cham |
michael@0 | 113 | 0, |
michael@0 | 114 | 0, |
michael@0 | 115 | 0, |
michael@0 | 116 | 0, |
michael@0 | 117 | 0x13153 | EXCLUSION, // Egyp |
michael@0 | 118 | 0, |
michael@0 | 119 | 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans |
michael@0 | 120 | 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant |
michael@0 | 121 | 0, |
michael@0 | 122 | 0, |
michael@0 | 123 | 0, |
michael@0 | 124 | 0xA984 | LIMITED_USE | LB_LETTERS, // Java |
michael@0 | 125 | 0xA90A | LIMITED_USE, // Kali |
michael@0 | 126 | 0, |
michael@0 | 127 | 0, |
michael@0 | 128 | 0x1C00 | LIMITED_USE, // Lepc |
michael@0 | 129 | 0, |
michael@0 | 130 | 0x0840 | LIMITED_USE | RTL, // Mand |
michael@0 | 131 | 0, |
michael@0 | 132 | 0x10980 | EXCLUSION | RTL, // Mero |
michael@0 | 133 | 0x07CA | LIMITED_USE | RTL, // Nkoo |
michael@0 | 134 | 0x10C00 | EXCLUSION | RTL, // Orkh |
michael@0 | 135 | 0, |
michael@0 | 136 | 0xA840 | EXCLUSION, // Phag |
michael@0 | 137 | 0x10900 | EXCLUSION | RTL, // Phnx |
michael@0 | 138 | 0x16F00 | ASPIRATIONAL, // Plrd |
michael@0 | 139 | 0, |
michael@0 | 140 | 0, |
michael@0 | 141 | 0, |
michael@0 | 142 | 0, |
michael@0 | 143 | 0, |
michael@0 | 144 | 0, |
michael@0 | 145 | 0xA549 | LIMITED_USE, // Vaii |
michael@0 | 146 | 0, |
michael@0 | 147 | 0x12000 | EXCLUSION, // Xsux |
michael@0 | 148 | 0, |
michael@0 | 149 | 0xFDD0 | UNKNOWN, // Zzzz |
michael@0 | 150 | 0x102A0 | EXCLUSION, // Cari |
michael@0 | 151 | 0x304B | RECOMMENDED | LB_LETTERS, // Jpan |
michael@0 | 152 | 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana |
michael@0 | 153 | 0x10280 | EXCLUSION, // Lyci |
michael@0 | 154 | 0x10920 | EXCLUSION | RTL, // Lydi |
michael@0 | 155 | 0x1C5A | LIMITED_USE, // Olck |
michael@0 | 156 | 0xA930 | EXCLUSION, // Rjng |
michael@0 | 157 | 0xA882 | LIMITED_USE, // Saur |
michael@0 | 158 | 0, |
michael@0 | 159 | 0x1B83 | LIMITED_USE, // Sund |
michael@0 | 160 | 0, |
michael@0 | 161 | 0xABC0 | LIMITED_USE, // Mtei |
michael@0 | 162 | 0x10840 | EXCLUSION | RTL, // Armi |
michael@0 | 163 | 0x10B00 | EXCLUSION | RTL, // Avst |
michael@0 | 164 | 0x11103 | LIMITED_USE, // Cakm |
michael@0 | 165 | 0xAC00 | RECOMMENDED, // Kore |
michael@0 | 166 | 0x11083 | EXCLUSION, // Kthi |
michael@0 | 167 | 0, |
michael@0 | 168 | 0x10B60 | EXCLUSION | RTL, // Phli |
michael@0 | 169 | 0, |
michael@0 | 170 | 0, |
michael@0 | 171 | 0x10B40 | EXCLUSION | RTL, // Prti |
michael@0 | 172 | 0x0800 | EXCLUSION | RTL, // Samr |
michael@0 | 173 | 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt |
michael@0 | 174 | 0, |
michael@0 | 175 | 0, |
michael@0 | 176 | 0xA6A0 | LIMITED_USE, // Bamu |
michael@0 | 177 | 0xA4D0 | LIMITED_USE, // Lisu |
michael@0 | 178 | 0, |
michael@0 | 179 | 0x10A60 | EXCLUSION | RTL, // Sarb |
michael@0 | 180 | 0, |
michael@0 | 181 | 0, |
michael@0 | 182 | 0, |
michael@0 | 183 | 0, |
michael@0 | 184 | 0, |
michael@0 | 185 | 0, |
michael@0 | 186 | 0, |
michael@0 | 187 | 0x109A0 | EXCLUSION | RTL, // Merc |
michael@0 | 188 | 0, |
michael@0 | 189 | 0, |
michael@0 | 190 | 0, |
michael@0 | 191 | 0, |
michael@0 | 192 | 0, |
michael@0 | 193 | 0, |
michael@0 | 194 | 0, |
michael@0 | 195 | 0, |
michael@0 | 196 | 0, |
michael@0 | 197 | 0x11183 | EXCLUSION, // Shrd |
michael@0 | 198 | 0x110D0 | EXCLUSION, // Sora |
michael@0 | 199 | 0x11680 | EXCLUSION, // Takr |
michael@0 | 200 | 0, |
michael@0 | 201 | 0, |
michael@0 | 202 | 0, |
michael@0 | 203 | 0, |
michael@0 | 204 | 0, |
michael@0 | 205 | 0, |
michael@0 | 206 | 0, |
michael@0 | 207 | // End copy-paste from parsescriptmetadata.py |
michael@0 | 208 | }; |
michael@0 | 209 | |
michael@0 | 210 | int32_t getScriptProps(UScriptCode script) { |
michael@0 | 211 | if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) { |
michael@0 | 212 | return SCRIPT_PROPS[script]; |
michael@0 | 213 | } else { |
michael@0 | 214 | return 0; |
michael@0 | 215 | } |
michael@0 | 216 | } |
michael@0 | 217 | |
michael@0 | 218 | } // namespace |
michael@0 | 219 | |
michael@0 | 220 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 221 | uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) { |
michael@0 | 222 | if(U_FAILURE(*pErrorCode)) { return 0; } |
michael@0 | 223 | if(capacity < 0 || (capacity > 0 && dest == NULL)) { |
michael@0 | 224 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 225 | return 0; |
michael@0 | 226 | } |
michael@0 | 227 | int32_t sampleChar = getScriptProps(script) & 0x1fffff; |
michael@0 | 228 | int32_t length; |
michael@0 | 229 | if(sampleChar == 0) { |
michael@0 | 230 | length = 0; |
michael@0 | 231 | } else { |
michael@0 | 232 | length = U16_LENGTH(sampleChar); |
michael@0 | 233 | if(length <= capacity) { |
michael@0 | 234 | int32_t i = 0; |
michael@0 | 235 | U16_APPEND_UNSAFE(dest, i, sampleChar); |
michael@0 | 236 | } |
michael@0 | 237 | } |
michael@0 | 238 | return u_terminateUChars(dest, capacity, length, pErrorCode); |
michael@0 | 239 | } |
michael@0 | 240 | |
michael@0 | 241 | U_COMMON_API icu::UnicodeString U_EXPORT2 |
michael@0 | 242 | uscript_getSampleUnicodeString(UScriptCode script) { |
michael@0 | 243 | icu::UnicodeString sample; |
michael@0 | 244 | int32_t sampleChar = getScriptProps(script) & 0x1fffff; |
michael@0 | 245 | if(sampleChar != 0) { |
michael@0 | 246 | sample.append(sampleChar); |
michael@0 | 247 | } |
michael@0 | 248 | return sample; |
michael@0 | 249 | } |
michael@0 | 250 | |
michael@0 | 251 | U_CAPI UScriptUsage U_EXPORT2 |
michael@0 | 252 | uscript_getUsage(UScriptCode script) { |
michael@0 | 253 | return (UScriptUsage)((getScriptProps(script) >> 21) & 7); |
michael@0 | 254 | } |
michael@0 | 255 | |
michael@0 | 256 | U_CAPI UBool U_EXPORT2 |
michael@0 | 257 | uscript_isRightToLeft(UScriptCode script) { |
michael@0 | 258 | return (getScriptProps(script) & RTL) != 0; |
michael@0 | 259 | } |
michael@0 | 260 | |
michael@0 | 261 | U_CAPI UBool U_EXPORT2 |
michael@0 | 262 | uscript_breaksBetweenLetters(UScriptCode script) { |
michael@0 | 263 | return (getScriptProps(script) & LB_LETTERS) != 0; |
michael@0 | 264 | } |
michael@0 | 265 | |
michael@0 | 266 | U_CAPI UBool U_EXPORT2 |
michael@0 | 267 | uscript_isCased(UScriptCode script) { |
michael@0 | 268 | return (getScriptProps(script) & CASED) != 0; |
michael@0 | 269 | } |