intl/icu/source/common/uscript_props.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/uscript_props.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,269 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*   Copyright (C) 2013, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +*******************************************************************************
     1.9 +*   file name:  uscript_props.cpp
    1.10 +*   encoding:   US-ASCII
    1.11 +*   tab size:   8 (not used)
    1.12 +*   indentation:4
    1.13 +*
    1.14 +*   created on: 2013feb16
    1.15 +*   created by: Markus W. Scherer
    1.16 +*/
    1.17 +
    1.18 +#include "unicode/utypes.h"
    1.19 +#include "unicode/unistr.h"
    1.20 +#include "unicode/uscript.h"
    1.21 +#include "unicode/utf16.h"
    1.22 +#include "ustr_imp.h"
    1.23 +
    1.24 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
    1.25 +
    1.26 +namespace {
    1.27 +
    1.28 +// Script metadata (script properties).
    1.29 +// See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
    1.30 +
    1.31 +// 0 = NOT_ENCODED, no sample character, default false script properties.
    1.32 +// Bits 20.. 0: sample character
    1.33 +
    1.34 +// Bits 23..21: usage
    1.35 +const int32_t UNKNOWN = 1 << 21;
    1.36 +const int32_t EXCLUSION = 2 << 21;
    1.37 +const int32_t LIMITED_USE = 3 << 21;
    1.38 +const int32_t ASPIRATIONAL = 4 << 21;
    1.39 +const int32_t RECOMMENDED = 5 << 21;
    1.40 +
    1.41 +// Bits 31..24: Single-bit flags
    1.42 +const int32_t RTL = 1 << 24;
    1.43 +const int32_t LB_LETTERS = 1 << 25;
    1.44 +const int32_t CASED = 1 << 26;
    1.45 +
    1.46 +const int32_t SCRIPT_PROPS[] = {
    1.47 +    // Begin copy-paste output from
    1.48 +    // tools/trunk/unicode/py/parsescriptmetadata.py
    1.49 +    0x0040 | UNKNOWN,  // Zyyy
    1.50 +    0x0308 | UNKNOWN,  // Zinh
    1.51 +    0x0628 | RECOMMENDED | RTL,  // Arab
    1.52 +    0x0531 | RECOMMENDED | CASED,  // Armn
    1.53 +    0x0995 | RECOMMENDED,  // Beng
    1.54 +    0x3105 | RECOMMENDED | LB_LETTERS,  // Bopo
    1.55 +    0x13C4 | LIMITED_USE,  // Cher
    1.56 +    0x03E2 | EXCLUSION | CASED,  // Copt
    1.57 +    0x042F | RECOMMENDED | CASED,  // Cyrl
    1.58 +    0x10414 | EXCLUSION | CASED,  // Dsrt
    1.59 +    0x0905 | RECOMMENDED,  // Deva
    1.60 +    0x12A0 | RECOMMENDED,  // Ethi
    1.61 +    0x10D3 | RECOMMENDED,  // Geor
    1.62 +    0x10330 | EXCLUSION,  // Goth
    1.63 +    0x03A9 | RECOMMENDED | CASED,  // Grek
    1.64 +    0x0A95 | RECOMMENDED,  // Gujr
    1.65 +    0x0A15 | RECOMMENDED,  // Guru
    1.66 +    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hani
    1.67 +    0xAC00 | RECOMMENDED,  // Hang
    1.68 +    0x05D0 | RECOMMENDED | RTL,  // Hebr
    1.69 +    0x304B | RECOMMENDED | LB_LETTERS,  // Hira
    1.70 +    0x0C95 | RECOMMENDED,  // Knda
    1.71 +    0x30AB | RECOMMENDED | LB_LETTERS,  // Kana
    1.72 +    0x1780 | RECOMMENDED | LB_LETTERS,  // Khmr
    1.73 +    0x0EA5 | RECOMMENDED | LB_LETTERS,  // Laoo
    1.74 +    0x004C | RECOMMENDED | CASED,  // Latn
    1.75 +    0x0D15 | RECOMMENDED,  // Mlym
    1.76 +    0x1826 | ASPIRATIONAL,  // Mong
    1.77 +    0x1000 | RECOMMENDED | LB_LETTERS,  // Mymr
    1.78 +    0x168F | EXCLUSION,  // Ogam
    1.79 +    0x10300 | EXCLUSION,  // Ital
    1.80 +    0x0B15 | RECOMMENDED,  // Orya
    1.81 +    0x16A0 | EXCLUSION,  // Runr
    1.82 +    0x0D85 | RECOMMENDED,  // Sinh
    1.83 +    0x0710 | LIMITED_USE | RTL,  // Syrc
    1.84 +    0x0B95 | RECOMMENDED,  // Taml
    1.85 +    0x0C15 | RECOMMENDED,  // Telu
    1.86 +    0x078C | RECOMMENDED | RTL,  // Thaa
    1.87 +    0x0E17 | RECOMMENDED | LB_LETTERS,  // Thai
    1.88 +    0x0F40 | RECOMMENDED,  // Tibt
    1.89 +    0x14C0 | ASPIRATIONAL,  // Cans
    1.90 +    0xA288 | ASPIRATIONAL | LB_LETTERS,  // Yiii
    1.91 +    0x1703 | EXCLUSION,  // Tglg
    1.92 +    0x1723 | EXCLUSION,  // Hano
    1.93 +    0x1743 | EXCLUSION,  // Buhd
    1.94 +    0x1763 | EXCLUSION,  // Tagb
    1.95 +    0x2800 | UNKNOWN,  // Brai
    1.96 +    0x10800 | EXCLUSION | RTL,  // Cprt
    1.97 +    0x1900 | LIMITED_USE,  // Limb
    1.98 +    0x10000 | EXCLUSION,  // Linb
    1.99 +    0x10480 | EXCLUSION,  // Osma
   1.100 +    0x10450 | EXCLUSION,  // Shaw
   1.101 +    0x1950 | LIMITED_USE | LB_LETTERS,  // Tale
   1.102 +    0x10380 | EXCLUSION,  // Ugar
   1.103 +    0,
   1.104 +    0x1A00 | EXCLUSION,  // Bugi
   1.105 +    0x2C00 | EXCLUSION | CASED,  // Glag
   1.106 +    0x10A00 | EXCLUSION | RTL,  // Khar
   1.107 +    0xA800 | LIMITED_USE,  // Sylo
   1.108 +    0x1980 | LIMITED_USE | LB_LETTERS,  // Talu
   1.109 +    0x2D30 | ASPIRATIONAL,  // Tfng
   1.110 +    0x103A0 | EXCLUSION,  // Xpeo
   1.111 +    0x1B05 | LIMITED_USE | LB_LETTERS,  // Bali
   1.112 +    0x1BC0 | LIMITED_USE,  // Batk
   1.113 +    0,
   1.114 +    0x11005 | EXCLUSION,  // Brah
   1.115 +    0xAA00 | LIMITED_USE,  // Cham
   1.116 +    0,
   1.117 +    0,
   1.118 +    0,
   1.119 +    0,
   1.120 +    0x13153 | EXCLUSION,  // Egyp
   1.121 +    0,
   1.122 +    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hans
   1.123 +    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hant
   1.124 +    0,
   1.125 +    0,
   1.126 +    0,
   1.127 +    0xA984 | LIMITED_USE | LB_LETTERS,  // Java
   1.128 +    0xA90A | LIMITED_USE,  // Kali
   1.129 +    0,
   1.130 +    0,
   1.131 +    0x1C00 | LIMITED_USE,  // Lepc
   1.132 +    0,
   1.133 +    0x0840 | LIMITED_USE | RTL,  // Mand
   1.134 +    0,
   1.135 +    0x10980 | EXCLUSION | RTL,  // Mero
   1.136 +    0x07CA | LIMITED_USE | RTL,  // Nkoo
   1.137 +    0x10C00 | EXCLUSION | RTL,  // Orkh
   1.138 +    0,
   1.139 +    0xA840 | EXCLUSION,  // Phag
   1.140 +    0x10900 | EXCLUSION | RTL,  // Phnx
   1.141 +    0x16F00 | ASPIRATIONAL,  // Plrd
   1.142 +    0,
   1.143 +    0,
   1.144 +    0,
   1.145 +    0,
   1.146 +    0,
   1.147 +    0,
   1.148 +    0xA549 | LIMITED_USE,  // Vaii
   1.149 +    0,
   1.150 +    0x12000 | EXCLUSION,  // Xsux
   1.151 +    0,
   1.152 +    0xFDD0 | UNKNOWN,  // Zzzz
   1.153 +    0x102A0 | EXCLUSION,  // Cari
   1.154 +    0x304B | RECOMMENDED | LB_LETTERS,  // Jpan
   1.155 +    0x1A20 | LIMITED_USE | LB_LETTERS,  // Lana
   1.156 +    0x10280 | EXCLUSION,  // Lyci
   1.157 +    0x10920 | EXCLUSION | RTL,  // Lydi
   1.158 +    0x1C5A | LIMITED_USE,  // Olck
   1.159 +    0xA930 | EXCLUSION,  // Rjng
   1.160 +    0xA882 | LIMITED_USE,  // Saur
   1.161 +    0,
   1.162 +    0x1B83 | LIMITED_USE,  // Sund
   1.163 +    0,
   1.164 +    0xABC0 | LIMITED_USE,  // Mtei
   1.165 +    0x10840 | EXCLUSION | RTL,  // Armi
   1.166 +    0x10B00 | EXCLUSION | RTL,  // Avst
   1.167 +    0x11103 | LIMITED_USE,  // Cakm
   1.168 +    0xAC00 | RECOMMENDED,  // Kore
   1.169 +    0x11083 | EXCLUSION,  // Kthi
   1.170 +    0,
   1.171 +    0x10B60 | EXCLUSION | RTL,  // Phli
   1.172 +    0,
   1.173 +    0,
   1.174 +    0x10B40 | EXCLUSION | RTL,  // Prti
   1.175 +    0x0800 | EXCLUSION | RTL,  // Samr
   1.176 +    0xAA80 | LIMITED_USE | LB_LETTERS,  // Tavt
   1.177 +    0,
   1.178 +    0,
   1.179 +    0xA6A0 | LIMITED_USE,  // Bamu
   1.180 +    0xA4D0 | LIMITED_USE,  // Lisu
   1.181 +    0,
   1.182 +    0x10A60 | EXCLUSION | RTL,  // Sarb
   1.183 +    0,
   1.184 +    0,
   1.185 +    0,
   1.186 +    0,
   1.187 +    0,
   1.188 +    0,
   1.189 +    0,
   1.190 +    0x109A0 | EXCLUSION | RTL,  // Merc
   1.191 +    0,
   1.192 +    0,
   1.193 +    0,
   1.194 +    0,
   1.195 +    0,
   1.196 +    0,
   1.197 +    0,
   1.198 +    0,
   1.199 +    0,
   1.200 +    0x11183 | EXCLUSION,  // Shrd
   1.201 +    0x110D0 | EXCLUSION,  // Sora
   1.202 +    0x11680 | EXCLUSION,  // Takr
   1.203 +    0,
   1.204 +    0,
   1.205 +    0,
   1.206 +    0,
   1.207 +    0,
   1.208 +    0,
   1.209 +    0,
   1.210 +    // End copy-paste from parsescriptmetadata.py
   1.211 +};
   1.212 +
   1.213 +int32_t getScriptProps(UScriptCode script) {
   1.214 +    if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) {
   1.215 +        return SCRIPT_PROPS[script];
   1.216 +    } else {
   1.217 +        return 0;
   1.218 +    }
   1.219 +}
   1.220 +
   1.221 +}  // namespace
   1.222 +
   1.223 +U_CAPI int32_t U_EXPORT2
   1.224 +uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
   1.225 +    if(U_FAILURE(*pErrorCode)) { return 0; }
   1.226 +    if(capacity < 0 || (capacity > 0 && dest == NULL)) {
   1.227 +        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
   1.228 +        return 0;
   1.229 +    }
   1.230 +    int32_t sampleChar = getScriptProps(script) & 0x1fffff;
   1.231 +    int32_t length;
   1.232 +    if(sampleChar == 0) {
   1.233 +        length = 0;
   1.234 +    } else {
   1.235 +        length = U16_LENGTH(sampleChar);
   1.236 +        if(length <= capacity) {
   1.237 +            int32_t i = 0;
   1.238 +            U16_APPEND_UNSAFE(dest, i, sampleChar);
   1.239 +        }
   1.240 +    }
   1.241 +    return u_terminateUChars(dest, capacity, length, pErrorCode);
   1.242 +}
   1.243 +
   1.244 +U_COMMON_API icu::UnicodeString U_EXPORT2
   1.245 +uscript_getSampleUnicodeString(UScriptCode script) {
   1.246 +    icu::UnicodeString sample;
   1.247 +    int32_t sampleChar = getScriptProps(script) & 0x1fffff;
   1.248 +    if(sampleChar != 0) {
   1.249 +        sample.append(sampleChar);
   1.250 +    }
   1.251 +    return sample;
   1.252 +}
   1.253 +
   1.254 +U_CAPI UScriptUsage U_EXPORT2
   1.255 +uscript_getUsage(UScriptCode script) {
   1.256 +    return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
   1.257 +}
   1.258 +
   1.259 +U_CAPI UBool U_EXPORT2
   1.260 +uscript_isRightToLeft(UScriptCode script) {
   1.261 +    return (getScriptProps(script) & RTL) != 0;
   1.262 +}
   1.263 +
   1.264 +U_CAPI UBool U_EXPORT2
   1.265 +uscript_breaksBetweenLetters(UScriptCode script) {
   1.266 +    return (getScriptProps(script) & LB_LETTERS) != 0;
   1.267 +}
   1.268 +
   1.269 +U_CAPI UBool U_EXPORT2
   1.270 +uscript_isCased(UScriptCode script) {
   1.271 +    return (getScriptProps(script) & CASED) != 0;
   1.272 +}

mercurial