intl/icu/source/common/uscript_props.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /*
     2 *******************************************************************************
     3 *   Copyright (C) 2013, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 *******************************************************************************
     6 *   file name:  uscript_props.cpp
     7 *   encoding:   US-ASCII
     8 *   tab size:   8 (not used)
     9 *   indentation:4
    10 *
    11 *   created on: 2013feb16
    12 *   created by: Markus W. Scherer
    13 */
    15 #include "unicode/utypes.h"
    16 #include "unicode/unistr.h"
    17 #include "unicode/uscript.h"
    18 #include "unicode/utf16.h"
    19 #include "ustr_imp.h"
    21 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
    23 namespace {
    25 // Script metadata (script properties).
    26 // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
    28 // 0 = NOT_ENCODED, no sample character, default false script properties.
    29 // Bits 20.. 0: sample character
    31 // Bits 23..21: usage
    32 const int32_t UNKNOWN = 1 << 21;
    33 const int32_t EXCLUSION = 2 << 21;
    34 const int32_t LIMITED_USE = 3 << 21;
    35 const int32_t ASPIRATIONAL = 4 << 21;
    36 const int32_t RECOMMENDED = 5 << 21;
    38 // Bits 31..24: Single-bit flags
    39 const int32_t RTL = 1 << 24;
    40 const int32_t LB_LETTERS = 1 << 25;
    41 const int32_t CASED = 1 << 26;
    43 const int32_t SCRIPT_PROPS[] = {
    44     // Begin copy-paste output from
    45     // tools/trunk/unicode/py/parsescriptmetadata.py
    46     0x0040 | UNKNOWN,  // Zyyy
    47     0x0308 | UNKNOWN,  // Zinh
    48     0x0628 | RECOMMENDED | RTL,  // Arab
    49     0x0531 | RECOMMENDED | CASED,  // Armn
    50     0x0995 | RECOMMENDED,  // Beng
    51     0x3105 | RECOMMENDED | LB_LETTERS,  // Bopo
    52     0x13C4 | LIMITED_USE,  // Cher
    53     0x03E2 | EXCLUSION | CASED,  // Copt
    54     0x042F | RECOMMENDED | CASED,  // Cyrl
    55     0x10414 | EXCLUSION | CASED,  // Dsrt
    56     0x0905 | RECOMMENDED,  // Deva
    57     0x12A0 | RECOMMENDED,  // Ethi
    58     0x10D3 | RECOMMENDED,  // Geor
    59     0x10330 | EXCLUSION,  // Goth
    60     0x03A9 | RECOMMENDED | CASED,  // Grek
    61     0x0A95 | RECOMMENDED,  // Gujr
    62     0x0A15 | RECOMMENDED,  // Guru
    63     0x5B57 | RECOMMENDED | LB_LETTERS,  // Hani
    64     0xAC00 | RECOMMENDED,  // Hang
    65     0x05D0 | RECOMMENDED | RTL,  // Hebr
    66     0x304B | RECOMMENDED | LB_LETTERS,  // Hira
    67     0x0C95 | RECOMMENDED,  // Knda
    68     0x30AB | RECOMMENDED | LB_LETTERS,  // Kana
    69     0x1780 | RECOMMENDED | LB_LETTERS,  // Khmr
    70     0x0EA5 | RECOMMENDED | LB_LETTERS,  // Laoo
    71     0x004C | RECOMMENDED | CASED,  // Latn
    72     0x0D15 | RECOMMENDED,  // Mlym
    73     0x1826 | ASPIRATIONAL,  // Mong
    74     0x1000 | RECOMMENDED | LB_LETTERS,  // Mymr
    75     0x168F | EXCLUSION,  // Ogam
    76     0x10300 | EXCLUSION,  // Ital
    77     0x0B15 | RECOMMENDED,  // Orya
    78     0x16A0 | EXCLUSION,  // Runr
    79     0x0D85 | RECOMMENDED,  // Sinh
    80     0x0710 | LIMITED_USE | RTL,  // Syrc
    81     0x0B95 | RECOMMENDED,  // Taml
    82     0x0C15 | RECOMMENDED,  // Telu
    83     0x078C | RECOMMENDED | RTL,  // Thaa
    84     0x0E17 | RECOMMENDED | LB_LETTERS,  // Thai
    85     0x0F40 | RECOMMENDED,  // Tibt
    86     0x14C0 | ASPIRATIONAL,  // Cans
    87     0xA288 | ASPIRATIONAL | LB_LETTERS,  // Yiii
    88     0x1703 | EXCLUSION,  // Tglg
    89     0x1723 | EXCLUSION,  // Hano
    90     0x1743 | EXCLUSION,  // Buhd
    91     0x1763 | EXCLUSION,  // Tagb
    92     0x2800 | UNKNOWN,  // Brai
    93     0x10800 | EXCLUSION | RTL,  // Cprt
    94     0x1900 | LIMITED_USE,  // Limb
    95     0x10000 | EXCLUSION,  // Linb
    96     0x10480 | EXCLUSION,  // Osma
    97     0x10450 | EXCLUSION,  // Shaw
    98     0x1950 | LIMITED_USE | LB_LETTERS,  // Tale
    99     0x10380 | EXCLUSION,  // Ugar
   100     0,
   101     0x1A00 | EXCLUSION,  // Bugi
   102     0x2C00 | EXCLUSION | CASED,  // Glag
   103     0x10A00 | EXCLUSION | RTL,  // Khar
   104     0xA800 | LIMITED_USE,  // Sylo
   105     0x1980 | LIMITED_USE | LB_LETTERS,  // Talu
   106     0x2D30 | ASPIRATIONAL,  // Tfng
   107     0x103A0 | EXCLUSION,  // Xpeo
   108     0x1B05 | LIMITED_USE | LB_LETTERS,  // Bali
   109     0x1BC0 | LIMITED_USE,  // Batk
   110     0,
   111     0x11005 | EXCLUSION,  // Brah
   112     0xAA00 | LIMITED_USE,  // Cham
   113     0,
   114     0,
   115     0,
   116     0,
   117     0x13153 | EXCLUSION,  // Egyp
   118     0,
   119     0x5B57 | RECOMMENDED | LB_LETTERS,  // Hans
   120     0x5B57 | RECOMMENDED | LB_LETTERS,  // Hant
   121     0,
   122     0,
   123     0,
   124     0xA984 | LIMITED_USE | LB_LETTERS,  // Java
   125     0xA90A | LIMITED_USE,  // Kali
   126     0,
   127     0,
   128     0x1C00 | LIMITED_USE,  // Lepc
   129     0,
   130     0x0840 | LIMITED_USE | RTL,  // Mand
   131     0,
   132     0x10980 | EXCLUSION | RTL,  // Mero
   133     0x07CA | LIMITED_USE | RTL,  // Nkoo
   134     0x10C00 | EXCLUSION | RTL,  // Orkh
   135     0,
   136     0xA840 | EXCLUSION,  // Phag
   137     0x10900 | EXCLUSION | RTL,  // Phnx
   138     0x16F00 | ASPIRATIONAL,  // Plrd
   139     0,
   140     0,
   141     0,
   142     0,
   143     0,
   144     0,
   145     0xA549 | LIMITED_USE,  // Vaii
   146     0,
   147     0x12000 | EXCLUSION,  // Xsux
   148     0,
   149     0xFDD0 | UNKNOWN,  // Zzzz
   150     0x102A0 | EXCLUSION,  // Cari
   151     0x304B | RECOMMENDED | LB_LETTERS,  // Jpan
   152     0x1A20 | LIMITED_USE | LB_LETTERS,  // Lana
   153     0x10280 | EXCLUSION,  // Lyci
   154     0x10920 | EXCLUSION | RTL,  // Lydi
   155     0x1C5A | LIMITED_USE,  // Olck
   156     0xA930 | EXCLUSION,  // Rjng
   157     0xA882 | LIMITED_USE,  // Saur
   158     0,
   159     0x1B83 | LIMITED_USE,  // Sund
   160     0,
   161     0xABC0 | LIMITED_USE,  // Mtei
   162     0x10840 | EXCLUSION | RTL,  // Armi
   163     0x10B00 | EXCLUSION | RTL,  // Avst
   164     0x11103 | LIMITED_USE,  // Cakm
   165     0xAC00 | RECOMMENDED,  // Kore
   166     0x11083 | EXCLUSION,  // Kthi
   167     0,
   168     0x10B60 | EXCLUSION | RTL,  // Phli
   169     0,
   170     0,
   171     0x10B40 | EXCLUSION | RTL,  // Prti
   172     0x0800 | EXCLUSION | RTL,  // Samr
   173     0xAA80 | LIMITED_USE | LB_LETTERS,  // Tavt
   174     0,
   175     0,
   176     0xA6A0 | LIMITED_USE,  // Bamu
   177     0xA4D0 | LIMITED_USE,  // Lisu
   178     0,
   179     0x10A60 | EXCLUSION | RTL,  // Sarb
   180     0,
   181     0,
   182     0,
   183     0,
   184     0,
   185     0,
   186     0,
   187     0x109A0 | EXCLUSION | RTL,  // Merc
   188     0,
   189     0,
   190     0,
   191     0,
   192     0,
   193     0,
   194     0,
   195     0,
   196     0,
   197     0x11183 | EXCLUSION,  // Shrd
   198     0x110D0 | EXCLUSION,  // Sora
   199     0x11680 | EXCLUSION,  // Takr
   200     0,
   201     0,
   202     0,
   203     0,
   204     0,
   205     0,
   206     0,
   207     // End copy-paste from parsescriptmetadata.py
   208 };
   210 int32_t getScriptProps(UScriptCode script) {
   211     if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) {
   212         return SCRIPT_PROPS[script];
   213     } else {
   214         return 0;
   215     }
   216 }
   218 }  // namespace
   220 U_CAPI int32_t U_EXPORT2
   221 uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
   222     if(U_FAILURE(*pErrorCode)) { return 0; }
   223     if(capacity < 0 || (capacity > 0 && dest == NULL)) {
   224         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
   225         return 0;
   226     }
   227     int32_t sampleChar = getScriptProps(script) & 0x1fffff;
   228     int32_t length;
   229     if(sampleChar == 0) {
   230         length = 0;
   231     } else {
   232         length = U16_LENGTH(sampleChar);
   233         if(length <= capacity) {
   234             int32_t i = 0;
   235             U16_APPEND_UNSAFE(dest, i, sampleChar);
   236         }
   237     }
   238     return u_terminateUChars(dest, capacity, length, pErrorCode);
   239 }
   241 U_COMMON_API icu::UnicodeString U_EXPORT2
   242 uscript_getSampleUnicodeString(UScriptCode script) {
   243     icu::UnicodeString sample;
   244     int32_t sampleChar = getScriptProps(script) & 0x1fffff;
   245     if(sampleChar != 0) {
   246         sample.append(sampleChar);
   247     }
   248     return sample;
   249 }
   251 U_CAPI UScriptUsage U_EXPORT2
   252 uscript_getUsage(UScriptCode script) {
   253     return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
   254 }
   256 U_CAPI UBool U_EXPORT2
   257 uscript_isRightToLeft(UScriptCode script) {
   258     return (getScriptProps(script) & RTL) != 0;
   259 }
   261 U_CAPI UBool U_EXPORT2
   262 uscript_breaksBetweenLetters(UScriptCode script) {
   263     return (getScriptProps(script) & LB_LETTERS) != 0;
   264 }
   266 U_CAPI UBool U_EXPORT2
   267 uscript_isCased(UScriptCode script) {
   268     return (getScriptProps(script) & CASED) != 0;
   269 }

mercurial