intl/icu/source/common/unicode/uscript.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 **********************************************************************
michael@0 3 * Copyright (C) 1997-2013, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 **********************************************************************
michael@0 6 *
michael@0 7 * File USCRIPT.H
michael@0 8 *
michael@0 9 * Modification History:
michael@0 10 *
michael@0 11 * Date Name Description
michael@0 12 * 07/06/2001 Ram Creation.
michael@0 13 ******************************************************************************
michael@0 14 */
michael@0 15
michael@0 16 #ifndef USCRIPT_H
michael@0 17 #define USCRIPT_H
michael@0 18 #include "unicode/utypes.h"
michael@0 19
michael@0 20 /**
michael@0 21 * \file
michael@0 22 * \brief C API: Unicode Script Information
michael@0 23 */
michael@0 24
michael@0 25 /**
michael@0 26 * Constants for ISO 15924 script codes.
michael@0 27 *
michael@0 28 * Many of these script codes - those from Unicode's ScriptNames.txt -
michael@0 29 * are character property values for Unicode's Script property.
michael@0 30 * See UAX #24 Script Names (http://www.unicode.org/reports/tr24/).
michael@0 31 *
michael@0 32 * Starting with ICU 3.6, constants for most ISO 15924 script codes
michael@0 33 * are included (currently excluding private-use codes Qaaa..Qabx).
michael@0 34 * For scripts for which there are codes in ISO 15924 but which are not
michael@0 35 * used in the Unicode Character Database (UCD), there are no Unicode characters
michael@0 36 * associated with those scripts.
michael@0 37 *
michael@0 38 * For example, there are no characters that have a UCD script code of
michael@0 39 * Hans or Hant. All Han ideographs have the Hani script code.
michael@0 40 * The Hans and Hant script codes are used with CLDR data.
michael@0 41 *
michael@0 42 * ISO 15924 script codes are included for use with CLDR and similar.
michael@0 43 *
michael@0 44 * @stable ICU 2.2
michael@0 45 */
michael@0 46 typedef enum UScriptCode {
michael@0 47 /*
michael@0 48 * Note: UScriptCode constants and their ISO script code comments
michael@0 49 * are parsed by preparseucd.py.
michael@0 50 * It matches lines like
michael@0 51 * USCRIPT_<Unicode Script value name> = <integer>, / * <ISO script code> * /
michael@0 52 */
michael@0 53
michael@0 54 /** @stable ICU 2.2 */
michael@0 55 USCRIPT_INVALID_CODE = -1,
michael@0 56 /** @stable ICU 2.2 */
michael@0 57 USCRIPT_COMMON = 0, /* Zyyy */
michael@0 58 /** @stable ICU 2.2 */
michael@0 59 USCRIPT_INHERITED = 1, /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */
michael@0 60 /** @stable ICU 2.2 */
michael@0 61 USCRIPT_ARABIC = 2, /* Arab */
michael@0 62 /** @stable ICU 2.2 */
michael@0 63 USCRIPT_ARMENIAN = 3, /* Armn */
michael@0 64 /** @stable ICU 2.2 */
michael@0 65 USCRIPT_BENGALI = 4, /* Beng */
michael@0 66 /** @stable ICU 2.2 */
michael@0 67 USCRIPT_BOPOMOFO = 5, /* Bopo */
michael@0 68 /** @stable ICU 2.2 */
michael@0 69 USCRIPT_CHEROKEE = 6, /* Cher */
michael@0 70 /** @stable ICU 2.2 */
michael@0 71 USCRIPT_COPTIC = 7, /* Copt */
michael@0 72 /** @stable ICU 2.2 */
michael@0 73 USCRIPT_CYRILLIC = 8, /* Cyrl */
michael@0 74 /** @stable ICU 2.2 */
michael@0 75 USCRIPT_DESERET = 9, /* Dsrt */
michael@0 76 /** @stable ICU 2.2 */
michael@0 77 USCRIPT_DEVANAGARI = 10, /* Deva */
michael@0 78 /** @stable ICU 2.2 */
michael@0 79 USCRIPT_ETHIOPIC = 11, /* Ethi */
michael@0 80 /** @stable ICU 2.2 */
michael@0 81 USCRIPT_GEORGIAN = 12, /* Geor */
michael@0 82 /** @stable ICU 2.2 */
michael@0 83 USCRIPT_GOTHIC = 13, /* Goth */
michael@0 84 /** @stable ICU 2.2 */
michael@0 85 USCRIPT_GREEK = 14, /* Grek */
michael@0 86 /** @stable ICU 2.2 */
michael@0 87 USCRIPT_GUJARATI = 15, /* Gujr */
michael@0 88 /** @stable ICU 2.2 */
michael@0 89 USCRIPT_GURMUKHI = 16, /* Guru */
michael@0 90 /** @stable ICU 2.2 */
michael@0 91 USCRIPT_HAN = 17, /* Hani */
michael@0 92 /** @stable ICU 2.2 */
michael@0 93 USCRIPT_HANGUL = 18, /* Hang */
michael@0 94 /** @stable ICU 2.2 */
michael@0 95 USCRIPT_HEBREW = 19, /* Hebr */
michael@0 96 /** @stable ICU 2.2 */
michael@0 97 USCRIPT_HIRAGANA = 20, /* Hira */
michael@0 98 /** @stable ICU 2.2 */
michael@0 99 USCRIPT_KANNADA = 21, /* Knda */
michael@0 100 /** @stable ICU 2.2 */
michael@0 101 USCRIPT_KATAKANA = 22, /* Kana */
michael@0 102 /** @stable ICU 2.2 */
michael@0 103 USCRIPT_KHMER = 23, /* Khmr */
michael@0 104 /** @stable ICU 2.2 */
michael@0 105 USCRIPT_LAO = 24, /* Laoo */
michael@0 106 /** @stable ICU 2.2 */
michael@0 107 USCRIPT_LATIN = 25, /* Latn */
michael@0 108 /** @stable ICU 2.2 */
michael@0 109 USCRIPT_MALAYALAM = 26, /* Mlym */
michael@0 110 /** @stable ICU 2.2 */
michael@0 111 USCRIPT_MONGOLIAN = 27, /* Mong */
michael@0 112 /** @stable ICU 2.2 */
michael@0 113 USCRIPT_MYANMAR = 28, /* Mymr */
michael@0 114 /** @stable ICU 2.2 */
michael@0 115 USCRIPT_OGHAM = 29, /* Ogam */
michael@0 116 /** @stable ICU 2.2 */
michael@0 117 USCRIPT_OLD_ITALIC = 30, /* Ital */
michael@0 118 /** @stable ICU 2.2 */
michael@0 119 USCRIPT_ORIYA = 31, /* Orya */
michael@0 120 /** @stable ICU 2.2 */
michael@0 121 USCRIPT_RUNIC = 32, /* Runr */
michael@0 122 /** @stable ICU 2.2 */
michael@0 123 USCRIPT_SINHALA = 33, /* Sinh */
michael@0 124 /** @stable ICU 2.2 */
michael@0 125 USCRIPT_SYRIAC = 34, /* Syrc */
michael@0 126 /** @stable ICU 2.2 */
michael@0 127 USCRIPT_TAMIL = 35, /* Taml */
michael@0 128 /** @stable ICU 2.2 */
michael@0 129 USCRIPT_TELUGU = 36, /* Telu */
michael@0 130 /** @stable ICU 2.2 */
michael@0 131 USCRIPT_THAANA = 37, /* Thaa */
michael@0 132 /** @stable ICU 2.2 */
michael@0 133 USCRIPT_THAI = 38, /* Thai */
michael@0 134 /** @stable ICU 2.2 */
michael@0 135 USCRIPT_TIBETAN = 39, /* Tibt */
michael@0 136 /** Canadian_Aboriginal script. @stable ICU 2.6 */
michael@0 137 USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */
michael@0 138 /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */
michael@0 139 USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL,
michael@0 140 /** @stable ICU 2.2 */
michael@0 141 USCRIPT_YI = 41, /* Yiii */
michael@0 142 /* New scripts in Unicode 3.2 */
michael@0 143 /** @stable ICU 2.2 */
michael@0 144 USCRIPT_TAGALOG = 42, /* Tglg */
michael@0 145 /** @stable ICU 2.2 */
michael@0 146 USCRIPT_HANUNOO = 43, /* Hano */
michael@0 147 /** @stable ICU 2.2 */
michael@0 148 USCRIPT_BUHID = 44, /* Buhd */
michael@0 149 /** @stable ICU 2.2 */
michael@0 150 USCRIPT_TAGBANWA = 45, /* Tagb */
michael@0 151
michael@0 152 /* New scripts in Unicode 4 */
michael@0 153 /** @stable ICU 2.6 */
michael@0 154 USCRIPT_BRAILLE = 46, /* Brai */
michael@0 155 /** @stable ICU 2.6 */
michael@0 156 USCRIPT_CYPRIOT = 47, /* Cprt */
michael@0 157 /** @stable ICU 2.6 */
michael@0 158 USCRIPT_LIMBU = 48, /* Limb */
michael@0 159 /** @stable ICU 2.6 */
michael@0 160 USCRIPT_LINEAR_B = 49, /* Linb */
michael@0 161 /** @stable ICU 2.6 */
michael@0 162 USCRIPT_OSMANYA = 50, /* Osma */
michael@0 163 /** @stable ICU 2.6 */
michael@0 164 USCRIPT_SHAVIAN = 51, /* Shaw */
michael@0 165 /** @stable ICU 2.6 */
michael@0 166 USCRIPT_TAI_LE = 52, /* Tale */
michael@0 167 /** @stable ICU 2.6 */
michael@0 168 USCRIPT_UGARITIC = 53, /* Ugar */
michael@0 169
michael@0 170 /** New script code in Unicode 4.0.1 @stable ICU 3.0 */
michael@0 171 USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */
michael@0 172
michael@0 173 /* New scripts in Unicode 4.1 */
michael@0 174 /** @stable ICU 3.4 */
michael@0 175 USCRIPT_BUGINESE = 55, /* Bugi */
michael@0 176 /** @stable ICU 3.4 */
michael@0 177 USCRIPT_GLAGOLITIC = 56, /* Glag */
michael@0 178 /** @stable ICU 3.4 */
michael@0 179 USCRIPT_KHAROSHTHI = 57, /* Khar */
michael@0 180 /** @stable ICU 3.4 */
michael@0 181 USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */
michael@0 182 /** @stable ICU 3.4 */
michael@0 183 USCRIPT_NEW_TAI_LUE = 59, /* Talu */
michael@0 184 /** @stable ICU 3.4 */
michael@0 185 USCRIPT_TIFINAGH = 60, /* Tfng */
michael@0 186 /** @stable ICU 3.4 */
michael@0 187 USCRIPT_OLD_PERSIAN = 61, /* Xpeo */
michael@0 188
michael@0 189 /* New script codes from ISO 15924 */
michael@0 190 /** @stable ICU 3.6 */
michael@0 191 USCRIPT_BALINESE = 62, /* Bali */
michael@0 192 /** @stable ICU 3.6 */
michael@0 193 USCRIPT_BATAK = 63, /* Batk */
michael@0 194 /** @stable ICU 3.6 */
michael@0 195 USCRIPT_BLISSYMBOLS = 64, /* Blis */
michael@0 196 /** @stable ICU 3.6 */
michael@0 197 USCRIPT_BRAHMI = 65, /* Brah */
michael@0 198 /** @stable ICU 3.6 */
michael@0 199 USCRIPT_CHAM = 66, /* Cham */
michael@0 200 /** @stable ICU 3.6 */
michael@0 201 USCRIPT_CIRTH = 67, /* Cirt */
michael@0 202 /** @stable ICU 3.6 */
michael@0 203 USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */
michael@0 204 /** @stable ICU 3.6 */
michael@0 205 USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */
michael@0 206 /** @stable ICU 3.6 */
michael@0 207 USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */
michael@0 208 /** @stable ICU 3.6 */
michael@0 209 USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */
michael@0 210 /** @stable ICU 3.6 */
michael@0 211 USCRIPT_KHUTSURI = 72, /* Geok */
michael@0 212 /** @stable ICU 3.6 */
michael@0 213 USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */
michael@0 214 /** @stable ICU 3.6 */
michael@0 215 USCRIPT_TRADITIONAL_HAN = 74, /* Hant */
michael@0 216 /** @stable ICU 3.6 */
michael@0 217 USCRIPT_PAHAWH_HMONG = 75, /* Hmng */
michael@0 218 /** @stable ICU 3.6 */
michael@0 219 USCRIPT_OLD_HUNGARIAN = 76, /* Hung */
michael@0 220 /** @stable ICU 3.6 */
michael@0 221 USCRIPT_HARAPPAN_INDUS = 77, /* Inds */
michael@0 222 /** @stable ICU 3.6 */
michael@0 223 USCRIPT_JAVANESE = 78, /* Java */
michael@0 224 /** @stable ICU 3.6 */
michael@0 225 USCRIPT_KAYAH_LI = 79, /* Kali */
michael@0 226 /** @stable ICU 3.6 */
michael@0 227 USCRIPT_LATIN_FRAKTUR = 80, /* Latf */
michael@0 228 /** @stable ICU 3.6 */
michael@0 229 USCRIPT_LATIN_GAELIC = 81, /* Latg */
michael@0 230 /** @stable ICU 3.6 */
michael@0 231 USCRIPT_LEPCHA = 82, /* Lepc */
michael@0 232 /** @stable ICU 3.6 */
michael@0 233 USCRIPT_LINEAR_A = 83, /* Lina */
michael@0 234 /** @stable ICU 4.6 */
michael@0 235 USCRIPT_MANDAIC = 84, /* Mand */
michael@0 236 /** @stable ICU 3.6 */
michael@0 237 USCRIPT_MANDAEAN = USCRIPT_MANDAIC,
michael@0 238 /** @stable ICU 3.6 */
michael@0 239 USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */
michael@0 240 /** @stable ICU 4.6 */
michael@0 241 USCRIPT_MEROITIC_HIEROGLYPHS = 86, /* Mero */
michael@0 242 /** @stable ICU 3.6 */
michael@0 243 USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS,
michael@0 244 /** @stable ICU 3.6 */
michael@0 245 USCRIPT_NKO = 87, /* Nkoo */
michael@0 246 /** @stable ICU 3.6 */
michael@0 247 USCRIPT_ORKHON = 88, /* Orkh */
michael@0 248 /** @stable ICU 3.6 */
michael@0 249 USCRIPT_OLD_PERMIC = 89, /* Perm */
michael@0 250 /** @stable ICU 3.6 */
michael@0 251 USCRIPT_PHAGS_PA = 90, /* Phag */
michael@0 252 /** @stable ICU 3.6 */
michael@0 253 USCRIPT_PHOENICIAN = 91, /* Phnx */
michael@0 254 /** @stable ICU 52 */
michael@0 255 USCRIPT_MIAO = 92, /* Plrd */
michael@0 256 /** @stable ICU 3.6 */
michael@0 257 USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO,
michael@0 258 /** @stable ICU 3.6 */
michael@0 259 USCRIPT_RONGORONGO = 93, /* Roro */
michael@0 260 /** @stable ICU 3.6 */
michael@0 261 USCRIPT_SARATI = 94, /* Sara */
michael@0 262 /** @stable ICU 3.6 */
michael@0 263 USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */
michael@0 264 /** @stable ICU 3.6 */
michael@0 265 USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */
michael@0 266 /** @stable ICU 3.6 */
michael@0 267 USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */
michael@0 268 /** @stable ICU 3.6 */
michael@0 269 USCRIPT_TENGWAR = 98, /* Teng */
michael@0 270 /** @stable ICU 3.6 */
michael@0 271 USCRIPT_VAI = 99, /* Vaii */
michael@0 272 /** @stable ICU 3.6 */
michael@0 273 USCRIPT_VISIBLE_SPEECH = 100,/* Visp */
michael@0 274 /** @stable ICU 3.6 */
michael@0 275 USCRIPT_CUNEIFORM = 101,/* Xsux */
michael@0 276 /** @stable ICU 3.6 */
michael@0 277 USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */
michael@0 278 /** @stable ICU 3.6 */
michael@0 279 USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
michael@0 280
michael@0 281 /* New script codes from ISO 15924 */
michael@0 282 /** @stable ICU 3.8 */
michael@0 283 USCRIPT_CARIAN = 104,/* Cari */
michael@0 284 /** @stable ICU 3.8 */
michael@0 285 USCRIPT_JAPANESE = 105,/* Jpan */
michael@0 286 /** @stable ICU 3.8 */
michael@0 287 USCRIPT_LANNA = 106,/* Lana */
michael@0 288 /** @stable ICU 3.8 */
michael@0 289 USCRIPT_LYCIAN = 107,/* Lyci */
michael@0 290 /** @stable ICU 3.8 */
michael@0 291 USCRIPT_LYDIAN = 108,/* Lydi */
michael@0 292 /** @stable ICU 3.8 */
michael@0 293 USCRIPT_OL_CHIKI = 109,/* Olck */
michael@0 294 /** @stable ICU 3.8 */
michael@0 295 USCRIPT_REJANG = 110,/* Rjng */
michael@0 296 /** @stable ICU 3.8 */
michael@0 297 USCRIPT_SAURASHTRA = 111,/* Saur */
michael@0 298 /** @stable ICU 3.8 */
michael@0 299 USCRIPT_SIGN_WRITING = 112,/* Sgnw */
michael@0 300 /** @stable ICU 3.8 */
michael@0 301 USCRIPT_SUNDANESE = 113,/* Sund */
michael@0 302 /** @stable ICU 3.8 */
michael@0 303 USCRIPT_MOON = 114,/* Moon */
michael@0 304 /** @stable ICU 3.8 */
michael@0 305 USCRIPT_MEITEI_MAYEK = 115,/* Mtei */
michael@0 306
michael@0 307 /* New script codes from ISO 15924 */
michael@0 308 /** @stable ICU 4.0 */
michael@0 309 USCRIPT_IMPERIAL_ARAMAIC = 116,/* Armi */
michael@0 310 /** @stable ICU 4.0 */
michael@0 311 USCRIPT_AVESTAN = 117,/* Avst */
michael@0 312 /** @stable ICU 4.0 */
michael@0 313 USCRIPT_CHAKMA = 118,/* Cakm */
michael@0 314 /** @stable ICU 4.0 */
michael@0 315 USCRIPT_KOREAN = 119,/* Kore */
michael@0 316 /** @stable ICU 4.0 */
michael@0 317 USCRIPT_KAITHI = 120,/* Kthi */
michael@0 318 /** @stable ICU 4.0 */
michael@0 319 USCRIPT_MANICHAEAN = 121,/* Mani */
michael@0 320 /** @stable ICU 4.0 */
michael@0 321 USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,/* Phli */
michael@0 322 /** @stable ICU 4.0 */
michael@0 323 USCRIPT_PSALTER_PAHLAVI = 123,/* Phlp */
michael@0 324 /** @stable ICU 4.0 */
michael@0 325 USCRIPT_BOOK_PAHLAVI = 124,/* Phlv */
michael@0 326 /** @stable ICU 4.0 */
michael@0 327 USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,/* Prti */
michael@0 328 /** @stable ICU 4.0 */
michael@0 329 USCRIPT_SAMARITAN = 126,/* Samr */
michael@0 330 /** @stable ICU 4.0 */
michael@0 331 USCRIPT_TAI_VIET = 127,/* Tavt */
michael@0 332 /** @stable ICU 4.0 */
michael@0 333 USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */
michael@0 334 /** @stable ICU 4.0 */
michael@0 335 USCRIPT_SYMBOLS = 129,/* Zsym */
michael@0 336
michael@0 337 /* New script codes from ISO 15924 */
michael@0 338 /** @stable ICU 4.4 */
michael@0 339 USCRIPT_BAMUM = 130,/* Bamu */
michael@0 340 /** @stable ICU 4.4 */
michael@0 341 USCRIPT_LISU = 131,/* Lisu */
michael@0 342 /** @stable ICU 4.4 */
michael@0 343 USCRIPT_NAKHI_GEBA = 132,/* Nkgb */
michael@0 344 /** @stable ICU 4.4 */
michael@0 345 USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */
michael@0 346
michael@0 347 /* New script codes from ISO 15924 */
michael@0 348 /** @stable ICU 4.6 */
michael@0 349 USCRIPT_BASSA_VAH = 134,/* Bass */
michael@0 350 /** @stable ICU 4.6 */
michael@0 351 USCRIPT_DUPLOYAN_SHORTAND = 135,/* Dupl */
michael@0 352 /** @stable ICU 4.6 */
michael@0 353 USCRIPT_ELBASAN = 136,/* Elba */
michael@0 354 /** @stable ICU 4.6 */
michael@0 355 USCRIPT_GRANTHA = 137,/* Gran */
michael@0 356 /** @stable ICU 4.6 */
michael@0 357 USCRIPT_KPELLE = 138,/* Kpel */
michael@0 358 /** @stable ICU 4.6 */
michael@0 359 USCRIPT_LOMA = 139,/* Loma */
michael@0 360 /** @stable ICU 4.6 */
michael@0 361 USCRIPT_MENDE = 140,/* Mend */
michael@0 362 /** @stable ICU 4.6 */
michael@0 363 USCRIPT_MEROITIC_CURSIVE = 141,/* Merc */
michael@0 364 /** @stable ICU 4.6 */
michael@0 365 USCRIPT_OLD_NORTH_ARABIAN = 142,/* Narb */
michael@0 366 /** @stable ICU 4.6 */
michael@0 367 USCRIPT_NABATAEAN = 143,/* Nbat */
michael@0 368 /** @stable ICU 4.6 */
michael@0 369 USCRIPT_PALMYRENE = 144,/* Palm */
michael@0 370 /** @stable ICU 4.6 */
michael@0 371 USCRIPT_SINDHI = 145,/* Sind */
michael@0 372 /** @stable ICU 4.6 */
michael@0 373 USCRIPT_WARANG_CITI = 146,/* Wara */
michael@0 374
michael@0 375 /** @stable ICU 4.8 */
michael@0 376 USCRIPT_AFAKA = 147,/* Afak */
michael@0 377 /** @stable ICU 4.8 */
michael@0 378 USCRIPT_JURCHEN = 148,/* Jurc */
michael@0 379 /** @stable ICU 4.8 */
michael@0 380 USCRIPT_MRO = 149,/* Mroo */
michael@0 381 /** @stable ICU 4.8 */
michael@0 382 USCRIPT_NUSHU = 150,/* Nshu */
michael@0 383 /** @stable ICU 4.8 */
michael@0 384 USCRIPT_SHARADA = 151,/* Shrd */
michael@0 385 /** @stable ICU 4.8 */
michael@0 386 USCRIPT_SORA_SOMPENG = 152,/* Sora */
michael@0 387 /** @stable ICU 4.8 */
michael@0 388 USCRIPT_TAKRI = 153,/* Takr */
michael@0 389 /** @stable ICU 4.8 */
michael@0 390 USCRIPT_TANGUT = 154,/* Tang */
michael@0 391 /** @stable ICU 4.8 */
michael@0 392 USCRIPT_WOLEAI = 155,/* Wole */
michael@0 393
michael@0 394 /** @stable ICU 49 */
michael@0 395 USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,/* Hluw */
michael@0 396 /** @stable ICU 49 */
michael@0 397 USCRIPT_KHOJKI = 157,/* Khoj */
michael@0 398 /** @stable ICU 49 */
michael@0 399 USCRIPT_TIRHUTA = 158,/* Tirh */
michael@0 400
michael@0 401 /** @stable ICU 52 */
michael@0 402 USCRIPT_CAUCASIAN_ALBANIAN = 159,/* Aghb */
michael@0 403 /** @stable ICU 52 */
michael@0 404 USCRIPT_MAHAJANI = 160,/* Mahj */
michael@0 405
michael@0 406 /* Private use codes from Qaaa - Qabx are not supported */
michael@0 407
michael@0 408 /** @stable ICU 2.2 */
michael@0 409 USCRIPT_CODE_LIMIT = 161
michael@0 410 } UScriptCode;
michael@0 411
michael@0 412 /**
michael@0 413 * Gets script codes associated with the given locale or ISO 15924 abbreviation or name.
michael@0 414 * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
michael@0 415 * Fills in USCRIPT_LATIN given "en" OR "en_US"
michael@0 416 * If required capacity is greater than capacity of the destination buffer then the error code
michael@0 417 * is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned
michael@0 418 *
michael@0 419 * <p>Note: To search by short or long script alias only, use
michael@0 420 * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. This does
michael@0 421 * a fast lookup with no access of the locale data.
michael@0 422 * @param nameOrAbbrOrLocale name of the script, as given in
michael@0 423 * PropertyValueAliases.txt, or ISO 15924 code or locale
michael@0 424 * @param fillIn the UScriptCode buffer to fill in the script code
michael@0 425 * @param capacity the capacity (size) fo UScriptCode buffer passed in.
michael@0 426 * @param err the error status code.
michael@0 427 * @return The number of script codes filled in the buffer passed in
michael@0 428 * @stable ICU 2.4
michael@0 429 */
michael@0 430 U_STABLE int32_t U_EXPORT2
michael@0 431 uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);
michael@0 432
michael@0 433 /**
michael@0 434 * Gets a script name associated with the given script code.
michael@0 435 * Returns "Malayam" given USCRIPT_MALAYALAM
michael@0 436 * @param scriptCode UScriptCode enum
michael@0 437 * @return script long name as given in
michael@0 438 * PropertyValueAliases.txt, or NULL if scriptCode is invalid
michael@0 439 * @stable ICU 2.4
michael@0 440 */
michael@0 441 U_STABLE const char* U_EXPORT2
michael@0 442 uscript_getName(UScriptCode scriptCode);
michael@0 443
michael@0 444 /**
michael@0 445 * Gets a script name associated with the given script code.
michael@0 446 * Returns "Mlym" given USCRIPT_MALAYALAM
michael@0 447 * @param scriptCode UScriptCode enum
michael@0 448 * @return script abbreviated name as given in
michael@0 449 * PropertyValueAliases.txt, or NULL if scriptCode is invalid
michael@0 450 * @stable ICU 2.4
michael@0 451 */
michael@0 452 U_STABLE const char* U_EXPORT2
michael@0 453 uscript_getShortName(UScriptCode scriptCode);
michael@0 454
michael@0 455 /**
michael@0 456 * Gets the script code associated with the given codepoint.
michael@0 457 * Returns USCRIPT_MALAYALAM given 0x0D02
michael@0 458 * @param codepoint UChar32 codepoint
michael@0 459 * @param err the error status code.
michael@0 460 * @return The UScriptCode, or 0 if codepoint is invalid
michael@0 461 * @stable ICU 2.4
michael@0 462 */
michael@0 463 U_STABLE UScriptCode U_EXPORT2
michael@0 464 uscript_getScript(UChar32 codepoint, UErrorCode *err);
michael@0 465
michael@0 466 /**
michael@0 467 * Do the Script_Extensions of code point c contain script sc?
michael@0 468 * If c does not have explicit Script_Extensions, then this tests whether
michael@0 469 * c has the Script property value sc.
michael@0 470 *
michael@0 471 * Some characters are commonly used in multiple scripts.
michael@0 472 * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
michael@0 473 *
michael@0 474 * The Script_Extensions property is provisional. It may be modified or removed
michael@0 475 * in future versions of the Unicode Standard, and thus in ICU.
michael@0 476 * @param c code point
michael@0 477 * @param sc script code
michael@0 478 * @return TRUE if sc is in Script_Extensions(c)
michael@0 479 * @stable ICU 49
michael@0 480 */
michael@0 481 U_STABLE UBool U_EXPORT2
michael@0 482 uscript_hasScript(UChar32 c, UScriptCode sc);
michael@0 483
michael@0 484 /**
michael@0 485 * Writes code point c's Script_Extensions as a list of UScriptCode values
michael@0 486 * to the output scripts array and returns the number of script codes.
michael@0 487 * - If c does have Script_Extensions, then the Script property value
michael@0 488 * (normally Common or Inherited) is not included.
michael@0 489 * - If c does not have Script_Extensions, then the one Script code is written to the output array.
michael@0 490 * - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written.
michael@0 491 * In other words, if the return value is 1,
michael@0 492 * then the output array contains exactly c's single Script code.
michael@0 493 * If the return value is n>=2, then the output array contains c's n Script_Extensions script codes.
michael@0 494 *
michael@0 495 * Some characters are commonly used in multiple scripts.
michael@0 496 * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
michael@0 497 *
michael@0 498 * If there are more than capacity script codes to be written, then
michael@0 499 * U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned.
michael@0 500 * (Usual ICU buffer handling behavior.)
michael@0 501 *
michael@0 502 * The Script_Extensions property is provisional. It may be modified or removed
michael@0 503 * in future versions of the Unicode Standard, and thus in ICU.
michael@0 504 * @param c code point
michael@0 505 * @param scripts output script code array
michael@0 506 * @param capacity capacity of the scripts array
michael@0 507 * @param errorCode Standard ICU error code. Its input value must
michael@0 508 * pass the U_SUCCESS() test, or else the function returns
michael@0 509 * immediately. Check for U_FAILURE() on output or use with
michael@0 510 * function chaining. (See User Guide for details.)
michael@0 511 * @return number of script codes in c's Script_Extensions, or 1 for the single Script value,
michael@0 512 * written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
michael@0 513 * @stable ICU 49
michael@0 514 */
michael@0 515 U_STABLE int32_t U_EXPORT2
michael@0 516 uscript_getScriptExtensions(UChar32 c,
michael@0 517 UScriptCode *scripts, int32_t capacity,
michael@0 518 UErrorCode *errorCode);
michael@0 519
michael@0 520 #ifndef U_HIDE_DRAFT_API
michael@0 521
michael@0 522 /**
michael@0 523 * Script usage constants.
michael@0 524 * See UAX #31 Unicode Identifier and Pattern Syntax.
michael@0 525 * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
michael@0 526 *
michael@0 527 * @draft ICU 51
michael@0 528 */
michael@0 529 typedef enum UScriptUsage {
michael@0 530 /** Not encoded in Unicode. @draft ICU 51 */
michael@0 531 USCRIPT_USAGE_NOT_ENCODED,
michael@0 532 /** Unknown script usage. @draft ICU 51 */
michael@0 533 USCRIPT_USAGE_UNKNOWN,
michael@0 534 /** Candidate for Exclusion from Identifiers. @draft ICU 51 */
michael@0 535 USCRIPT_USAGE_EXCLUDED,
michael@0 536 /** Limited Use script. @draft ICU 51 */
michael@0 537 USCRIPT_USAGE_LIMITED_USE,
michael@0 538 /** Aspirational Use script. @draft ICU 51 */
michael@0 539 USCRIPT_USAGE_ASPIRATIONAL,
michael@0 540 /** Recommended script. @draft ICU 51 */
michael@0 541 USCRIPT_USAGE_RECOMMENDED
michael@0 542 } UScriptUsage;
michael@0 543
michael@0 544 /**
michael@0 545 * Writes the script sample character string.
michael@0 546 * This string normally consists of one code point but might be longer.
michael@0 547 * The string is empty if the script is not encoded.
michael@0 548 *
michael@0 549 * @param script script code
michael@0 550 * @param dest output string array
michael@0 551 * @param capacity number of UChars in the dest array
michael@0 552 * @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input
michael@0 553 * @return the string length, even if U_BUFFER_OVERFLOW_ERROR
michael@0 554 * @draft ICU 51
michael@0 555 */
michael@0 556 U_DRAFT int32_t U_EXPORT2
michael@0 557 uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode);
michael@0 558
michael@0 559 #if U_SHOW_CPLUSPLUS_API
michael@0 560
michael@0 561 U_NAMESPACE_BEGIN
michael@0 562 class UnicodeString;
michael@0 563 U_NAMESPACE_END
michael@0 564
michael@0 565 /**
michael@0 566 * Returns the script sample character string.
michael@0 567 * This string normally consists of one code point but might be longer.
michael@0 568 * The string is empty if the script is not encoded.
michael@0 569 *
michael@0 570 * @param script script code
michael@0 571 * @return the sample character string
michael@0 572 * @draft ICU 51
michael@0 573 */
michael@0 574 U_COMMON_API icu::UnicodeString U_EXPORT2
michael@0 575 uscript_getSampleUnicodeString(UScriptCode script);
michael@0 576
michael@0 577 #endif
michael@0 578
michael@0 579 /**
michael@0 580 * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
michael@0 581 * Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.
michael@0 582 *
michael@0 583 * @param script script code
michael@0 584 * @return script usage
michael@0 585 * @see UScriptUsage
michael@0 586 * @draft ICU 51
michael@0 587 */
michael@0 588 U_DRAFT UScriptUsage U_EXPORT2
michael@0 589 uscript_getUsage(UScriptCode script);
michael@0 590
michael@0 591 /**
michael@0 592 * Returns TRUE if the script is written right-to-left.
michael@0 593 * For example, Arab and Hebr.
michael@0 594 *
michael@0 595 * @param script script code
michael@0 596 * @return TRUE if the script is right-to-left
michael@0 597 * @draft ICU 51
michael@0 598 */
michael@0 599 U_DRAFT UBool U_EXPORT2
michael@0 600 uscript_isRightToLeft(UScriptCode script);
michael@0 601
michael@0 602 /**
michael@0 603 * Returns TRUE if the script allows line breaks between letters (excluding hyphenation).
michael@0 604 * Such a script typically requires dictionary-based line breaking.
michael@0 605 * For example, Hani and Thai.
michael@0 606 *
michael@0 607 * @param script script code
michael@0 608 * @return TRUE if the script allows line breaks between letters
michael@0 609 * @draft ICU 51
michael@0 610 */
michael@0 611 U_DRAFT UBool U_EXPORT2
michael@0 612 uscript_breaksBetweenLetters(UScriptCode script);
michael@0 613
michael@0 614 /**
michael@0 615 * Returns TRUE if in modern (or most recent) usage of the script case distinctions are customary.
michael@0 616 * For example, Latn and Cyrl.
michael@0 617 *
michael@0 618 * @param script script code
michael@0 619 * @return TRUE if the script is cased
michael@0 620 * @draft ICU 51
michael@0 621 */
michael@0 622 U_DRAFT UBool U_EXPORT2
michael@0 623 uscript_isCased(UScriptCode script);
michael@0 624
michael@0 625 #endif /* U_HIDE_DRAFT_API */
michael@0 626
michael@0 627 #endif

mercurial