Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | ********************************************************************** |
michael@0 | 3 | * Copyright (C) 1997-2013, International Business Machines |
michael@0 | 4 | * Corporation and others. All Rights Reserved. |
michael@0 | 5 | ********************************************************************** |
michael@0 | 6 | * |
michael@0 | 7 | * File USCRIPT.H |
michael@0 | 8 | * |
michael@0 | 9 | * Modification History: |
michael@0 | 10 | * |
michael@0 | 11 | * Date Name Description |
michael@0 | 12 | * 07/06/2001 Ram Creation. |
michael@0 | 13 | ****************************************************************************** |
michael@0 | 14 | */ |
michael@0 | 15 | |
michael@0 | 16 | #ifndef USCRIPT_H |
michael@0 | 17 | #define USCRIPT_H |
michael@0 | 18 | #include "unicode/utypes.h" |
michael@0 | 19 | |
michael@0 | 20 | /** |
michael@0 | 21 | * \file |
michael@0 | 22 | * \brief C API: Unicode Script Information |
michael@0 | 23 | */ |
michael@0 | 24 | |
michael@0 | 25 | /** |
michael@0 | 26 | * Constants for ISO 15924 script codes. |
michael@0 | 27 | * |
michael@0 | 28 | * Many of these script codes - those from Unicode's ScriptNames.txt - |
michael@0 | 29 | * are character property values for Unicode's Script property. |
michael@0 | 30 | * See UAX #24 Script Names (http://www.unicode.org/reports/tr24/). |
michael@0 | 31 | * |
michael@0 | 32 | * Starting with ICU 3.6, constants for most ISO 15924 script codes |
michael@0 | 33 | * are included (currently excluding private-use codes Qaaa..Qabx). |
michael@0 | 34 | * For scripts for which there are codes in ISO 15924 but which are not |
michael@0 | 35 | * used in the Unicode Character Database (UCD), there are no Unicode characters |
michael@0 | 36 | * associated with those scripts. |
michael@0 | 37 | * |
michael@0 | 38 | * For example, there are no characters that have a UCD script code of |
michael@0 | 39 | * Hans or Hant. All Han ideographs have the Hani script code. |
michael@0 | 40 | * The Hans and Hant script codes are used with CLDR data. |
michael@0 | 41 | * |
michael@0 | 42 | * ISO 15924 script codes are included for use with CLDR and similar. |
michael@0 | 43 | * |
michael@0 | 44 | * @stable ICU 2.2 |
michael@0 | 45 | */ |
michael@0 | 46 | typedef enum UScriptCode { |
michael@0 | 47 | /* |
michael@0 | 48 | * Note: UScriptCode constants and their ISO script code comments |
michael@0 | 49 | * are parsed by preparseucd.py. |
michael@0 | 50 | * It matches lines like |
michael@0 | 51 | * USCRIPT_<Unicode Script value name> = <integer>, / * <ISO script code> * / |
michael@0 | 52 | */ |
michael@0 | 53 | |
michael@0 | 54 | /** @stable ICU 2.2 */ |
michael@0 | 55 | USCRIPT_INVALID_CODE = -1, |
michael@0 | 56 | /** @stable ICU 2.2 */ |
michael@0 | 57 | USCRIPT_COMMON = 0, /* Zyyy */ |
michael@0 | 58 | /** @stable ICU 2.2 */ |
michael@0 | 59 | USCRIPT_INHERITED = 1, /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */ |
michael@0 | 60 | /** @stable ICU 2.2 */ |
michael@0 | 61 | USCRIPT_ARABIC = 2, /* Arab */ |
michael@0 | 62 | /** @stable ICU 2.2 */ |
michael@0 | 63 | USCRIPT_ARMENIAN = 3, /* Armn */ |
michael@0 | 64 | /** @stable ICU 2.2 */ |
michael@0 | 65 | USCRIPT_BENGALI = 4, /* Beng */ |
michael@0 | 66 | /** @stable ICU 2.2 */ |
michael@0 | 67 | USCRIPT_BOPOMOFO = 5, /* Bopo */ |
michael@0 | 68 | /** @stable ICU 2.2 */ |
michael@0 | 69 | USCRIPT_CHEROKEE = 6, /* Cher */ |
michael@0 | 70 | /** @stable ICU 2.2 */ |
michael@0 | 71 | USCRIPT_COPTIC = 7, /* Copt */ |
michael@0 | 72 | /** @stable ICU 2.2 */ |
michael@0 | 73 | USCRIPT_CYRILLIC = 8, /* Cyrl */ |
michael@0 | 74 | /** @stable ICU 2.2 */ |
michael@0 | 75 | USCRIPT_DESERET = 9, /* Dsrt */ |
michael@0 | 76 | /** @stable ICU 2.2 */ |
michael@0 | 77 | USCRIPT_DEVANAGARI = 10, /* Deva */ |
michael@0 | 78 | /** @stable ICU 2.2 */ |
michael@0 | 79 | USCRIPT_ETHIOPIC = 11, /* Ethi */ |
michael@0 | 80 | /** @stable ICU 2.2 */ |
michael@0 | 81 | USCRIPT_GEORGIAN = 12, /* Geor */ |
michael@0 | 82 | /** @stable ICU 2.2 */ |
michael@0 | 83 | USCRIPT_GOTHIC = 13, /* Goth */ |
michael@0 | 84 | /** @stable ICU 2.2 */ |
michael@0 | 85 | USCRIPT_GREEK = 14, /* Grek */ |
michael@0 | 86 | /** @stable ICU 2.2 */ |
michael@0 | 87 | USCRIPT_GUJARATI = 15, /* Gujr */ |
michael@0 | 88 | /** @stable ICU 2.2 */ |
michael@0 | 89 | USCRIPT_GURMUKHI = 16, /* Guru */ |
michael@0 | 90 | /** @stable ICU 2.2 */ |
michael@0 | 91 | USCRIPT_HAN = 17, /* Hani */ |
michael@0 | 92 | /** @stable ICU 2.2 */ |
michael@0 | 93 | USCRIPT_HANGUL = 18, /* Hang */ |
michael@0 | 94 | /** @stable ICU 2.2 */ |
michael@0 | 95 | USCRIPT_HEBREW = 19, /* Hebr */ |
michael@0 | 96 | /** @stable ICU 2.2 */ |
michael@0 | 97 | USCRIPT_HIRAGANA = 20, /* Hira */ |
michael@0 | 98 | /** @stable ICU 2.2 */ |
michael@0 | 99 | USCRIPT_KANNADA = 21, /* Knda */ |
michael@0 | 100 | /** @stable ICU 2.2 */ |
michael@0 | 101 | USCRIPT_KATAKANA = 22, /* Kana */ |
michael@0 | 102 | /** @stable ICU 2.2 */ |
michael@0 | 103 | USCRIPT_KHMER = 23, /* Khmr */ |
michael@0 | 104 | /** @stable ICU 2.2 */ |
michael@0 | 105 | USCRIPT_LAO = 24, /* Laoo */ |
michael@0 | 106 | /** @stable ICU 2.2 */ |
michael@0 | 107 | USCRIPT_LATIN = 25, /* Latn */ |
michael@0 | 108 | /** @stable ICU 2.2 */ |
michael@0 | 109 | USCRIPT_MALAYALAM = 26, /* Mlym */ |
michael@0 | 110 | /** @stable ICU 2.2 */ |
michael@0 | 111 | USCRIPT_MONGOLIAN = 27, /* Mong */ |
michael@0 | 112 | /** @stable ICU 2.2 */ |
michael@0 | 113 | USCRIPT_MYANMAR = 28, /* Mymr */ |
michael@0 | 114 | /** @stable ICU 2.2 */ |
michael@0 | 115 | USCRIPT_OGHAM = 29, /* Ogam */ |
michael@0 | 116 | /** @stable ICU 2.2 */ |
michael@0 | 117 | USCRIPT_OLD_ITALIC = 30, /* Ital */ |
michael@0 | 118 | /** @stable ICU 2.2 */ |
michael@0 | 119 | USCRIPT_ORIYA = 31, /* Orya */ |
michael@0 | 120 | /** @stable ICU 2.2 */ |
michael@0 | 121 | USCRIPT_RUNIC = 32, /* Runr */ |
michael@0 | 122 | /** @stable ICU 2.2 */ |
michael@0 | 123 | USCRIPT_SINHALA = 33, /* Sinh */ |
michael@0 | 124 | /** @stable ICU 2.2 */ |
michael@0 | 125 | USCRIPT_SYRIAC = 34, /* Syrc */ |
michael@0 | 126 | /** @stable ICU 2.2 */ |
michael@0 | 127 | USCRIPT_TAMIL = 35, /* Taml */ |
michael@0 | 128 | /** @stable ICU 2.2 */ |
michael@0 | 129 | USCRIPT_TELUGU = 36, /* Telu */ |
michael@0 | 130 | /** @stable ICU 2.2 */ |
michael@0 | 131 | USCRIPT_THAANA = 37, /* Thaa */ |
michael@0 | 132 | /** @stable ICU 2.2 */ |
michael@0 | 133 | USCRIPT_THAI = 38, /* Thai */ |
michael@0 | 134 | /** @stable ICU 2.2 */ |
michael@0 | 135 | USCRIPT_TIBETAN = 39, /* Tibt */ |
michael@0 | 136 | /** Canadian_Aboriginal script. @stable ICU 2.6 */ |
michael@0 | 137 | USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */ |
michael@0 | 138 | /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */ |
michael@0 | 139 | USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL, |
michael@0 | 140 | /** @stable ICU 2.2 */ |
michael@0 | 141 | USCRIPT_YI = 41, /* Yiii */ |
michael@0 | 142 | /* New scripts in Unicode 3.2 */ |
michael@0 | 143 | /** @stable ICU 2.2 */ |
michael@0 | 144 | USCRIPT_TAGALOG = 42, /* Tglg */ |
michael@0 | 145 | /** @stable ICU 2.2 */ |
michael@0 | 146 | USCRIPT_HANUNOO = 43, /* Hano */ |
michael@0 | 147 | /** @stable ICU 2.2 */ |
michael@0 | 148 | USCRIPT_BUHID = 44, /* Buhd */ |
michael@0 | 149 | /** @stable ICU 2.2 */ |
michael@0 | 150 | USCRIPT_TAGBANWA = 45, /* Tagb */ |
michael@0 | 151 | |
michael@0 | 152 | /* New scripts in Unicode 4 */ |
michael@0 | 153 | /** @stable ICU 2.6 */ |
michael@0 | 154 | USCRIPT_BRAILLE = 46, /* Brai */ |
michael@0 | 155 | /** @stable ICU 2.6 */ |
michael@0 | 156 | USCRIPT_CYPRIOT = 47, /* Cprt */ |
michael@0 | 157 | /** @stable ICU 2.6 */ |
michael@0 | 158 | USCRIPT_LIMBU = 48, /* Limb */ |
michael@0 | 159 | /** @stable ICU 2.6 */ |
michael@0 | 160 | USCRIPT_LINEAR_B = 49, /* Linb */ |
michael@0 | 161 | /** @stable ICU 2.6 */ |
michael@0 | 162 | USCRIPT_OSMANYA = 50, /* Osma */ |
michael@0 | 163 | /** @stable ICU 2.6 */ |
michael@0 | 164 | USCRIPT_SHAVIAN = 51, /* Shaw */ |
michael@0 | 165 | /** @stable ICU 2.6 */ |
michael@0 | 166 | USCRIPT_TAI_LE = 52, /* Tale */ |
michael@0 | 167 | /** @stable ICU 2.6 */ |
michael@0 | 168 | USCRIPT_UGARITIC = 53, /* Ugar */ |
michael@0 | 169 | |
michael@0 | 170 | /** New script code in Unicode 4.0.1 @stable ICU 3.0 */ |
michael@0 | 171 | USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */ |
michael@0 | 172 | |
michael@0 | 173 | /* New scripts in Unicode 4.1 */ |
michael@0 | 174 | /** @stable ICU 3.4 */ |
michael@0 | 175 | USCRIPT_BUGINESE = 55, /* Bugi */ |
michael@0 | 176 | /** @stable ICU 3.4 */ |
michael@0 | 177 | USCRIPT_GLAGOLITIC = 56, /* Glag */ |
michael@0 | 178 | /** @stable ICU 3.4 */ |
michael@0 | 179 | USCRIPT_KHAROSHTHI = 57, /* Khar */ |
michael@0 | 180 | /** @stable ICU 3.4 */ |
michael@0 | 181 | USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */ |
michael@0 | 182 | /** @stable ICU 3.4 */ |
michael@0 | 183 | USCRIPT_NEW_TAI_LUE = 59, /* Talu */ |
michael@0 | 184 | /** @stable ICU 3.4 */ |
michael@0 | 185 | USCRIPT_TIFINAGH = 60, /* Tfng */ |
michael@0 | 186 | /** @stable ICU 3.4 */ |
michael@0 | 187 | USCRIPT_OLD_PERSIAN = 61, /* Xpeo */ |
michael@0 | 188 | |
michael@0 | 189 | /* New script codes from ISO 15924 */ |
michael@0 | 190 | /** @stable ICU 3.6 */ |
michael@0 | 191 | USCRIPT_BALINESE = 62, /* Bali */ |
michael@0 | 192 | /** @stable ICU 3.6 */ |
michael@0 | 193 | USCRIPT_BATAK = 63, /* Batk */ |
michael@0 | 194 | /** @stable ICU 3.6 */ |
michael@0 | 195 | USCRIPT_BLISSYMBOLS = 64, /* Blis */ |
michael@0 | 196 | /** @stable ICU 3.6 */ |
michael@0 | 197 | USCRIPT_BRAHMI = 65, /* Brah */ |
michael@0 | 198 | /** @stable ICU 3.6 */ |
michael@0 | 199 | USCRIPT_CHAM = 66, /* Cham */ |
michael@0 | 200 | /** @stable ICU 3.6 */ |
michael@0 | 201 | USCRIPT_CIRTH = 67, /* Cirt */ |
michael@0 | 202 | /** @stable ICU 3.6 */ |
michael@0 | 203 | USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */ |
michael@0 | 204 | /** @stable ICU 3.6 */ |
michael@0 | 205 | USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */ |
michael@0 | 206 | /** @stable ICU 3.6 */ |
michael@0 | 207 | USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */ |
michael@0 | 208 | /** @stable ICU 3.6 */ |
michael@0 | 209 | USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */ |
michael@0 | 210 | /** @stable ICU 3.6 */ |
michael@0 | 211 | USCRIPT_KHUTSURI = 72, /* Geok */ |
michael@0 | 212 | /** @stable ICU 3.6 */ |
michael@0 | 213 | USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */ |
michael@0 | 214 | /** @stable ICU 3.6 */ |
michael@0 | 215 | USCRIPT_TRADITIONAL_HAN = 74, /* Hant */ |
michael@0 | 216 | /** @stable ICU 3.6 */ |
michael@0 | 217 | USCRIPT_PAHAWH_HMONG = 75, /* Hmng */ |
michael@0 | 218 | /** @stable ICU 3.6 */ |
michael@0 | 219 | USCRIPT_OLD_HUNGARIAN = 76, /* Hung */ |
michael@0 | 220 | /** @stable ICU 3.6 */ |
michael@0 | 221 | USCRIPT_HARAPPAN_INDUS = 77, /* Inds */ |
michael@0 | 222 | /** @stable ICU 3.6 */ |
michael@0 | 223 | USCRIPT_JAVANESE = 78, /* Java */ |
michael@0 | 224 | /** @stable ICU 3.6 */ |
michael@0 | 225 | USCRIPT_KAYAH_LI = 79, /* Kali */ |
michael@0 | 226 | /** @stable ICU 3.6 */ |
michael@0 | 227 | USCRIPT_LATIN_FRAKTUR = 80, /* Latf */ |
michael@0 | 228 | /** @stable ICU 3.6 */ |
michael@0 | 229 | USCRIPT_LATIN_GAELIC = 81, /* Latg */ |
michael@0 | 230 | /** @stable ICU 3.6 */ |
michael@0 | 231 | USCRIPT_LEPCHA = 82, /* Lepc */ |
michael@0 | 232 | /** @stable ICU 3.6 */ |
michael@0 | 233 | USCRIPT_LINEAR_A = 83, /* Lina */ |
michael@0 | 234 | /** @stable ICU 4.6 */ |
michael@0 | 235 | USCRIPT_MANDAIC = 84, /* Mand */ |
michael@0 | 236 | /** @stable ICU 3.6 */ |
michael@0 | 237 | USCRIPT_MANDAEAN = USCRIPT_MANDAIC, |
michael@0 | 238 | /** @stable ICU 3.6 */ |
michael@0 | 239 | USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */ |
michael@0 | 240 | /** @stable ICU 4.6 */ |
michael@0 | 241 | USCRIPT_MEROITIC_HIEROGLYPHS = 86, /* Mero */ |
michael@0 | 242 | /** @stable ICU 3.6 */ |
michael@0 | 243 | USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS, |
michael@0 | 244 | /** @stable ICU 3.6 */ |
michael@0 | 245 | USCRIPT_NKO = 87, /* Nkoo */ |
michael@0 | 246 | /** @stable ICU 3.6 */ |
michael@0 | 247 | USCRIPT_ORKHON = 88, /* Orkh */ |
michael@0 | 248 | /** @stable ICU 3.6 */ |
michael@0 | 249 | USCRIPT_OLD_PERMIC = 89, /* Perm */ |
michael@0 | 250 | /** @stable ICU 3.6 */ |
michael@0 | 251 | USCRIPT_PHAGS_PA = 90, /* Phag */ |
michael@0 | 252 | /** @stable ICU 3.6 */ |
michael@0 | 253 | USCRIPT_PHOENICIAN = 91, /* Phnx */ |
michael@0 | 254 | /** @stable ICU 52 */ |
michael@0 | 255 | USCRIPT_MIAO = 92, /* Plrd */ |
michael@0 | 256 | /** @stable ICU 3.6 */ |
michael@0 | 257 | USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO, |
michael@0 | 258 | /** @stable ICU 3.6 */ |
michael@0 | 259 | USCRIPT_RONGORONGO = 93, /* Roro */ |
michael@0 | 260 | /** @stable ICU 3.6 */ |
michael@0 | 261 | USCRIPT_SARATI = 94, /* Sara */ |
michael@0 | 262 | /** @stable ICU 3.6 */ |
michael@0 | 263 | USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */ |
michael@0 | 264 | /** @stable ICU 3.6 */ |
michael@0 | 265 | USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */ |
michael@0 | 266 | /** @stable ICU 3.6 */ |
michael@0 | 267 | USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */ |
michael@0 | 268 | /** @stable ICU 3.6 */ |
michael@0 | 269 | USCRIPT_TENGWAR = 98, /* Teng */ |
michael@0 | 270 | /** @stable ICU 3.6 */ |
michael@0 | 271 | USCRIPT_VAI = 99, /* Vaii */ |
michael@0 | 272 | /** @stable ICU 3.6 */ |
michael@0 | 273 | USCRIPT_VISIBLE_SPEECH = 100,/* Visp */ |
michael@0 | 274 | /** @stable ICU 3.6 */ |
michael@0 | 275 | USCRIPT_CUNEIFORM = 101,/* Xsux */ |
michael@0 | 276 | /** @stable ICU 3.6 */ |
michael@0 | 277 | USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */ |
michael@0 | 278 | /** @stable ICU 3.6 */ |
michael@0 | 279 | USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */ |
michael@0 | 280 | |
michael@0 | 281 | /* New script codes from ISO 15924 */ |
michael@0 | 282 | /** @stable ICU 3.8 */ |
michael@0 | 283 | USCRIPT_CARIAN = 104,/* Cari */ |
michael@0 | 284 | /** @stable ICU 3.8 */ |
michael@0 | 285 | USCRIPT_JAPANESE = 105,/* Jpan */ |
michael@0 | 286 | /** @stable ICU 3.8 */ |
michael@0 | 287 | USCRIPT_LANNA = 106,/* Lana */ |
michael@0 | 288 | /** @stable ICU 3.8 */ |
michael@0 | 289 | USCRIPT_LYCIAN = 107,/* Lyci */ |
michael@0 | 290 | /** @stable ICU 3.8 */ |
michael@0 | 291 | USCRIPT_LYDIAN = 108,/* Lydi */ |
michael@0 | 292 | /** @stable ICU 3.8 */ |
michael@0 | 293 | USCRIPT_OL_CHIKI = 109,/* Olck */ |
michael@0 | 294 | /** @stable ICU 3.8 */ |
michael@0 | 295 | USCRIPT_REJANG = 110,/* Rjng */ |
michael@0 | 296 | /** @stable ICU 3.8 */ |
michael@0 | 297 | USCRIPT_SAURASHTRA = 111,/* Saur */ |
michael@0 | 298 | /** @stable ICU 3.8 */ |
michael@0 | 299 | USCRIPT_SIGN_WRITING = 112,/* Sgnw */ |
michael@0 | 300 | /** @stable ICU 3.8 */ |
michael@0 | 301 | USCRIPT_SUNDANESE = 113,/* Sund */ |
michael@0 | 302 | /** @stable ICU 3.8 */ |
michael@0 | 303 | USCRIPT_MOON = 114,/* Moon */ |
michael@0 | 304 | /** @stable ICU 3.8 */ |
michael@0 | 305 | USCRIPT_MEITEI_MAYEK = 115,/* Mtei */ |
michael@0 | 306 | |
michael@0 | 307 | /* New script codes from ISO 15924 */ |
michael@0 | 308 | /** @stable ICU 4.0 */ |
michael@0 | 309 | USCRIPT_IMPERIAL_ARAMAIC = 116,/* Armi */ |
michael@0 | 310 | /** @stable ICU 4.0 */ |
michael@0 | 311 | USCRIPT_AVESTAN = 117,/* Avst */ |
michael@0 | 312 | /** @stable ICU 4.0 */ |
michael@0 | 313 | USCRIPT_CHAKMA = 118,/* Cakm */ |
michael@0 | 314 | /** @stable ICU 4.0 */ |
michael@0 | 315 | USCRIPT_KOREAN = 119,/* Kore */ |
michael@0 | 316 | /** @stable ICU 4.0 */ |
michael@0 | 317 | USCRIPT_KAITHI = 120,/* Kthi */ |
michael@0 | 318 | /** @stable ICU 4.0 */ |
michael@0 | 319 | USCRIPT_MANICHAEAN = 121,/* Mani */ |
michael@0 | 320 | /** @stable ICU 4.0 */ |
michael@0 | 321 | USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,/* Phli */ |
michael@0 | 322 | /** @stable ICU 4.0 */ |
michael@0 | 323 | USCRIPT_PSALTER_PAHLAVI = 123,/* Phlp */ |
michael@0 | 324 | /** @stable ICU 4.0 */ |
michael@0 | 325 | USCRIPT_BOOK_PAHLAVI = 124,/* Phlv */ |
michael@0 | 326 | /** @stable ICU 4.0 */ |
michael@0 | 327 | USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,/* Prti */ |
michael@0 | 328 | /** @stable ICU 4.0 */ |
michael@0 | 329 | USCRIPT_SAMARITAN = 126,/* Samr */ |
michael@0 | 330 | /** @stable ICU 4.0 */ |
michael@0 | 331 | USCRIPT_TAI_VIET = 127,/* Tavt */ |
michael@0 | 332 | /** @stable ICU 4.0 */ |
michael@0 | 333 | USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */ |
michael@0 | 334 | /** @stable ICU 4.0 */ |
michael@0 | 335 | USCRIPT_SYMBOLS = 129,/* Zsym */ |
michael@0 | 336 | |
michael@0 | 337 | /* New script codes from ISO 15924 */ |
michael@0 | 338 | /** @stable ICU 4.4 */ |
michael@0 | 339 | USCRIPT_BAMUM = 130,/* Bamu */ |
michael@0 | 340 | /** @stable ICU 4.4 */ |
michael@0 | 341 | USCRIPT_LISU = 131,/* Lisu */ |
michael@0 | 342 | /** @stable ICU 4.4 */ |
michael@0 | 343 | USCRIPT_NAKHI_GEBA = 132,/* Nkgb */ |
michael@0 | 344 | /** @stable ICU 4.4 */ |
michael@0 | 345 | USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */ |
michael@0 | 346 | |
michael@0 | 347 | /* New script codes from ISO 15924 */ |
michael@0 | 348 | /** @stable ICU 4.6 */ |
michael@0 | 349 | USCRIPT_BASSA_VAH = 134,/* Bass */ |
michael@0 | 350 | /** @stable ICU 4.6 */ |
michael@0 | 351 | USCRIPT_DUPLOYAN_SHORTAND = 135,/* Dupl */ |
michael@0 | 352 | /** @stable ICU 4.6 */ |
michael@0 | 353 | USCRIPT_ELBASAN = 136,/* Elba */ |
michael@0 | 354 | /** @stable ICU 4.6 */ |
michael@0 | 355 | USCRIPT_GRANTHA = 137,/* Gran */ |
michael@0 | 356 | /** @stable ICU 4.6 */ |
michael@0 | 357 | USCRIPT_KPELLE = 138,/* Kpel */ |
michael@0 | 358 | /** @stable ICU 4.6 */ |
michael@0 | 359 | USCRIPT_LOMA = 139,/* Loma */ |
michael@0 | 360 | /** @stable ICU 4.6 */ |
michael@0 | 361 | USCRIPT_MENDE = 140,/* Mend */ |
michael@0 | 362 | /** @stable ICU 4.6 */ |
michael@0 | 363 | USCRIPT_MEROITIC_CURSIVE = 141,/* Merc */ |
michael@0 | 364 | /** @stable ICU 4.6 */ |
michael@0 | 365 | USCRIPT_OLD_NORTH_ARABIAN = 142,/* Narb */ |
michael@0 | 366 | /** @stable ICU 4.6 */ |
michael@0 | 367 | USCRIPT_NABATAEAN = 143,/* Nbat */ |
michael@0 | 368 | /** @stable ICU 4.6 */ |
michael@0 | 369 | USCRIPT_PALMYRENE = 144,/* Palm */ |
michael@0 | 370 | /** @stable ICU 4.6 */ |
michael@0 | 371 | USCRIPT_SINDHI = 145,/* Sind */ |
michael@0 | 372 | /** @stable ICU 4.6 */ |
michael@0 | 373 | USCRIPT_WARANG_CITI = 146,/* Wara */ |
michael@0 | 374 | |
michael@0 | 375 | /** @stable ICU 4.8 */ |
michael@0 | 376 | USCRIPT_AFAKA = 147,/* Afak */ |
michael@0 | 377 | /** @stable ICU 4.8 */ |
michael@0 | 378 | USCRIPT_JURCHEN = 148,/* Jurc */ |
michael@0 | 379 | /** @stable ICU 4.8 */ |
michael@0 | 380 | USCRIPT_MRO = 149,/* Mroo */ |
michael@0 | 381 | /** @stable ICU 4.8 */ |
michael@0 | 382 | USCRIPT_NUSHU = 150,/* Nshu */ |
michael@0 | 383 | /** @stable ICU 4.8 */ |
michael@0 | 384 | USCRIPT_SHARADA = 151,/* Shrd */ |
michael@0 | 385 | /** @stable ICU 4.8 */ |
michael@0 | 386 | USCRIPT_SORA_SOMPENG = 152,/* Sora */ |
michael@0 | 387 | /** @stable ICU 4.8 */ |
michael@0 | 388 | USCRIPT_TAKRI = 153,/* Takr */ |
michael@0 | 389 | /** @stable ICU 4.8 */ |
michael@0 | 390 | USCRIPT_TANGUT = 154,/* Tang */ |
michael@0 | 391 | /** @stable ICU 4.8 */ |
michael@0 | 392 | USCRIPT_WOLEAI = 155,/* Wole */ |
michael@0 | 393 | |
michael@0 | 394 | /** @stable ICU 49 */ |
michael@0 | 395 | USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,/* Hluw */ |
michael@0 | 396 | /** @stable ICU 49 */ |
michael@0 | 397 | USCRIPT_KHOJKI = 157,/* Khoj */ |
michael@0 | 398 | /** @stable ICU 49 */ |
michael@0 | 399 | USCRIPT_TIRHUTA = 158,/* Tirh */ |
michael@0 | 400 | |
michael@0 | 401 | /** @stable ICU 52 */ |
michael@0 | 402 | USCRIPT_CAUCASIAN_ALBANIAN = 159,/* Aghb */ |
michael@0 | 403 | /** @stable ICU 52 */ |
michael@0 | 404 | USCRIPT_MAHAJANI = 160,/* Mahj */ |
michael@0 | 405 | |
michael@0 | 406 | /* Private use codes from Qaaa - Qabx are not supported */ |
michael@0 | 407 | |
michael@0 | 408 | /** @stable ICU 2.2 */ |
michael@0 | 409 | USCRIPT_CODE_LIMIT = 161 |
michael@0 | 410 | } UScriptCode; |
michael@0 | 411 | |
michael@0 | 412 | /** |
michael@0 | 413 | * Gets script codes associated with the given locale or ISO 15924 abbreviation or name. |
michael@0 | 414 | * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym". |
michael@0 | 415 | * Fills in USCRIPT_LATIN given "en" OR "en_US" |
michael@0 | 416 | * If required capacity is greater than capacity of the destination buffer then the error code |
michael@0 | 417 | * is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned |
michael@0 | 418 | * |
michael@0 | 419 | * <p>Note: To search by short or long script alias only, use |
michael@0 | 420 | * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. This does |
michael@0 | 421 | * a fast lookup with no access of the locale data. |
michael@0 | 422 | * @param nameOrAbbrOrLocale name of the script, as given in |
michael@0 | 423 | * PropertyValueAliases.txt, or ISO 15924 code or locale |
michael@0 | 424 | * @param fillIn the UScriptCode buffer to fill in the script code |
michael@0 | 425 | * @param capacity the capacity (size) fo UScriptCode buffer passed in. |
michael@0 | 426 | * @param err the error status code. |
michael@0 | 427 | * @return The number of script codes filled in the buffer passed in |
michael@0 | 428 | * @stable ICU 2.4 |
michael@0 | 429 | */ |
michael@0 | 430 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 431 | uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err); |
michael@0 | 432 | |
michael@0 | 433 | /** |
michael@0 | 434 | * Gets a script name associated with the given script code. |
michael@0 | 435 | * Returns "Malayam" given USCRIPT_MALAYALAM |
michael@0 | 436 | * @param scriptCode UScriptCode enum |
michael@0 | 437 | * @return script long name as given in |
michael@0 | 438 | * PropertyValueAliases.txt, or NULL if scriptCode is invalid |
michael@0 | 439 | * @stable ICU 2.4 |
michael@0 | 440 | */ |
michael@0 | 441 | U_STABLE const char* U_EXPORT2 |
michael@0 | 442 | uscript_getName(UScriptCode scriptCode); |
michael@0 | 443 | |
michael@0 | 444 | /** |
michael@0 | 445 | * Gets a script name associated with the given script code. |
michael@0 | 446 | * Returns "Mlym" given USCRIPT_MALAYALAM |
michael@0 | 447 | * @param scriptCode UScriptCode enum |
michael@0 | 448 | * @return script abbreviated name as given in |
michael@0 | 449 | * PropertyValueAliases.txt, or NULL if scriptCode is invalid |
michael@0 | 450 | * @stable ICU 2.4 |
michael@0 | 451 | */ |
michael@0 | 452 | U_STABLE const char* U_EXPORT2 |
michael@0 | 453 | uscript_getShortName(UScriptCode scriptCode); |
michael@0 | 454 | |
michael@0 | 455 | /** |
michael@0 | 456 | * Gets the script code associated with the given codepoint. |
michael@0 | 457 | * Returns USCRIPT_MALAYALAM given 0x0D02 |
michael@0 | 458 | * @param codepoint UChar32 codepoint |
michael@0 | 459 | * @param err the error status code. |
michael@0 | 460 | * @return The UScriptCode, or 0 if codepoint is invalid |
michael@0 | 461 | * @stable ICU 2.4 |
michael@0 | 462 | */ |
michael@0 | 463 | U_STABLE UScriptCode U_EXPORT2 |
michael@0 | 464 | uscript_getScript(UChar32 codepoint, UErrorCode *err); |
michael@0 | 465 | |
michael@0 | 466 | /** |
michael@0 | 467 | * Do the Script_Extensions of code point c contain script sc? |
michael@0 | 468 | * If c does not have explicit Script_Extensions, then this tests whether |
michael@0 | 469 | * c has the Script property value sc. |
michael@0 | 470 | * |
michael@0 | 471 | * Some characters are commonly used in multiple scripts. |
michael@0 | 472 | * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. |
michael@0 | 473 | * |
michael@0 | 474 | * The Script_Extensions property is provisional. It may be modified or removed |
michael@0 | 475 | * in future versions of the Unicode Standard, and thus in ICU. |
michael@0 | 476 | * @param c code point |
michael@0 | 477 | * @param sc script code |
michael@0 | 478 | * @return TRUE if sc is in Script_Extensions(c) |
michael@0 | 479 | * @stable ICU 49 |
michael@0 | 480 | */ |
michael@0 | 481 | U_STABLE UBool U_EXPORT2 |
michael@0 | 482 | uscript_hasScript(UChar32 c, UScriptCode sc); |
michael@0 | 483 | |
michael@0 | 484 | /** |
michael@0 | 485 | * Writes code point c's Script_Extensions as a list of UScriptCode values |
michael@0 | 486 | * to the output scripts array and returns the number of script codes. |
michael@0 | 487 | * - If c does have Script_Extensions, then the Script property value |
michael@0 | 488 | * (normally Common or Inherited) is not included. |
michael@0 | 489 | * - If c does not have Script_Extensions, then the one Script code is written to the output array. |
michael@0 | 490 | * - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written. |
michael@0 | 491 | * In other words, if the return value is 1, |
michael@0 | 492 | * then the output array contains exactly c's single Script code. |
michael@0 | 493 | * If the return value is n>=2, then the output array contains c's n Script_Extensions script codes. |
michael@0 | 494 | * |
michael@0 | 495 | * Some characters are commonly used in multiple scripts. |
michael@0 | 496 | * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. |
michael@0 | 497 | * |
michael@0 | 498 | * If there are more than capacity script codes to be written, then |
michael@0 | 499 | * U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned. |
michael@0 | 500 | * (Usual ICU buffer handling behavior.) |
michael@0 | 501 | * |
michael@0 | 502 | * The Script_Extensions property is provisional. It may be modified or removed |
michael@0 | 503 | * in future versions of the Unicode Standard, and thus in ICU. |
michael@0 | 504 | * @param c code point |
michael@0 | 505 | * @param scripts output script code array |
michael@0 | 506 | * @param capacity capacity of the scripts array |
michael@0 | 507 | * @param errorCode Standard ICU error code. Its input value must |
michael@0 | 508 | * pass the U_SUCCESS() test, or else the function returns |
michael@0 | 509 | * immediately. Check for U_FAILURE() on output or use with |
michael@0 | 510 | * function chaining. (See User Guide for details.) |
michael@0 | 511 | * @return number of script codes in c's Script_Extensions, or 1 for the single Script value, |
michael@0 | 512 | * written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity |
michael@0 | 513 | * @stable ICU 49 |
michael@0 | 514 | */ |
michael@0 | 515 | U_STABLE int32_t U_EXPORT2 |
michael@0 | 516 | uscript_getScriptExtensions(UChar32 c, |
michael@0 | 517 | UScriptCode *scripts, int32_t capacity, |
michael@0 | 518 | UErrorCode *errorCode); |
michael@0 | 519 | |
michael@0 | 520 | #ifndef U_HIDE_DRAFT_API |
michael@0 | 521 | |
michael@0 | 522 | /** |
michael@0 | 523 | * Script usage constants. |
michael@0 | 524 | * See UAX #31 Unicode Identifier and Pattern Syntax. |
michael@0 | 525 | * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers |
michael@0 | 526 | * |
michael@0 | 527 | * @draft ICU 51 |
michael@0 | 528 | */ |
michael@0 | 529 | typedef enum UScriptUsage { |
michael@0 | 530 | /** Not encoded in Unicode. @draft ICU 51 */ |
michael@0 | 531 | USCRIPT_USAGE_NOT_ENCODED, |
michael@0 | 532 | /** Unknown script usage. @draft ICU 51 */ |
michael@0 | 533 | USCRIPT_USAGE_UNKNOWN, |
michael@0 | 534 | /** Candidate for Exclusion from Identifiers. @draft ICU 51 */ |
michael@0 | 535 | USCRIPT_USAGE_EXCLUDED, |
michael@0 | 536 | /** Limited Use script. @draft ICU 51 */ |
michael@0 | 537 | USCRIPT_USAGE_LIMITED_USE, |
michael@0 | 538 | /** Aspirational Use script. @draft ICU 51 */ |
michael@0 | 539 | USCRIPT_USAGE_ASPIRATIONAL, |
michael@0 | 540 | /** Recommended script. @draft ICU 51 */ |
michael@0 | 541 | USCRIPT_USAGE_RECOMMENDED |
michael@0 | 542 | } UScriptUsage; |
michael@0 | 543 | |
michael@0 | 544 | /** |
michael@0 | 545 | * Writes the script sample character string. |
michael@0 | 546 | * This string normally consists of one code point but might be longer. |
michael@0 | 547 | * The string is empty if the script is not encoded. |
michael@0 | 548 | * |
michael@0 | 549 | * @param script script code |
michael@0 | 550 | * @param dest output string array |
michael@0 | 551 | * @param capacity number of UChars in the dest array |
michael@0 | 552 | * @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input |
michael@0 | 553 | * @return the string length, even if U_BUFFER_OVERFLOW_ERROR |
michael@0 | 554 | * @draft ICU 51 |
michael@0 | 555 | */ |
michael@0 | 556 | U_DRAFT int32_t U_EXPORT2 |
michael@0 | 557 | uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode); |
michael@0 | 558 | |
michael@0 | 559 | #if U_SHOW_CPLUSPLUS_API |
michael@0 | 560 | |
michael@0 | 561 | U_NAMESPACE_BEGIN |
michael@0 | 562 | class UnicodeString; |
michael@0 | 563 | U_NAMESPACE_END |
michael@0 | 564 | |
michael@0 | 565 | /** |
michael@0 | 566 | * Returns the script sample character string. |
michael@0 | 567 | * This string normally consists of one code point but might be longer. |
michael@0 | 568 | * The string is empty if the script is not encoded. |
michael@0 | 569 | * |
michael@0 | 570 | * @param script script code |
michael@0 | 571 | * @return the sample character string |
michael@0 | 572 | * @draft ICU 51 |
michael@0 | 573 | */ |
michael@0 | 574 | U_COMMON_API icu::UnicodeString U_EXPORT2 |
michael@0 | 575 | uscript_getSampleUnicodeString(UScriptCode script); |
michael@0 | 576 | |
michael@0 | 577 | #endif |
michael@0 | 578 | |
michael@0 | 579 | /** |
michael@0 | 580 | * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax. |
michael@0 | 581 | * Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode. |
michael@0 | 582 | * |
michael@0 | 583 | * @param script script code |
michael@0 | 584 | * @return script usage |
michael@0 | 585 | * @see UScriptUsage |
michael@0 | 586 | * @draft ICU 51 |
michael@0 | 587 | */ |
michael@0 | 588 | U_DRAFT UScriptUsage U_EXPORT2 |
michael@0 | 589 | uscript_getUsage(UScriptCode script); |
michael@0 | 590 | |
michael@0 | 591 | /** |
michael@0 | 592 | * Returns TRUE if the script is written right-to-left. |
michael@0 | 593 | * For example, Arab and Hebr. |
michael@0 | 594 | * |
michael@0 | 595 | * @param script script code |
michael@0 | 596 | * @return TRUE if the script is right-to-left |
michael@0 | 597 | * @draft ICU 51 |
michael@0 | 598 | */ |
michael@0 | 599 | U_DRAFT UBool U_EXPORT2 |
michael@0 | 600 | uscript_isRightToLeft(UScriptCode script); |
michael@0 | 601 | |
michael@0 | 602 | /** |
michael@0 | 603 | * Returns TRUE if the script allows line breaks between letters (excluding hyphenation). |
michael@0 | 604 | * Such a script typically requires dictionary-based line breaking. |
michael@0 | 605 | * For example, Hani and Thai. |
michael@0 | 606 | * |
michael@0 | 607 | * @param script script code |
michael@0 | 608 | * @return TRUE if the script allows line breaks between letters |
michael@0 | 609 | * @draft ICU 51 |
michael@0 | 610 | */ |
michael@0 | 611 | U_DRAFT UBool U_EXPORT2 |
michael@0 | 612 | uscript_breaksBetweenLetters(UScriptCode script); |
michael@0 | 613 | |
michael@0 | 614 | /** |
michael@0 | 615 | * Returns TRUE if in modern (or most recent) usage of the script case distinctions are customary. |
michael@0 | 616 | * For example, Latn and Cyrl. |
michael@0 | 617 | * |
michael@0 | 618 | * @param script script code |
michael@0 | 619 | * @return TRUE if the script is cased |
michael@0 | 620 | * @draft ICU 51 |
michael@0 | 621 | */ |
michael@0 | 622 | U_DRAFT UBool U_EXPORT2 |
michael@0 | 623 | uscript_isCased(UScriptCode script); |
michael@0 | 624 | |
michael@0 | 625 | #endif /* U_HIDE_DRAFT_API */ |
michael@0 | 626 | |
michael@0 | 627 | #endif |