michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (C) 1997-2013, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: * michael@0: * File USCRIPT.H michael@0: * michael@0: * Modification History: michael@0: * michael@0: * Date Name Description michael@0: * 07/06/2001 Ram Creation. michael@0: ****************************************************************************** michael@0: */ michael@0: michael@0: #ifndef USCRIPT_H michael@0: #define USCRIPT_H michael@0: #include "unicode/utypes.h" michael@0: michael@0: /** michael@0: * \file michael@0: * \brief C API: Unicode Script Information michael@0: */ michael@0: michael@0: /** michael@0: * Constants for ISO 15924 script codes. michael@0: * michael@0: * Many of these script codes - those from Unicode's ScriptNames.txt - michael@0: * are character property values for Unicode's Script property. michael@0: * See UAX #24 Script Names (http://www.unicode.org/reports/tr24/). michael@0: * michael@0: * Starting with ICU 3.6, constants for most ISO 15924 script codes michael@0: * are included (currently excluding private-use codes Qaaa..Qabx). michael@0: * For scripts for which there are codes in ISO 15924 but which are not michael@0: * used in the Unicode Character Database (UCD), there are no Unicode characters michael@0: * associated with those scripts. michael@0: * michael@0: * For example, there are no characters that have a UCD script code of michael@0: * Hans or Hant. All Han ideographs have the Hani script code. michael@0: * The Hans and Hant script codes are used with CLDR data. michael@0: * michael@0: * ISO 15924 script codes are included for use with CLDR and similar. michael@0: * michael@0: * @stable ICU 2.2 michael@0: */ michael@0: typedef enum UScriptCode { michael@0: /* michael@0: * Note: UScriptCode constants and their ISO script code comments michael@0: * are parsed by preparseucd.py. michael@0: * It matches lines like michael@0: * USCRIPT_ = , / * * / michael@0: */ michael@0: michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_INVALID_CODE = -1, michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_COMMON = 0, /* Zyyy */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_INHERITED = 1, /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_ARABIC = 2, /* Arab */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_ARMENIAN = 3, /* Armn */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_BENGALI = 4, /* Beng */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_BOPOMOFO = 5, /* Bopo */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_CHEROKEE = 6, /* Cher */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_COPTIC = 7, /* Copt */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_CYRILLIC = 8, /* Cyrl */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_DESERET = 9, /* Dsrt */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_DEVANAGARI = 10, /* Deva */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_ETHIOPIC = 11, /* Ethi */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_GEORGIAN = 12, /* Geor */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_GOTHIC = 13, /* Goth */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_GREEK = 14, /* Grek */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_GUJARATI = 15, /* Gujr */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_GURMUKHI = 16, /* Guru */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_HAN = 17, /* Hani */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_HANGUL = 18, /* Hang */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_HEBREW = 19, /* Hebr */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_HIRAGANA = 20, /* Hira */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_KANNADA = 21, /* Knda */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_KATAKANA = 22, /* Kana */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_KHMER = 23, /* Khmr */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_LAO = 24, /* Laoo */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_LATIN = 25, /* Latn */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_MALAYALAM = 26, /* Mlym */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_MONGOLIAN = 27, /* Mong */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_MYANMAR = 28, /* Mymr */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_OGHAM = 29, /* Ogam */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_OLD_ITALIC = 30, /* Ital */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_ORIYA = 31, /* Orya */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_RUNIC = 32, /* Runr */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_SINHALA = 33, /* Sinh */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_SYRIAC = 34, /* Syrc */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_TAMIL = 35, /* Taml */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_TELUGU = 36, /* Telu */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_THAANA = 37, /* Thaa */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_THAI = 38, /* Thai */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_TIBETAN = 39, /* Tibt */ michael@0: /** Canadian_Aboriginal script. @stable ICU 2.6 */ michael@0: USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */ michael@0: /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */ michael@0: USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL, michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_YI = 41, /* Yiii */ michael@0: /* New scripts in Unicode 3.2 */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_TAGALOG = 42, /* Tglg */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_HANUNOO = 43, /* Hano */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_BUHID = 44, /* Buhd */ michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_TAGBANWA = 45, /* Tagb */ michael@0: michael@0: /* New scripts in Unicode 4 */ michael@0: /** @stable ICU 2.6 */ michael@0: USCRIPT_BRAILLE = 46, /* Brai */ michael@0: /** @stable ICU 2.6 */ michael@0: USCRIPT_CYPRIOT = 47, /* Cprt */ michael@0: /** @stable ICU 2.6 */ michael@0: USCRIPT_LIMBU = 48, /* Limb */ michael@0: /** @stable ICU 2.6 */ michael@0: USCRIPT_LINEAR_B = 49, /* Linb */ michael@0: /** @stable ICU 2.6 */ michael@0: USCRIPT_OSMANYA = 50, /* Osma */ michael@0: /** @stable ICU 2.6 */ michael@0: USCRIPT_SHAVIAN = 51, /* Shaw */ michael@0: /** @stable ICU 2.6 */ michael@0: USCRIPT_TAI_LE = 52, /* Tale */ michael@0: /** @stable ICU 2.6 */ michael@0: USCRIPT_UGARITIC = 53, /* Ugar */ michael@0: michael@0: /** New script code in Unicode 4.0.1 @stable ICU 3.0 */ michael@0: USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */ michael@0: michael@0: /* New scripts in Unicode 4.1 */ michael@0: /** @stable ICU 3.4 */ michael@0: USCRIPT_BUGINESE = 55, /* Bugi */ michael@0: /** @stable ICU 3.4 */ michael@0: USCRIPT_GLAGOLITIC = 56, /* Glag */ michael@0: /** @stable ICU 3.4 */ michael@0: USCRIPT_KHAROSHTHI = 57, /* Khar */ michael@0: /** @stable ICU 3.4 */ michael@0: USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */ michael@0: /** @stable ICU 3.4 */ michael@0: USCRIPT_NEW_TAI_LUE = 59, /* Talu */ michael@0: /** @stable ICU 3.4 */ michael@0: USCRIPT_TIFINAGH = 60, /* Tfng */ michael@0: /** @stable ICU 3.4 */ michael@0: USCRIPT_OLD_PERSIAN = 61, /* Xpeo */ michael@0: michael@0: /* New script codes from ISO 15924 */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_BALINESE = 62, /* Bali */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_BATAK = 63, /* Batk */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_BLISSYMBOLS = 64, /* Blis */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_BRAHMI = 65, /* Brah */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_CHAM = 66, /* Cham */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_CIRTH = 67, /* Cirt */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_KHUTSURI = 72, /* Geok */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_TRADITIONAL_HAN = 74, /* Hant */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_PAHAWH_HMONG = 75, /* Hmng */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_OLD_HUNGARIAN = 76, /* Hung */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_HARAPPAN_INDUS = 77, /* Inds */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_JAVANESE = 78, /* Java */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_KAYAH_LI = 79, /* Kali */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_LATIN_FRAKTUR = 80, /* Latf */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_LATIN_GAELIC = 81, /* Latg */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_LEPCHA = 82, /* Lepc */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_LINEAR_A = 83, /* Lina */ michael@0: /** @stable ICU 4.6 */ michael@0: USCRIPT_MANDAIC = 84, /* Mand */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_MANDAEAN = USCRIPT_MANDAIC, michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */ michael@0: /** @stable ICU 4.6 */ michael@0: USCRIPT_MEROITIC_HIEROGLYPHS = 86, /* Mero */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS, michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_NKO = 87, /* Nkoo */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_ORKHON = 88, /* Orkh */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_OLD_PERMIC = 89, /* Perm */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_PHAGS_PA = 90, /* Phag */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_PHOENICIAN = 91, /* Phnx */ michael@0: /** @stable ICU 52 */ michael@0: USCRIPT_MIAO = 92, /* Plrd */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO, michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_RONGORONGO = 93, /* Roro */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_SARATI = 94, /* Sara */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_TENGWAR = 98, /* Teng */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_VAI = 99, /* Vaii */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_VISIBLE_SPEECH = 100,/* Visp */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_CUNEIFORM = 101,/* Xsux */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */ michael@0: /** @stable ICU 3.6 */ michael@0: USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */ michael@0: michael@0: /* New script codes from ISO 15924 */ michael@0: /** @stable ICU 3.8 */ michael@0: USCRIPT_CARIAN = 104,/* Cari */ michael@0: /** @stable ICU 3.8 */ michael@0: USCRIPT_JAPANESE = 105,/* Jpan */ michael@0: /** @stable ICU 3.8 */ michael@0: USCRIPT_LANNA = 106,/* Lana */ michael@0: /** @stable ICU 3.8 */ michael@0: USCRIPT_LYCIAN = 107,/* Lyci */ michael@0: /** @stable ICU 3.8 */ michael@0: USCRIPT_LYDIAN = 108,/* Lydi */ michael@0: /** @stable ICU 3.8 */ michael@0: USCRIPT_OL_CHIKI = 109,/* Olck */ michael@0: /** @stable ICU 3.8 */ michael@0: USCRIPT_REJANG = 110,/* Rjng */ michael@0: /** @stable ICU 3.8 */ michael@0: USCRIPT_SAURASHTRA = 111,/* Saur */ michael@0: /** @stable ICU 3.8 */ michael@0: USCRIPT_SIGN_WRITING = 112,/* Sgnw */ michael@0: /** @stable ICU 3.8 */ michael@0: USCRIPT_SUNDANESE = 113,/* Sund */ michael@0: /** @stable ICU 3.8 */ michael@0: USCRIPT_MOON = 114,/* Moon */ michael@0: /** @stable ICU 3.8 */ michael@0: USCRIPT_MEITEI_MAYEK = 115,/* Mtei */ michael@0: michael@0: /* New script codes from ISO 15924 */ michael@0: /** @stable ICU 4.0 */ michael@0: USCRIPT_IMPERIAL_ARAMAIC = 116,/* Armi */ michael@0: /** @stable ICU 4.0 */ michael@0: USCRIPT_AVESTAN = 117,/* Avst */ michael@0: /** @stable ICU 4.0 */ michael@0: USCRIPT_CHAKMA = 118,/* Cakm */ michael@0: /** @stable ICU 4.0 */ michael@0: USCRIPT_KOREAN = 119,/* Kore */ michael@0: /** @stable ICU 4.0 */ michael@0: USCRIPT_KAITHI = 120,/* Kthi */ michael@0: /** @stable ICU 4.0 */ michael@0: USCRIPT_MANICHAEAN = 121,/* Mani */ michael@0: /** @stable ICU 4.0 */ michael@0: USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,/* Phli */ michael@0: /** @stable ICU 4.0 */ michael@0: USCRIPT_PSALTER_PAHLAVI = 123,/* Phlp */ michael@0: /** @stable ICU 4.0 */ michael@0: USCRIPT_BOOK_PAHLAVI = 124,/* Phlv */ michael@0: /** @stable ICU 4.0 */ michael@0: USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,/* Prti */ michael@0: /** @stable ICU 4.0 */ michael@0: USCRIPT_SAMARITAN = 126,/* Samr */ michael@0: /** @stable ICU 4.0 */ michael@0: USCRIPT_TAI_VIET = 127,/* Tavt */ michael@0: /** @stable ICU 4.0 */ michael@0: USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */ michael@0: /** @stable ICU 4.0 */ michael@0: USCRIPT_SYMBOLS = 129,/* Zsym */ michael@0: michael@0: /* New script codes from ISO 15924 */ michael@0: /** @stable ICU 4.4 */ michael@0: USCRIPT_BAMUM = 130,/* Bamu */ michael@0: /** @stable ICU 4.4 */ michael@0: USCRIPT_LISU = 131,/* Lisu */ michael@0: /** @stable ICU 4.4 */ michael@0: USCRIPT_NAKHI_GEBA = 132,/* Nkgb */ michael@0: /** @stable ICU 4.4 */ michael@0: USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */ michael@0: michael@0: /* New script codes from ISO 15924 */ michael@0: /** @stable ICU 4.6 */ michael@0: USCRIPT_BASSA_VAH = 134,/* Bass */ michael@0: /** @stable ICU 4.6 */ michael@0: USCRIPT_DUPLOYAN_SHORTAND = 135,/* Dupl */ michael@0: /** @stable ICU 4.6 */ michael@0: USCRIPT_ELBASAN = 136,/* Elba */ michael@0: /** @stable ICU 4.6 */ michael@0: USCRIPT_GRANTHA = 137,/* Gran */ michael@0: /** @stable ICU 4.6 */ michael@0: USCRIPT_KPELLE = 138,/* Kpel */ michael@0: /** @stable ICU 4.6 */ michael@0: USCRIPT_LOMA = 139,/* Loma */ michael@0: /** @stable ICU 4.6 */ michael@0: USCRIPT_MENDE = 140,/* Mend */ michael@0: /** @stable ICU 4.6 */ michael@0: USCRIPT_MEROITIC_CURSIVE = 141,/* Merc */ michael@0: /** @stable ICU 4.6 */ michael@0: USCRIPT_OLD_NORTH_ARABIAN = 142,/* Narb */ michael@0: /** @stable ICU 4.6 */ michael@0: USCRIPT_NABATAEAN = 143,/* Nbat */ michael@0: /** @stable ICU 4.6 */ michael@0: USCRIPT_PALMYRENE = 144,/* Palm */ michael@0: /** @stable ICU 4.6 */ michael@0: USCRIPT_SINDHI = 145,/* Sind */ michael@0: /** @stable ICU 4.6 */ michael@0: USCRIPT_WARANG_CITI = 146,/* Wara */ michael@0: michael@0: /** @stable ICU 4.8 */ michael@0: USCRIPT_AFAKA = 147,/* Afak */ michael@0: /** @stable ICU 4.8 */ michael@0: USCRIPT_JURCHEN = 148,/* Jurc */ michael@0: /** @stable ICU 4.8 */ michael@0: USCRIPT_MRO = 149,/* Mroo */ michael@0: /** @stable ICU 4.8 */ michael@0: USCRIPT_NUSHU = 150,/* Nshu */ michael@0: /** @stable ICU 4.8 */ michael@0: USCRIPT_SHARADA = 151,/* Shrd */ michael@0: /** @stable ICU 4.8 */ michael@0: USCRIPT_SORA_SOMPENG = 152,/* Sora */ michael@0: /** @stable ICU 4.8 */ michael@0: USCRIPT_TAKRI = 153,/* Takr */ michael@0: /** @stable ICU 4.8 */ michael@0: USCRIPT_TANGUT = 154,/* Tang */ michael@0: /** @stable ICU 4.8 */ michael@0: USCRIPT_WOLEAI = 155,/* Wole */ michael@0: michael@0: /** @stable ICU 49 */ michael@0: USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,/* Hluw */ michael@0: /** @stable ICU 49 */ michael@0: USCRIPT_KHOJKI = 157,/* Khoj */ michael@0: /** @stable ICU 49 */ michael@0: USCRIPT_TIRHUTA = 158,/* Tirh */ michael@0: michael@0: /** @stable ICU 52 */ michael@0: USCRIPT_CAUCASIAN_ALBANIAN = 159,/* Aghb */ michael@0: /** @stable ICU 52 */ michael@0: USCRIPT_MAHAJANI = 160,/* Mahj */ michael@0: michael@0: /* Private use codes from Qaaa - Qabx are not supported */ michael@0: michael@0: /** @stable ICU 2.2 */ michael@0: USCRIPT_CODE_LIMIT = 161 michael@0: } UScriptCode; michael@0: michael@0: /** michael@0: * Gets script codes associated with the given locale or ISO 15924 abbreviation or name. michael@0: * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym". michael@0: * Fills in USCRIPT_LATIN given "en" OR "en_US" michael@0: * If required capacity is greater than capacity of the destination buffer then the error code michael@0: * is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned michael@0: * michael@0: *

Note: To search by short or long script alias only, use michael@0: * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. This does michael@0: * a fast lookup with no access of the locale data. michael@0: * @param nameOrAbbrOrLocale name of the script, as given in michael@0: * PropertyValueAliases.txt, or ISO 15924 code or locale michael@0: * @param fillIn the UScriptCode buffer to fill in the script code michael@0: * @param capacity the capacity (size) fo UScriptCode buffer passed in. michael@0: * @param err the error status code. michael@0: * @return The number of script codes filled in the buffer passed in michael@0: * @stable ICU 2.4 michael@0: */ michael@0: U_STABLE int32_t U_EXPORT2 michael@0: uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err); michael@0: michael@0: /** michael@0: * Gets a script name associated with the given script code. michael@0: * Returns "Malayam" given USCRIPT_MALAYALAM michael@0: * @param scriptCode UScriptCode enum michael@0: * @return script long name as given in michael@0: * PropertyValueAliases.txt, or NULL if scriptCode is invalid michael@0: * @stable ICU 2.4 michael@0: */ michael@0: U_STABLE const char* U_EXPORT2 michael@0: uscript_getName(UScriptCode scriptCode); michael@0: michael@0: /** michael@0: * Gets a script name associated with the given script code. michael@0: * Returns "Mlym" given USCRIPT_MALAYALAM michael@0: * @param scriptCode UScriptCode enum michael@0: * @return script abbreviated name as given in michael@0: * PropertyValueAliases.txt, or NULL if scriptCode is invalid michael@0: * @stable ICU 2.4 michael@0: */ michael@0: U_STABLE const char* U_EXPORT2 michael@0: uscript_getShortName(UScriptCode scriptCode); michael@0: michael@0: /** michael@0: * Gets the script code associated with the given codepoint. michael@0: * Returns USCRIPT_MALAYALAM given 0x0D02 michael@0: * @param codepoint UChar32 codepoint michael@0: * @param err the error status code. michael@0: * @return The UScriptCode, or 0 if codepoint is invalid michael@0: * @stable ICU 2.4 michael@0: */ michael@0: U_STABLE UScriptCode U_EXPORT2 michael@0: uscript_getScript(UChar32 codepoint, UErrorCode *err); michael@0: michael@0: /** michael@0: * Do the Script_Extensions of code point c contain script sc? michael@0: * If c does not have explicit Script_Extensions, then this tests whether michael@0: * c has the Script property value sc. michael@0: * michael@0: * Some characters are commonly used in multiple scripts. michael@0: * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. michael@0: * michael@0: * The Script_Extensions property is provisional. It may be modified or removed michael@0: * in future versions of the Unicode Standard, and thus in ICU. michael@0: * @param c code point michael@0: * @param sc script code michael@0: * @return TRUE if sc is in Script_Extensions(c) michael@0: * @stable ICU 49 michael@0: */ michael@0: U_STABLE UBool U_EXPORT2 michael@0: uscript_hasScript(UChar32 c, UScriptCode sc); michael@0: michael@0: /** michael@0: * Writes code point c's Script_Extensions as a list of UScriptCode values michael@0: * to the output scripts array and returns the number of script codes. michael@0: * - If c does have Script_Extensions, then the Script property value michael@0: * (normally Common or Inherited) is not included. michael@0: * - If c does not have Script_Extensions, then the one Script code is written to the output array. michael@0: * - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written. michael@0: * In other words, if the return value is 1, michael@0: * then the output array contains exactly c's single Script code. michael@0: * If the return value is n>=2, then the output array contains c's n Script_Extensions script codes. michael@0: * michael@0: * Some characters are commonly used in multiple scripts. michael@0: * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. michael@0: * michael@0: * If there are more than capacity script codes to be written, then michael@0: * U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned. michael@0: * (Usual ICU buffer handling behavior.) michael@0: * michael@0: * The Script_Extensions property is provisional. It may be modified or removed michael@0: * in future versions of the Unicode Standard, and thus in ICU. michael@0: * @param c code point michael@0: * @param scripts output script code array michael@0: * @param capacity capacity of the scripts array michael@0: * @param errorCode Standard ICU error code. Its input value must michael@0: * pass the U_SUCCESS() test, or else the function returns michael@0: * immediately. Check for U_FAILURE() on output or use with michael@0: * function chaining. (See User Guide for details.) michael@0: * @return number of script codes in c's Script_Extensions, or 1 for the single Script value, michael@0: * written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity michael@0: * @stable ICU 49 michael@0: */ michael@0: U_STABLE int32_t U_EXPORT2 michael@0: uscript_getScriptExtensions(UChar32 c, michael@0: UScriptCode *scripts, int32_t capacity, michael@0: UErrorCode *errorCode); michael@0: michael@0: #ifndef U_HIDE_DRAFT_API michael@0: michael@0: /** michael@0: * Script usage constants. michael@0: * See UAX #31 Unicode Identifier and Pattern Syntax. michael@0: * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers michael@0: * michael@0: * @draft ICU 51 michael@0: */ michael@0: typedef enum UScriptUsage { michael@0: /** Not encoded in Unicode. @draft ICU 51 */ michael@0: USCRIPT_USAGE_NOT_ENCODED, michael@0: /** Unknown script usage. @draft ICU 51 */ michael@0: USCRIPT_USAGE_UNKNOWN, michael@0: /** Candidate for Exclusion from Identifiers. @draft ICU 51 */ michael@0: USCRIPT_USAGE_EXCLUDED, michael@0: /** Limited Use script. @draft ICU 51 */ michael@0: USCRIPT_USAGE_LIMITED_USE, michael@0: /** Aspirational Use script. @draft ICU 51 */ michael@0: USCRIPT_USAGE_ASPIRATIONAL, michael@0: /** Recommended script. @draft ICU 51 */ michael@0: USCRIPT_USAGE_RECOMMENDED michael@0: } UScriptUsage; michael@0: michael@0: /** michael@0: * Writes the script sample character string. michael@0: * This string normally consists of one code point but might be longer. michael@0: * The string is empty if the script is not encoded. michael@0: * michael@0: * @param script script code michael@0: * @param dest output string array michael@0: * @param capacity number of UChars in the dest array michael@0: * @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input michael@0: * @return the string length, even if U_BUFFER_OVERFLOW_ERROR michael@0: * @draft ICU 51 michael@0: */ michael@0: U_DRAFT int32_t U_EXPORT2 michael@0: uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode); michael@0: michael@0: #if U_SHOW_CPLUSPLUS_API michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: class UnicodeString; michael@0: U_NAMESPACE_END michael@0: michael@0: /** michael@0: * Returns the script sample character string. michael@0: * This string normally consists of one code point but might be longer. michael@0: * The string is empty if the script is not encoded. michael@0: * michael@0: * @param script script code michael@0: * @return the sample character string michael@0: * @draft ICU 51 michael@0: */ michael@0: U_COMMON_API icu::UnicodeString U_EXPORT2 michael@0: uscript_getSampleUnicodeString(UScriptCode script); michael@0: michael@0: #endif michael@0: michael@0: /** michael@0: * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax. michael@0: * Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode. michael@0: * michael@0: * @param script script code michael@0: * @return script usage michael@0: * @see UScriptUsage michael@0: * @draft ICU 51 michael@0: */ michael@0: U_DRAFT UScriptUsage U_EXPORT2 michael@0: uscript_getUsage(UScriptCode script); michael@0: michael@0: /** michael@0: * Returns TRUE if the script is written right-to-left. michael@0: * For example, Arab and Hebr. michael@0: * michael@0: * @param script script code michael@0: * @return TRUE if the script is right-to-left michael@0: * @draft ICU 51 michael@0: */ michael@0: U_DRAFT UBool U_EXPORT2 michael@0: uscript_isRightToLeft(UScriptCode script); michael@0: michael@0: /** michael@0: * Returns TRUE if the script allows line breaks between letters (excluding hyphenation). michael@0: * Such a script typically requires dictionary-based line breaking. michael@0: * For example, Hani and Thai. michael@0: * michael@0: * @param script script code michael@0: * @return TRUE if the script allows line breaks between letters michael@0: * @draft ICU 51 michael@0: */ michael@0: U_DRAFT UBool U_EXPORT2 michael@0: uscript_breaksBetweenLetters(UScriptCode script); michael@0: michael@0: /** michael@0: * Returns TRUE if in modern (or most recent) usage of the script case distinctions are customary. michael@0: * For example, Latn and Cyrl. michael@0: * michael@0: * @param script script code michael@0: * @return TRUE if the script is cased michael@0: * @draft ICU 51 michael@0: */ michael@0: U_DRAFT UBool U_EXPORT2 michael@0: uscript_isCased(UScriptCode script); michael@0: michael@0: #endif /* U_HIDE_DRAFT_API */ michael@0: michael@0: #endif