intl/icu/source/common/propname.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 **********************************************************************
michael@0 3 * Copyright (c) 2002-2011, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 **********************************************************************
michael@0 6 * Author: Alan Liu
michael@0 7 * Created: October 30 2002
michael@0 8 * Since: ICU 2.4
michael@0 9 * 2010nov19 Markus Scherer Rewrite for formatVersion 2.
michael@0 10 **********************************************************************
michael@0 11 */
michael@0 12 #ifndef PROPNAME_H
michael@0 13 #define PROPNAME_H
michael@0 14
michael@0 15 #include "unicode/utypes.h"
michael@0 16 #include "unicode/bytestrie.h"
michael@0 17 #include "unicode/uchar.h"
michael@0 18 #include "udataswp.h"
michael@0 19 #include "uprops.h"
michael@0 20
michael@0 21 /*
michael@0 22 * This header defines the in-memory layout of the property names data
michael@0 23 * structure representing the UCD data files PropertyAliases.txt and
michael@0 24 * PropertyValueAliases.txt. It is used by:
michael@0 25 * propname.cpp - reads data
michael@0 26 * genpname - creates data
michael@0 27 */
michael@0 28
michael@0 29 /* low-level char * property name comparison -------------------------------- */
michael@0 30
michael@0 31 U_CDECL_BEGIN
michael@0 32
michael@0 33 /**
michael@0 34 * \var uprv_comparePropertyNames
michael@0 35 * Unicode property names and property value names are compared "loosely".
michael@0 36 *
michael@0 37 * UCD.html 4.0.1 says:
michael@0 38 * For all property names, property value names, and for property values for
michael@0 39 * Enumerated, Binary, or Catalog properties, use the following
michael@0 40 * loose matching rule:
michael@0 41 *
michael@0 42 * LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
michael@0 43 *
michael@0 44 * This function does just that, for (char *) name strings.
michael@0 45 * It is almost identical to ucnv_compareNames() but also ignores
michael@0 46 * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
michael@0 47 *
michael@0 48 * @internal
michael@0 49 */
michael@0 50
michael@0 51 U_CAPI int32_t U_EXPORT2
michael@0 52 uprv_compareASCIIPropertyNames(const char *name1, const char *name2);
michael@0 53
michael@0 54 U_CAPI int32_t U_EXPORT2
michael@0 55 uprv_compareEBCDICPropertyNames(const char *name1, const char *name2);
michael@0 56
michael@0 57 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
michael@0 58 # define uprv_comparePropertyNames uprv_compareASCIIPropertyNames
michael@0 59 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
michael@0 60 # define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames
michael@0 61 #else
michael@0 62 # error U_CHARSET_FAMILY is not valid
michael@0 63 #endif
michael@0 64
michael@0 65 U_CDECL_END
michael@0 66
michael@0 67 /* UDataMemory structure and signatures ------------------------------------- */
michael@0 68
michael@0 69 #define PNAME_DATA_NAME "pnames"
michael@0 70 #define PNAME_DATA_TYPE "icu"
michael@0 71
michael@0 72 /* Fields in UDataInfo: */
michael@0 73
michael@0 74 /* PNAME_SIG[] is encoded as numeric literals for compatibility with the HP compiler */
michael@0 75 #define PNAME_SIG_0 ((uint8_t)0x70) /* p */
michael@0 76 #define PNAME_SIG_1 ((uint8_t)0x6E) /* n */
michael@0 77 #define PNAME_SIG_2 ((uint8_t)0x61) /* a */
michael@0 78 #define PNAME_SIG_3 ((uint8_t)0x6D) /* m */
michael@0 79
michael@0 80 U_NAMESPACE_BEGIN
michael@0 81
michael@0 82 class PropNameData {
michael@0 83 public:
michael@0 84 enum {
michael@0 85 // Byte offsets from the start of the data, after the generic header.
michael@0 86 IX_VALUE_MAPS_OFFSET,
michael@0 87 IX_BYTE_TRIES_OFFSET,
michael@0 88 IX_NAME_GROUPS_OFFSET,
michael@0 89 IX_RESERVED3_OFFSET,
michael@0 90 IX_RESERVED4_OFFSET,
michael@0 91 IX_TOTAL_SIZE,
michael@0 92
michael@0 93 // Other values.
michael@0 94 IX_MAX_NAME_LENGTH,
michael@0 95 IX_RESERVED7,
michael@0 96 IX_COUNT
michael@0 97 };
michael@0 98
michael@0 99 static const char *getPropertyName(int32_t property, int32_t nameChoice);
michael@0 100 static const char *getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice);
michael@0 101
michael@0 102 static int32_t getPropertyEnum(const char *alias);
michael@0 103 static int32_t getPropertyValueEnum(int32_t property, const char *alias);
michael@0 104
michael@0 105 private:
michael@0 106 static int32_t findProperty(int32_t property);
michael@0 107 static int32_t findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value);
michael@0 108 static const char *getName(const char *nameGroup, int32_t nameIndex);
michael@0 109 static UBool containsName(BytesTrie &trie, const char *name);
michael@0 110
michael@0 111 static int32_t getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias);
michael@0 112
michael@0 113 static const int32_t indexes[];
michael@0 114 static const int32_t valueMaps[];
michael@0 115 static const uint8_t bytesTries[];
michael@0 116 static const char nameGroups[];
michael@0 117 };
michael@0 118
michael@0 119 /*
michael@0 120 * pnames.icu formatVersion 2
michael@0 121 *
michael@0 122 * formatVersion 2 is new in ICU 4.8.
michael@0 123 * In ICU 4.8, the pnames.icu data file is used only in ICU4J.
michael@0 124 * ICU4C 4.8 has the same data structures hardcoded in source/common/propname_data.h.
michael@0 125 *
michael@0 126 * For documentation of pnames.icu formatVersion 1 see ICU4C 4.6 (2010-dec-01)
michael@0 127 * or earlier versions of this header file (source/common/propname.h).
michael@0 128 *
michael@0 129 * The pnames.icu begins with the standard ICU DataHeader/UDataInfo.
michael@0 130 * After that:
michael@0 131 *
michael@0 132 * int32_t indexes[8];
michael@0 133 *
michael@0 134 * (See the PropNameData::IX_... constants.)
michael@0 135 *
michael@0 136 * The first 6 indexes are byte offsets from the beginning of the data
michael@0 137 * (beginning of indexes[]) to following structures.
michael@0 138 * The length of each structure is the difference between its offset
michael@0 139 * and the next one.
michael@0 140 * All offsets are filled in: Where there is no data between two offsets,
michael@0 141 * those two offsets are the same.
michael@0 142 * The last offset (indexes[PropNameData::IX_TOTAL_SIZE]) indicates the
michael@0 143 * total number of bytes in the file. (Not counting the standard headers.)
michael@0 144 *
michael@0 145 * The sixth index (indexes[PropNameData::IX_MAX_NAME_LENGTH]) has the
michael@0 146 * maximum length of any Unicode property (or property value) alias.
michael@0 147 * (Without normalization, that is, including underscores etc.)
michael@0 148 *
michael@0 149 * int32_t valueMaps[];
michael@0 150 *
michael@0 151 * The valueMaps[] begins with a map from UProperty enums to properties,
michael@0 152 * followed by the per-property value maps from property values to names,
michael@0 153 * for those properties that have named values.
michael@0 154 * (Binary & enumerated, plus General_Category_Mask.)
michael@0 155 *
michael@0 156 * valueMaps[0] contains the number of UProperty enum ranges.
michael@0 157 * For each range:
michael@0 158 * int32_t start, limit -- first and last+1 UProperty enum of a dense range
michael@0 159 * Followed by (limit-start) pairs of
michael@0 160 * int32_t nameGroupOffset;
michael@0 161 * Offset into nameGroups[] for the property's names/aliases.
michael@0 162 * int32_t valueMapIndex;
michael@0 163 * Offset of the property's value map in the valueMaps[] array.
michael@0 164 * If the valueMapIndex is 0, then the property does not have named values.
michael@0 165 *
michael@0 166 * For each property's value map:
michael@0 167 * int32_t bytesTrieOffset; -- Offset into bytesTries[] for name->value mapping.
michael@0 168 * int32_t numRanges;
michael@0 169 * If numRanges is in the range 1..15, then that many ranges of values follow.
michael@0 170 * Per range:
michael@0 171 * int32_t start, limit -- first and last+1 UProperty enum of a range
michael@0 172 * Followed by (limit-start) entries of
michael@0 173 * int32_t nameGroupOffset;
michael@0 174 * Offset into nameGroups[] for the property value's names/aliases.
michael@0 175 * If the nameGroupOffset is 0, then this is not a named value for this property.
michael@0 176 * (That is, the ranges need not be dense.)
michael@0 177 * If numRanges is >=0x10, then (numRanges-0x10) sorted values
michael@0 178 * and then (numRanges-0x10) corresponding nameGroupOffsets follow.
michael@0 179 * Values are sorted as signed integers.
michael@0 180 * In this case, the set of values is dense; no nameGroupOffset will be 0.
michael@0 181 *
michael@0 182 * For both properties and property values, ranges are sorted by their start/limit values.
michael@0 183 *
michael@0 184 * uint8_t bytesTries[];
michael@0 185 *
michael@0 186 * This is a sequence of BytesTrie structures, byte-serialized tries for
michael@0 187 * mapping from names/aliases to values.
michael@0 188 * The first one maps from property names/aliases to UProperty enum constants.
michael@0 189 * The following ones are indexed by property value map bytesTrieOffsets
michael@0 190 * for mapping each property's names/aliases to their property values.
michael@0 191 *
michael@0 192 * char nameGroups[];
michael@0 193 *
michael@0 194 * This is a sequence of property name groups.
michael@0 195 * Each group is a list of names/aliases (invariant-character strings) for
michael@0 196 * one property or property value, in the order of UCharNameChoice.
michael@0 197 * The first byte of each group is the number of names in the group.
michael@0 198 * It is followed by that many NUL-terminated strings.
michael@0 199 * The first string is for the short name; if there is no short name,
michael@0 200 * then the first string is empty.
michael@0 201 * The second string is the long name. Further strings are additional aliases.
michael@0 202 *
michael@0 203 * The first name group is for a property rather than a property value,
michael@0 204 * so that a nameGroupOffset of 0 can be used to indicate "no value"
michael@0 205 * in a property's sparse value ranges.
michael@0 206 */
michael@0 207
michael@0 208 U_NAMESPACE_END
michael@0 209
michael@0 210 #endif

mercurial