intl/icu/source/common/propname.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/propname.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,210 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +* Copyright (c) 2002-2011, International Business Machines
     1.7 +* Corporation and others.  All Rights Reserved.
     1.8 +**********************************************************************
     1.9 +* Author: Alan Liu
    1.10 +* Created: October 30 2002
    1.11 +* Since: ICU 2.4
    1.12 +* 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
    1.13 +**********************************************************************
    1.14 +*/
    1.15 +#ifndef PROPNAME_H
    1.16 +#define PROPNAME_H
    1.17 +
    1.18 +#include "unicode/utypes.h"
    1.19 +#include "unicode/bytestrie.h"
    1.20 +#include "unicode/uchar.h"
    1.21 +#include "udataswp.h"
    1.22 +#include "uprops.h"
    1.23 +
    1.24 +/*
    1.25 + * This header defines the in-memory layout of the property names data
    1.26 + * structure representing the UCD data files PropertyAliases.txt and
    1.27 + * PropertyValueAliases.txt.  It is used by:
    1.28 + *   propname.cpp - reads data
    1.29 + *   genpname     - creates data
    1.30 + */
    1.31 +
    1.32 +/* low-level char * property name comparison -------------------------------- */
    1.33 +
    1.34 +U_CDECL_BEGIN
    1.35 +
    1.36 +/**
    1.37 + * \var uprv_comparePropertyNames
    1.38 + * Unicode property names and property value names are compared "loosely".
    1.39 + *
    1.40 + * UCD.html 4.0.1 says:
    1.41 + *   For all property names, property value names, and for property values for
    1.42 + *   Enumerated, Binary, or Catalog properties, use the following
    1.43 + *   loose matching rule:
    1.44 + *
    1.45 + *   LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
    1.46 + *
    1.47 + * This function does just that, for (char *) name strings.
    1.48 + * It is almost identical to ucnv_compareNames() but also ignores
    1.49 + * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
    1.50 + *
    1.51 + * @internal
    1.52 + */
    1.53 +
    1.54 +U_CAPI int32_t U_EXPORT2
    1.55 +uprv_compareASCIIPropertyNames(const char *name1, const char *name2);
    1.56 +
    1.57 +U_CAPI int32_t U_EXPORT2
    1.58 +uprv_compareEBCDICPropertyNames(const char *name1, const char *name2);
    1.59 +
    1.60 +#if U_CHARSET_FAMILY==U_ASCII_FAMILY
    1.61 +#   define uprv_comparePropertyNames uprv_compareASCIIPropertyNames
    1.62 +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    1.63 +#   define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames
    1.64 +#else
    1.65 +#   error U_CHARSET_FAMILY is not valid
    1.66 +#endif
    1.67 +
    1.68 +U_CDECL_END
    1.69 +
    1.70 +/* UDataMemory structure and signatures ------------------------------------- */
    1.71 +
    1.72 +#define PNAME_DATA_NAME "pnames"
    1.73 +#define PNAME_DATA_TYPE "icu"
    1.74 +
    1.75 +/* Fields in UDataInfo: */
    1.76 +
    1.77 +/* PNAME_SIG[] is encoded as numeric literals for compatibility with the HP compiler */
    1.78 +#define PNAME_SIG_0 ((uint8_t)0x70) /* p */
    1.79 +#define PNAME_SIG_1 ((uint8_t)0x6E) /* n */
    1.80 +#define PNAME_SIG_2 ((uint8_t)0x61) /* a */
    1.81 +#define PNAME_SIG_3 ((uint8_t)0x6D) /* m */
    1.82 +
    1.83 +U_NAMESPACE_BEGIN
    1.84 +
    1.85 +class PropNameData {
    1.86 +public:
    1.87 +    enum {
    1.88 +        // Byte offsets from the start of the data, after the generic header.
    1.89 +        IX_VALUE_MAPS_OFFSET,
    1.90 +        IX_BYTE_TRIES_OFFSET,
    1.91 +        IX_NAME_GROUPS_OFFSET,
    1.92 +        IX_RESERVED3_OFFSET,
    1.93 +        IX_RESERVED4_OFFSET,
    1.94 +        IX_TOTAL_SIZE,
    1.95 +
    1.96 +        // Other values.
    1.97 +        IX_MAX_NAME_LENGTH,
    1.98 +        IX_RESERVED7,
    1.99 +        IX_COUNT
   1.100 +    };
   1.101 +
   1.102 +    static const char *getPropertyName(int32_t property, int32_t nameChoice);
   1.103 +    static const char *getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice);
   1.104 +
   1.105 +    static int32_t getPropertyEnum(const char *alias);
   1.106 +    static int32_t getPropertyValueEnum(int32_t property, const char *alias);
   1.107 +
   1.108 +private:
   1.109 +    static int32_t findProperty(int32_t property);
   1.110 +    static int32_t findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value);
   1.111 +    static const char *getName(const char *nameGroup, int32_t nameIndex);
   1.112 +    static UBool containsName(BytesTrie &trie, const char *name);
   1.113 +
   1.114 +    static int32_t getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias);
   1.115 +
   1.116 +    static const int32_t indexes[];
   1.117 +    static const int32_t valueMaps[];
   1.118 +    static const uint8_t bytesTries[];
   1.119 +    static const char nameGroups[];
   1.120 +};
   1.121 +
   1.122 +/*
   1.123 + * pnames.icu formatVersion 2
   1.124 + *
   1.125 + * formatVersion 2 is new in ICU 4.8.
   1.126 + * In ICU 4.8, the pnames.icu data file is used only in ICU4J.
   1.127 + * ICU4C 4.8 has the same data structures hardcoded in source/common/propname_data.h.
   1.128 + *
   1.129 + * For documentation of pnames.icu formatVersion 1 see ICU4C 4.6 (2010-dec-01)
   1.130 + * or earlier versions of this header file (source/common/propname.h).
   1.131 + *
   1.132 + * The pnames.icu begins with the standard ICU DataHeader/UDataInfo.
   1.133 + * After that:
   1.134 + *
   1.135 + * int32_t indexes[8];
   1.136 + *
   1.137 + *      (See the PropNameData::IX_... constants.)
   1.138 + *
   1.139 + *      The first 6 indexes are byte offsets from the beginning of the data
   1.140 + *      (beginning of indexes[]) to following structures.
   1.141 + *      The length of each structure is the difference between its offset
   1.142 + *      and the next one.
   1.143 + *      All offsets are filled in: Where there is no data between two offsets,
   1.144 + *      those two offsets are the same.
   1.145 + *      The last offset (indexes[PropNameData::IX_TOTAL_SIZE]) indicates the
   1.146 + *      total number of bytes in the file. (Not counting the standard headers.)
   1.147 + *
   1.148 + *      The sixth index (indexes[PropNameData::IX_MAX_NAME_LENGTH]) has the
   1.149 + *      maximum length of any Unicode property (or property value) alias.
   1.150 + *      (Without normalization, that is, including underscores etc.)
   1.151 + *
   1.152 + * int32_t valueMaps[];
   1.153 + *
   1.154 + *      The valueMaps[] begins with a map from UProperty enums to properties,
   1.155 + *      followed by the per-property value maps from property values to names,
   1.156 + *      for those properties that have named values.
   1.157 + *      (Binary & enumerated, plus General_Category_Mask.)
   1.158 + *
   1.159 + *      valueMaps[0] contains the number of UProperty enum ranges.
   1.160 + *      For each range:
   1.161 + *        int32_t start, limit -- first and last+1 UProperty enum of a dense range
   1.162 + *        Followed by (limit-start) pairs of
   1.163 + *          int32_t nameGroupOffset;
   1.164 + *            Offset into nameGroups[] for the property's names/aliases.
   1.165 + *          int32_t valueMapIndex;
   1.166 + *            Offset of the property's value map in the valueMaps[] array.
   1.167 + *            If the valueMapIndex is 0, then the property does not have named values.
   1.168 + *
   1.169 + *      For each property's value map:
   1.170 + *      int32_t bytesTrieOffset; -- Offset into bytesTries[] for name->value mapping.
   1.171 + *      int32_t numRanges;
   1.172 + *        If numRanges is in the range 1..15, then that many ranges of values follow.
   1.173 + *        Per range:
   1.174 + *          int32_t start, limit -- first and last+1 UProperty enum of a range
   1.175 + *          Followed by (limit-start) entries of
   1.176 + *            int32_t nameGroupOffset;
   1.177 + *              Offset into nameGroups[] for the property value's names/aliases.
   1.178 + *              If the nameGroupOffset is 0, then this is not a named value for this property.
   1.179 + *              (That is, the ranges need not be dense.)
   1.180 + *        If numRanges is >=0x10, then (numRanges-0x10) sorted values
   1.181 + *        and then (numRanges-0x10) corresponding nameGroupOffsets follow.
   1.182 + *        Values are sorted as signed integers.
   1.183 + *        In this case, the set of values is dense; no nameGroupOffset will be 0.
   1.184 + *
   1.185 + *      For both properties and property values, ranges are sorted by their start/limit values.
   1.186 + *
   1.187 + * uint8_t bytesTries[];
   1.188 + *
   1.189 + *      This is a sequence of BytesTrie structures, byte-serialized tries for
   1.190 + *      mapping from names/aliases to values.
   1.191 + *      The first one maps from property names/aliases to UProperty enum constants.
   1.192 + *      The following ones are indexed by property value map bytesTrieOffsets
   1.193 + *      for mapping each property's names/aliases to their property values.
   1.194 + *
   1.195 + * char nameGroups[];
   1.196 + *
   1.197 + *      This is a sequence of property name groups.
   1.198 + *      Each group is a list of names/aliases (invariant-character strings) for
   1.199 + *      one property or property value, in the order of UCharNameChoice.
   1.200 + *      The first byte of each group is the number of names in the group.
   1.201 + *      It is followed by that many NUL-terminated strings.
   1.202 + *      The first string is for the short name; if there is no short name,
   1.203 + *      then the first string is empty.
   1.204 + *      The second string is the long name. Further strings are additional aliases.
   1.205 + *
   1.206 + *      The first name group is for a property rather than a property value,
   1.207 + *      so that a nameGroupOffset of 0 can be used to indicate "no value"
   1.208 + *      in a property's sparse value ranges.
   1.209 + */
   1.210 +
   1.211 +U_NAMESPACE_END
   1.212 +
   1.213 +#endif

mercurial