intl/icu/source/i18n/ucol_tok.h

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 2001-2011, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 * file name: ucol_tok.h
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created 02/22/2001
michael@0 14 * created by: Vladimir Weinstein
michael@0 15 *
michael@0 16 * This module reads a tailoring rule string and produces a list of
michael@0 17 * tokens that will be turned into collation elements
michael@0 18 *
michael@0 19 */
michael@0 20
michael@0 21 #ifndef UCOL_TOKENS_H
michael@0 22 #define UCOL_TOKENS_H
michael@0 23
michael@0 24 #include "unicode/utypes.h"
michael@0 25 #include "unicode/uset.h"
michael@0 26
michael@0 27 #if !UCONFIG_NO_COLLATION
michael@0 28
michael@0 29 #include "ucol_imp.h"
michael@0 30 #include "uhash.h"
michael@0 31 #include "unicode/parseerr.h"
michael@0 32
michael@0 33 #define UCOL_TOK_UNSET 0xFFFFFFFF
michael@0 34 #define UCOL_TOK_RESET 0xDEADBEEF
michael@0 35
michael@0 36 #define UCOL_TOK_POLARITY_NEGATIVE 0
michael@0 37 #define UCOL_TOK_POLARITY_POSITIVE 1
michael@0 38
michael@0 39 #define UCOL_TOK_TOP 0x04
michael@0 40 #define UCOL_TOK_VARIABLE_TOP 0x08
michael@0 41 #define UCOL_TOK_BEFORE 0x03
michael@0 42 #define UCOL_TOK_SUCCESS 0x10
michael@0 43
michael@0 44 /* this is space for the extra strings that need to be unquoted */
michael@0 45 /* during the parsing of the rules */
michael@0 46 #define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096
michael@0 47 typedef struct UColToken UColToken;
michael@0 48
michael@0 49 typedef struct {
michael@0 50 UColToken* first;
michael@0 51 UColToken* last;
michael@0 52 UColToken* reset;
michael@0 53 UBool indirect;
michael@0 54 uint32_t baseCE;
michael@0 55 uint32_t baseContCE;
michael@0 56 uint32_t nextCE;
michael@0 57 uint32_t nextContCE;
michael@0 58 uint32_t previousCE;
michael@0 59 uint32_t previousContCE;
michael@0 60 int32_t pos[UCOL_STRENGTH_LIMIT];
michael@0 61 uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT];
michael@0 62 uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT];
michael@0 63 uint32_t numStr[UCOL_CE_STRENGTH_LIMIT];
michael@0 64 UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT];
michael@0 65 UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT];
michael@0 66 } UColTokListHeader;
michael@0 67
michael@0 68 struct UColToken {
michael@0 69 UChar debugSource;
michael@0 70 UChar debugExpansion;
michael@0 71 UChar debugPrefix;
michael@0 72 uint32_t CEs[128];
michael@0 73 uint32_t noOfCEs;
michael@0 74 uint32_t expCEs[128];
michael@0 75 uint32_t noOfExpCEs;
michael@0 76 uint32_t source;
michael@0 77 uint32_t expansion;
michael@0 78 uint32_t prefix;
michael@0 79 uint32_t strength;
michael@0 80 uint32_t toInsert;
michael@0 81 uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */
michael@0 82 UColTokListHeader *listHeader;
michael@0 83 UColToken* previous;
michael@0 84 UColToken* next;
michael@0 85 UChar **rulesToParseHdl;
michael@0 86 uint16_t flags;
michael@0 87 };
michael@0 88
michael@0 89 /*
michael@0 90 * This is a token that has been parsed
michael@0 91 * but not yet processed. Used to reduce
michael@0 92 * the number of arguments in the parser
michael@0 93 */
michael@0 94 typedef struct {
michael@0 95 uint32_t strength;
michael@0 96 uint32_t charsOffset;
michael@0 97 uint32_t charsLen;
michael@0 98 uint32_t extensionOffset;
michael@0 99 uint32_t extensionLen;
michael@0 100 uint32_t prefixOffset;
michael@0 101 uint32_t prefixLen;
michael@0 102 uint16_t flags;
michael@0 103 uint16_t indirectIndex;
michael@0 104 } UColParsedToken;
michael@0 105
michael@0 106
michael@0 107 typedef struct {
michael@0 108 UColParsedToken parsedToken;
michael@0 109 UChar *source;
michael@0 110 UChar *end;
michael@0 111 const UChar *current;
michael@0 112 UChar *sourceCurrent;
michael@0 113 UChar *extraCurrent;
michael@0 114 UChar *extraEnd;
michael@0 115 const InverseUCATableHeader *invUCA;
michael@0 116 const UCollator *UCA;
michael@0 117 UHashtable *tailored;
michael@0 118 UColOptionSet *opts;
michael@0 119 uint32_t resultLen;
michael@0 120 uint32_t listCapacity;
michael@0 121 UColTokListHeader *lh;
michael@0 122 UColToken *varTop;
michael@0 123 USet *copySet;
michael@0 124 USet *removeSet;
michael@0 125 UBool buildCCTabFlag; /* Tailoring rule requirs building combining class table. */
michael@0 126
michael@0 127 UChar32 previousCp; /* Previous code point. */
michael@0 128 /* For processing starred lists. */
michael@0 129 UBool isStarred; /* Are we processing a starred token? */
michael@0 130 UBool savedIsStarred;
michael@0 131 uint32_t currentStarredCharIndex; /* Index of the current charrecter in the starred expression. */
michael@0 132 uint32_t lastStarredCharIndex; /* Index to the last character in the starred expression. */
michael@0 133
michael@0 134 /* For processing ranges. */
michael@0 135 UBool inRange; /* Are we in a range? */
michael@0 136 UChar32 currentRangeCp; /* Current code point in the range. */
michael@0 137 UChar32 lastRangeCp; /* The last code point in the range. */
michael@0 138
michael@0 139 /* reorder codes for collation reordering */
michael@0 140 int32_t* reorderCodes;
michael@0 141 int32_t reorderCodesLength;
michael@0 142
michael@0 143 } UColTokenParser;
michael@0 144
michael@0 145 typedef struct {
michael@0 146 const UChar *subName;
michael@0 147 int32_t subLen;
michael@0 148 UColAttributeValue attrVal;
michael@0 149 } ucolTokSuboption;
michael@0 150
michael@0 151 typedef struct {
michael@0 152 const UChar *optionName;
michael@0 153 int32_t optionLen;
michael@0 154 const ucolTokSuboption *subopts;
michael@0 155 int32_t subSize;
michael@0 156 UColAttribute attr;
michael@0 157 } ucolTokOption;
michael@0 158
michael@0 159 #define ucol_tok_isSpecialChar(ch) \
michael@0 160 (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \
michael@0 161 (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \
michael@0 162 (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \
michael@0 163 (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \
michael@0 164 (ch) == 0x007B))
michael@0 165
michael@0 166
michael@0 167 U_CFUNC
michael@0 168 uint32_t ucol_tok_assembleTokenList(UColTokenParser *src,
michael@0 169 UParseError *parseError,
michael@0 170 UErrorCode *status);
michael@0 171
michael@0 172 U_CFUNC
michael@0 173 void ucol_tok_initTokenList(UColTokenParser *src,
michael@0 174 const UChar *rules,
michael@0 175 const uint32_t rulesLength,
michael@0 176 const UCollator *UCA,
michael@0 177 GetCollationRulesFunction importFunc,
michael@0 178 void* context,
michael@0 179 UErrorCode *status);
michael@0 180
michael@0 181 U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src);
michael@0 182
michael@0 183 U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src,
michael@0 184 UBool startOfRules,
michael@0 185 UParseError *parseError,
michael@0 186 UErrorCode *status);
michael@0 187
michael@0 188
michael@0 189 U_CAPI const UChar * U_EXPORT2
michael@0 190 ucol_tok_getNextArgument(const UChar *start, const UChar *end,
michael@0 191 UColAttribute *attrib, UColAttributeValue *value,
michael@0 192 UErrorCode *status);
michael@0 193 U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src,
michael@0 194 uint32_t CE, uint32_t contCE,
michael@0 195 uint32_t *nextCE, uint32_t *nextContCE,
michael@0 196 uint32_t strength);
michael@0 197 U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src,
michael@0 198 uint32_t CE, uint32_t contCE,
michael@0 199 uint32_t *prevCE, uint32_t *prevContCE,
michael@0 200 uint32_t strength);
michael@0 201
michael@0 202 const UChar* U_CALLCONV ucol_tok_getRulesFromBundle(
michael@0 203 void* context,
michael@0 204 const char* locale,
michael@0 205 const char* type,
michael@0 206 int32_t* pLength,
michael@0 207 UErrorCode* status);
michael@0 208
michael@0 209 #endif /* #if !UCONFIG_NO_COLLATION */
michael@0 210
michael@0 211 #endif

mercurial