1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/ucol_tok.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,211 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 2001-2011, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* file name: ucol_tok.h 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created 02/22/2001 1.17 +* created by: Vladimir Weinstein 1.18 +* 1.19 +* This module reads a tailoring rule string and produces a list of 1.20 +* tokens that will be turned into collation elements 1.21 +* 1.22 +*/ 1.23 + 1.24 +#ifndef UCOL_TOKENS_H 1.25 +#define UCOL_TOKENS_H 1.26 + 1.27 +#include "unicode/utypes.h" 1.28 +#include "unicode/uset.h" 1.29 + 1.30 +#if !UCONFIG_NO_COLLATION 1.31 + 1.32 +#include "ucol_imp.h" 1.33 +#include "uhash.h" 1.34 +#include "unicode/parseerr.h" 1.35 + 1.36 +#define UCOL_TOK_UNSET 0xFFFFFFFF 1.37 +#define UCOL_TOK_RESET 0xDEADBEEF 1.38 + 1.39 +#define UCOL_TOK_POLARITY_NEGATIVE 0 1.40 +#define UCOL_TOK_POLARITY_POSITIVE 1 1.41 + 1.42 +#define UCOL_TOK_TOP 0x04 1.43 +#define UCOL_TOK_VARIABLE_TOP 0x08 1.44 +#define UCOL_TOK_BEFORE 0x03 1.45 +#define UCOL_TOK_SUCCESS 0x10 1.46 + 1.47 +/* this is space for the extra strings that need to be unquoted */ 1.48 +/* during the parsing of the rules */ 1.49 +#define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096 1.50 +typedef struct UColToken UColToken; 1.51 + 1.52 +typedef struct { 1.53 + UColToken* first; 1.54 + UColToken* last; 1.55 + UColToken* reset; 1.56 + UBool indirect; 1.57 + uint32_t baseCE; 1.58 + uint32_t baseContCE; 1.59 + uint32_t nextCE; 1.60 + uint32_t nextContCE; 1.61 + uint32_t previousCE; 1.62 + uint32_t previousContCE; 1.63 + int32_t pos[UCOL_STRENGTH_LIMIT]; 1.64 + uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT]; 1.65 + uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT]; 1.66 + uint32_t numStr[UCOL_CE_STRENGTH_LIMIT]; 1.67 + UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT]; 1.68 + UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT]; 1.69 +} UColTokListHeader; 1.70 + 1.71 +struct UColToken { 1.72 + UChar debugSource; 1.73 + UChar debugExpansion; 1.74 + UChar debugPrefix; 1.75 + uint32_t CEs[128]; 1.76 + uint32_t noOfCEs; 1.77 + uint32_t expCEs[128]; 1.78 + uint32_t noOfExpCEs; 1.79 + uint32_t source; 1.80 + uint32_t expansion; 1.81 + uint32_t prefix; 1.82 + uint32_t strength; 1.83 + uint32_t toInsert; 1.84 + uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */ 1.85 + UColTokListHeader *listHeader; 1.86 + UColToken* previous; 1.87 + UColToken* next; 1.88 + UChar **rulesToParseHdl; 1.89 + uint16_t flags; 1.90 +}; 1.91 + 1.92 +/* 1.93 + * This is a token that has been parsed 1.94 + * but not yet processed. Used to reduce 1.95 + * the number of arguments in the parser 1.96 + */ 1.97 +typedef struct { 1.98 + uint32_t strength; 1.99 + uint32_t charsOffset; 1.100 + uint32_t charsLen; 1.101 + uint32_t extensionOffset; 1.102 + uint32_t extensionLen; 1.103 + uint32_t prefixOffset; 1.104 + uint32_t prefixLen; 1.105 + uint16_t flags; 1.106 + uint16_t indirectIndex; 1.107 +} UColParsedToken; 1.108 + 1.109 + 1.110 +typedef struct { 1.111 + UColParsedToken parsedToken; 1.112 + UChar *source; 1.113 + UChar *end; 1.114 + const UChar *current; 1.115 + UChar *sourceCurrent; 1.116 + UChar *extraCurrent; 1.117 + UChar *extraEnd; 1.118 + const InverseUCATableHeader *invUCA; 1.119 + const UCollator *UCA; 1.120 + UHashtable *tailored; 1.121 + UColOptionSet *opts; 1.122 + uint32_t resultLen; 1.123 + uint32_t listCapacity; 1.124 + UColTokListHeader *lh; 1.125 + UColToken *varTop; 1.126 + USet *copySet; 1.127 + USet *removeSet; 1.128 + UBool buildCCTabFlag; /* Tailoring rule requirs building combining class table. */ 1.129 + 1.130 + UChar32 previousCp; /* Previous code point. */ 1.131 + /* For processing starred lists. */ 1.132 + UBool isStarred; /* Are we processing a starred token? */ 1.133 + UBool savedIsStarred; 1.134 + uint32_t currentStarredCharIndex; /* Index of the current charrecter in the starred expression. */ 1.135 + uint32_t lastStarredCharIndex; /* Index to the last character in the starred expression. */ 1.136 + 1.137 + /* For processing ranges. */ 1.138 + UBool inRange; /* Are we in a range? */ 1.139 + UChar32 currentRangeCp; /* Current code point in the range. */ 1.140 + UChar32 lastRangeCp; /* The last code point in the range. */ 1.141 + 1.142 + /* reorder codes for collation reordering */ 1.143 + int32_t* reorderCodes; 1.144 + int32_t reorderCodesLength; 1.145 + 1.146 +} UColTokenParser; 1.147 + 1.148 +typedef struct { 1.149 + const UChar *subName; 1.150 + int32_t subLen; 1.151 + UColAttributeValue attrVal; 1.152 +} ucolTokSuboption; 1.153 + 1.154 +typedef struct { 1.155 + const UChar *optionName; 1.156 + int32_t optionLen; 1.157 + const ucolTokSuboption *subopts; 1.158 + int32_t subSize; 1.159 + UColAttribute attr; 1.160 +} ucolTokOption; 1.161 + 1.162 +#define ucol_tok_isSpecialChar(ch) \ 1.163 + (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \ 1.164 + (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \ 1.165 + (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \ 1.166 + (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \ 1.167 + (ch) == 0x007B)) 1.168 + 1.169 + 1.170 +U_CFUNC 1.171 +uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, 1.172 + UParseError *parseError, 1.173 + UErrorCode *status); 1.174 + 1.175 +U_CFUNC 1.176 +void ucol_tok_initTokenList(UColTokenParser *src, 1.177 + const UChar *rules, 1.178 + const uint32_t rulesLength, 1.179 + const UCollator *UCA, 1.180 + GetCollationRulesFunction importFunc, 1.181 + void* context, 1.182 + UErrorCode *status); 1.183 + 1.184 +U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src); 1.185 + 1.186 +U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src, 1.187 + UBool startOfRules, 1.188 + UParseError *parseError, 1.189 + UErrorCode *status); 1.190 + 1.191 + 1.192 +U_CAPI const UChar * U_EXPORT2 1.193 +ucol_tok_getNextArgument(const UChar *start, const UChar *end, 1.194 + UColAttribute *attrib, UColAttributeValue *value, 1.195 + UErrorCode *status); 1.196 +U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src, 1.197 + uint32_t CE, uint32_t contCE, 1.198 + uint32_t *nextCE, uint32_t *nextContCE, 1.199 + uint32_t strength); 1.200 +U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src, 1.201 + uint32_t CE, uint32_t contCE, 1.202 + uint32_t *prevCE, uint32_t *prevContCE, 1.203 + uint32_t strength); 1.204 + 1.205 +const UChar* U_CALLCONV ucol_tok_getRulesFromBundle( 1.206 + void* context, 1.207 + const char* locale, 1.208 + const char* type, 1.209 + int32_t* pLength, 1.210 + UErrorCode* status); 1.211 + 1.212 +#endif /* #if !UCONFIG_NO_COLLATION */ 1.213 + 1.214 +#endif