diff -r 000000000000 -r 6474c204b198 intl/icu/source/i18n/ucol_tok.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/intl/icu/source/i18n/ucol_tok.h Wed Dec 31 06:09:35 2014 +0100 @@ -0,0 +1,211 @@ +/* +******************************************************************************* +* +* Copyright (C) 2001-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ucol_tok.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created 02/22/2001 +* created by: Vladimir Weinstein +* +* This module reads a tailoring rule string and produces a list of +* tokens that will be turned into collation elements +* +*/ + +#ifndef UCOL_TOKENS_H +#define UCOL_TOKENS_H + +#include "unicode/utypes.h" +#include "unicode/uset.h" + +#if !UCONFIG_NO_COLLATION + +#include "ucol_imp.h" +#include "uhash.h" +#include "unicode/parseerr.h" + +#define UCOL_TOK_UNSET 0xFFFFFFFF +#define UCOL_TOK_RESET 0xDEADBEEF + +#define UCOL_TOK_POLARITY_NEGATIVE 0 +#define UCOL_TOK_POLARITY_POSITIVE 1 + +#define UCOL_TOK_TOP 0x04 +#define UCOL_TOK_VARIABLE_TOP 0x08 +#define UCOL_TOK_BEFORE 0x03 +#define UCOL_TOK_SUCCESS 0x10 + +/* this is space for the extra strings that need to be unquoted */ +/* during the parsing of the rules */ +#define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096 +typedef struct UColToken UColToken; + +typedef struct { + UColToken* first; + UColToken* last; + UColToken* reset; + UBool indirect; + uint32_t baseCE; + uint32_t baseContCE; + uint32_t nextCE; + uint32_t nextContCE; + uint32_t previousCE; + uint32_t previousContCE; + int32_t pos[UCOL_STRENGTH_LIMIT]; + uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT]; + uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT]; + uint32_t numStr[UCOL_CE_STRENGTH_LIMIT]; + UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT]; + UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT]; +} UColTokListHeader; + +struct UColToken { + UChar debugSource; + UChar debugExpansion; + UChar debugPrefix; + uint32_t CEs[128]; + uint32_t noOfCEs; + uint32_t expCEs[128]; + uint32_t noOfExpCEs; + uint32_t source; + uint32_t expansion; + uint32_t prefix; + uint32_t strength; + uint32_t toInsert; + uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */ + UColTokListHeader *listHeader; + UColToken* previous; + UColToken* next; + UChar **rulesToParseHdl; + uint16_t flags; +}; + +/* + * This is a token that has been parsed + * but not yet processed. Used to reduce + * the number of arguments in the parser + */ +typedef struct { + uint32_t strength; + uint32_t charsOffset; + uint32_t charsLen; + uint32_t extensionOffset; + uint32_t extensionLen; + uint32_t prefixOffset; + uint32_t prefixLen; + uint16_t flags; + uint16_t indirectIndex; +} UColParsedToken; + + +typedef struct { + UColParsedToken parsedToken; + UChar *source; + UChar *end; + const UChar *current; + UChar *sourceCurrent; + UChar *extraCurrent; + UChar *extraEnd; + const InverseUCATableHeader *invUCA; + const UCollator *UCA; + UHashtable *tailored; + UColOptionSet *opts; + uint32_t resultLen; + uint32_t listCapacity; + UColTokListHeader *lh; + UColToken *varTop; + USet *copySet; + USet *removeSet; + UBool buildCCTabFlag; /* Tailoring rule requirs building combining class table. */ + + UChar32 previousCp; /* Previous code point. */ + /* For processing starred lists. */ + UBool isStarred; /* Are we processing a starred token? */ + UBool savedIsStarred; + uint32_t currentStarredCharIndex; /* Index of the current charrecter in the starred expression. */ + uint32_t lastStarredCharIndex; /* Index to the last character in the starred expression. */ + + /* For processing ranges. */ + UBool inRange; /* Are we in a range? */ + UChar32 currentRangeCp; /* Current code point in the range. */ + UChar32 lastRangeCp; /* The last code point in the range. */ + + /* reorder codes for collation reordering */ + int32_t* reorderCodes; + int32_t reorderCodesLength; + +} UColTokenParser; + +typedef struct { + const UChar *subName; + int32_t subLen; + UColAttributeValue attrVal; +} ucolTokSuboption; + +typedef struct { + const UChar *optionName; + int32_t optionLen; + const ucolTokSuboption *subopts; + int32_t subSize; + UColAttribute attr; +} ucolTokOption; + +#define ucol_tok_isSpecialChar(ch) \ + (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \ + (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \ + (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \ + (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \ + (ch) == 0x007B)) + + +U_CFUNC +uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, + UParseError *parseError, + UErrorCode *status); + +U_CFUNC +void ucol_tok_initTokenList(UColTokenParser *src, + const UChar *rules, + const uint32_t rulesLength, + const UCollator *UCA, + GetCollationRulesFunction importFunc, + void* context, + UErrorCode *status); + +U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src); + +U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src, + UBool startOfRules, + UParseError *parseError, + UErrorCode *status); + + +U_CAPI const UChar * U_EXPORT2 +ucol_tok_getNextArgument(const UChar *start, const UChar *end, + UColAttribute *attrib, UColAttributeValue *value, + UErrorCode *status); +U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src, + uint32_t CE, uint32_t contCE, + uint32_t *nextCE, uint32_t *nextContCE, + uint32_t strength); +U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src, + uint32_t CE, uint32_t contCE, + uint32_t *prevCE, uint32_t *prevContCE, + uint32_t strength); + +const UChar* U_CALLCONV ucol_tok_getRulesFromBundle( + void* context, + const char* locale, + const char* type, + int32_t* pLength, + UErrorCode* status); + +#endif /* #if !UCONFIG_NO_COLLATION */ + +#endif