michael@0: /* michael@0: ******************************************************************************* michael@0: * michael@0: * Copyright (C) 2001-2011, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************* michael@0: * file name: ucol_tok.h michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created 02/22/2001 michael@0: * created by: Vladimir Weinstein michael@0: * michael@0: * This module reads a tailoring rule string and produces a list of michael@0: * tokens that will be turned into collation elements michael@0: * michael@0: */ michael@0: michael@0: #ifndef UCOL_TOKENS_H michael@0: #define UCOL_TOKENS_H michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/uset.h" michael@0: michael@0: #if !UCONFIG_NO_COLLATION michael@0: michael@0: #include "ucol_imp.h" michael@0: #include "uhash.h" michael@0: #include "unicode/parseerr.h" michael@0: michael@0: #define UCOL_TOK_UNSET 0xFFFFFFFF michael@0: #define UCOL_TOK_RESET 0xDEADBEEF michael@0: michael@0: #define UCOL_TOK_POLARITY_NEGATIVE 0 michael@0: #define UCOL_TOK_POLARITY_POSITIVE 1 michael@0: michael@0: #define UCOL_TOK_TOP 0x04 michael@0: #define UCOL_TOK_VARIABLE_TOP 0x08 michael@0: #define UCOL_TOK_BEFORE 0x03 michael@0: #define UCOL_TOK_SUCCESS 0x10 michael@0: michael@0: /* this is space for the extra strings that need to be unquoted */ michael@0: /* during the parsing of the rules */ michael@0: #define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096 michael@0: typedef struct UColToken UColToken; michael@0: michael@0: typedef struct { michael@0: UColToken* first; michael@0: UColToken* last; michael@0: UColToken* reset; michael@0: UBool indirect; michael@0: uint32_t baseCE; michael@0: uint32_t baseContCE; michael@0: uint32_t nextCE; michael@0: uint32_t nextContCE; michael@0: uint32_t previousCE; michael@0: uint32_t previousContCE; michael@0: int32_t pos[UCOL_STRENGTH_LIMIT]; michael@0: uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT]; michael@0: uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT]; michael@0: uint32_t numStr[UCOL_CE_STRENGTH_LIMIT]; michael@0: UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT]; michael@0: UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT]; michael@0: } UColTokListHeader; michael@0: michael@0: struct UColToken { michael@0: UChar debugSource; michael@0: UChar debugExpansion; michael@0: UChar debugPrefix; michael@0: uint32_t CEs[128]; michael@0: uint32_t noOfCEs; michael@0: uint32_t expCEs[128]; michael@0: uint32_t noOfExpCEs; michael@0: uint32_t source; michael@0: uint32_t expansion; michael@0: uint32_t prefix; michael@0: uint32_t strength; michael@0: uint32_t toInsert; michael@0: uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */ michael@0: UColTokListHeader *listHeader; michael@0: UColToken* previous; michael@0: UColToken* next; michael@0: UChar **rulesToParseHdl; michael@0: uint16_t flags; michael@0: }; michael@0: michael@0: /* michael@0: * This is a token that has been parsed michael@0: * but not yet processed. Used to reduce michael@0: * the number of arguments in the parser michael@0: */ michael@0: typedef struct { michael@0: uint32_t strength; michael@0: uint32_t charsOffset; michael@0: uint32_t charsLen; michael@0: uint32_t extensionOffset; michael@0: uint32_t extensionLen; michael@0: uint32_t prefixOffset; michael@0: uint32_t prefixLen; michael@0: uint16_t flags; michael@0: uint16_t indirectIndex; michael@0: } UColParsedToken; michael@0: michael@0: michael@0: typedef struct { michael@0: UColParsedToken parsedToken; michael@0: UChar *source; michael@0: UChar *end; michael@0: const UChar *current; michael@0: UChar *sourceCurrent; michael@0: UChar *extraCurrent; michael@0: UChar *extraEnd; michael@0: const InverseUCATableHeader *invUCA; michael@0: const UCollator *UCA; michael@0: UHashtable *tailored; michael@0: UColOptionSet *opts; michael@0: uint32_t resultLen; michael@0: uint32_t listCapacity; michael@0: UColTokListHeader *lh; michael@0: UColToken *varTop; michael@0: USet *copySet; michael@0: USet *removeSet; michael@0: UBool buildCCTabFlag; /* Tailoring rule requirs building combining class table. */ michael@0: michael@0: UChar32 previousCp; /* Previous code point. */ michael@0: /* For processing starred lists. */ michael@0: UBool isStarred; /* Are we processing a starred token? */ michael@0: UBool savedIsStarred; michael@0: uint32_t currentStarredCharIndex; /* Index of the current charrecter in the starred expression. */ michael@0: uint32_t lastStarredCharIndex; /* Index to the last character in the starred expression. */ michael@0: michael@0: /* For processing ranges. */ michael@0: UBool inRange; /* Are we in a range? */ michael@0: UChar32 currentRangeCp; /* Current code point in the range. */ michael@0: UChar32 lastRangeCp; /* The last code point in the range. */ michael@0: michael@0: /* reorder codes for collation reordering */ michael@0: int32_t* reorderCodes; michael@0: int32_t reorderCodesLength; michael@0: michael@0: } UColTokenParser; michael@0: michael@0: typedef struct { michael@0: const UChar *subName; michael@0: int32_t subLen; michael@0: UColAttributeValue attrVal; michael@0: } ucolTokSuboption; michael@0: michael@0: typedef struct { michael@0: const UChar *optionName; michael@0: int32_t optionLen; michael@0: const ucolTokSuboption *subopts; michael@0: int32_t subSize; michael@0: UColAttribute attr; michael@0: } ucolTokOption; michael@0: michael@0: #define ucol_tok_isSpecialChar(ch) \ michael@0: (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \ michael@0: (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \ michael@0: (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \ michael@0: (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \ michael@0: (ch) == 0x007B)) michael@0: michael@0: michael@0: U_CFUNC michael@0: uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, michael@0: UParseError *parseError, michael@0: UErrorCode *status); michael@0: michael@0: U_CFUNC michael@0: void ucol_tok_initTokenList(UColTokenParser *src, michael@0: const UChar *rules, michael@0: const uint32_t rulesLength, michael@0: const UCollator *UCA, michael@0: GetCollationRulesFunction importFunc, michael@0: void* context, michael@0: UErrorCode *status); michael@0: michael@0: U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src); michael@0: michael@0: U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src, michael@0: UBool startOfRules, michael@0: UParseError *parseError, michael@0: UErrorCode *status); michael@0: michael@0: michael@0: U_CAPI const UChar * U_EXPORT2 michael@0: ucol_tok_getNextArgument(const UChar *start, const UChar *end, michael@0: UColAttribute *attrib, UColAttributeValue *value, michael@0: UErrorCode *status); michael@0: U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src, michael@0: uint32_t CE, uint32_t contCE, michael@0: uint32_t *nextCE, uint32_t *nextContCE, michael@0: uint32_t strength); michael@0: U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src, michael@0: uint32_t CE, uint32_t contCE, michael@0: uint32_t *prevCE, uint32_t *prevContCE, michael@0: uint32_t strength); michael@0: michael@0: const UChar* U_CALLCONV ucol_tok_getRulesFromBundle( michael@0: void* context, michael@0: const char* locale, michael@0: const char* type, michael@0: int32_t* pLength, michael@0: UErrorCode* status); michael@0: michael@0: #endif /* #if !UCONFIG_NO_COLLATION */ michael@0: michael@0: #endif