intl/icu/source/i18n/ucol_tok.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/ucol_tok.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,211 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2001-2011, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +*******************************************************************************
    1.11 +*   file name:  ucol_tok.h
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created 02/22/2001
    1.17 +*   created by: Vladimir Weinstein
    1.18 +*
    1.19 +* This module reads a tailoring rule string and produces a list of 
    1.20 +* tokens that will be turned into collation elements
    1.21 +* 
    1.22 +*/
    1.23 +
    1.24 +#ifndef UCOL_TOKENS_H
    1.25 +#define UCOL_TOKENS_H
    1.26 +
    1.27 +#include "unicode/utypes.h"
    1.28 +#include "unicode/uset.h"
    1.29 +
    1.30 +#if !UCONFIG_NO_COLLATION
    1.31 +
    1.32 +#include "ucol_imp.h"
    1.33 +#include "uhash.h"
    1.34 +#include "unicode/parseerr.h"
    1.35 +
    1.36 +#define UCOL_TOK_UNSET 0xFFFFFFFF
    1.37 +#define UCOL_TOK_RESET 0xDEADBEEF
    1.38 +
    1.39 +#define UCOL_TOK_POLARITY_NEGATIVE 0
    1.40 +#define UCOL_TOK_POLARITY_POSITIVE 1
    1.41 +
    1.42 +#define UCOL_TOK_TOP 0x04
    1.43 +#define UCOL_TOK_VARIABLE_TOP 0x08
    1.44 +#define UCOL_TOK_BEFORE 0x03
    1.45 +#define UCOL_TOK_SUCCESS 0x10
    1.46 +
    1.47 +/* this is space for the extra strings that need to be unquoted */
    1.48 +/* during the parsing of the rules */
    1.49 +#define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096
    1.50 +typedef struct UColToken UColToken;
    1.51 +
    1.52 +typedef struct  {
    1.53 +  UColToken* first;
    1.54 +  UColToken* last;
    1.55 +  UColToken* reset;
    1.56 +  UBool indirect;
    1.57 +  uint32_t baseCE;
    1.58 +  uint32_t baseContCE;
    1.59 +  uint32_t nextCE;
    1.60 +  uint32_t nextContCE;
    1.61 +  uint32_t previousCE;
    1.62 +  uint32_t previousContCE;
    1.63 +  int32_t pos[UCOL_STRENGTH_LIMIT];
    1.64 +  uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT];
    1.65 +  uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT];
    1.66 +  uint32_t numStr[UCOL_CE_STRENGTH_LIMIT];
    1.67 +  UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT];
    1.68 +  UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT];
    1.69 +} UColTokListHeader;
    1.70 +
    1.71 +struct UColToken {
    1.72 +  UChar debugSource;
    1.73 +  UChar debugExpansion;
    1.74 +  UChar debugPrefix;
    1.75 +  uint32_t CEs[128];
    1.76 +  uint32_t noOfCEs;
    1.77 +  uint32_t expCEs[128];
    1.78 +  uint32_t noOfExpCEs;
    1.79 +  uint32_t source;
    1.80 +  uint32_t expansion;
    1.81 +  uint32_t prefix;
    1.82 +  uint32_t strength;
    1.83 +  uint32_t toInsert;
    1.84 +  uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */
    1.85 +  UColTokListHeader *listHeader;
    1.86 +  UColToken* previous;
    1.87 +  UColToken* next;
    1.88 +  UChar **rulesToParseHdl;
    1.89 +  uint16_t flags;
    1.90 +};
    1.91 +
    1.92 +/* 
    1.93 + * This is a token that has been parsed
    1.94 + * but not yet processed. Used to reduce
    1.95 + * the number of arguments in the parser
    1.96 + */
    1.97 +typedef struct {
    1.98 +  uint32_t strength;
    1.99 +  uint32_t charsOffset;
   1.100 +  uint32_t charsLen;
   1.101 +  uint32_t extensionOffset;
   1.102 +  uint32_t extensionLen;
   1.103 +  uint32_t prefixOffset;
   1.104 +  uint32_t prefixLen;
   1.105 +  uint16_t flags;
   1.106 +  uint16_t indirectIndex;
   1.107 +} UColParsedToken;
   1.108 +
   1.109 +
   1.110 +typedef struct {
   1.111 +  UColParsedToken parsedToken;
   1.112 +  UChar *source;
   1.113 +  UChar *end;
   1.114 +  const UChar *current;
   1.115 +  UChar *sourceCurrent;
   1.116 +  UChar *extraCurrent;
   1.117 +  UChar *extraEnd;
   1.118 +  const InverseUCATableHeader *invUCA;
   1.119 +  const UCollator *UCA;
   1.120 +  UHashtable *tailored;
   1.121 +  UColOptionSet *opts;
   1.122 +  uint32_t resultLen;
   1.123 +  uint32_t listCapacity;
   1.124 +  UColTokListHeader *lh;
   1.125 +  UColToken *varTop;
   1.126 +  USet *copySet;
   1.127 +  USet *removeSet;
   1.128 +  UBool buildCCTabFlag;  /* Tailoring rule requirs building combining class table. */
   1.129 +
   1.130 +  UChar32 previousCp;               /* Previous code point. */
   1.131 +  /* For processing starred lists. */
   1.132 +  UBool isStarred;                   /* Are we processing a starred token? */
   1.133 +  UBool savedIsStarred;
   1.134 +  uint32_t currentStarredCharIndex;  /* Index of the current charrecter in the starred expression. */
   1.135 +  uint32_t lastStarredCharIndex;    /* Index to the last character in the starred expression. */
   1.136 +
   1.137 +  /* For processing ranges. */
   1.138 +  UBool inRange;                     /* Are we in a range? */
   1.139 +  UChar32 currentRangeCp;           /* Current code point in the range. */
   1.140 +  UChar32 lastRangeCp;              /* The last code point in the range. */
   1.141 +  
   1.142 +  /* reorder codes for collation reordering */
   1.143 +  int32_t* reorderCodes;
   1.144 +  int32_t reorderCodesLength;
   1.145 +
   1.146 +} UColTokenParser;
   1.147 +
   1.148 +typedef struct {
   1.149 +  const UChar *subName;
   1.150 +  int32_t subLen;
   1.151 +  UColAttributeValue attrVal;
   1.152 +} ucolTokSuboption;
   1.153 +
   1.154 +typedef struct {
   1.155 +   const UChar *optionName;
   1.156 +   int32_t optionLen;
   1.157 +   const ucolTokSuboption *subopts;
   1.158 +   int32_t subSize;
   1.159 +   UColAttribute attr;
   1.160 +} ucolTokOption;
   1.161 +
   1.162 +#define ucol_tok_isSpecialChar(ch)              \
   1.163 +    (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \
   1.164 +      (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \
   1.165 +      (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \
   1.166 +      (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \
   1.167 +      (ch) == 0x007B))
   1.168 +
   1.169 +
   1.170 +U_CFUNC 
   1.171 +uint32_t ucol_tok_assembleTokenList(UColTokenParser *src,
   1.172 +                                    UParseError *parseError, 
   1.173 +                                    UErrorCode *status);
   1.174 +
   1.175 +U_CFUNC
   1.176 +void ucol_tok_initTokenList(UColTokenParser *src,
   1.177 +                            const UChar *rules,
   1.178 +                            const uint32_t rulesLength,
   1.179 +                            const UCollator *UCA,
   1.180 +                            GetCollationRulesFunction importFunc,
   1.181 +                            void* context,
   1.182 +                            UErrorCode *status);
   1.183 +
   1.184 +U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src);
   1.185 +
   1.186 +U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src, 
   1.187 +                        UBool startOfRules,
   1.188 +                        UParseError *parseError,
   1.189 +                        UErrorCode *status);
   1.190 +
   1.191 +
   1.192 +U_CAPI const UChar * U_EXPORT2
   1.193 +ucol_tok_getNextArgument(const UChar *start, const UChar *end, 
   1.194 +                               UColAttribute *attrib, UColAttributeValue *value, 
   1.195 +                               UErrorCode *status);
   1.196 +U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src,
   1.197 +                                            uint32_t CE, uint32_t contCE,
   1.198 +                                            uint32_t *nextCE, uint32_t *nextContCE,
   1.199 +                                            uint32_t strength);
   1.200 +U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src,
   1.201 +                                            uint32_t CE, uint32_t contCE,
   1.202 +                                            uint32_t *prevCE, uint32_t *prevContCE,
   1.203 +                                            uint32_t strength);
   1.204 +
   1.205 +const UChar* U_CALLCONV ucol_tok_getRulesFromBundle(
   1.206 +    void* context,
   1.207 +    const char* locale,
   1.208 +    const char* type,
   1.209 +    int32_t* pLength,
   1.210 +    UErrorCode* status);
   1.211 +
   1.212 +#endif /* #if !UCONFIG_NO_COLLATION */
   1.213 +
   1.214 +#endif

mercurial