intl/icu/source/i18n/ucol_tok.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 *
     4 *   Copyright (C) 2001-2011, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 *******************************************************************************
     8 *   file name:  ucol_tok.h
     9 *   encoding:   US-ASCII
    10 *   tab size:   8 (not used)
    11 *   indentation:4
    12 *
    13 *   created 02/22/2001
    14 *   created by: Vladimir Weinstein
    15 *
    16 * This module reads a tailoring rule string and produces a list of 
    17 * tokens that will be turned into collation elements
    18 * 
    19 */
    21 #ifndef UCOL_TOKENS_H
    22 #define UCOL_TOKENS_H
    24 #include "unicode/utypes.h"
    25 #include "unicode/uset.h"
    27 #if !UCONFIG_NO_COLLATION
    29 #include "ucol_imp.h"
    30 #include "uhash.h"
    31 #include "unicode/parseerr.h"
    33 #define UCOL_TOK_UNSET 0xFFFFFFFF
    34 #define UCOL_TOK_RESET 0xDEADBEEF
    36 #define UCOL_TOK_POLARITY_NEGATIVE 0
    37 #define UCOL_TOK_POLARITY_POSITIVE 1
    39 #define UCOL_TOK_TOP 0x04
    40 #define UCOL_TOK_VARIABLE_TOP 0x08
    41 #define UCOL_TOK_BEFORE 0x03
    42 #define UCOL_TOK_SUCCESS 0x10
    44 /* this is space for the extra strings that need to be unquoted */
    45 /* during the parsing of the rules */
    46 #define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096
    47 typedef struct UColToken UColToken;
    49 typedef struct  {
    50   UColToken* first;
    51   UColToken* last;
    52   UColToken* reset;
    53   UBool indirect;
    54   uint32_t baseCE;
    55   uint32_t baseContCE;
    56   uint32_t nextCE;
    57   uint32_t nextContCE;
    58   uint32_t previousCE;
    59   uint32_t previousContCE;
    60   int32_t pos[UCOL_STRENGTH_LIMIT];
    61   uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT];
    62   uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT];
    63   uint32_t numStr[UCOL_CE_STRENGTH_LIMIT];
    64   UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT];
    65   UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT];
    66 } UColTokListHeader;
    68 struct UColToken {
    69   UChar debugSource;
    70   UChar debugExpansion;
    71   UChar debugPrefix;
    72   uint32_t CEs[128];
    73   uint32_t noOfCEs;
    74   uint32_t expCEs[128];
    75   uint32_t noOfExpCEs;
    76   uint32_t source;
    77   uint32_t expansion;
    78   uint32_t prefix;
    79   uint32_t strength;
    80   uint32_t toInsert;
    81   uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */
    82   UColTokListHeader *listHeader;
    83   UColToken* previous;
    84   UColToken* next;
    85   UChar **rulesToParseHdl;
    86   uint16_t flags;
    87 };
    89 /* 
    90  * This is a token that has been parsed
    91  * but not yet processed. Used to reduce
    92  * the number of arguments in the parser
    93  */
    94 typedef struct {
    95   uint32_t strength;
    96   uint32_t charsOffset;
    97   uint32_t charsLen;
    98   uint32_t extensionOffset;
    99   uint32_t extensionLen;
   100   uint32_t prefixOffset;
   101   uint32_t prefixLen;
   102   uint16_t flags;
   103   uint16_t indirectIndex;
   104 } UColParsedToken;
   107 typedef struct {
   108   UColParsedToken parsedToken;
   109   UChar *source;
   110   UChar *end;
   111   const UChar *current;
   112   UChar *sourceCurrent;
   113   UChar *extraCurrent;
   114   UChar *extraEnd;
   115   const InverseUCATableHeader *invUCA;
   116   const UCollator *UCA;
   117   UHashtable *tailored;
   118   UColOptionSet *opts;
   119   uint32_t resultLen;
   120   uint32_t listCapacity;
   121   UColTokListHeader *lh;
   122   UColToken *varTop;
   123   USet *copySet;
   124   USet *removeSet;
   125   UBool buildCCTabFlag;  /* Tailoring rule requirs building combining class table. */
   127   UChar32 previousCp;               /* Previous code point. */
   128   /* For processing starred lists. */
   129   UBool isStarred;                   /* Are we processing a starred token? */
   130   UBool savedIsStarred;
   131   uint32_t currentStarredCharIndex;  /* Index of the current charrecter in the starred expression. */
   132   uint32_t lastStarredCharIndex;    /* Index to the last character in the starred expression. */
   134   /* For processing ranges. */
   135   UBool inRange;                     /* Are we in a range? */
   136   UChar32 currentRangeCp;           /* Current code point in the range. */
   137   UChar32 lastRangeCp;              /* The last code point in the range. */
   139   /* reorder codes for collation reordering */
   140   int32_t* reorderCodes;
   141   int32_t reorderCodesLength;
   143 } UColTokenParser;
   145 typedef struct {
   146   const UChar *subName;
   147   int32_t subLen;
   148   UColAttributeValue attrVal;
   149 } ucolTokSuboption;
   151 typedef struct {
   152    const UChar *optionName;
   153    int32_t optionLen;
   154    const ucolTokSuboption *subopts;
   155    int32_t subSize;
   156    UColAttribute attr;
   157 } ucolTokOption;
   159 #define ucol_tok_isSpecialChar(ch)              \
   160     (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \
   161       (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \
   162       (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \
   163       (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \
   164       (ch) == 0x007B))
   167 U_CFUNC 
   168 uint32_t ucol_tok_assembleTokenList(UColTokenParser *src,
   169                                     UParseError *parseError, 
   170                                     UErrorCode *status);
   172 U_CFUNC
   173 void ucol_tok_initTokenList(UColTokenParser *src,
   174                             const UChar *rules,
   175                             const uint32_t rulesLength,
   176                             const UCollator *UCA,
   177                             GetCollationRulesFunction importFunc,
   178                             void* context,
   179                             UErrorCode *status);
   181 U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src);
   183 U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src, 
   184                         UBool startOfRules,
   185                         UParseError *parseError,
   186                         UErrorCode *status);
   189 U_CAPI const UChar * U_EXPORT2
   190 ucol_tok_getNextArgument(const UChar *start, const UChar *end, 
   191                                UColAttribute *attrib, UColAttributeValue *value, 
   192                                UErrorCode *status);
   193 U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src,
   194                                             uint32_t CE, uint32_t contCE,
   195                                             uint32_t *nextCE, uint32_t *nextContCE,
   196                                             uint32_t strength);
   197 U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src,
   198                                             uint32_t CE, uint32_t contCE,
   199                                             uint32_t *prevCE, uint32_t *prevContCE,
   200                                             uint32_t strength);
   202 const UChar* U_CALLCONV ucol_tok_getRulesFromBundle(
   203     void* context,
   204     const char* locale,
   205     const char* type,
   206     int32_t* pLength,
   207     UErrorCode* status);
   209 #endif /* #if !UCONFIG_NO_COLLATION */
   211 #endif

mercurial