intl/icu/source/common/ubidiimp.h

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /*
     2 ******************************************************************************
     3 *
     4 *   Copyright (C) 1999-2013, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 ******************************************************************************
     8 *   file name:  ubidiimp.h
     9 *   encoding:   US-ASCII
    10 *   tab size:   8 (not used)
    11 *   indentation:4
    12 *
    13 *   created on: 1999aug06
    14 *   created by: Markus W. Scherer, updated by Matitiahu Allouche
    15 */
    17 #ifndef UBIDIIMP_H
    18 #define UBIDIIMP_H
    20 /* set import/export definitions */
    21 #ifdef U_COMMON_IMPLEMENTATION
    23 #include "unicode/utypes.h"
    24 #include "unicode/uchar.h"
    25 #include "ubidi_props.h"
    27 /* miscellaneous definitions ---------------------------------------------- */
    29 typedef uint8_t DirProp;
    30 typedef uint32_t Flags;
    32 /*  Comparing the description of the BiDi algorithm with this implementation
    33     is easier with the same names for the BiDi types in the code as there.
    34     See UCharDirection in uchar.h .
    35 */
    36 enum {
    37     L=  U_LEFT_TO_RIGHT,                /*  0 */
    38     R=  U_RIGHT_TO_LEFT,                /*  1 */
    39     EN= U_EUROPEAN_NUMBER,              /*  2 */
    40     ES= U_EUROPEAN_NUMBER_SEPARATOR,    /*  3 */
    41     ET= U_EUROPEAN_NUMBER_TERMINATOR,   /*  4 */
    42     AN= U_ARABIC_NUMBER,                /*  5 */
    43     CS= U_COMMON_NUMBER_SEPARATOR,      /*  6 */
    44     B=  U_BLOCK_SEPARATOR,              /*  7 */
    45     S=  U_SEGMENT_SEPARATOR,            /*  8 */
    46     WS= U_WHITE_SPACE_NEUTRAL,          /*  9 */
    47     ON= U_OTHER_NEUTRAL,                /* 10 */
    48     LRE=U_LEFT_TO_RIGHT_EMBEDDING,      /* 11 */
    49     LRO=U_LEFT_TO_RIGHT_OVERRIDE,       /* 12 */
    50     AL= U_RIGHT_TO_LEFT_ARABIC,         /* 13 */
    51     RLE=U_RIGHT_TO_LEFT_EMBEDDING,      /* 14 */
    52     RLO=U_RIGHT_TO_LEFT_OVERRIDE,       /* 15 */
    53     PDF=U_POP_DIRECTIONAL_FORMAT,       /* 16 */
    54     NSM=U_DIR_NON_SPACING_MARK,         /* 17 */
    55     BN= U_BOUNDARY_NEUTRAL,             /* 18 */
    56     FSI=U_FIRST_STRONG_ISOLATE,         /* 19 */
    57     LRI=U_LEFT_TO_RIGHT_ISOLATE,        /* 20 */
    58     RLI=U_RIGHT_TO_LEFT_ISOLATE,        /* 21 */
    59     PDI=U_POP_DIRECTIONAL_ISOLATE,      /* 22 */
    60     ENL,                                /* 23 */
    61     ENR,                                /* 24 */
    62     dirPropCount
    63 };
    65 /*
    66  * Sometimes, bit values are more appropriate
    67  * to deal with directionality properties.
    68  * Abbreviations in these macro names refer to names
    69  * used in the BiDi algorithm.
    70  */
    71 #define DIRPROP_FLAG(dir) (1UL<<(dir))
    73 /* special flag for multiple runs from explicit embedding codes */
    74 #define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
    76 /* are there any characters that are LTR or RTL? */
    77 #define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(LRI))
    78 #define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(RLI))
    79 #define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL))
    80 #define MASK_STRONG_EN_AN (DIRPROP_FLAG(L)|DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN))
    82 /* explicit embedding codes */
    83 #define MASK_EXPLICIT (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(PDF))
    85 /* explicit isolate codes */
    86 #define MASK_ISO (DIRPROP_FLAG(LRI)|DIRPROP_FLAG(RLI)|DIRPROP_FLAG(FSI)|DIRPROP_FLAG(PDI))
    88 #define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
    90 /* paragraph and segment separators */
    91 #define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
    93 /* all types that are counted as White Space or Neutral in some steps */
    94 #define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT|MASK_ISO)
    96 /* types that are neutrals or could becomes neutrals in (Wn) */
    97 #define MASK_POSSIBLE_N (DIRPROP_FLAG(ON)|DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_WS)
    99 /*
   100  * These types may be changed to "e",
   101  * the embedding type (L or R) of the run,
   102  * in the BiDi algorithm (N2)
   103  */
   104 #define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
   106 /* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */
   107 #define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1))
   109 #define IS_DEFAULT_LEVEL(level) ((level)>=0xfe)
   111 /*
   112  * The following bit is ORed to the property of directional control
   113  * characters which are ignored: unmatched PDF or PDI; LRx, RLx or FSI
   114  * which would exceed the maximum explicit bidi level.
   115  */
   116 #define IGNORE_CC   0x40
   118 #define PURE_DIRPROP(prop)  ((prop)&~IGNORE_CC)
   120 /*
   121  * The following bit is used for the directional isolate status.
   122  * Stack entries corresponding to isolate sequences are greater than ISOLATE.
   123  */
   124 #define ISOLATE  0x0100
   126 U_CFUNC UBiDiLevel
   127 ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t index);
   129 #define GET_PARALEVEL(ubidi, index) \
   130             ((UBiDiLevel)(!(ubidi)->defaultParaLevel || (index)<(ubidi)->paras[0].limit ? \
   131                          (ubidi)->paraLevel : ubidi_getParaLevelAtIndex((ubidi), (index))))
   133 /* number of paras entries allocated initially without malloc */
   134 #define SIMPLE_PARAS_SIZE   10
   135 /* number of isolate entries allocated initially without malloc */
   136 #define SIMPLE_ISOLATES_SIZE 5
   137 /* number of isolate run entries for paired brackets allocated initially without malloc */
   138 #define SIMPLE_OPENINGS_SIZE 20
   140 #define CR  0x000D
   141 #define LF  0x000A
   143 /* Run structure for reordering --------------------------------------------- */
   144 enum {
   145     LRM_BEFORE=1,
   146     LRM_AFTER=2,
   147     RLM_BEFORE=4,
   148     RLM_AFTER=8
   149 };
   151 typedef struct Para {
   152     int32_t limit;
   153     int32_t level;
   154 } Para;
   156 enum {                                  /* flags for Opening.flags */
   157     FOUND_L=DIRPROP_FLAG(L),
   158     FOUND_R=DIRPROP_FLAG(R)
   159 };
   161 typedef struct Opening {
   162     int32_t position;                   /* position of opening bracket */
   163     int32_t match;                      /* matching char or -position of closing bracket */
   164     int32_t contextPos;                 /* position of last strong char found before opening */
   165     uint16_t flags;                     /* bits for L or R/AL found within the pair */
   166     UBiDiDirection contextDir;          /* L or R according to last strong char before opening */
   167     uint8_t filler;                     /* to complete a nice multiple of 4 chars */
   168 } Opening;
   170 typedef struct IsoRun {
   171     int32_t  lastStrongPos;             /* position of last strong char found in this run */
   172     int32_t  contextPos;                /* position of last char defining context */
   173     uint16_t start;                     /* index of first opening entry for this run */
   174     uint16_t limit;                     /* index after last opening entry for this run */
   175     UBiDiLevel level;                   /* level of this run */
   176     DirProp lastStrong;                 /* bidi class of last strong char found in this run */
   177     UBiDiDirection contextDir;          /* L or R to use as context for following openings */
   178     uint8_t filler;                     /* to complete a nice multiple of 4 chars */
   179 } IsoRun;
   181 typedef struct BracketData {
   182     UBiDi   *pBiDi;
   183     /* array of opening entries which should be enough in most cases; no malloc() */
   184     Opening simpleOpenings[SIMPLE_OPENINGS_SIZE];
   185     Opening *openings;                  /* pointer to current array of entries */
   186     int32_t openingsSize;               /* number of allocated entries */
   187     int32_t isoRunLast;                 /* index of last used entry */
   188     /* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL
   189        + 1 for index 0, + 1 for before the first isolated sequence */
   190     IsoRun  isoRuns[UBIDI_MAX_EXPLICIT_LEVEL+2];
   191     UBool isNumbersSpecial;             /* reordering mode for NUMBERS_SPECIAL */
   192 } BracketData;
   194 typedef struct Isolate {
   195     int32_t start1;
   196     int16_t stateImp;
   197     int16_t state;
   198 } Isolate;
   200 typedef struct Run {
   201     int32_t logicalStart,   /* first character of the run; b31 indicates even/odd level */
   202             visualLimit,    /* last visual position of the run +1 */
   203             insertRemove;   /* if >0, flags for inserting LRM/RLM before/after run,
   204                                if <0, count of bidi controls within run            */
   205 } Run;
   207 /* in a Run, logicalStart will get this bit set if the run level is odd */
   208 #define INDEX_ODD_BIT (1UL<<31)
   210 #define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31))
   211 #define ADD_ODD_BIT_FROM_LEVEL(x, level)  ((x)|=((int32_t)(level)<<31))
   212 #define REMOVE_ODD_BIT(x)                 ((x)&=~INDEX_ODD_BIT)
   214 #define GET_INDEX(x)   ((x)&~INDEX_ODD_BIT)
   215 #define GET_ODD_BIT(x) ((uint32_t)(x)>>31)
   216 #define IS_ODD_RUN(x)  ((UBool)(((x)&INDEX_ODD_BIT)!=0))
   217 #define IS_EVEN_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)==0))
   219 U_CFUNC UBool
   220 ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
   222 /** BiDi control code points */
   223 enum {
   224     ZWNJ_CHAR=0x200c,
   225     ZWJ_CHAR,
   226     LRM_CHAR,
   227     RLM_CHAR,
   228     LRE_CHAR=0x202a,
   229     RLE_CHAR,
   230     PDF_CHAR,
   231     LRO_CHAR,
   232     RLO_CHAR,
   233     LRI_CHAR=0x2066,
   234     RLI_CHAR,
   235     FSI_CHAR,
   236     PDI_CHAR
   237 };
   239 #define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5 || (uint32_t)((c)-LRI_CHAR)<4)
   241 /* InsertPoints structure for noting where to put BiDi marks ---------------- */
   243 typedef struct Point {
   244     int32_t pos;            /* position in text */
   245     int32_t flag;           /* flag for LRM/RLM, before/after */
   246 } Point;
   248 typedef struct InsertPoints {
   249     int32_t capacity;       /* number of points allocated */
   250     int32_t size;           /* number of points used */
   251     int32_t confirmed;      /* number of points confirmed */
   252     UErrorCode errorCode;   /* for eventual memory shortage */
   253     Point *points;          /* pointer to array of points */
   254 } InsertPoints;
   257 /* UBiDi structure ----------------------------------------------------------- */
   259 struct UBiDi {
   260     /* pointer to parent paragraph object (pointer to self if this object is
   261      * a paragraph object); set to NULL in a newly opened object; set to a
   262      * real value after a successful execution of ubidi_setPara or ubidi_setLine
   263      */
   264     const UBiDi * pParaBiDi;
   266     const UBiDiProps *bdp;
   268     /* alias pointer to the current text */
   269     const UChar *text;
   271     /* length of the current text */
   272     int32_t originalLength;
   274     /* if the UBIDI_OPTION_STREAMING option is set, this is the length
   275      * of text actually processed by ubidi_setPara, which may be shorter than
   276      * the original length.
   277      * Otherwise, it is identical to the original length.
   278      */
   279     int32_t length;
   281     /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or
   282      * marks are allowed to be inserted in one of the reordering mode, the
   283      * length of the result string may be different from the processed length.
   284      */
   285     int32_t resultLength;
   287     /* memory sizes in bytes */
   288     int32_t dirPropsSize, levelsSize, openingsSize, parasSize, runsSize, isolatesSize;
   290     /* allocated memory */
   291     DirProp *dirPropsMemory;
   292     UBiDiLevel *levelsMemory;
   293     Opening *openingsMemory;
   294     Para *parasMemory;
   295     Run *runsMemory;
   296     Isolate *isolatesMemory;
   298     /* indicators for whether memory may be allocated after ubidi_open() */
   299     UBool mayAllocateText, mayAllocateRuns;
   301     /* arrays with one value per text-character */
   302     DirProp *dirProps;
   303     UBiDiLevel *levels;
   305     /* are we performing an approximation of the "inverse BiDi" algorithm? */
   306     UBool isInverse;
   308     /* are we using the basic algorithm or its variation? */
   309     UBiDiReorderingMode reorderingMode;
   311     /* UBIDI_REORDER_xxx values must be ordered so that all the regular
   312      * logical to visual modes come first, and all inverse BiDi modes
   313      * come last.
   314      */
   315     #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL    UBIDI_REORDER_NUMBERS_SPECIAL
   317     /* bitmask for reordering options */
   318     uint32_t reorderingOptions;
   320     /* must block separators receive level 0? */
   321     UBool orderParagraphsLTR;
   323     /* the paragraph level */
   324     UBiDiLevel paraLevel;
   325     /* original paraLevel when contextual */
   326     /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */
   327     UBiDiLevel defaultParaLevel;
   329     /* context data */
   330     const UChar *prologue;
   331     int32_t proLength;
   332     const UChar *epilogue;
   333     int32_t epiLength;
   335     /* the following is set in ubidi_setPara, used in processPropertySeq */
   336     const struct ImpTabPair * pImpTabPair;  /* pointer to levels state table pair */
   338     /* the overall paragraph or line directionality - see UBiDiDirection */
   339     UBiDiDirection direction;
   341     /* flags is a bit set for which directional properties are in the text */
   342     Flags flags;
   344     /* lastArabicPos is index to the last AL in the text, -1 if none */
   345     int32_t lastArabicPos;
   347     /* characters after trailingWSStart are WS and are */
   348     /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
   349     int32_t trailingWSStart;
   351     /* fields for paragraph handling */
   352     int32_t paraCount;                  /* set in getDirProps() */
   353     /* filled in getDirProps() */
   354     Para *paras;
   356     /* for relatively short text, we only need a tiny array of paras (no malloc()) */
   357     Para simpleParas[SIMPLE_PARAS_SIZE];
   359     /* fields for line reordering */
   360     int32_t runCount;     /* ==-1: runs not set up yet */
   361     Run *runs;
   363     /* for non-mixed text, we only need a tiny array of runs (no malloc()) */
   364     Run simpleRuns[1];
   366     /* maximum or current nesting depth of isolate sequences */
   367     /* Within resolveExplicitLevels() and checkExplicitLevels(), this is the maximal
   368        nesting encountered.
   369        Within resolveImplicitLevels(), this is the index of the current isolates
   370        stack entry. */
   371     int32_t isolateCount;
   372     Isolate *isolates;
   374     /* for simple text, have a small stack (no malloc()) */
   375     Isolate simpleIsolates[SIMPLE_ISOLATES_SIZE];
   377     /* for inverse Bidi with insertion of directional marks */
   378     InsertPoints insertPoints;
   380     /* for option UBIDI_OPTION_REMOVE_CONTROLS */
   381     int32_t controlCount;
   383     /* for Bidi class callback */
   384     UBiDiClassCallback *fnClassCallback;    /* action pointer */
   385     const void *coClassCallback;            /* context pointer */
   386 };
   388 #define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x)))
   389 #define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)))
   391 typedef union {
   392     DirProp *dirPropsMemory;
   393     UBiDiLevel *levelsMemory;
   394     Opening *openingsMemory;
   395     Para *parasMemory;
   396     Run *runsMemory;
   397     Isolate *isolatesMemory;
   398 } BidiMemoryForAllocation;
   400 /* Macros for initial checks at function entry */
   401 #define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue)   \
   402         if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue
   403 #define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue)   \
   404         if(!IS_VALID_PARA(bidi)) {  \
   405             errcode=U_INVALID_STATE_ERROR;  \
   406             return retvalue;                \
   407         }
   408 #define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue)   \
   409         if(!IS_VALID_PARA_OR_LINE(bidi)) {  \
   410             errcode=U_INVALID_STATE_ERROR;  \
   411             return retvalue;                \
   412         }
   413 #define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue)   \
   414         if((arg)<(start) || (arg)>=(limit)) {       \
   415             (errcode)=U_ILLEGAL_ARGUMENT_ERROR;     \
   416             return retvalue;                        \
   417         }
   419 #define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode)   \
   420         if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return
   421 #define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode)   \
   422         if(!IS_VALID_PARA(bidi)) {  \
   423             errcode=U_INVALID_STATE_ERROR;  \
   424             return;                \
   425         }
   426 #define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode)   \
   427         if(!IS_VALID_PARA_OR_LINE(bidi)) {  \
   428             errcode=U_INVALID_STATE_ERROR;  \
   429             return;                \
   430         }
   431 #define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode)   \
   432         if((arg)<(start) || (arg)>=(limit)) {       \
   433             (errcode)=U_ILLEGAL_ARGUMENT_ERROR;     \
   434             return;                        \
   435         }
   437 /* helper function to (re)allocate memory if allowed */
   438 U_CFUNC UBool
   439 ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded);
   441 /* helper macros for each allocated array in UBiDi */
   442 #define getDirPropsMemory(pBiDi, length) \
   443         ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
   444                         (pBiDi)->mayAllocateText, (length))
   446 #define getLevelsMemory(pBiDi, length) \
   447         ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
   448                         (pBiDi)->mayAllocateText, (length))
   450 #define getRunsMemory(pBiDi, length) \
   451         ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
   452                         (pBiDi)->mayAllocateRuns, (length)*sizeof(Run))
   454 /* additional macros used by ubidi_open() - always allow allocation */
   455 #define getInitialDirPropsMemory(pBiDi, length) \
   456         ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
   457                         TRUE, (length))
   459 #define getInitialLevelsMemory(pBiDi, length) \
   460         ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
   461                         TRUE, (length))
   463 #define getInitialOpeningsMemory(pBiDi, length) \
   464         ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->openingsMemory, &(pBiDi)->openingsSize, \
   465                         TRUE, (length)*sizeof(Opening))
   467 #define getInitialParasMemory(pBiDi, length) \
   468         ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \
   469                         TRUE, (length)*sizeof(Para))
   471 #define getInitialRunsMemory(pBiDi, length) \
   472         ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
   473                         TRUE, (length)*sizeof(Run))
   475 #define getInitialIsolatesMemory(pBiDi, length) \
   476         ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->isolatesMemory, &(pBiDi)->isolatesSize, \
   477                         TRUE, (length)*sizeof(Isolate))
   479 #endif
   481 #endif

mercurial