intl/icu/source/common/ubidiwrt.c

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /*
     2 ******************************************************************************
     3 *
     4 *   Copyright (C) 2000-2011, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 ******************************************************************************
     8 *   file name:  ubidiwrt.c
     9 *   encoding:   US-ASCII
    10 *   tab size:   8 (not used)
    11 *   indentation:4
    12 *
    13 *   created on: 1999aug06
    14 *   created by: Markus W. Scherer, updated by Matitiahu Allouche
    15 *
    16 * This file contains implementations for BiDi functions that use
    17 * the core algorithm and core API to write reordered text.
    18 */
    20 /* set import/export definitions */
    21 #ifndef U_COMMON_IMPLEMENTATION
    22 #   define U_COMMON_IMPLEMENTATION
    23 #endif
    25 #include "unicode/utypes.h"
    26 #include "unicode/ustring.h"
    27 #include "unicode/uchar.h"
    28 #include "unicode/ubidi.h"
    29 #include "unicode/utf16.h"
    30 #include "cmemory.h"
    31 #include "ustr_imp.h"
    32 #include "ubidiimp.h"
    34 /*
    35  * The function implementations in this file are designed
    36  * for UTF-16 and UTF-32, not for UTF-8.
    37  *
    38  * Assumptions that are not true for UTF-8:
    39  * - Any code point always needs the same number of code units
    40  *   ("minimum-length-problem" of UTF-8)
    41  * - The BiDi control characters need only one code unit each
    42  *
    43  * Further assumptions for all UTFs:
    44  * - u_charMirror(c) needs the same number of code units as c
    45  */
    46 #if UTF_SIZE==8
    47 # error reimplement ubidi_writeReordered() for UTF-8, see comment above
    48 #endif
    50 #define IS_COMBINING(type) ((1UL<<(type))&(1UL<<U_NON_SPACING_MARK|1UL<<U_COMBINING_SPACING_MARK|1UL<<U_ENCLOSING_MARK))
    52 /*
    53  * When we have UBIDI_OUTPUT_REVERSE set on ubidi_writeReordered(), then we
    54  * semantically write RTL runs in reverse and later reverse them again.
    55  * Instead, we actually write them in forward order to begin with.
    56  * However, if the RTL run was to be mirrored, we need to mirror here now
    57  * since the implicit second reversal must not do it.
    58  * It looks strange to do mirroring in LTR output, but it is only because
    59  * we are writing RTL output in reverse.
    60  */
    61 static int32_t
    62 doWriteForward(const UChar *src, int32_t srcLength,
    63                UChar *dest, int32_t destSize,
    64                uint16_t options,
    65                UErrorCode *pErrorCode) {
    66     /* optimize for several combinations of options */
    67     switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING)) {
    68     case 0: {
    69         /* simply copy the LTR run to the destination */
    70         int32_t length=srcLength;
    71         if(destSize<length) {
    72             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    73             return srcLength;
    74         }
    75         do {
    76             *dest++=*src++;
    77         } while(--length>0);
    78         return srcLength;
    79     }
    80     case UBIDI_DO_MIRRORING: {
    81         /* do mirroring */
    82         int32_t i=0, j=0;
    83         UChar32 c;
    85         if(destSize<srcLength) {
    86             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    87             return srcLength;
    88         }
    89         do {
    90             U16_NEXT(src, i, srcLength, c);
    91             c=u_charMirror(c);
    92             U16_APPEND_UNSAFE(dest, j, c);
    93         } while(i<srcLength);
    94         return srcLength;
    95     }
    96     case UBIDI_REMOVE_BIDI_CONTROLS: {
    97         /* copy the LTR run and remove any BiDi control characters */
    98         int32_t remaining=destSize;
    99         UChar c;
   100         do {
   101             c=*src++;
   102             if(!IS_BIDI_CONTROL_CHAR(c)) {
   103                 if(--remaining<0) {
   104                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   106                     /* preflight the length */
   107                     while(--srcLength>0) {
   108                         c=*src++;
   109                         if(!IS_BIDI_CONTROL_CHAR(c)) {
   110                             --remaining;
   111                         }
   112                     }
   113                     return destSize-remaining;
   114                 }
   115                 *dest++=c;
   116             }
   117         } while(--srcLength>0);
   118         return destSize-remaining;
   119     }
   120     default: {
   121         /* remove BiDi control characters and do mirroring */
   122         int32_t remaining=destSize;
   123         int32_t i, j=0;
   124         UChar32 c;
   125         do {
   126             i=0;
   127             U16_NEXT(src, i, srcLength, c);
   128             src+=i;
   129             srcLength-=i;
   130             if(!IS_BIDI_CONTROL_CHAR(c)) {
   131                 remaining-=i;
   132                 if(remaining<0) {
   133                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   135                     /* preflight the length */
   136                     while(srcLength>0) {
   137                         c=*src++;
   138                         if(!IS_BIDI_CONTROL_CHAR(c)) {
   139                             --remaining;
   140                         }
   141                         --srcLength;
   142                     }
   143                     return destSize-remaining;
   144                 }
   145                 c=u_charMirror(c);
   146                 U16_APPEND_UNSAFE(dest, j, c);
   147             }
   148         } while(srcLength>0);
   149         return j;
   150     }
   151     } /* end of switch */
   152 }
   154 static int32_t
   155 doWriteReverse(const UChar *src, int32_t srcLength,
   156                UChar *dest, int32_t destSize,
   157                uint16_t options,
   158                UErrorCode *pErrorCode) {
   159     /*
   160      * RTL run -
   161      *
   162      * RTL runs need to be copied to the destination in reverse order
   163      * of code points, not code units, to keep Unicode characters intact.
   164      *
   165      * The general strategy for this is to read the source text
   166      * in backward order, collect all code units for a code point
   167      * (and optionally following combining characters, see below),
   168      * and copy all these code units in ascending order
   169      * to the destination for this run.
   170      *
   171      * Several options request whether combining characters
   172      * should be kept after their base characters,
   173      * whether BiDi control characters should be removed, and
   174      * whether characters should be replaced by their mirror-image
   175      * equivalent Unicode characters.
   176      */
   177     int32_t i, j;
   178     UChar32 c;
   180     /* optimize for several combinations of options */
   181     switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING|UBIDI_KEEP_BASE_COMBINING)) {
   182     case 0:
   183         /*
   184          * With none of the "complicated" options set, the destination
   185          * run will have the same length as the source run,
   186          * and there is no mirroring and no keeping combining characters
   187          * with their base characters.
   188          */
   189         if(destSize<srcLength) {
   190             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   191             return srcLength;
   192         }
   193         destSize=srcLength;
   195         /* preserve character integrity */
   196         do {
   197             /* i is always after the last code unit known to need to be kept in this segment */
   198             i=srcLength;
   200             /* collect code units for one base character */
   201             U16_BACK_1(src, 0, srcLength);
   203             /* copy this base character */
   204             j=srcLength;
   205             do {
   206                 *dest++=src[j++];
   207             } while(j<i);
   208         } while(srcLength>0);
   209         break;
   210     case UBIDI_KEEP_BASE_COMBINING:
   211         /*
   212          * Here, too, the destination
   213          * run will have the same length as the source run,
   214          * and there is no mirroring.
   215          * We do need to keep combining characters with their base characters.
   216          */
   217         if(destSize<srcLength) {
   218             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   219             return srcLength;
   220         }
   221         destSize=srcLength;
   223         /* preserve character integrity */
   224         do {
   225             /* i is always after the last code unit known to need to be kept in this segment */
   226             i=srcLength;
   228             /* collect code units and modifier letters for one base character */
   229             do {
   230                 U16_PREV(src, 0, srcLength, c);
   231             } while(srcLength>0 && IS_COMBINING(u_charType(c)));
   233             /* copy this "user character" */
   234             j=srcLength;
   235             do {
   236                 *dest++=src[j++];
   237             } while(j<i);
   238         } while(srcLength>0);
   239         break;
   240     default:
   241         /*
   242          * With several "complicated" options set, this is the most
   243          * general and the slowest copying of an RTL run.
   244          * We will do mirroring, remove BiDi controls, and
   245          * keep combining characters with their base characters
   246          * as requested.
   247          */
   248         if(!(options&UBIDI_REMOVE_BIDI_CONTROLS)) {
   249             i=srcLength;
   250         } else {
   251             /* we need to find out the destination length of the run,
   252                which will not include the BiDi control characters */
   253             int32_t length=srcLength;
   254             UChar ch;
   256             i=0;
   257             do {
   258                 ch=*src++;
   259                 if(!IS_BIDI_CONTROL_CHAR(ch)) {
   260                     ++i;
   261                 }
   262             } while(--length>0);
   263             src-=srcLength;
   264         }
   266         if(destSize<i) {
   267             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   268             return i;
   269         }
   270         destSize=i;
   272         /* preserve character integrity */
   273         do {
   274             /* i is always after the last code unit known to need to be kept in this segment */
   275             i=srcLength;
   277             /* collect code units for one base character */
   278             U16_PREV(src, 0, srcLength, c);
   279             if(options&UBIDI_KEEP_BASE_COMBINING) {
   280                 /* collect modifier letters for this base character */
   281                 while(srcLength>0 && IS_COMBINING(u_charType(c))) {
   282                     U16_PREV(src, 0, srcLength, c);
   283                 }
   284             }
   286             if(options&UBIDI_REMOVE_BIDI_CONTROLS && IS_BIDI_CONTROL_CHAR(c)) {
   287                 /* do not copy this BiDi control character */
   288                 continue;
   289             }
   291             /* copy this "user character" */
   292             j=srcLength;
   293             if(options&UBIDI_DO_MIRRORING) {
   294                 /* mirror only the base character */
   295                 int32_t k=0;
   296                 c=u_charMirror(c);
   297                 U16_APPEND_UNSAFE(dest, k, c);
   298                 dest+=k;
   299                 j+=k;
   300             }
   301             while(j<i) {
   302                 *dest++=src[j++];
   303             }
   304         } while(srcLength>0);
   305         break;
   306     } /* end of switch */
   308     return destSize;
   309 }
   311 U_CAPI int32_t U_EXPORT2
   312 ubidi_writeReverse(const UChar *src, int32_t srcLength,
   313                    UChar *dest, int32_t destSize,
   314                    uint16_t options,
   315                    UErrorCode *pErrorCode) {
   316     int32_t destLength;
   318     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   319         return 0;
   320     }
   322     /* more error checking */
   323     if( src==NULL || srcLength<-1 ||
   324         destSize<0 || (destSize>0 && dest==NULL))
   325     {
   326         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   327         return 0;
   328     }
   330     /* do input and output overlap? */
   331     if( dest!=NULL &&
   332         ((src>=dest && src<dest+destSize) ||
   333          (dest>=src && dest<src+srcLength)))
   334     {
   335         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   336         return 0;
   337     }
   339     if(srcLength==-1) {
   340         srcLength=u_strlen(src);
   341     }
   342     if(srcLength>0) {
   343         destLength=doWriteReverse(src, srcLength, dest, destSize, options, pErrorCode);
   344     } else {
   345         /* nothing to do */
   346         destLength=0;
   347     }
   349     return u_terminateUChars(dest, destSize, destLength, pErrorCode);
   350 }
   352 U_CAPI int32_t U_EXPORT2
   353 ubidi_writeReordered(UBiDi *pBiDi,
   354                      UChar *dest, int32_t destSize,
   355                      uint16_t options,
   356                      UErrorCode *pErrorCode) {
   357     const UChar *text;
   358     UChar *saveDest;
   359     int32_t length, destCapacity;
   360     int32_t run, runCount, logicalStart, runLength;
   362     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   363         return 0;
   364     }
   366     /* more error checking */
   367     if( pBiDi==NULL ||
   368         (text=pBiDi->text)==NULL || (length=pBiDi->length)<0 ||
   369         destSize<0 || (destSize>0 && dest==NULL))
   370     {
   371         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   372         return 0;
   373     }
   375     /* do input and output overlap? */
   376     if( dest!=NULL &&
   377         ((text>=dest && text<dest+destSize) ||
   378          (dest>=text && dest<text+pBiDi->originalLength)))
   379     {
   380         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   381         return 0;
   382     }
   384     if(length==0) {
   385         /* nothing to do */
   386         return u_terminateUChars(dest, destSize, 0, pErrorCode);
   387     }
   389     runCount=ubidi_countRuns(pBiDi, pErrorCode);
   390     if(U_FAILURE(*pErrorCode)) {
   391         return 0;
   392     }
   394     /* destSize shrinks, later destination length=destCapacity-destSize */
   395     saveDest=dest;
   396     destCapacity=destSize;
   398     /*
   399      * Option "insert marks" implies UBIDI_INSERT_LRM_FOR_NUMERIC if the
   400      * reordering mode (checked below) is appropriate.
   401      */
   402     if(pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
   403         options|=UBIDI_INSERT_LRM_FOR_NUMERIC;
   404         options&=~UBIDI_REMOVE_BIDI_CONTROLS;
   405     }
   406     /*
   407      * Option "remove controls" implies UBIDI_REMOVE_BIDI_CONTROLS
   408      * and cancels UBIDI_INSERT_LRM_FOR_NUMERIC.
   409      */
   410     if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
   411         options|=UBIDI_REMOVE_BIDI_CONTROLS;
   412         options&=~UBIDI_INSERT_LRM_FOR_NUMERIC;
   413     }
   414     /*
   415      * If we do not perform the "inverse BiDi" algorithm, then we
   416      * don't need to insert any LRMs, and don't need to test for it.
   417      */
   418     if((pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_NUMBERS_AS_L) &&
   419        (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_LIKE_DIRECT)  &&
   420        (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&
   421        (pBiDi->reorderingMode != UBIDI_REORDER_RUNS_ONLY)) {
   422         options&=~UBIDI_INSERT_LRM_FOR_NUMERIC;
   423     }
   424     /*
   425      * Iterate through all visual runs and copy the run text segments to
   426      * the destination, according to the options.
   427      *
   428      * The tests for where to insert LRMs ignore the fact that there may be
   429      * BN codes or non-BMP code points at the beginning and end of a run;
   430      * they may insert LRMs unnecessarily but the tests are faster this way
   431      * (this would have to be improved for UTF-8).
   432      *
   433      * Note that the only errors that are set by doWriteXY() are buffer overflow
   434      * errors. Ignore them until the end, and continue for preflighting.
   435      */
   436     if(!(options&UBIDI_OUTPUT_REVERSE)) {
   437         /* forward output */
   438         if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) {
   439             /* do not insert BiDi controls */
   440             for(run=0; run<runCount; ++run) {
   441                 if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) {
   442                     runLength=doWriteForward(text+logicalStart, runLength,
   443                                              dest, destSize,
   444                                              (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
   445                 } else {
   446                     runLength=doWriteReverse(text+logicalStart, runLength,
   447                                              dest, destSize,
   448                                              options, pErrorCode);
   449                 }
   450                 if(dest!=NULL) {
   451                   dest+=runLength;
   452                 }
   453                 destSize-=runLength;
   454             }
   455         } else {
   456             /* insert BiDi controls for "inverse BiDi" */
   457             const DirProp *dirProps=pBiDi->dirProps;
   458             const UChar *src;
   459             UChar uc;
   460             UBiDiDirection dir;
   461             int32_t markFlag;
   463             for(run=0; run<runCount; ++run) {
   464                 dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength);
   465                 src=text+logicalStart;
   466                 /* check if something relevant in insertPoints */
   467                 markFlag=pBiDi->runs[run].insertRemove;
   468                 if(markFlag<0) {        /* BiDi controls count */
   469                     markFlag=0;
   470                 }
   472                 if(UBIDI_LTR==dir) {
   473                     if((pBiDi->isInverse) &&
   474                        (/*run>0 &&*/ dirProps[logicalStart]!=L)) {
   475                         markFlag |= LRM_BEFORE;
   476                     }
   477                     if (markFlag & LRM_BEFORE) {
   478                         uc=LRM_CHAR;
   479                     }
   480                     else if (markFlag & RLM_BEFORE) {
   481                         uc=RLM_CHAR;
   482                     }
   483                     else  uc=0;
   484                     if(uc) {
   485                         if(destSize>0) {
   486                             *dest++=uc;
   487                         }
   488                         --destSize;
   489                     }
   491                     runLength=doWriteForward(src, runLength,
   492                                              dest, destSize,
   493                                              (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
   494                     if(dest!=NULL) {
   495                       dest+=runLength;
   496                     }
   497                     destSize-=runLength;
   499                     if((pBiDi->isInverse) &&
   500                        (/*run<runCount-1 &&*/ dirProps[logicalStart+runLength-1]!=L)) {
   501                         markFlag |= LRM_AFTER;
   502                     }
   503                     if (markFlag & LRM_AFTER) {
   504                         uc=LRM_CHAR;
   505                     }
   506                     else if (markFlag & RLM_AFTER) {
   507                         uc=RLM_CHAR;
   508                     }
   509                     else  uc=0;
   510                     if(uc) {
   511                         if(destSize>0) {
   512                             *dest++=uc;
   513                         }
   514                         --destSize;
   515                     }
   516                 } else {                /* RTL run */
   517                     if((pBiDi->isInverse) &&
   518                        (/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1])))) {
   519                         markFlag |= RLM_BEFORE;
   520                     }
   521                     if (markFlag & LRM_BEFORE) {
   522                         uc=LRM_CHAR;
   523                     }
   524                     else if (markFlag & RLM_BEFORE) {
   525                         uc=RLM_CHAR;
   526                     }
   527                     else  uc=0;
   528                     if(uc) {
   529                         if(destSize>0) {
   530                             *dest++=uc;
   531                         }
   532                         --destSize;
   533                     }
   535                     runLength=doWriteReverse(src, runLength,
   536                                              dest, destSize,
   537                                              options, pErrorCode);
   538                     if(dest!=NULL) {
   539                       dest+=runLength;
   540                     }
   541                     destSize-=runLength;
   543                     if((pBiDi->isInverse) &&
   544                        (/*run<runCount-1 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart])))) {
   545                         markFlag |= RLM_AFTER;
   546                     }
   547                     if (markFlag & LRM_AFTER) {
   548                         uc=LRM_CHAR;
   549                     }
   550                     else if (markFlag & RLM_AFTER) {
   551                         uc=RLM_CHAR;
   552                     }
   553                     else  uc=0;
   554                     if(uc) {
   555                         if(destSize>0) {
   556                             *dest++=uc;
   557                         }
   558                         --destSize;
   559                     }
   560                 }
   561             }
   562         }
   563     } else {
   564         /* reverse output */
   565         if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) {
   566             /* do not insert BiDi controls */
   567             for(run=runCount; --run>=0;) {
   568                 if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) {
   569                     runLength=doWriteReverse(text+logicalStart, runLength,
   570                                              dest, destSize,
   571                                              (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
   572                 } else {
   573                     runLength=doWriteForward(text+logicalStart, runLength,
   574                                              dest, destSize,
   575                                              options, pErrorCode);
   576                 }
   577                 if(dest!=NULL) {
   578                   dest+=runLength;
   579                 }
   580                 destSize-=runLength;
   581             }
   582         } else {
   583             /* insert BiDi controls for "inverse BiDi" */
   584             const DirProp *dirProps=pBiDi->dirProps;
   585             const UChar *src;
   586             UBiDiDirection dir;
   588             for(run=runCount; --run>=0;) {
   589                 /* reverse output */
   590                 dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength);
   591                 src=text+logicalStart;
   593                 if(UBIDI_LTR==dir) {
   594                     if(/*run<runCount-1 &&*/ dirProps[logicalStart+runLength-1]!=L) {
   595                         if(destSize>0) {
   596                             *dest++=LRM_CHAR;
   597                         }
   598                         --destSize;
   599                     }
   601                     runLength=doWriteReverse(src, runLength,
   602                                              dest, destSize,
   603                                              (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
   604                     if(dest!=NULL) {
   605                       dest+=runLength;
   606                     }
   607                     destSize-=runLength;
   609                     if(/*run>0 &&*/ dirProps[logicalStart]!=L) {
   610                         if(destSize>0) {
   611                             *dest++=LRM_CHAR;
   612                         }
   613                         --destSize;
   614                     }
   615                 } else {
   616                     if(/*run<runCount-1 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart]))) {
   617                         if(destSize>0) {
   618                             *dest++=RLM_CHAR;
   619                         }
   620                         --destSize;
   621                     }
   623                     runLength=doWriteForward(src, runLength,
   624                                              dest, destSize,
   625                                              options, pErrorCode);
   626                     if(dest!=NULL) {
   627                       dest+=runLength;
   628                     }
   629                     destSize-=runLength;
   631                     if(/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1]))) {
   632                         if(destSize>0) {
   633                             *dest++=RLM_CHAR;
   634                         }
   635                         --destSize;
   636                     }
   637                 }
   638             }
   639         }
   640     }
   642     return u_terminateUChars(saveDest, destCapacity, destCapacity-destSize, pErrorCode);
   643 }

mercurial