intl/icu/source/common/propsvec.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 *
     4 *   Copyright (C) 2002-2011, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 *******************************************************************************
     8 *   file name:  propsvec.c
     9 *   encoding:   US-ASCII
    10 *   tab size:   8 (not used)
    11 *   indentation:4
    12 *
    13 *   created on: 2002feb22
    14 *   created by: Markus W. Scherer
    15 *
    16 *   Store bits (Unicode character properties) in bit set vectors.
    17 */
    19 #include <stdlib.h>
    20 #include "unicode/utypes.h"
    21 #include "cmemory.h"
    22 #include "utrie.h"
    23 #include "utrie2.h"
    24 #include "uarrsort.h"
    25 #include "propsvec.h"
    26 #include "uassert.h"
    28 struct UPropsVectors {
    29     uint32_t *v;
    30     int32_t columns;  /* number of columns, plus two for start & limit values */
    31     int32_t maxRows;
    32     int32_t rows;
    33     int32_t prevRow;  /* search optimization: remember last row seen */
    34     UBool isCompacted;
    35 };
    37 #define UPVEC_INITIAL_ROWS (1<<12)
    38 #define UPVEC_MEDIUM_ROWS ((int32_t)1<<16)
    39 #define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1)
    41 U_CAPI UPropsVectors * U_EXPORT2
    42 upvec_open(int32_t columns, UErrorCode *pErrorCode) {
    43     UPropsVectors *pv;
    44     uint32_t *v, *row;
    45     uint32_t cp;
    47     if(U_FAILURE(*pErrorCode)) {
    48         return NULL;
    49     }
    50     if(columns<1) {
    51         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    52         return NULL;
    53     }
    54     columns+=2; /* count range start and limit columns */
    56     pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors));
    57     v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4);
    58     if(pv==NULL || v==NULL) {
    59         uprv_free(pv);
    60         uprv_free(v);
    61         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    62         return NULL;
    63     }
    64     uprv_memset(pv, 0, sizeof(UPropsVectors));
    65     pv->v=v;
    66     pv->columns=columns;
    67     pv->maxRows=UPVEC_INITIAL_ROWS;
    68     pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP);
    70     /* set the all-Unicode row and the special-value rows */
    71     row=pv->v;
    72     uprv_memset(row, 0, pv->rows*columns*4);
    73     row[0]=0;
    74     row[1]=0x110000;
    75     row+=columns;
    76     for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) {
    77         row[0]=cp;
    78         row[1]=cp+1;
    79         row+=columns;
    80     }
    81     return pv;
    82 }
    84 U_CAPI void U_EXPORT2
    85 upvec_close(UPropsVectors *pv) {
    86     if(pv!=NULL) {
    87         uprv_free(pv->v);
    88         uprv_free(pv);
    89     }
    90 }
    92 static uint32_t *
    93 _findRow(UPropsVectors *pv, UChar32 rangeStart) {
    94     uint32_t *row;
    95     int32_t columns, i, start, limit, prevRow;
    97     columns=pv->columns;
    98     limit=pv->rows;
    99     prevRow=pv->prevRow;
   101     /* check the vicinity of the last-seen row (start searching with an unrolled loop) */
   102     row=pv->v+prevRow*columns;
   103     if(rangeStart>=(UChar32)row[0]) {
   104         if(rangeStart<(UChar32)row[1]) {
   105             /* same row as last seen */
   106             return row;
   107         } else if(rangeStart<(UChar32)(row+=columns)[1]) {
   108             /* next row after the last one */
   109             pv->prevRow=prevRow+1;
   110             return row;
   111         } else if(rangeStart<(UChar32)(row+=columns)[1]) {
   112             /* second row after the last one */
   113             pv->prevRow=prevRow+2;
   114             return row;
   115         } else if((rangeStart-(UChar32)row[1])<10) {
   116             /* we are close, continue looping */
   117             prevRow+=2;
   118             do {
   119                 ++prevRow;
   120                 row+=columns;
   121             } while(rangeStart>=(UChar32)row[1]);
   122             pv->prevRow=prevRow;
   123             return row;
   124         }
   125     } else if(rangeStart<(UChar32)pv->v[1]) {
   126         /* the very first row */
   127         pv->prevRow=0;
   128         return pv->v;
   129     }
   131     /* do a binary search for the start of the range */
   132     start=0;
   133     while(start<limit-1) {
   134         i=(start+limit)/2;
   135         row=pv->v+i*columns;
   136         if(rangeStart<(UChar32)row[0]) {
   137             limit=i;
   138         } else if(rangeStart<(UChar32)row[1]) {
   139             pv->prevRow=i;
   140             return row;
   141         } else {
   142             start=i;
   143         }
   144     }
   146     /* must be found because all ranges together always cover all of Unicode */
   147     pv->prevRow=start;
   148     return pv->v+start*columns;
   149 }
   151 U_CAPI void U_EXPORT2
   152 upvec_setValue(UPropsVectors *pv,
   153                UChar32 start, UChar32 end,
   154                int32_t column,
   155                uint32_t value, uint32_t mask,
   156                UErrorCode *pErrorCode) {
   157     uint32_t *firstRow, *lastRow;
   158     int32_t columns;
   159     UChar32 limit;
   160     UBool splitFirstRow, splitLastRow;
   162     /* argument checking */
   163     if(U_FAILURE(*pErrorCode)) {
   164         return;
   165     }
   166     if( pv==NULL ||
   167         start<0 || start>end || end>UPVEC_MAX_CP ||
   168         column<0 || column>=(pv->columns-2)
   169     ) {
   170         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   171         return;
   172     }
   173     if(pv->isCompacted) {
   174         *pErrorCode=U_NO_WRITE_PERMISSION;
   175         return;
   176     }
   177     limit=end+1;
   179     /* initialize */
   180     columns=pv->columns;
   181     column+=2; /* skip range start and limit columns */
   182     value&=mask;
   184     /* find the rows whose ranges overlap with the input range */
   186     /* find the first and last rows, always successful */
   187     firstRow=_findRow(pv, start);
   188     lastRow=_findRow(pv, end);
   190     /*
   191      * Rows need to be split if they partially overlap with the
   192      * input range (only possible for the first and last rows)
   193      * and if their value differs from the input value.
   194      */
   195     splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask));
   196     splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask));
   198     /* split first/last rows if necessary */
   199     if(splitFirstRow || splitLastRow) {
   200         int32_t count, rows;
   202         rows=pv->rows;
   203         if((rows+splitFirstRow+splitLastRow)>pv->maxRows) {
   204             uint32_t *newVectors;
   205             int32_t newMaxRows;
   207             if(pv->maxRows<UPVEC_MEDIUM_ROWS) {
   208                 newMaxRows=UPVEC_MEDIUM_ROWS;
   209             } else if(pv->maxRows<UPVEC_MAX_ROWS) {
   210                 newMaxRows=UPVEC_MAX_ROWS;
   211             } else {
   212                 /* Implementation bug, or UPVEC_MAX_ROWS too low. */
   213                 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
   214                 return;
   215             }
   216             newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4);
   217             if(newVectors==NULL) {
   218                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   219                 return;
   220             }
   221             uprv_memcpy(newVectors, pv->v, rows*columns*4);
   222             firstRow=newVectors+(firstRow-pv->v);
   223             lastRow=newVectors+(lastRow-pv->v);
   224             uprv_free(pv->v);
   225             pv->v=newVectors;
   226             pv->maxRows=newMaxRows;
   227         }
   229         /* count the number of row cells to move after the last row, and move them */
   230         count = (int32_t)((pv->v+rows*columns)-(lastRow+columns));
   231         if(count>0) {
   232             uprv_memmove(
   233                 lastRow+(1+splitFirstRow+splitLastRow)*columns,
   234                 lastRow+columns,
   235                 count*4);
   236         }
   237         pv->rows=rows+splitFirstRow+splitLastRow;
   239         /* split the first row, and move the firstRow pointer to the second part */
   240         if(splitFirstRow) {
   241             /* copy all affected rows up one and move the lastRow pointer */
   242             count = (int32_t)((lastRow-firstRow)+columns);
   243             uprv_memmove(firstRow+columns, firstRow, count*4);
   244             lastRow+=columns;
   246             /* split the range and move the firstRow pointer */
   247             firstRow[1]=firstRow[columns]=(uint32_t)start;
   248             firstRow+=columns;
   249         }
   251         /* split the last row */
   252         if(splitLastRow) {
   253             /* copy the last row data */
   254             uprv_memcpy(lastRow+columns, lastRow, columns*4);
   256             /* split the range and move the firstRow pointer */
   257             lastRow[1]=lastRow[columns]=(uint32_t)limit;
   258         }
   259     }
   261     /* set the "row last seen" to the last row for the range */
   262     pv->prevRow=(int32_t)((lastRow-(pv->v))/columns);
   264     /* set the input value in all remaining rows */
   265     firstRow+=column;
   266     lastRow+=column;
   267     mask=~mask;
   268     for(;;) {
   269         *firstRow=(*firstRow&mask)|value;
   270         if(firstRow==lastRow) {
   271             break;
   272         }
   273         firstRow+=columns;
   274     }
   275 }
   277 U_CAPI uint32_t U_EXPORT2
   278 upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) {
   279     uint32_t *row;
   280     UPropsVectors *ncpv;
   282     if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) {
   283         return 0;
   284     }
   285     ncpv=(UPropsVectors *)pv;
   286     row=_findRow(ncpv, c);
   287     return row[2+column];
   288 }
   290 U_CAPI uint32_t * U_EXPORT2
   291 upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
   292              UChar32 *pRangeStart, UChar32 *pRangeEnd) {
   293     uint32_t *row;
   294     int32_t columns;
   296     if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) {
   297         return NULL;
   298     }
   300     columns=pv->columns;
   301     row=pv->v+rowIndex*columns;
   302     if(pRangeStart!=NULL) {
   303         *pRangeStart=(UChar32)row[0];
   304     }
   305     if(pRangeEnd!=NULL) {
   306         *pRangeEnd=(UChar32)row[1]-1;
   307     }
   308     return row+2;
   309 }
   311 static int32_t U_CALLCONV
   312 upvec_compareRows(const void *context, const void *l, const void *r) {
   313     const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r;
   314     const UPropsVectors *pv=(const UPropsVectors *)context;
   315     int32_t i, count, columns;
   317     count=columns=pv->columns; /* includes start/limit columns */
   319     /* start comparing after start/limit but wrap around to them */
   320     i=2;
   321     do {
   322         if(left[i]!=right[i]) {
   323             return left[i]<right[i] ? -1 : 1;
   324         }
   325         if(++i==columns) {
   326             i=0;
   327         }
   328     } while(--count>0);
   330     return 0;
   331 }
   333 U_CAPI void U_EXPORT2
   334 upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) {
   335     uint32_t *row;
   336     int32_t i, columns, valueColumns, rows, count;
   337     UChar32 start, limit;
   339     /* argument checking */
   340     if(U_FAILURE(*pErrorCode)) {
   341         return;
   342     }
   343     if(handler==NULL) {
   344         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   345         return;
   346     }
   347     if(pv->isCompacted) {
   348         return;
   349     }
   351     /* Set the flag now: Sorting and compacting destroys the builder data structure. */
   352     pv->isCompacted=TRUE;
   354     rows=pv->rows;
   355     columns=pv->columns;
   356     U_ASSERT(columns>=3); /* upvec_open asserts this */
   357     valueColumns=columns-2; /* not counting start & limit */
   359     /* sort the properties vectors to find unique vector values */
   360     uprv_sortArray(pv->v, rows, columns*4,
   361                    upvec_compareRows, pv, FALSE, pErrorCode);
   362     if(U_FAILURE(*pErrorCode)) {
   363         return;
   364     }
   366     /*
   367      * Find and set the special values.
   368      * This has to do almost the same work as the compaction below,
   369      * to find the indexes where the special-value rows will move.
   370      */
   371     row=pv->v;
   372     count=-valueColumns;
   373     for(i=0; i<rows; ++i) {
   374         start=(UChar32)row[0];
   376         /* count a new values vector if it is different from the current one */
   377         if(count<0 || 0!=uprv_memcmp(row+2, row-valueColumns, valueColumns*4)) {
   378             count+=valueColumns;
   379         }
   381         if(start>=UPVEC_FIRST_SPECIAL_CP) {
   382             handler(context, start, start, count, row+2, valueColumns, pErrorCode);
   383             if(U_FAILURE(*pErrorCode)) {
   384                 return;
   385             }
   386         }
   388         row+=columns;
   389     }
   391     /* count is at the beginning of the last vector, add valueColumns to include that last vector */
   392     count+=valueColumns;
   394     /* Call the handler once more to signal the start of delivering real values. */
   395     handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP,
   396             count, row-valueColumns, valueColumns, pErrorCode);
   397     if(U_FAILURE(*pErrorCode)) {
   398         return;
   399     }
   401     /*
   402      * Move vector contents up to a contiguous array with only unique
   403      * vector values, and call the handler function for each vector.
   404      *
   405      * This destroys the Properties Vector structure and replaces it
   406      * with an array of just vector values.
   407      */
   408     row=pv->v;
   409     count=-valueColumns;
   410     for(i=0; i<rows; ++i) {
   411         /* fetch these first before memmove() may overwrite them */
   412         start=(UChar32)row[0];
   413         limit=(UChar32)row[1];
   415         /* add a new values vector if it is different from the current one */
   416         if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) {
   417             count+=valueColumns;
   418             uprv_memmove(pv->v+count, row+2, valueColumns*4);
   419         }
   421         if(start<UPVEC_FIRST_SPECIAL_CP) {
   422             handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode);
   423             if(U_FAILURE(*pErrorCode)) {
   424                 return;
   425             }
   426         }
   428         row+=columns;
   429     }
   431     /* count is at the beginning of the last vector, add one to include that last vector */
   432     pv->rows=count/valueColumns+1;
   433 }
   435 U_CAPI const uint32_t * U_EXPORT2
   436 upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) {
   437     if(!pv->isCompacted) {
   438         return NULL;
   439     }
   440     if(pRows!=NULL) {
   441         *pRows=pv->rows;
   442     }
   443     if(pColumns!=NULL) {
   444         *pColumns=pv->columns-2;
   445     }
   446     return pv->v;
   447 }
   449 U_CAPI uint32_t * U_EXPORT2
   450 upvec_cloneArray(const UPropsVectors *pv,
   451                  int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) {
   452     uint32_t *clonedArray;
   453     int32_t byteLength;
   455     if(U_FAILURE(*pErrorCode)) {
   456         return NULL;
   457     }
   458     if(!pv->isCompacted) {
   459         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   460         return NULL;
   461     }
   462     byteLength=pv->rows*(pv->columns-2)*4;
   463     clonedArray=(uint32_t *)uprv_malloc(byteLength);
   464     if(clonedArray==NULL) {
   465         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   466         return NULL;
   467     }
   468     uprv_memcpy(clonedArray, pv->v, byteLength);
   469     if(pRows!=NULL) {
   470         *pRows=pv->rows;
   471     }
   472     if(pColumns!=NULL) {
   473         *pColumns=pv->columns-2;
   474     }
   475     return clonedArray;
   476 }
   478 U_CAPI UTrie2 * U_EXPORT2
   479 upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) {
   480     UPVecToUTrie2Context toUTrie2={ NULL };
   481     upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode);
   482     utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode);
   483     if(U_FAILURE(*pErrorCode)) {
   484         utrie2_close(toUTrie2.trie);
   485         toUTrie2.trie=NULL;
   486     }
   487     return toUTrie2.trie;
   488 }
   490 /*
   491  * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts
   492  * some 16-bit field and builds and returns a UTrie2.
   493  */
   495 U_CAPI void U_CALLCONV
   496 upvec_compactToUTrie2Handler(void *context,
   497                              UChar32 start, UChar32 end,
   498                              int32_t rowIndex, uint32_t *row, int32_t columns,
   499                              UErrorCode *pErrorCode) {
   500     UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context;
   501     if(start<UPVEC_FIRST_SPECIAL_CP) {
   502         utrie2_setRange32(toUTrie2->trie, start, end, (uint32_t)rowIndex, TRUE, pErrorCode);
   503     } else {
   504         switch(start) {
   505         case UPVEC_INITIAL_VALUE_CP:
   506             toUTrie2->initialValue=rowIndex;
   507             break;
   508         case UPVEC_ERROR_VALUE_CP:
   509             toUTrie2->errorValue=rowIndex;
   510             break;
   511         case UPVEC_START_REAL_VALUES_CP:
   512             toUTrie2->maxValue=rowIndex;
   513             if(rowIndex>0xffff) {
   514                 /* too many rows for a 16-bit trie */
   515                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   516             } else {
   517                 toUTrie2->trie=utrie2_open(toUTrie2->initialValue,
   518                                            toUTrie2->errorValue, pErrorCode);
   519             }
   520             break;
   521         default:
   522             break;
   523         }
   524     }
   525 }

mercurial