1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/propsvec.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,525 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 2002-2011, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* file name: propsvec.c 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 2002feb22 1.17 +* created by: Markus W. Scherer 1.18 +* 1.19 +* Store bits (Unicode character properties) in bit set vectors. 1.20 +*/ 1.21 + 1.22 +#include <stdlib.h> 1.23 +#include "unicode/utypes.h" 1.24 +#include "cmemory.h" 1.25 +#include "utrie.h" 1.26 +#include "utrie2.h" 1.27 +#include "uarrsort.h" 1.28 +#include "propsvec.h" 1.29 +#include "uassert.h" 1.30 + 1.31 +struct UPropsVectors { 1.32 + uint32_t *v; 1.33 + int32_t columns; /* number of columns, plus two for start & limit values */ 1.34 + int32_t maxRows; 1.35 + int32_t rows; 1.36 + int32_t prevRow; /* search optimization: remember last row seen */ 1.37 + UBool isCompacted; 1.38 +}; 1.39 + 1.40 +#define UPVEC_INITIAL_ROWS (1<<12) 1.41 +#define UPVEC_MEDIUM_ROWS ((int32_t)1<<16) 1.42 +#define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1) 1.43 + 1.44 +U_CAPI UPropsVectors * U_EXPORT2 1.45 +upvec_open(int32_t columns, UErrorCode *pErrorCode) { 1.46 + UPropsVectors *pv; 1.47 + uint32_t *v, *row; 1.48 + uint32_t cp; 1.49 + 1.50 + if(U_FAILURE(*pErrorCode)) { 1.51 + return NULL; 1.52 + } 1.53 + if(columns<1) { 1.54 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.55 + return NULL; 1.56 + } 1.57 + columns+=2; /* count range start and limit columns */ 1.58 + 1.59 + pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors)); 1.60 + v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4); 1.61 + if(pv==NULL || v==NULL) { 1.62 + uprv_free(pv); 1.63 + uprv_free(v); 1.64 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.65 + return NULL; 1.66 + } 1.67 + uprv_memset(pv, 0, sizeof(UPropsVectors)); 1.68 + pv->v=v; 1.69 + pv->columns=columns; 1.70 + pv->maxRows=UPVEC_INITIAL_ROWS; 1.71 + pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP); 1.72 + 1.73 + /* set the all-Unicode row and the special-value rows */ 1.74 + row=pv->v; 1.75 + uprv_memset(row, 0, pv->rows*columns*4); 1.76 + row[0]=0; 1.77 + row[1]=0x110000; 1.78 + row+=columns; 1.79 + for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) { 1.80 + row[0]=cp; 1.81 + row[1]=cp+1; 1.82 + row+=columns; 1.83 + } 1.84 + return pv; 1.85 +} 1.86 + 1.87 +U_CAPI void U_EXPORT2 1.88 +upvec_close(UPropsVectors *pv) { 1.89 + if(pv!=NULL) { 1.90 + uprv_free(pv->v); 1.91 + uprv_free(pv); 1.92 + } 1.93 +} 1.94 + 1.95 +static uint32_t * 1.96 +_findRow(UPropsVectors *pv, UChar32 rangeStart) { 1.97 + uint32_t *row; 1.98 + int32_t columns, i, start, limit, prevRow; 1.99 + 1.100 + columns=pv->columns; 1.101 + limit=pv->rows; 1.102 + prevRow=pv->prevRow; 1.103 + 1.104 + /* check the vicinity of the last-seen row (start searching with an unrolled loop) */ 1.105 + row=pv->v+prevRow*columns; 1.106 + if(rangeStart>=(UChar32)row[0]) { 1.107 + if(rangeStart<(UChar32)row[1]) { 1.108 + /* same row as last seen */ 1.109 + return row; 1.110 + } else if(rangeStart<(UChar32)(row+=columns)[1]) { 1.111 + /* next row after the last one */ 1.112 + pv->prevRow=prevRow+1; 1.113 + return row; 1.114 + } else if(rangeStart<(UChar32)(row+=columns)[1]) { 1.115 + /* second row after the last one */ 1.116 + pv->prevRow=prevRow+2; 1.117 + return row; 1.118 + } else if((rangeStart-(UChar32)row[1])<10) { 1.119 + /* we are close, continue looping */ 1.120 + prevRow+=2; 1.121 + do { 1.122 + ++prevRow; 1.123 + row+=columns; 1.124 + } while(rangeStart>=(UChar32)row[1]); 1.125 + pv->prevRow=prevRow; 1.126 + return row; 1.127 + } 1.128 + } else if(rangeStart<(UChar32)pv->v[1]) { 1.129 + /* the very first row */ 1.130 + pv->prevRow=0; 1.131 + return pv->v; 1.132 + } 1.133 + 1.134 + /* do a binary search for the start of the range */ 1.135 + start=0; 1.136 + while(start<limit-1) { 1.137 + i=(start+limit)/2; 1.138 + row=pv->v+i*columns; 1.139 + if(rangeStart<(UChar32)row[0]) { 1.140 + limit=i; 1.141 + } else if(rangeStart<(UChar32)row[1]) { 1.142 + pv->prevRow=i; 1.143 + return row; 1.144 + } else { 1.145 + start=i; 1.146 + } 1.147 + } 1.148 + 1.149 + /* must be found because all ranges together always cover all of Unicode */ 1.150 + pv->prevRow=start; 1.151 + return pv->v+start*columns; 1.152 +} 1.153 + 1.154 +U_CAPI void U_EXPORT2 1.155 +upvec_setValue(UPropsVectors *pv, 1.156 + UChar32 start, UChar32 end, 1.157 + int32_t column, 1.158 + uint32_t value, uint32_t mask, 1.159 + UErrorCode *pErrorCode) { 1.160 + uint32_t *firstRow, *lastRow; 1.161 + int32_t columns; 1.162 + UChar32 limit; 1.163 + UBool splitFirstRow, splitLastRow; 1.164 + 1.165 + /* argument checking */ 1.166 + if(U_FAILURE(*pErrorCode)) { 1.167 + return; 1.168 + } 1.169 + if( pv==NULL || 1.170 + start<0 || start>end || end>UPVEC_MAX_CP || 1.171 + column<0 || column>=(pv->columns-2) 1.172 + ) { 1.173 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.174 + return; 1.175 + } 1.176 + if(pv->isCompacted) { 1.177 + *pErrorCode=U_NO_WRITE_PERMISSION; 1.178 + return; 1.179 + } 1.180 + limit=end+1; 1.181 + 1.182 + /* initialize */ 1.183 + columns=pv->columns; 1.184 + column+=2; /* skip range start and limit columns */ 1.185 + value&=mask; 1.186 + 1.187 + /* find the rows whose ranges overlap with the input range */ 1.188 + 1.189 + /* find the first and last rows, always successful */ 1.190 + firstRow=_findRow(pv, start); 1.191 + lastRow=_findRow(pv, end); 1.192 + 1.193 + /* 1.194 + * Rows need to be split if they partially overlap with the 1.195 + * input range (only possible for the first and last rows) 1.196 + * and if their value differs from the input value. 1.197 + */ 1.198 + splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask)); 1.199 + splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask)); 1.200 + 1.201 + /* split first/last rows if necessary */ 1.202 + if(splitFirstRow || splitLastRow) { 1.203 + int32_t count, rows; 1.204 + 1.205 + rows=pv->rows; 1.206 + if((rows+splitFirstRow+splitLastRow)>pv->maxRows) { 1.207 + uint32_t *newVectors; 1.208 + int32_t newMaxRows; 1.209 + 1.210 + if(pv->maxRows<UPVEC_MEDIUM_ROWS) { 1.211 + newMaxRows=UPVEC_MEDIUM_ROWS; 1.212 + } else if(pv->maxRows<UPVEC_MAX_ROWS) { 1.213 + newMaxRows=UPVEC_MAX_ROWS; 1.214 + } else { 1.215 + /* Implementation bug, or UPVEC_MAX_ROWS too low. */ 1.216 + *pErrorCode=U_INTERNAL_PROGRAM_ERROR; 1.217 + return; 1.218 + } 1.219 + newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4); 1.220 + if(newVectors==NULL) { 1.221 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.222 + return; 1.223 + } 1.224 + uprv_memcpy(newVectors, pv->v, rows*columns*4); 1.225 + firstRow=newVectors+(firstRow-pv->v); 1.226 + lastRow=newVectors+(lastRow-pv->v); 1.227 + uprv_free(pv->v); 1.228 + pv->v=newVectors; 1.229 + pv->maxRows=newMaxRows; 1.230 + } 1.231 + 1.232 + /* count the number of row cells to move after the last row, and move them */ 1.233 + count = (int32_t)((pv->v+rows*columns)-(lastRow+columns)); 1.234 + if(count>0) { 1.235 + uprv_memmove( 1.236 + lastRow+(1+splitFirstRow+splitLastRow)*columns, 1.237 + lastRow+columns, 1.238 + count*4); 1.239 + } 1.240 + pv->rows=rows+splitFirstRow+splitLastRow; 1.241 + 1.242 + /* split the first row, and move the firstRow pointer to the second part */ 1.243 + if(splitFirstRow) { 1.244 + /* copy all affected rows up one and move the lastRow pointer */ 1.245 + count = (int32_t)((lastRow-firstRow)+columns); 1.246 + uprv_memmove(firstRow+columns, firstRow, count*4); 1.247 + lastRow+=columns; 1.248 + 1.249 + /* split the range and move the firstRow pointer */ 1.250 + firstRow[1]=firstRow[columns]=(uint32_t)start; 1.251 + firstRow+=columns; 1.252 + } 1.253 + 1.254 + /* split the last row */ 1.255 + if(splitLastRow) { 1.256 + /* copy the last row data */ 1.257 + uprv_memcpy(lastRow+columns, lastRow, columns*4); 1.258 + 1.259 + /* split the range and move the firstRow pointer */ 1.260 + lastRow[1]=lastRow[columns]=(uint32_t)limit; 1.261 + } 1.262 + } 1.263 + 1.264 + /* set the "row last seen" to the last row for the range */ 1.265 + pv->prevRow=(int32_t)((lastRow-(pv->v))/columns); 1.266 + 1.267 + /* set the input value in all remaining rows */ 1.268 + firstRow+=column; 1.269 + lastRow+=column; 1.270 + mask=~mask; 1.271 + for(;;) { 1.272 + *firstRow=(*firstRow&mask)|value; 1.273 + if(firstRow==lastRow) { 1.274 + break; 1.275 + } 1.276 + firstRow+=columns; 1.277 + } 1.278 +} 1.279 + 1.280 +U_CAPI uint32_t U_EXPORT2 1.281 +upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) { 1.282 + uint32_t *row; 1.283 + UPropsVectors *ncpv; 1.284 + 1.285 + if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) { 1.286 + return 0; 1.287 + } 1.288 + ncpv=(UPropsVectors *)pv; 1.289 + row=_findRow(ncpv, c); 1.290 + return row[2+column]; 1.291 +} 1.292 + 1.293 +U_CAPI uint32_t * U_EXPORT2 1.294 +upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, 1.295 + UChar32 *pRangeStart, UChar32 *pRangeEnd) { 1.296 + uint32_t *row; 1.297 + int32_t columns; 1.298 + 1.299 + if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) { 1.300 + return NULL; 1.301 + } 1.302 + 1.303 + columns=pv->columns; 1.304 + row=pv->v+rowIndex*columns; 1.305 + if(pRangeStart!=NULL) { 1.306 + *pRangeStart=(UChar32)row[0]; 1.307 + } 1.308 + if(pRangeEnd!=NULL) { 1.309 + *pRangeEnd=(UChar32)row[1]-1; 1.310 + } 1.311 + return row+2; 1.312 +} 1.313 + 1.314 +static int32_t U_CALLCONV 1.315 +upvec_compareRows(const void *context, const void *l, const void *r) { 1.316 + const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r; 1.317 + const UPropsVectors *pv=(const UPropsVectors *)context; 1.318 + int32_t i, count, columns; 1.319 + 1.320 + count=columns=pv->columns; /* includes start/limit columns */ 1.321 + 1.322 + /* start comparing after start/limit but wrap around to them */ 1.323 + i=2; 1.324 + do { 1.325 + if(left[i]!=right[i]) { 1.326 + return left[i]<right[i] ? -1 : 1; 1.327 + } 1.328 + if(++i==columns) { 1.329 + i=0; 1.330 + } 1.331 + } while(--count>0); 1.332 + 1.333 + return 0; 1.334 +} 1.335 + 1.336 +U_CAPI void U_EXPORT2 1.337 +upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) { 1.338 + uint32_t *row; 1.339 + int32_t i, columns, valueColumns, rows, count; 1.340 + UChar32 start, limit; 1.341 + 1.342 + /* argument checking */ 1.343 + if(U_FAILURE(*pErrorCode)) { 1.344 + return; 1.345 + } 1.346 + if(handler==NULL) { 1.347 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.348 + return; 1.349 + } 1.350 + if(pv->isCompacted) { 1.351 + return; 1.352 + } 1.353 + 1.354 + /* Set the flag now: Sorting and compacting destroys the builder data structure. */ 1.355 + pv->isCompacted=TRUE; 1.356 + 1.357 + rows=pv->rows; 1.358 + columns=pv->columns; 1.359 + U_ASSERT(columns>=3); /* upvec_open asserts this */ 1.360 + valueColumns=columns-2; /* not counting start & limit */ 1.361 + 1.362 + /* sort the properties vectors to find unique vector values */ 1.363 + uprv_sortArray(pv->v, rows, columns*4, 1.364 + upvec_compareRows, pv, FALSE, pErrorCode); 1.365 + if(U_FAILURE(*pErrorCode)) { 1.366 + return; 1.367 + } 1.368 + 1.369 + /* 1.370 + * Find and set the special values. 1.371 + * This has to do almost the same work as the compaction below, 1.372 + * to find the indexes where the special-value rows will move. 1.373 + */ 1.374 + row=pv->v; 1.375 + count=-valueColumns; 1.376 + for(i=0; i<rows; ++i) { 1.377 + start=(UChar32)row[0]; 1.378 + 1.379 + /* count a new values vector if it is different from the current one */ 1.380 + if(count<0 || 0!=uprv_memcmp(row+2, row-valueColumns, valueColumns*4)) { 1.381 + count+=valueColumns; 1.382 + } 1.383 + 1.384 + if(start>=UPVEC_FIRST_SPECIAL_CP) { 1.385 + handler(context, start, start, count, row+2, valueColumns, pErrorCode); 1.386 + if(U_FAILURE(*pErrorCode)) { 1.387 + return; 1.388 + } 1.389 + } 1.390 + 1.391 + row+=columns; 1.392 + } 1.393 + 1.394 + /* count is at the beginning of the last vector, add valueColumns to include that last vector */ 1.395 + count+=valueColumns; 1.396 + 1.397 + /* Call the handler once more to signal the start of delivering real values. */ 1.398 + handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP, 1.399 + count, row-valueColumns, valueColumns, pErrorCode); 1.400 + if(U_FAILURE(*pErrorCode)) { 1.401 + return; 1.402 + } 1.403 + 1.404 + /* 1.405 + * Move vector contents up to a contiguous array with only unique 1.406 + * vector values, and call the handler function for each vector. 1.407 + * 1.408 + * This destroys the Properties Vector structure and replaces it 1.409 + * with an array of just vector values. 1.410 + */ 1.411 + row=pv->v; 1.412 + count=-valueColumns; 1.413 + for(i=0; i<rows; ++i) { 1.414 + /* fetch these first before memmove() may overwrite them */ 1.415 + start=(UChar32)row[0]; 1.416 + limit=(UChar32)row[1]; 1.417 + 1.418 + /* add a new values vector if it is different from the current one */ 1.419 + if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) { 1.420 + count+=valueColumns; 1.421 + uprv_memmove(pv->v+count, row+2, valueColumns*4); 1.422 + } 1.423 + 1.424 + if(start<UPVEC_FIRST_SPECIAL_CP) { 1.425 + handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode); 1.426 + if(U_FAILURE(*pErrorCode)) { 1.427 + return; 1.428 + } 1.429 + } 1.430 + 1.431 + row+=columns; 1.432 + } 1.433 + 1.434 + /* count is at the beginning of the last vector, add one to include that last vector */ 1.435 + pv->rows=count/valueColumns+1; 1.436 +} 1.437 + 1.438 +U_CAPI const uint32_t * U_EXPORT2 1.439 +upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) { 1.440 + if(!pv->isCompacted) { 1.441 + return NULL; 1.442 + } 1.443 + if(pRows!=NULL) { 1.444 + *pRows=pv->rows; 1.445 + } 1.446 + if(pColumns!=NULL) { 1.447 + *pColumns=pv->columns-2; 1.448 + } 1.449 + return pv->v; 1.450 +} 1.451 + 1.452 +U_CAPI uint32_t * U_EXPORT2 1.453 +upvec_cloneArray(const UPropsVectors *pv, 1.454 + int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) { 1.455 + uint32_t *clonedArray; 1.456 + int32_t byteLength; 1.457 + 1.458 + if(U_FAILURE(*pErrorCode)) { 1.459 + return NULL; 1.460 + } 1.461 + if(!pv->isCompacted) { 1.462 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.463 + return NULL; 1.464 + } 1.465 + byteLength=pv->rows*(pv->columns-2)*4; 1.466 + clonedArray=(uint32_t *)uprv_malloc(byteLength); 1.467 + if(clonedArray==NULL) { 1.468 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.469 + return NULL; 1.470 + } 1.471 + uprv_memcpy(clonedArray, pv->v, byteLength); 1.472 + if(pRows!=NULL) { 1.473 + *pRows=pv->rows; 1.474 + } 1.475 + if(pColumns!=NULL) { 1.476 + *pColumns=pv->columns-2; 1.477 + } 1.478 + return clonedArray; 1.479 +} 1.480 + 1.481 +U_CAPI UTrie2 * U_EXPORT2 1.482 +upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) { 1.483 + UPVecToUTrie2Context toUTrie2={ NULL }; 1.484 + upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode); 1.485 + utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode); 1.486 + if(U_FAILURE(*pErrorCode)) { 1.487 + utrie2_close(toUTrie2.trie); 1.488 + toUTrie2.trie=NULL; 1.489 + } 1.490 + return toUTrie2.trie; 1.491 +} 1.492 + 1.493 +/* 1.494 + * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts 1.495 + * some 16-bit field and builds and returns a UTrie2. 1.496 + */ 1.497 + 1.498 +U_CAPI void U_CALLCONV 1.499 +upvec_compactToUTrie2Handler(void *context, 1.500 + UChar32 start, UChar32 end, 1.501 + int32_t rowIndex, uint32_t *row, int32_t columns, 1.502 + UErrorCode *pErrorCode) { 1.503 + UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context; 1.504 + if(start<UPVEC_FIRST_SPECIAL_CP) { 1.505 + utrie2_setRange32(toUTrie2->trie, start, end, (uint32_t)rowIndex, TRUE, pErrorCode); 1.506 + } else { 1.507 + switch(start) { 1.508 + case UPVEC_INITIAL_VALUE_CP: 1.509 + toUTrie2->initialValue=rowIndex; 1.510 + break; 1.511 + case UPVEC_ERROR_VALUE_CP: 1.512 + toUTrie2->errorValue=rowIndex; 1.513 + break; 1.514 + case UPVEC_START_REAL_VALUES_CP: 1.515 + toUTrie2->maxValue=rowIndex; 1.516 + if(rowIndex>0xffff) { 1.517 + /* too many rows for a 16-bit trie */ 1.518 + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1.519 + } else { 1.520 + toUTrie2->trie=utrie2_open(toUTrie2->initialValue, 1.521 + toUTrie2->errorValue, pErrorCode); 1.522 + } 1.523 + break; 1.524 + default: 1.525 + break; 1.526 + } 1.527 + } 1.528 +}