intl/icu/source/i18n/ucol_cnt.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*
     2  *******************************************************************************
     3  *
     4  *   Copyright (C) 2001-2008, International Business Machines
     5  *   Corporation and others.  All Rights Reserved.
     6  *
     7  *******************************************************************************
     8  *   file name:  ucol_cnt.cpp
     9  *   encoding:   US-ASCII
    10  *   tab size:   8 (not used)
    11  *   indentation:4
    12  *
    13  *   created 02/22/2001
    14  *   created by: Vladimir Weinstein
    15  *
    16  * This module maintains a contraction table structure in expanded form
    17  * and provides means to flatten this structure
    18  * 
    19  */
    21 #include "unicode/utypes.h"
    23 #if !UCONFIG_NO_COLLATION
    25 #include "unicode/uchar.h"
    26 #include "ucol_cnt.h"
    27 #include "cmemory.h"
    29 static void uprv_growTable(ContractionTable *tbl, UErrorCode *status) {
    30     if(tbl->position == tbl->size) {
    31         uint32_t *newData = (uint32_t *)uprv_realloc(tbl->CEs, 2*tbl->size*sizeof(uint32_t));
    32         if(newData == NULL) {
    33             *status = U_MEMORY_ALLOCATION_ERROR;
    34             return;
    35         }
    36         UChar *newCPs = (UChar *)uprv_realloc(tbl->codePoints, 2*tbl->size*sizeof(UChar));
    37         if(newCPs == NULL) {
    38             uprv_free(newData);
    39             *status = U_MEMORY_ALLOCATION_ERROR;
    40             return;
    41         }
    42         tbl->CEs = newData;
    43         tbl->codePoints = newCPs;
    44         tbl->size *= 2;
    45     }
    46 }
    48 U_CAPI CntTable*  U_EXPORT2
    49 /*uprv_cnttab_open(CompactEIntArray *mapping, UErrorCode *status) {*/
    50 uprv_cnttab_open(UNewTrie *mapping, UErrorCode *status) {
    51     if(U_FAILURE(*status)) {
    52         return 0;
    53     }
    54     CntTable *tbl = (CntTable *)uprv_malloc(sizeof(CntTable));
    55     if(tbl == NULL) {
    56         *status = U_MEMORY_ALLOCATION_ERROR;
    57         return NULL;
    58     }
    59     tbl->mapping = mapping;
    60     tbl->elements = (ContractionTable **)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *));
    61     if(tbl->elements == NULL) {
    62         *status = U_MEMORY_ALLOCATION_ERROR;
    63         uprv_free(tbl);
    64         return NULL;
    65     }
    66     tbl->capacity = INIT_EXP_TABLE_SIZE;
    67     uprv_memset(tbl->elements, 0, INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *));
    68     tbl->size = 0;
    69     tbl->position = 0;
    70     tbl->CEs = NULL;
    71     tbl->codePoints = NULL;
    72     tbl->offsets = NULL;
    73     tbl->currentTag = NOT_FOUND_TAG;
    74     return tbl;
    75 }
    77 static ContractionTable *addATableElement(CntTable *table, uint32_t *key, UErrorCode *status) {
    78     ContractionTable *el = (ContractionTable *)uprv_malloc(sizeof(ContractionTable));
    79     if(el == NULL) {
    80         goto outOfMemory;
    81     }
    82     el->CEs = (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
    83     if(el->CEs == NULL) {
    84         goto outOfMemory;
    85     }
    87     el->codePoints = (UChar *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(UChar));
    88     if(el->codePoints == NULL) {
    89         uprv_free(el->CEs);
    90         goto outOfMemory;
    91     }
    93     el->position = 0;
    94     el->size = INIT_EXP_TABLE_SIZE;
    95     uprv_memset(el->CEs, 0, INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
    96     uprv_memset(el->codePoints, 0, INIT_EXP_TABLE_SIZE*sizeof(UChar));
    98     table->elements[table->size] = el;
   100     //uhash_put(table->elements, (void *)table->size, el, status);
   102     *key = table->size++;
   104     if(table->size == table->capacity) {
   105         ContractionTable **newElements = (ContractionTable **)uprv_malloc(table->capacity*2*sizeof(ContractionTable *));
   106         // do realloc
   107         /*        table->elements = (ContractionTable **)realloc(table->elements, table->capacity*2*sizeof(ContractionTable *));*/
   108         if(newElements == NULL) {
   109             uprv_free(el->codePoints);
   110             uprv_free(el->CEs);
   111             goto outOfMemory;
   112         }
   113         ContractionTable **oldElements = table->elements;
   114         uprv_memcpy(newElements, oldElements, table->capacity*sizeof(ContractionTable *));
   115         uprv_memset(newElements+table->capacity, 0, table->capacity*sizeof(ContractionTable *));
   116         table->capacity *= 2;
   117         table->elements = newElements;
   118         uprv_free(oldElements);
   119     }
   121     return el;
   123 outOfMemory:
   124     *status = U_MEMORY_ALLOCATION_ERROR;
   125     if (el) uprv_free(el);
   126     return NULL;
   127 }
   129 U_CAPI int32_t  U_EXPORT2
   130 uprv_cnttab_constructTable(CntTable *table, uint32_t mainOffset, UErrorCode *status) {
   131     int32_t i = 0, j = 0;
   132     if(U_FAILURE(*status) || table->size == 0) {
   133         return 0;
   134     }
   136     table->position = 0;
   138     if(table->offsets != NULL) {
   139         uprv_free(table->offsets);
   140     }
   141     table->offsets = (int32_t *)uprv_malloc(table->size*sizeof(int32_t));
   142     if(table->offsets == NULL) {
   143         *status = U_MEMORY_ALLOCATION_ERROR;
   144         return 0;
   145     }
   148     /* See how much memory we need */
   149     for(i = 0; i<table->size; i++) {
   150         table->offsets[i] = table->position+mainOffset;
   151         table->position += table->elements[i]->position;
   152     }
   154     /* Allocate it */
   155     if(table->CEs != NULL) {
   156         uprv_free(table->CEs);
   157     }
   158     table->CEs = (uint32_t *)uprv_malloc(table->position*sizeof(uint32_t));
   159     if(table->CEs == NULL) {
   160         *status = U_MEMORY_ALLOCATION_ERROR;
   161         uprv_free(table->offsets);
   162         table->offsets = NULL;
   163         return 0;
   164     }
   165     uprv_memset(table->CEs, '?', table->position*sizeof(uint32_t));
   167     if(table->codePoints != NULL) {
   168         uprv_free(table->codePoints);
   169     }
   170     table->codePoints = (UChar *)uprv_malloc(table->position*sizeof(UChar));
   171     if(table->codePoints == NULL) {
   172         *status = U_MEMORY_ALLOCATION_ERROR;
   173         uprv_free(table->offsets);
   174         table->offsets = NULL;
   175         uprv_free(table->CEs);
   176         table->CEs = NULL;
   177         return 0;
   178     }
   179     uprv_memset(table->codePoints, '?', table->position*sizeof(UChar));
   181     /* Now stuff the things in*/
   183     UChar *cpPointer = table->codePoints;
   184     uint32_t *CEPointer = table->CEs;
   185     for(i = 0; i<table->size; i++) {
   186         int32_t size = table->elements[i]->position;
   187         uint8_t ccMax = 0, ccMin = 255, cc = 0;
   188         for(j = 1; j<size; j++) {
   189             cc = u_getCombiningClass(table->elements[i]->codePoints[j]);
   190             if(cc>ccMax) {
   191                 ccMax = cc;
   192             }
   193             if(cc<ccMin) {
   194                 ccMin = cc;
   195             }
   196             *(cpPointer+j) = table->elements[i]->codePoints[j];
   197         }
   198         *cpPointer = ((ccMin==ccMax)?1:0 << 8) | ccMax;
   200         uprv_memcpy(CEPointer, table->elements[i]->CEs, size*sizeof(uint32_t));
   201         for(j = 0; j<size; j++) {
   202             if(isCntTableElement(*(CEPointer+j))) {
   203                 *(CEPointer+j) = constructContractCE(getCETag(*(CEPointer+j)), table->offsets[getContractOffset(*(CEPointer+j))]);
   204             }
   205         }
   206         cpPointer += size;
   207         CEPointer += size;
   208     }
   210     // TODO: this one apparently updates the contraction CEs to point to a real address (relative to the 
   211     // start of the flat file). However, what is done below is just wrong and it affects building of 
   212     // tailorings that have constructions in a bad way. At least, one should enumerate the trie. Also,
   213     // keeping a list of code points that are contractions might be smart, although I'm not sure if it's
   214     // feasible.
   215     uint32_t CE;
   216     for(i = 0; i<=0x10FFFF; i++) {
   217         /*CE = ucmpe32_get(table->mapping, i);*/
   218         CE = utrie_get32(table->mapping, i, NULL);
   219         if(isCntTableElement(CE)) {
   220             CE = constructContractCE(getCETag(CE), table->offsets[getContractOffset(CE)]);
   221             /*ucmpe32_set(table->mapping, i, CE);*/
   222             utrie_set32(table->mapping, i, CE);
   223         }
   224     }
   227     return table->position;
   228 }
   230 static ContractionTable *uprv_cnttab_cloneContraction(ContractionTable *t, UErrorCode *status) {
   231     ContractionTable *r = (ContractionTable *)uprv_malloc(sizeof(ContractionTable));
   232     if(r == NULL) {
   233         goto outOfMemory;
   234     }
   236     r->position = t->position;
   237     r->size = t->size;
   239     r->codePoints = (UChar *)uprv_malloc(sizeof(UChar)*t->size);
   240     if(r->codePoints == NULL) {
   241         goto outOfMemory;
   242     }
   243     r->CEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->size);
   244     if(r->CEs == NULL) {
   245         uprv_free(r->codePoints);
   246         goto outOfMemory;
   247     }
   248     uprv_memcpy(r->codePoints, t->codePoints, sizeof(UChar)*t->size);
   249     uprv_memcpy(r->CEs, t->CEs, sizeof(uint32_t)*t->size);
   251     return r;
   253 outOfMemory:
   254     *status = U_MEMORY_ALLOCATION_ERROR;
   255     if (r) uprv_free(r);
   256     return NULL;
   257 }
   259 U_CAPI CntTable* U_EXPORT2
   260 uprv_cnttab_clone(CntTable *t, UErrorCode *status) {
   261     if(U_FAILURE(*status)) {
   262         return NULL;
   263     }
   264     int32_t i = 0;
   265     CntTable *r = (CntTable *)uprv_malloc(sizeof(CntTable));
   266     /* test for NULL */
   267     if (r == NULL) {
   268         goto outOfMemory;
   269     }
   270     r->position = t->position;
   271     r->size = t->size;
   272     r->capacity = t->capacity;
   274     r->mapping = t->mapping;
   276     r->elements = (ContractionTable **)uprv_malloc(t->capacity*sizeof(ContractionTable *));
   277     /* test for NULL */
   278     if (r->elements == NULL) {
   279         goto outOfMemory;
   280     }
   281     //uprv_memcpy(r->elements, t->elements, t->capacity*sizeof(ContractionTable *));
   283     for(i = 0; i<t->size; i++) {
   284         r->elements[i] = uprv_cnttab_cloneContraction(t->elements[i], status);
   285     }
   287     if(t->CEs != NULL) {
   288         r->CEs = (uint32_t *)uprv_malloc(t->position*sizeof(uint32_t));
   289         /* test for NULL */
   290         if (r->CEs == NULL) {
   291             uprv_free(r->elements);
   292             goto outOfMemory;
   293         }
   294         uprv_memcpy(r->CEs, t->CEs, t->position*sizeof(uint32_t));
   295     } else {
   296         r->CEs = NULL;
   297     }
   299     if(t->codePoints != NULL) {
   300         r->codePoints = (UChar *)uprv_malloc(t->position*sizeof(UChar));
   301         /* test for NULL */
   302         if (r->codePoints == NULL) {
   303             uprv_free(r->CEs);
   304             uprv_free(r->elements);
   305             goto outOfMemory;
   306         }
   307         uprv_memcpy(r->codePoints, t->codePoints, t->position*sizeof(UChar));
   308     } else {
   309         r->codePoints = NULL;
   310     }
   312     if(t->offsets != NULL) {
   313         r->offsets = (int32_t *)uprv_malloc(t->size*sizeof(int32_t));
   314         /* test for NULL */
   315         if (r->offsets == NULL) {
   316             uprv_free(r->codePoints);
   317             uprv_free(r->CEs);
   318             uprv_free(r->elements);
   319             goto outOfMemory;
   320         }
   321         uprv_memcpy(r->offsets, t->offsets, t->size*sizeof(int32_t));
   322     } else {
   323         r->offsets = NULL;
   324     }
   326     return r;
   328 outOfMemory:
   329     *status = U_MEMORY_ALLOCATION_ERROR;
   330     if (r) uprv_free(r);
   331     return NULL;
   332 }
   334 U_CAPI void  U_EXPORT2
   335 uprv_cnttab_close(CntTable *table) {
   336     int32_t i = 0;
   337     for(i = 0; i<table->size; i++) {
   338         uprv_free(table->elements[i]->CEs);
   339         uprv_free(table->elements[i]->codePoints);
   340         uprv_free(table->elements[i]);
   341     }
   342     uprv_free(table->elements);
   343     uprv_free(table->CEs);
   344     uprv_free(table->offsets);
   345     uprv_free(table->codePoints);
   346     uprv_free(table);
   347 }
   349 /* this is for adding non contractions */
   350 U_CAPI uint32_t  U_EXPORT2
   351 uprv_cnttab_changeLastCE(CntTable *table, uint32_t element, uint32_t value, UErrorCode *status) {
   352     element &= 0xFFFFFF;
   354     ContractionTable *tbl = NULL;
   355     if(U_FAILURE(*status)) {
   356         return 0;
   357     }
   359     if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
   360         return 0;
   361     }
   363     tbl->CEs[tbl->position-1] = value;
   365     return(constructContractCE(table->currentTag, element));
   366 }
   369 /* inserts a part of contraction sequence in table. Sequences behind the offset are moved back. If element is non existent, it creates on. Returns element handle */
   370 U_CAPI uint32_t  U_EXPORT2
   371 uprv_cnttab_insertContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) {
   373     ContractionTable *tbl = NULL;
   375     if(U_FAILURE(*status)) {
   376         return 0;
   377     }
   378     element &= 0xFFFFFF;
   380     if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
   381         tbl = addATableElement(table, &element, status);
   382         if (U_FAILURE(*status)) {
   383             return 0;
   384         }
   385     }
   387     uprv_growTable(tbl, status);
   389     uint32_t offset = 0;
   392     while(tbl->codePoints[offset] < codePoint && offset<tbl->position) {
   393         offset++;
   394     }
   396     uint32_t i = tbl->position;
   397     for(i = tbl->position; i > offset; i--) {
   398         tbl->CEs[i] = tbl->CEs[i-1];
   399         tbl->codePoints[i] = tbl->codePoints[i-1];
   400     }
   402     tbl->CEs[offset] = value;
   403     tbl->codePoints[offset] = codePoint;
   405     tbl->position++;
   407     return(constructContractCE(table->currentTag, element));
   408 }
   411 /* adds more contractions in table. If element is non existant, it creates on. Returns element handle */
   412 U_CAPI uint32_t  U_EXPORT2
   413 uprv_cnttab_addContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) {
   415     element &= 0xFFFFFF;
   417     ContractionTable *tbl = NULL;
   419     if(U_FAILURE(*status)) {
   420         return 0;
   421     }
   423     if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
   424         tbl = addATableElement(table, &element, status);
   425         if (U_FAILURE(*status)) {
   426             return 0;
   427         }
   428     } 
   430     uprv_growTable(tbl, status);
   432     tbl->CEs[tbl->position] = value;
   433     tbl->codePoints[tbl->position] = codePoint;
   435     tbl->position++;
   437     return(constructContractCE(table->currentTag, element));
   438 }
   440 /* sets a part of contraction sequence in table. If element is non existant, it creates on. Returns element handle */
   441 U_CAPI uint32_t  U_EXPORT2
   442 uprv_cnttab_setContraction(CntTable *table, uint32_t element, uint32_t offset, UChar codePoint, uint32_t value, UErrorCode *status) {
   444     element &= 0xFFFFFF;
   445     ContractionTable *tbl = NULL;
   447     if(U_FAILURE(*status)) {
   448         return 0;
   449     }
   451     if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
   452         tbl = addATableElement(table, &element, status);
   453         if (U_FAILURE(*status)) {
   454             return 0;
   455         }
   457     }
   459     if(offset >= tbl->size) {
   460         *status = U_INDEX_OUTOFBOUNDS_ERROR;
   461         return 0;
   462     }
   463     tbl->CEs[offset] = value;
   464     tbl->codePoints[offset] = codePoint;
   466     //return(offset);
   467     return(constructContractCE(table->currentTag, element));
   468 }
   470 static ContractionTable *_cnttab_getContractionTable(CntTable *table, uint32_t element) {
   471     element &= 0xFFFFFF;
   472     ContractionTable *tbl = NULL;
   474     if(element != 0xFFFFFF) {
   475         tbl = table->elements[element]; /* This could also return NULL */
   476     }
   477     return tbl;
   478 }
   480 static int32_t _cnttab_findCP(ContractionTable *tbl, UChar codePoint) {
   481     uint32_t position = 0;
   482     if(tbl == NULL) {
   483         return -1;
   484     }
   486     while(codePoint > tbl->codePoints[position]) {
   487         position++;
   488         if(position > tbl->position) {
   489             return -1;
   490         }
   491     }
   492     if (codePoint == tbl->codePoints[position]) {
   493         return position;
   494     } else {
   495         return -1;
   496     }
   497 }
   499 static uint32_t _cnttab_getCE(ContractionTable *tbl, int32_t position) {
   500     if(tbl == NULL) {
   501         return UCOL_NOT_FOUND;
   502     }
   503     if((uint32_t)position > tbl->position || position == -1) {
   504         return UCOL_NOT_FOUND;
   505     } else {
   506         return tbl->CEs[position];
   507     }
   508 }
   510 U_CAPI int32_t  U_EXPORT2
   511 uprv_cnttab_findCP(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) {
   513     if(U_FAILURE(*status)) {
   514         return 0;
   515     }
   517     return _cnttab_findCP(_cnttab_getContractionTable(table, element), codePoint);
   518 }
   520 U_CAPI uint32_t  U_EXPORT2
   521 uprv_cnttab_getCE(CntTable *table, uint32_t element, uint32_t position, UErrorCode *status) {
   522     if(U_FAILURE(*status)) {
   523         return UCOL_NOT_FOUND;
   524     }
   526     return(_cnttab_getCE(_cnttab_getContractionTable(table, element), position));
   527 }
   529 U_CAPI uint32_t  U_EXPORT2
   530 uprv_cnttab_findCE(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) {
   531     if(U_FAILURE(*status)) {
   532         return UCOL_NOT_FOUND;
   533     }
   534     ContractionTable *tbl = _cnttab_getContractionTable(table, element);
   535     return _cnttab_getCE(tbl, _cnttab_findCP(tbl, codePoint));
   536 }
   538 U_CAPI UBool  U_EXPORT2
   539 uprv_cnttab_isTailored(CntTable *table, uint32_t element, UChar *ztString, UErrorCode *status) {
   540     if(U_FAILURE(*status)) {
   541         return FALSE;
   542     }
   544     while(*(ztString)!=0) {
   545         element = uprv_cnttab_findCE(table, element, *(ztString), status);
   546         if(element == UCOL_NOT_FOUND) {
   547             return FALSE;
   548         }
   549         if(!isCntTableElement(element)) {
   550             return TRUE;
   551         }
   552         ztString++;
   553     }
   554     return (UBool)(uprv_cnttab_getCE(table, element, 0, status) != UCOL_NOT_FOUND);
   555 }
   557 U_CAPI uint32_t  U_EXPORT2
   558 uprv_cnttab_changeContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t newCE, UErrorCode *status) {
   560     element &= 0xFFFFFF;
   561     ContractionTable *tbl = NULL;
   563     if(U_FAILURE(*status)) {
   564         return 0;
   565     }
   567     if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
   568         return 0;
   569     }
   571     uint32_t position = 0;
   573     while(codePoint > tbl->codePoints[position]) {
   574         position++;
   575         if(position > tbl->position) {
   576             return UCOL_NOT_FOUND;
   577         }
   578     }
   579     if (codePoint == tbl->codePoints[position]) {
   580         tbl->CEs[position] = newCE;
   581         return element;
   582     } else {
   583         return UCOL_NOT_FOUND;
   584     }
   585 }
   587 #endif /* #if !UCONFIG_NO_COLLATION */

mercurial