michael@0: /* michael@0: ******************************************************************************* michael@0: * michael@0: * Copyright (C) 2001-2008, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************* michael@0: * file name: ucol_cnt.cpp michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created 02/22/2001 michael@0: * created by: Vladimir Weinstein michael@0: * michael@0: * This module maintains a contraction table structure in expanded form michael@0: * and provides means to flatten this structure michael@0: * michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_COLLATION michael@0: michael@0: #include "unicode/uchar.h" michael@0: #include "ucol_cnt.h" michael@0: #include "cmemory.h" michael@0: michael@0: static void uprv_growTable(ContractionTable *tbl, UErrorCode *status) { michael@0: if(tbl->position == tbl->size) { michael@0: uint32_t *newData = (uint32_t *)uprv_realloc(tbl->CEs, 2*tbl->size*sizeof(uint32_t)); michael@0: if(newData == NULL) { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: UChar *newCPs = (UChar *)uprv_realloc(tbl->codePoints, 2*tbl->size*sizeof(UChar)); michael@0: if(newCPs == NULL) { michael@0: uprv_free(newData); michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: tbl->CEs = newData; michael@0: tbl->codePoints = newCPs; michael@0: tbl->size *= 2; michael@0: } michael@0: } michael@0: michael@0: U_CAPI CntTable* U_EXPORT2 michael@0: /*uprv_cnttab_open(CompactEIntArray *mapping, UErrorCode *status) {*/ michael@0: uprv_cnttab_open(UNewTrie *mapping, UErrorCode *status) { michael@0: if(U_FAILURE(*status)) { michael@0: return 0; michael@0: } michael@0: CntTable *tbl = (CntTable *)uprv_malloc(sizeof(CntTable)); michael@0: if(tbl == NULL) { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: return NULL; michael@0: } michael@0: tbl->mapping = mapping; michael@0: tbl->elements = (ContractionTable **)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *)); michael@0: if(tbl->elements == NULL) { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: uprv_free(tbl); michael@0: return NULL; michael@0: } michael@0: tbl->capacity = INIT_EXP_TABLE_SIZE; michael@0: uprv_memset(tbl->elements, 0, INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *)); michael@0: tbl->size = 0; michael@0: tbl->position = 0; michael@0: tbl->CEs = NULL; michael@0: tbl->codePoints = NULL; michael@0: tbl->offsets = NULL; michael@0: tbl->currentTag = NOT_FOUND_TAG; michael@0: return tbl; michael@0: } michael@0: michael@0: static ContractionTable *addATableElement(CntTable *table, uint32_t *key, UErrorCode *status) { michael@0: ContractionTable *el = (ContractionTable *)uprv_malloc(sizeof(ContractionTable)); michael@0: if(el == NULL) { michael@0: goto outOfMemory; michael@0: } michael@0: el->CEs = (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t)); michael@0: if(el->CEs == NULL) { michael@0: goto outOfMemory; michael@0: } michael@0: michael@0: el->codePoints = (UChar *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(UChar)); michael@0: if(el->codePoints == NULL) { michael@0: uprv_free(el->CEs); michael@0: goto outOfMemory; michael@0: } michael@0: michael@0: el->position = 0; michael@0: el->size = INIT_EXP_TABLE_SIZE; michael@0: uprv_memset(el->CEs, 0, INIT_EXP_TABLE_SIZE*sizeof(uint32_t)); michael@0: uprv_memset(el->codePoints, 0, INIT_EXP_TABLE_SIZE*sizeof(UChar)); michael@0: michael@0: table->elements[table->size] = el; michael@0: michael@0: //uhash_put(table->elements, (void *)table->size, el, status); michael@0: michael@0: *key = table->size++; michael@0: michael@0: if(table->size == table->capacity) { michael@0: ContractionTable **newElements = (ContractionTable **)uprv_malloc(table->capacity*2*sizeof(ContractionTable *)); michael@0: // do realloc michael@0: /* table->elements = (ContractionTable **)realloc(table->elements, table->capacity*2*sizeof(ContractionTable *));*/ michael@0: if(newElements == NULL) { michael@0: uprv_free(el->codePoints); michael@0: uprv_free(el->CEs); michael@0: goto outOfMemory; michael@0: } michael@0: ContractionTable **oldElements = table->elements; michael@0: uprv_memcpy(newElements, oldElements, table->capacity*sizeof(ContractionTable *)); michael@0: uprv_memset(newElements+table->capacity, 0, table->capacity*sizeof(ContractionTable *)); michael@0: table->capacity *= 2; michael@0: table->elements = newElements; michael@0: uprv_free(oldElements); michael@0: } michael@0: michael@0: return el; michael@0: michael@0: outOfMemory: michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: if (el) uprv_free(el); michael@0: return NULL; michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uprv_cnttab_constructTable(CntTable *table, uint32_t mainOffset, UErrorCode *status) { michael@0: int32_t i = 0, j = 0; michael@0: if(U_FAILURE(*status) || table->size == 0) { michael@0: return 0; michael@0: } michael@0: michael@0: table->position = 0; michael@0: michael@0: if(table->offsets != NULL) { michael@0: uprv_free(table->offsets); michael@0: } michael@0: table->offsets = (int32_t *)uprv_malloc(table->size*sizeof(int32_t)); michael@0: if(table->offsets == NULL) { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: michael@0: /* See how much memory we need */ michael@0: for(i = 0; isize; i++) { michael@0: table->offsets[i] = table->position+mainOffset; michael@0: table->position += table->elements[i]->position; michael@0: } michael@0: michael@0: /* Allocate it */ michael@0: if(table->CEs != NULL) { michael@0: uprv_free(table->CEs); michael@0: } michael@0: table->CEs = (uint32_t *)uprv_malloc(table->position*sizeof(uint32_t)); michael@0: if(table->CEs == NULL) { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: uprv_free(table->offsets); michael@0: table->offsets = NULL; michael@0: return 0; michael@0: } michael@0: uprv_memset(table->CEs, '?', table->position*sizeof(uint32_t)); michael@0: michael@0: if(table->codePoints != NULL) { michael@0: uprv_free(table->codePoints); michael@0: } michael@0: table->codePoints = (UChar *)uprv_malloc(table->position*sizeof(UChar)); michael@0: if(table->codePoints == NULL) { michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: uprv_free(table->offsets); michael@0: table->offsets = NULL; michael@0: uprv_free(table->CEs); michael@0: table->CEs = NULL; michael@0: return 0; michael@0: } michael@0: uprv_memset(table->codePoints, '?', table->position*sizeof(UChar)); michael@0: michael@0: /* Now stuff the things in*/ michael@0: michael@0: UChar *cpPointer = table->codePoints; michael@0: uint32_t *CEPointer = table->CEs; michael@0: for(i = 0; isize; i++) { michael@0: int32_t size = table->elements[i]->position; michael@0: uint8_t ccMax = 0, ccMin = 255, cc = 0; michael@0: for(j = 1; jelements[i]->codePoints[j]); michael@0: if(cc>ccMax) { michael@0: ccMax = cc; michael@0: } michael@0: if(ccelements[i]->codePoints[j]; michael@0: } michael@0: *cpPointer = ((ccMin==ccMax)?1:0 << 8) | ccMax; michael@0: michael@0: uprv_memcpy(CEPointer, table->elements[i]->CEs, size*sizeof(uint32_t)); michael@0: for(j = 0; joffsets[getContractOffset(*(CEPointer+j))]); michael@0: } michael@0: } michael@0: cpPointer += size; michael@0: CEPointer += size; michael@0: } michael@0: michael@0: // TODO: this one apparently updates the contraction CEs to point to a real address (relative to the michael@0: // start of the flat file). However, what is done below is just wrong and it affects building of michael@0: // tailorings that have constructions in a bad way. At least, one should enumerate the trie. Also, michael@0: // keeping a list of code points that are contractions might be smart, although I'm not sure if it's michael@0: // feasible. michael@0: uint32_t CE; michael@0: for(i = 0; i<=0x10FFFF; i++) { michael@0: /*CE = ucmpe32_get(table->mapping, i);*/ michael@0: CE = utrie_get32(table->mapping, i, NULL); michael@0: if(isCntTableElement(CE)) { michael@0: CE = constructContractCE(getCETag(CE), table->offsets[getContractOffset(CE)]); michael@0: /*ucmpe32_set(table->mapping, i, CE);*/ michael@0: utrie_set32(table->mapping, i, CE); michael@0: } michael@0: } michael@0: michael@0: michael@0: return table->position; michael@0: } michael@0: michael@0: static ContractionTable *uprv_cnttab_cloneContraction(ContractionTable *t, UErrorCode *status) { michael@0: ContractionTable *r = (ContractionTable *)uprv_malloc(sizeof(ContractionTable)); michael@0: if(r == NULL) { michael@0: goto outOfMemory; michael@0: } michael@0: michael@0: r->position = t->position; michael@0: r->size = t->size; michael@0: michael@0: r->codePoints = (UChar *)uprv_malloc(sizeof(UChar)*t->size); michael@0: if(r->codePoints == NULL) { michael@0: goto outOfMemory; michael@0: } michael@0: r->CEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->size); michael@0: if(r->CEs == NULL) { michael@0: uprv_free(r->codePoints); michael@0: goto outOfMemory; michael@0: } michael@0: uprv_memcpy(r->codePoints, t->codePoints, sizeof(UChar)*t->size); michael@0: uprv_memcpy(r->CEs, t->CEs, sizeof(uint32_t)*t->size); michael@0: michael@0: return r; michael@0: michael@0: outOfMemory: michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: if (r) uprv_free(r); michael@0: return NULL; michael@0: } michael@0: michael@0: U_CAPI CntTable* U_EXPORT2 michael@0: uprv_cnttab_clone(CntTable *t, UErrorCode *status) { michael@0: if(U_FAILURE(*status)) { michael@0: return NULL; michael@0: } michael@0: int32_t i = 0; michael@0: CntTable *r = (CntTable *)uprv_malloc(sizeof(CntTable)); michael@0: /* test for NULL */ michael@0: if (r == NULL) { michael@0: goto outOfMemory; michael@0: } michael@0: r->position = t->position; michael@0: r->size = t->size; michael@0: r->capacity = t->capacity; michael@0: michael@0: r->mapping = t->mapping; michael@0: michael@0: r->elements = (ContractionTable **)uprv_malloc(t->capacity*sizeof(ContractionTable *)); michael@0: /* test for NULL */ michael@0: if (r->elements == NULL) { michael@0: goto outOfMemory; michael@0: } michael@0: //uprv_memcpy(r->elements, t->elements, t->capacity*sizeof(ContractionTable *)); michael@0: michael@0: for(i = 0; isize; i++) { michael@0: r->elements[i] = uprv_cnttab_cloneContraction(t->elements[i], status); michael@0: } michael@0: michael@0: if(t->CEs != NULL) { michael@0: r->CEs = (uint32_t *)uprv_malloc(t->position*sizeof(uint32_t)); michael@0: /* test for NULL */ michael@0: if (r->CEs == NULL) { michael@0: uprv_free(r->elements); michael@0: goto outOfMemory; michael@0: } michael@0: uprv_memcpy(r->CEs, t->CEs, t->position*sizeof(uint32_t)); michael@0: } else { michael@0: r->CEs = NULL; michael@0: } michael@0: michael@0: if(t->codePoints != NULL) { michael@0: r->codePoints = (UChar *)uprv_malloc(t->position*sizeof(UChar)); michael@0: /* test for NULL */ michael@0: if (r->codePoints == NULL) { michael@0: uprv_free(r->CEs); michael@0: uprv_free(r->elements); michael@0: goto outOfMemory; michael@0: } michael@0: uprv_memcpy(r->codePoints, t->codePoints, t->position*sizeof(UChar)); michael@0: } else { michael@0: r->codePoints = NULL; michael@0: } michael@0: michael@0: if(t->offsets != NULL) { michael@0: r->offsets = (int32_t *)uprv_malloc(t->size*sizeof(int32_t)); michael@0: /* test for NULL */ michael@0: if (r->offsets == NULL) { michael@0: uprv_free(r->codePoints); michael@0: uprv_free(r->CEs); michael@0: uprv_free(r->elements); michael@0: goto outOfMemory; michael@0: } michael@0: uprv_memcpy(r->offsets, t->offsets, t->size*sizeof(int32_t)); michael@0: } else { michael@0: r->offsets = NULL; michael@0: } michael@0: michael@0: return r; michael@0: michael@0: outOfMemory: michael@0: *status = U_MEMORY_ALLOCATION_ERROR; michael@0: if (r) uprv_free(r); michael@0: return NULL; michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: uprv_cnttab_close(CntTable *table) { michael@0: int32_t i = 0; michael@0: for(i = 0; isize; i++) { michael@0: uprv_free(table->elements[i]->CEs); michael@0: uprv_free(table->elements[i]->codePoints); michael@0: uprv_free(table->elements[i]); michael@0: } michael@0: uprv_free(table->elements); michael@0: uprv_free(table->CEs); michael@0: uprv_free(table->offsets); michael@0: uprv_free(table->codePoints); michael@0: uprv_free(table); michael@0: } michael@0: michael@0: /* this is for adding non contractions */ michael@0: U_CAPI uint32_t U_EXPORT2 michael@0: uprv_cnttab_changeLastCE(CntTable *table, uint32_t element, uint32_t value, UErrorCode *status) { michael@0: element &= 0xFFFFFF; michael@0: michael@0: ContractionTable *tbl = NULL; michael@0: if(U_FAILURE(*status)) { michael@0: return 0; michael@0: } michael@0: michael@0: if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) { michael@0: return 0; michael@0: } michael@0: michael@0: tbl->CEs[tbl->position-1] = value; michael@0: michael@0: return(constructContractCE(table->currentTag, element)); michael@0: } michael@0: michael@0: michael@0: /* inserts a part of contraction sequence in table. Sequences behind the offset are moved back. If element is non existent, it creates on. Returns element handle */ michael@0: U_CAPI uint32_t U_EXPORT2 michael@0: uprv_cnttab_insertContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) { michael@0: michael@0: ContractionTable *tbl = NULL; michael@0: michael@0: if(U_FAILURE(*status)) { michael@0: return 0; michael@0: } michael@0: element &= 0xFFFFFF; michael@0: michael@0: if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) { michael@0: tbl = addATableElement(table, &element, status); michael@0: if (U_FAILURE(*status)) { michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: uprv_growTable(tbl, status); michael@0: michael@0: uint32_t offset = 0; michael@0: michael@0: michael@0: while(tbl->codePoints[offset] < codePoint && offsetposition) { michael@0: offset++; michael@0: } michael@0: michael@0: uint32_t i = tbl->position; michael@0: for(i = tbl->position; i > offset; i--) { michael@0: tbl->CEs[i] = tbl->CEs[i-1]; michael@0: tbl->codePoints[i] = tbl->codePoints[i-1]; michael@0: } michael@0: michael@0: tbl->CEs[offset] = value; michael@0: tbl->codePoints[offset] = codePoint; michael@0: michael@0: tbl->position++; michael@0: michael@0: return(constructContractCE(table->currentTag, element)); michael@0: } michael@0: michael@0: michael@0: /* adds more contractions in table. If element is non existant, it creates on. Returns element handle */ michael@0: U_CAPI uint32_t U_EXPORT2 michael@0: uprv_cnttab_addContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) { michael@0: michael@0: element &= 0xFFFFFF; michael@0: michael@0: ContractionTable *tbl = NULL; michael@0: michael@0: if(U_FAILURE(*status)) { michael@0: return 0; michael@0: } michael@0: michael@0: if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) { michael@0: tbl = addATableElement(table, &element, status); michael@0: if (U_FAILURE(*status)) { michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: uprv_growTable(tbl, status); michael@0: michael@0: tbl->CEs[tbl->position] = value; michael@0: tbl->codePoints[tbl->position] = codePoint; michael@0: michael@0: tbl->position++; michael@0: michael@0: return(constructContractCE(table->currentTag, element)); michael@0: } michael@0: michael@0: /* sets a part of contraction sequence in table. If element is non existant, it creates on. Returns element handle */ michael@0: U_CAPI uint32_t U_EXPORT2 michael@0: uprv_cnttab_setContraction(CntTable *table, uint32_t element, uint32_t offset, UChar codePoint, uint32_t value, UErrorCode *status) { michael@0: michael@0: element &= 0xFFFFFF; michael@0: ContractionTable *tbl = NULL; michael@0: michael@0: if(U_FAILURE(*status)) { michael@0: return 0; michael@0: } michael@0: michael@0: if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) { michael@0: tbl = addATableElement(table, &element, status); michael@0: if (U_FAILURE(*status)) { michael@0: return 0; michael@0: } michael@0: michael@0: } michael@0: michael@0: if(offset >= tbl->size) { michael@0: *status = U_INDEX_OUTOFBOUNDS_ERROR; michael@0: return 0; michael@0: } michael@0: tbl->CEs[offset] = value; michael@0: tbl->codePoints[offset] = codePoint; michael@0: michael@0: //return(offset); michael@0: return(constructContractCE(table->currentTag, element)); michael@0: } michael@0: michael@0: static ContractionTable *_cnttab_getContractionTable(CntTable *table, uint32_t element) { michael@0: element &= 0xFFFFFF; michael@0: ContractionTable *tbl = NULL; michael@0: michael@0: if(element != 0xFFFFFF) { michael@0: tbl = table->elements[element]; /* This could also return NULL */ michael@0: } michael@0: return tbl; michael@0: } michael@0: michael@0: static int32_t _cnttab_findCP(ContractionTable *tbl, UChar codePoint) { michael@0: uint32_t position = 0; michael@0: if(tbl == NULL) { michael@0: return -1; michael@0: } michael@0: michael@0: while(codePoint > tbl->codePoints[position]) { michael@0: position++; michael@0: if(position > tbl->position) { michael@0: return -1; michael@0: } michael@0: } michael@0: if (codePoint == tbl->codePoints[position]) { michael@0: return position; michael@0: } else { michael@0: return -1; michael@0: } michael@0: } michael@0: michael@0: static uint32_t _cnttab_getCE(ContractionTable *tbl, int32_t position) { michael@0: if(tbl == NULL) { michael@0: return UCOL_NOT_FOUND; michael@0: } michael@0: if((uint32_t)position > tbl->position || position == -1) { michael@0: return UCOL_NOT_FOUND; michael@0: } else { michael@0: return tbl->CEs[position]; michael@0: } michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uprv_cnttab_findCP(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) { michael@0: michael@0: if(U_FAILURE(*status)) { michael@0: return 0; michael@0: } michael@0: michael@0: return _cnttab_findCP(_cnttab_getContractionTable(table, element), codePoint); michael@0: } michael@0: michael@0: U_CAPI uint32_t U_EXPORT2 michael@0: uprv_cnttab_getCE(CntTable *table, uint32_t element, uint32_t position, UErrorCode *status) { michael@0: if(U_FAILURE(*status)) { michael@0: return UCOL_NOT_FOUND; michael@0: } michael@0: michael@0: return(_cnttab_getCE(_cnttab_getContractionTable(table, element), position)); michael@0: } michael@0: michael@0: U_CAPI uint32_t U_EXPORT2 michael@0: uprv_cnttab_findCE(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) { michael@0: if(U_FAILURE(*status)) { michael@0: return UCOL_NOT_FOUND; michael@0: } michael@0: ContractionTable *tbl = _cnttab_getContractionTable(table, element); michael@0: return _cnttab_getCE(tbl, _cnttab_findCP(tbl, codePoint)); michael@0: } michael@0: michael@0: U_CAPI UBool U_EXPORT2 michael@0: uprv_cnttab_isTailored(CntTable *table, uint32_t element, UChar *ztString, UErrorCode *status) { michael@0: if(U_FAILURE(*status)) { michael@0: return FALSE; michael@0: } michael@0: michael@0: while(*(ztString)!=0) { michael@0: element = uprv_cnttab_findCE(table, element, *(ztString), status); michael@0: if(element == UCOL_NOT_FOUND) { michael@0: return FALSE; michael@0: } michael@0: if(!isCntTableElement(element)) { michael@0: return TRUE; michael@0: } michael@0: ztString++; michael@0: } michael@0: return (UBool)(uprv_cnttab_getCE(table, element, 0, status) != UCOL_NOT_FOUND); michael@0: } michael@0: michael@0: U_CAPI uint32_t U_EXPORT2 michael@0: uprv_cnttab_changeContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t newCE, UErrorCode *status) { michael@0: michael@0: element &= 0xFFFFFF; michael@0: ContractionTable *tbl = NULL; michael@0: michael@0: if(U_FAILURE(*status)) { michael@0: return 0; michael@0: } michael@0: michael@0: if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) { michael@0: return 0; michael@0: } michael@0: michael@0: uint32_t position = 0; michael@0: michael@0: while(codePoint > tbl->codePoints[position]) { michael@0: position++; michael@0: if(position > tbl->position) { michael@0: return UCOL_NOT_FOUND; michael@0: } michael@0: } michael@0: if (codePoint == tbl->codePoints[position]) { michael@0: tbl->CEs[position] = newCE; michael@0: return element; michael@0: } else { michael@0: return UCOL_NOT_FOUND; michael@0: } michael@0: } michael@0: michael@0: #endif /* #if !UCONFIG_NO_COLLATION */