diff -r 000000000000 -r 6474c204b198 intl/icu/source/common/unorm_it.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/intl/icu/source/common/unorm_it.c Wed Dec 31 06:09:35 2014 +0100 @@ -0,0 +1,646 @@ +/* +******************************************************************************* +* +* Copyright (C) 2003-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: unorm_it.c +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2003jan21 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION + +#include "unicode/uiter.h" +#include "unicode/unorm.h" +#include "unicode/utf.h" +#include "unorm_it.h" +#include "cmemory.h" + +/* UNormIterator ------------------------------------------------------------ */ + +enum { + INITIAL_CAPACITY=100 +}; + +struct UNormIterator { + UCharIterator api; + UCharIterator *iter; + + /* + * chars and states either use the static buffers + * or are allocated in the same memory block + * + * They are parallel arrays with states[] holding the getState() values + * from normalization boundaries, and UITER_NO_STATE in between. + */ + UChar *chars; + uint32_t *states; + + /* + * api.start: first valid character & state in the arrays + * api.index: current position + * api.limit: one past the last valid character in chars[], but states[limit] is valid + * capacity: length of allocated arrays + */ + int32_t capacity; + + /* the current iter->getState(), saved to avoid unnecessary setState() calls; may not correspond to api->index! */ + uint32_t state; + + /* there are UChars available before start or after limit? */ + UBool hasPrevious, hasNext, isStackAllocated; + + UNormalizationMode mode; + + UChar charsBuffer[INITIAL_CAPACITY]; + uint32_t statesBuffer[INITIAL_CAPACITY+1]; /* one more than charsBuffer[]! */ +}; + +static void +initIndexes(UNormIterator *uni, UCharIterator *iter) { + /* do not pass api so that the compiler knows it's an alias pointer to uni itself */ + UCharIterator *api=&uni->api; + + if(!iter->hasPrevious(iter)) { + /* set indexes to the beginning of the arrays */ + api->start=api->index=api->limit=0; + uni->hasPrevious=FALSE; + uni->hasNext=iter->hasNext(iter); + } else if(!iter->hasNext(iter)) { + /* set indexes to the end of the arrays */ + api->start=api->index=api->limit=uni->capacity; + uni->hasNext=FALSE; + uni->hasPrevious=iter->hasPrevious(iter); + } else { + /* set indexes into the middle of the arrays */ + api->start=api->index=api->limit=uni->capacity/2; + uni->hasPrevious=uni->hasNext=TRUE; + } +} + +static UBool +reallocArrays(UNormIterator *uni, int32_t capacity, UBool addAtStart) { + /* do not pass api so that the compiler knows it's an alias pointer to uni itself */ + UCharIterator *api=&uni->api; + + uint32_t *states; + UChar *chars; + int32_t start, limit; + + states=(uint32_t *)uprv_malloc((capacity+1)*4+capacity*2); + if(states==NULL) { + return FALSE; + } + + chars=(UChar *)(states+(capacity+1)); + uni->capacity=capacity; + + start=api->start; + limit=api->limit; + + if(addAtStart) { + /* copy old contents to the end of the new arrays */ + int32_t delta; + + delta=capacity-uni->capacity; + uprv_memcpy(states+delta+start, uni->states+start, (limit-start+1)*4); + uprv_memcpy(chars+delta+start, uni->chars+start, (limit-start)*4); + + api->start=start+delta; + api->index+=delta; + api->limit=limit+delta; + } else { + /* copy old contents to the beginning of the new arrays */ + uprv_memcpy(states+start, uni->states+start, (limit-start+1)*4); + uprv_memcpy(chars+start, uni->chars+start, (limit-start)*4); + } + + uni->chars=chars; + uni->states=states; + + return TRUE; +} + +static void +moveContentsTowardStart(UCharIterator *api, UChar chars[], uint32_t states[], int32_t delta) { + /* move array contents up to make room */ + int32_t srcIndex, destIndex, limit; + + limit=api->limit; + srcIndex=delta; + if(srcIndex>api->start) { + /* look for a position in the arrays with a known state */ + while(srcIndexstart=destIndex=0; + while(srcIndexlimit=destIndex; +} + +static void +moveContentsTowardEnd(UCharIterator *api, UChar chars[], uint32_t states[], int32_t delta) { + /* move array contents up to make room */ + int32_t srcIndex, destIndex, start; + + start=api->start; + destIndex=((UNormIterator *)api)->capacity; + srcIndex=destIndex-delta; + if(srcIndexlimit) { + /* look for a position in the arrays with a known state */ + while(srcIndex>start && states[srcIndex]==UITER_NO_STATE) { + --srcIndex; + } + } + + /* now actually move the array contents */ + api->limit=destIndex; + + /* copy states[limit] as well! */ + states[destIndex]=states[srcIndex]; + + while(srcIndex>start) { + chars[--destIndex]=chars[--srcIndex]; + states[destIndex]=states[srcIndex]; + } + + api->start=destIndex; +} + +/* normalize forward from the limit, assume hasNext is true */ +static UBool +readNext(UNormIterator *uni, UCharIterator *iter) { + /* do not pass api so that the compiler knows it's an alias pointer to uni itself */ + UCharIterator *api=&uni->api; + + /* make capacity/4 room at the end of the arrays */ + int32_t limit, capacity, room; + UErrorCode errorCode; + + limit=api->limit; + capacity=uni->capacity; + room=capacity/4; + if(room>(capacity-limit)) { + /* move array contents to make room */ + moveContentsTowardStart(api, uni->chars, uni->states, room); + api->index=limit=api->limit; + uni->hasPrevious=TRUE; + } + + /* normalize starting from the limit position */ + errorCode=U_ZERO_ERROR; + if(uni->state!=uni->states[limit]) { + uiter_setState(iter, uni->states[limit], &errorCode); + if(U_FAILURE(errorCode)) { + uni->state=UITER_NO_STATE; + uni->hasNext=FALSE; + return FALSE; + } + } + + room=unorm_next(iter, uni->chars+limit, capacity-limit, uni->mode, 0, TRUE, NULL, &errorCode); + if(errorCode==U_BUFFER_OVERFLOW_ERROR) { + if(room<=capacity) { + /* empty and re-use the arrays */ + uni->states[0]=uni->states[limit]; + api->start=api->index=api->limit=limit=0; + uni->hasPrevious=TRUE; + } else { + capacity+=room+100; + if(!reallocArrays(uni, capacity, FALSE)) { + uni->state=UITER_NO_STATE; + uni->hasNext=FALSE; + return FALSE; + } + limit=api->limit; + } + + errorCode=U_ZERO_ERROR; + uiter_setState(iter, uni->states[limit], &errorCode); + room=unorm_next(iter, uni->chars+limit, capacity-limit, uni->mode, 0, TRUE, NULL, &errorCode); + } + if(U_FAILURE(errorCode) || room==0) { + uni->state=UITER_NO_STATE; + uni->hasNext=FALSE; + return FALSE; + } + + /* room>0 */ + ++limit; /* leave the known states[limit] alone */ + for(--room; room>0; --room) { + /* set unknown states for all but the normalization boundaries */ + uni->states[limit++]=UITER_NO_STATE; + } + uni->states[limit]=uni->state=uiter_getState(iter); + uni->hasNext=iter->hasNext(iter); + api->limit=limit; + return TRUE; +} + +/* normalize backward from the start, assume hasPrevious is true */ +static UBool +readPrevious(UNormIterator *uni, UCharIterator *iter) { + /* do not pass api so that the compiler knows it's an alias pointer to uni itself */ + UCharIterator *api=&uni->api; + + /* make capacity/4 room at the start of the arrays */ + int32_t start, capacity, room; + UErrorCode errorCode; + + start=api->start; + capacity=uni->capacity; + room=capacity/4; + if(room>start) { + /* move array contents to make room */ + moveContentsTowardEnd(api, uni->chars, uni->states, room); + api->index=start=api->start; + uni->hasNext=TRUE; + } + + /* normalize ending at the start position */ + errorCode=U_ZERO_ERROR; + if(uni->state!=uni->states[start]) { + uiter_setState(iter, uni->states[start], &errorCode); + if(U_FAILURE(errorCode)) { + uni->state=UITER_NO_STATE; + uni->hasPrevious=FALSE; + return FALSE; + } + } + + room=unorm_previous(iter, uni->chars, start, uni->mode, 0, TRUE, NULL, &errorCode); + if(errorCode==U_BUFFER_OVERFLOW_ERROR) { + if(room<=capacity) { + /* empty and re-use the arrays */ + uni->states[capacity]=uni->states[start]; + api->start=api->index=api->limit=start=capacity; + uni->hasNext=TRUE; + } else { + capacity+=room+100; + if(!reallocArrays(uni, capacity, TRUE)) { + uni->state=UITER_NO_STATE; + uni->hasPrevious=FALSE; + return FALSE; + } + start=api->start; + } + + errorCode=U_ZERO_ERROR; + uiter_setState(iter, uni->states[start], &errorCode); + room=unorm_previous(iter, uni->chars, start, uni->mode, 0, TRUE, NULL, &errorCode); + } + if(U_FAILURE(errorCode) || room==0) { + uni->state=UITER_NO_STATE; + uni->hasPrevious=FALSE; + return FALSE; + } + + /* room>0 */ + do { + /* copy the UChars from chars[0..room[ to chars[(start-room)..start[ */ + uni->chars[--start]=uni->chars[--room]; + /* set unknown states for all but the normalization boundaries */ + uni->states[start]=UITER_NO_STATE; + } while(room>0); + uni->states[start]=uni->state=uiter_getState(iter); + uni->hasPrevious=iter->hasPrevious(iter); + api->start=start; + return TRUE; +} + +/* Iterator runtime API functions ------------------------------------------- */ + +static int32_t U_CALLCONV +unormIteratorGetIndex(UCharIterator *api, UCharIteratorOrigin origin) { + switch(origin) { + case UITER_ZERO: + case UITER_START: + return 0; + case UITER_CURRENT: + case UITER_LIMIT: + case UITER_LENGTH: + return UITER_UNKNOWN_INDEX; + default: + /* not a valid origin */ + /* Should never get here! */ + return -1; + } +} + +static int32_t U_CALLCONV +unormIteratorMove(UCharIterator *api, int32_t delta, UCharIteratorOrigin origin) { + UNormIterator *uni=(UNormIterator *)api; + UCharIterator *iter=uni->iter; + int32_t pos; + + switch(origin) { + case UITER_ZERO: + case UITER_START: + /* restart from the beginning */ + if(uni->hasPrevious) { + iter->move(iter, 0, UITER_START); + api->start=api->index=api->limit=0; + uni->states[api->limit]=uni->state=uiter_getState(iter); + uni->hasPrevious=FALSE; + uni->hasNext=iter->hasNext(iter); + } else { + /* we already have the beginning of the normalized text */ + api->index=api->start; + } + break; + case UITER_CURRENT: + break; + case UITER_LIMIT: + case UITER_LENGTH: + /* restart from the end */ + if(uni->hasNext) { + iter->move(iter, 0, UITER_LIMIT); + api->start=api->index=api->limit=uni->capacity; + uni->states[api->limit]=uni->state=uiter_getState(iter); + uni->hasPrevious=iter->hasPrevious(iter); + uni->hasNext=FALSE; + } else { + /* we already have the end of the normalized text */ + api->index=api->limit; + } + break; + default: + return -1; /* Error */ + } + + /* move relative to the current position by delta normalized UChars */ + if(delta==0) { + /* nothing to do */ + } else if(delta>0) { + /* go forward until the requested position is in the buffer */ + for(;;) { + pos=api->index+delta; /* requested position */ + delta=pos-api->limit; /* remainder beyond buffered text */ + if(delta<=0) { + api->index=pos; /* position reached */ + break; + } + + /* go to end of buffer and normalize further */ + api->index=api->limit; + if(!uni->hasNext || !readNext(uni, iter)) { + break; /* reached end of text */ + } + } + } else /* delta<0 */ { + /* go backward until the requested position is in the buffer */ + for(;;) { + pos=api->index+delta; /* requested position */ + delta=pos-api->start; /* remainder beyond buffered text */ + if(delta>=0) { + api->index=pos; /* position reached */ + break; + } + + /* go to start of buffer and normalize further */ + api->index=api->start; + if(!uni->hasPrevious || !readPrevious(uni, iter)) { + break; /* reached start of text */ + } + } + } + + if(api->index==api->start && !uni->hasPrevious) { + return 0; + } else { + return UITER_UNKNOWN_INDEX; + } +} + +static UBool U_CALLCONV +unormIteratorHasNext(UCharIterator *api) { + return api->indexlimit || ((UNormIterator *)api)->hasNext; +} + +static UBool U_CALLCONV +unormIteratorHasPrevious(UCharIterator *api) { + return api->index>api->start || ((UNormIterator *)api)->hasPrevious; +} + +static UChar32 U_CALLCONV +unormIteratorCurrent(UCharIterator *api) { + UNormIterator *uni=(UNormIterator *)api; + + if( api->indexlimit || + (uni->hasNext && readNext(uni, uni->iter)) + ) { + return uni->chars[api->index]; + } else { + return U_SENTINEL; + } +} + +static UChar32 U_CALLCONV +unormIteratorNext(UCharIterator *api) { + UNormIterator *uni=(UNormIterator *)api; + + if( api->indexlimit || + (uni->hasNext && readNext(uni, uni->iter)) + ) { + return uni->chars[api->index++]; + } else { + return U_SENTINEL; + } +} + +static UChar32 U_CALLCONV +unormIteratorPrevious(UCharIterator *api) { + UNormIterator *uni=(UNormIterator *)api; + + if( api->index>api->start || + (uni->hasPrevious && readPrevious(uni, uni->iter)) + ) { + return uni->chars[--api->index]; + } else { + return U_SENTINEL; + } +} + +static uint32_t U_CALLCONV +unormIteratorGetState(const UCharIterator *api) { + /* not uni->state because that may not be at api->index */ + return ((UNormIterator *)api)->states[api->index]; +} + +static void U_CALLCONV +unormIteratorSetState(UCharIterator *api, uint32_t state, UErrorCode *pErrorCode) { + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + /* do nothing */ + } else if(api==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } else if(state==UITER_NO_STATE) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + } else { + UNormIterator *uni=(UNormIterator *)api; + UCharIterator *iter=((UNormIterator *)api)->iter; + if(state!=uni->state) { + uni->state=state; + uiter_setState(iter, state, pErrorCode); + } + + /* + * Try shortcuts: If the requested state is in the array contents + * then just set the index there. + * + * We assume that the state is unique per position! + */ + if(state==uni->states[api->index]) { + return; + } else if(state==uni->states[api->limit]) { + api->index=api->limit; + return; + } else { + /* search for the index with this state */ + int32_t i; + + for(i=api->start; ilimit; ++i) { + if(state==uni->states[i]) { + api->index=i; + return; + } + } + } + + /* there is no array index for this state, reset for fresh contents */ + initIndexes((UNormIterator *)api, iter); + uni->states[api->limit]=state; + } +} + +static const UCharIterator unormIterator={ + NULL, 0, 0, 0, 0, 0, + unormIteratorGetIndex, + unormIteratorMove, + unormIteratorHasNext, + unormIteratorHasPrevious, + unormIteratorCurrent, + unormIteratorNext, + unormIteratorPrevious, + NULL, + unormIteratorGetState, + unormIteratorSetState +}; + +/* Setup functions ---------------------------------------------------------- */ + +U_CAPI UNormIterator * U_EXPORT2 +unorm_openIter(void *stackMem, int32_t stackMemSize, UErrorCode *pErrorCode) { + UNormIterator *uni; + + /* argument checking */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return NULL; + } + + /* allocate */ + uni=NULL; + if(stackMem!=NULL && stackMemSize>=sizeof(UNormIterator)) { + if(U_ALIGNMENT_OFFSET(stackMem)==0) { + /* already aligned */ + uni=(UNormIterator *)stackMem; + } else { + int32_t align=(int32_t)U_ALIGNMENT_OFFSET_UP(stackMem); + if((stackMemSize-=align)>=(int32_t)sizeof(UNormIterator)) { + /* needs alignment */ + uni=(UNormIterator *)((char *)stackMem+align); + } + } + /* else does not fit */ + } + + if(uni!=NULL) { + uni->isStackAllocated=TRUE; + } else { + uni=(UNormIterator *)uprv_malloc(sizeof(UNormIterator)); + if(uni==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uni->isStackAllocated=FALSE; + } + + /* + * initialize + * do not memset because that would unnecessarily initialize the arrays + */ + uni->iter=NULL; + uni->chars=uni->charsBuffer; + uni->states=uni->statesBuffer; + uni->capacity=INITIAL_CAPACITY; + uni->state=UITER_NO_STATE; + uni->hasPrevious=uni->hasNext=FALSE; + uni->mode=UNORM_NONE; + + /* set a no-op iterator into the api */ + uiter_setString(&uni->api, NULL, 0); + return uni; +} + +U_CAPI void U_EXPORT2 +unorm_closeIter(UNormIterator *uni) { + if(uni!=NULL) { + if(uni->states!=uni->statesBuffer) { + /* chars and states are allocated in the same memory block */ + uprv_free(uni->states); + } + if(!uni->isStackAllocated) { + uprv_free(uni); + } + } +} + +U_CAPI UCharIterator * U_EXPORT2 +unorm_setIter(UNormIterator *uni, UCharIterator *iter, UNormalizationMode mode, UErrorCode *pErrorCode) { + /* argument checking */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return NULL; + } + if(uni==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + if( iter==NULL || iter->getState==NULL || iter->setState==NULL || + modeapi, NULL, 0); + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + /* set the iterator and initialize */ + uprv_memcpy(&uni->api, &unormIterator, sizeof(unormIterator)); + + uni->iter=iter; + uni->mode=mode; + + initIndexes(uni, iter); + uni->states[uni->api.limit]=uni->state=uiter_getState(iter); + + return &uni->api; +} + +#endif /* uconfig.h switches */