intl/icu/source/common/unorm_it.c

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /*
     2 *******************************************************************************
     3 *
     4 *   Copyright (C) 2003-2011, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 *******************************************************************************
     8 *   file name:  unorm_it.c
     9 *   encoding:   US-ASCII
    10 *   tab size:   8 (not used)
    11 *   indentation:4
    12 *
    13 *   created on: 2003jan21
    14 *   created by: Markus W. Scherer
    15 */
    17 #include "unicode/utypes.h"
    19 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
    21 #include "unicode/uiter.h"
    22 #include "unicode/unorm.h"
    23 #include "unicode/utf.h"
    24 #include "unorm_it.h"
    25 #include "cmemory.h"
    27 /* UNormIterator ------------------------------------------------------------ */
    29 enum {
    30     INITIAL_CAPACITY=100
    31 };
    33 struct UNormIterator {
    34     UCharIterator api;
    35     UCharIterator *iter;
    37     /*
    38      * chars and states either use the static buffers
    39      * or are allocated in the same memory block
    40      *
    41      * They are parallel arrays with states[] holding the getState() values
    42      * from normalization boundaries, and UITER_NO_STATE in between.
    43      */
    44     UChar *chars;
    45     uint32_t *states;
    47     /*
    48      * api.start: first valid character & state in the arrays
    49      * api.index: current position
    50      * api.limit: one past the last valid character in chars[], but states[limit] is valid
    51      * capacity: length of allocated arrays
    52      */
    53     int32_t capacity;
    55     /* the current iter->getState(), saved to avoid unnecessary setState() calls; may not correspond to api->index! */
    56     uint32_t state;
    58     /* there are UChars available before start or after limit? */
    59     UBool hasPrevious, hasNext, isStackAllocated;
    61     UNormalizationMode mode;
    63     UChar charsBuffer[INITIAL_CAPACITY];
    64     uint32_t statesBuffer[INITIAL_CAPACITY+1]; /* one more than charsBuffer[]! */
    65 };
    67 static void
    68 initIndexes(UNormIterator *uni, UCharIterator *iter) {
    69     /* do not pass api so that the compiler knows it's an alias pointer to uni itself */
    70     UCharIterator *api=&uni->api;
    72     if(!iter->hasPrevious(iter)) {
    73         /* set indexes to the beginning of the arrays */
    74         api->start=api->index=api->limit=0;
    75         uni->hasPrevious=FALSE;
    76         uni->hasNext=iter->hasNext(iter);
    77     } else if(!iter->hasNext(iter)) {
    78         /* set indexes to the end of the arrays */
    79         api->start=api->index=api->limit=uni->capacity;
    80         uni->hasNext=FALSE;
    81         uni->hasPrevious=iter->hasPrevious(iter);
    82     } else {
    83         /* set indexes into the middle of the arrays */
    84         api->start=api->index=api->limit=uni->capacity/2;
    85         uni->hasPrevious=uni->hasNext=TRUE;
    86     }
    87 }
    89 static UBool
    90 reallocArrays(UNormIterator *uni, int32_t capacity, UBool addAtStart) {
    91     /* do not pass api so that the compiler knows it's an alias pointer to uni itself */
    92     UCharIterator *api=&uni->api;
    94     uint32_t *states;
    95     UChar *chars;
    96     int32_t start, limit;
    98     states=(uint32_t *)uprv_malloc((capacity+1)*4+capacity*2);
    99     if(states==NULL) {
   100         return FALSE;
   101     }
   103     chars=(UChar *)(states+(capacity+1));
   104     uni->capacity=capacity;
   106     start=api->start;
   107     limit=api->limit;
   109     if(addAtStart) {
   110         /* copy old contents to the end of the new arrays */
   111         int32_t delta;
   113         delta=capacity-uni->capacity;
   114         uprv_memcpy(states+delta+start, uni->states+start, (limit-start+1)*4);
   115         uprv_memcpy(chars+delta+start, uni->chars+start, (limit-start)*4);
   117         api->start=start+delta;
   118         api->index+=delta;
   119         api->limit=limit+delta;
   120     } else {
   121         /* copy old contents to the beginning of the new arrays */
   122         uprv_memcpy(states+start, uni->states+start, (limit-start+1)*4);
   123         uprv_memcpy(chars+start, uni->chars+start, (limit-start)*4);
   124     }
   126     uni->chars=chars;
   127     uni->states=states;
   129     return TRUE;
   130 }
   132 static void
   133 moveContentsTowardStart(UCharIterator *api, UChar chars[], uint32_t states[], int32_t delta) {
   134     /* move array contents up to make room */
   135     int32_t srcIndex, destIndex, limit;
   137     limit=api->limit;
   138     srcIndex=delta;
   139     if(srcIndex>api->start) {
   140         /* look for a position in the arrays with a known state */
   141         while(srcIndex<limit && states[srcIndex]==UITER_NO_STATE) {
   142             ++srcIndex;
   143         }
   144     }
   146     /* now actually move the array contents */
   147     api->start=destIndex=0;
   148     while(srcIndex<limit) {
   149         chars[destIndex]=chars[srcIndex];
   150         states[destIndex++]=states[srcIndex++];
   151     }
   153     /* copy states[limit] as well! */
   154     states[destIndex]=states[srcIndex];
   156     api->limit=destIndex;
   157 }
   159 static void
   160 moveContentsTowardEnd(UCharIterator *api, UChar chars[], uint32_t states[], int32_t delta) {
   161     /* move array contents up to make room */
   162     int32_t srcIndex, destIndex, start;
   164     start=api->start;
   165     destIndex=((UNormIterator *)api)->capacity;
   166     srcIndex=destIndex-delta;
   167     if(srcIndex<api->limit) {
   168         /* look for a position in the arrays with a known state */
   169         while(srcIndex>start && states[srcIndex]==UITER_NO_STATE) {
   170             --srcIndex;
   171         }
   172     }
   174     /* now actually move the array contents */
   175     api->limit=destIndex;
   177     /* copy states[limit] as well! */
   178     states[destIndex]=states[srcIndex];
   180     while(srcIndex>start) {
   181         chars[--destIndex]=chars[--srcIndex];
   182         states[destIndex]=states[srcIndex];
   183     }
   185     api->start=destIndex;
   186 }
   188 /* normalize forward from the limit, assume hasNext is true */
   189 static UBool
   190 readNext(UNormIterator *uni, UCharIterator *iter) {
   191     /* do not pass api so that the compiler knows it's an alias pointer to uni itself */
   192     UCharIterator *api=&uni->api;
   194     /* make capacity/4 room at the end of the arrays */
   195     int32_t limit, capacity, room;
   196     UErrorCode errorCode;
   198     limit=api->limit;
   199     capacity=uni->capacity;
   200     room=capacity/4;
   201     if(room>(capacity-limit)) {
   202         /* move array contents to make room */
   203         moveContentsTowardStart(api, uni->chars, uni->states, room);
   204         api->index=limit=api->limit;
   205         uni->hasPrevious=TRUE;
   206     }
   208     /* normalize starting from the limit position */
   209     errorCode=U_ZERO_ERROR;
   210     if(uni->state!=uni->states[limit]) {
   211         uiter_setState(iter, uni->states[limit], &errorCode);
   212         if(U_FAILURE(errorCode)) {
   213             uni->state=UITER_NO_STATE;
   214             uni->hasNext=FALSE;
   215             return FALSE;
   216         }
   217     }
   219     room=unorm_next(iter, uni->chars+limit, capacity-limit, uni->mode, 0, TRUE, NULL, &errorCode);
   220     if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
   221         if(room<=capacity) {
   222             /* empty and re-use the arrays */
   223             uni->states[0]=uni->states[limit];
   224             api->start=api->index=api->limit=limit=0;
   225             uni->hasPrevious=TRUE;
   226         } else {
   227             capacity+=room+100;
   228             if(!reallocArrays(uni, capacity, FALSE)) {
   229                 uni->state=UITER_NO_STATE;
   230                 uni->hasNext=FALSE;
   231                 return FALSE;
   232             }
   233             limit=api->limit;
   234         }
   236         errorCode=U_ZERO_ERROR;
   237         uiter_setState(iter, uni->states[limit], &errorCode);
   238         room=unorm_next(iter, uni->chars+limit, capacity-limit, uni->mode, 0, TRUE, NULL, &errorCode);
   239     }
   240     if(U_FAILURE(errorCode) || room==0) {
   241         uni->state=UITER_NO_STATE;
   242         uni->hasNext=FALSE;
   243         return FALSE;
   244     }
   246     /* room>0 */
   247     ++limit; /* leave the known states[limit] alone */
   248     for(--room; room>0; --room) {
   249         /* set unknown states for all but the normalization boundaries */
   250         uni->states[limit++]=UITER_NO_STATE;
   251     }
   252     uni->states[limit]=uni->state=uiter_getState(iter);
   253     uni->hasNext=iter->hasNext(iter);
   254     api->limit=limit;
   255     return TRUE;
   256 }
   258 /* normalize backward from the start, assume hasPrevious is true */
   259 static UBool
   260 readPrevious(UNormIterator *uni, UCharIterator *iter) {
   261     /* do not pass api so that the compiler knows it's an alias pointer to uni itself */
   262     UCharIterator *api=&uni->api;
   264     /* make capacity/4 room at the start of the arrays */
   265     int32_t start, capacity, room;
   266     UErrorCode errorCode;
   268     start=api->start;
   269     capacity=uni->capacity;
   270     room=capacity/4;
   271     if(room>start) {
   272         /* move array contents to make room */
   273         moveContentsTowardEnd(api, uni->chars, uni->states, room);
   274         api->index=start=api->start;
   275         uni->hasNext=TRUE;
   276     }
   278     /* normalize ending at the start position */
   279     errorCode=U_ZERO_ERROR;
   280     if(uni->state!=uni->states[start]) {
   281         uiter_setState(iter, uni->states[start], &errorCode);
   282         if(U_FAILURE(errorCode)) {
   283             uni->state=UITER_NO_STATE;
   284             uni->hasPrevious=FALSE;
   285             return FALSE;
   286         }
   287     }
   289     room=unorm_previous(iter, uni->chars, start, uni->mode, 0, TRUE, NULL, &errorCode);
   290     if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
   291         if(room<=capacity) {
   292             /* empty and re-use the arrays */
   293             uni->states[capacity]=uni->states[start];
   294             api->start=api->index=api->limit=start=capacity;
   295             uni->hasNext=TRUE;
   296         } else {
   297             capacity+=room+100;
   298             if(!reallocArrays(uni, capacity, TRUE)) {
   299                 uni->state=UITER_NO_STATE;
   300                 uni->hasPrevious=FALSE;
   301                 return FALSE;
   302             }
   303             start=api->start;
   304         }
   306         errorCode=U_ZERO_ERROR;
   307         uiter_setState(iter, uni->states[start], &errorCode);
   308         room=unorm_previous(iter, uni->chars, start, uni->mode, 0, TRUE, NULL, &errorCode);
   309     }
   310     if(U_FAILURE(errorCode) || room==0) {
   311         uni->state=UITER_NO_STATE;
   312         uni->hasPrevious=FALSE;
   313         return FALSE;
   314     }
   316     /* room>0 */
   317     do {
   318         /* copy the UChars from chars[0..room[ to chars[(start-room)..start[ */
   319         uni->chars[--start]=uni->chars[--room];
   320         /* set unknown states for all but the normalization boundaries */
   321         uni->states[start]=UITER_NO_STATE;
   322     } while(room>0);
   323     uni->states[start]=uni->state=uiter_getState(iter);
   324     uni->hasPrevious=iter->hasPrevious(iter);
   325     api->start=start;
   326     return TRUE;
   327 }
   329 /* Iterator runtime API functions ------------------------------------------- */
   331 static int32_t U_CALLCONV
   332 unormIteratorGetIndex(UCharIterator *api, UCharIteratorOrigin origin) {
   333     switch(origin) {
   334     case UITER_ZERO:
   335     case UITER_START:
   336         return 0;
   337     case UITER_CURRENT:
   338     case UITER_LIMIT:
   339     case UITER_LENGTH:
   340         return UITER_UNKNOWN_INDEX;
   341     default:
   342         /* not a valid origin */
   343         /* Should never get here! */
   344         return -1;
   345     }
   346 }
   348 static int32_t U_CALLCONV
   349 unormIteratorMove(UCharIterator *api, int32_t delta, UCharIteratorOrigin origin) {
   350     UNormIterator *uni=(UNormIterator *)api;
   351     UCharIterator *iter=uni->iter;
   352     int32_t pos;
   354     switch(origin) {
   355     case UITER_ZERO:
   356     case UITER_START:
   357         /* restart from the beginning */
   358         if(uni->hasPrevious) {
   359             iter->move(iter, 0, UITER_START);
   360             api->start=api->index=api->limit=0;
   361             uni->states[api->limit]=uni->state=uiter_getState(iter);
   362             uni->hasPrevious=FALSE;
   363             uni->hasNext=iter->hasNext(iter);
   364         } else {
   365             /* we already have the beginning of the normalized text */
   366             api->index=api->start;
   367         }
   368         break;
   369     case UITER_CURRENT:
   370         break;
   371     case UITER_LIMIT:
   372     case UITER_LENGTH:
   373         /* restart from the end */
   374         if(uni->hasNext) {
   375             iter->move(iter, 0, UITER_LIMIT);
   376             api->start=api->index=api->limit=uni->capacity;
   377             uni->states[api->limit]=uni->state=uiter_getState(iter);
   378             uni->hasPrevious=iter->hasPrevious(iter);
   379             uni->hasNext=FALSE;
   380         } else {
   381             /* we already have the end of the normalized text */
   382             api->index=api->limit;
   383         }
   384         break;
   385     default:
   386         return -1;  /* Error */
   387     }
   389     /* move relative to the current position by delta normalized UChars */
   390     if(delta==0) {
   391         /* nothing to do */
   392     } else if(delta>0) {
   393         /* go forward until the requested position is in the buffer */
   394         for(;;) {
   395             pos=api->index+delta;   /* requested position */
   396             delta=pos-api->limit;   /* remainder beyond buffered text */
   397             if(delta<=0) {
   398                 api->index=pos;     /* position reached */
   399                 break;
   400             }
   402             /* go to end of buffer and normalize further */
   403             api->index=api->limit;
   404             if(!uni->hasNext || !readNext(uni, iter)) {
   405                 break;              /* reached end of text */
   406             }
   407         }
   408     } else /* delta<0 */ {
   409         /* go backward until the requested position is in the buffer */
   410         for(;;) {
   411             pos=api->index+delta;   /* requested position */
   412             delta=pos-api->start;   /* remainder beyond buffered text */
   413             if(delta>=0) {
   414                 api->index=pos;     /* position reached */
   415                 break;
   416             }
   418             /* go to start of buffer and normalize further */
   419             api->index=api->start;
   420             if(!uni->hasPrevious || !readPrevious(uni, iter)) {
   421                 break;              /* reached start of text */
   422             }
   423         }
   424     }
   426     if(api->index==api->start && !uni->hasPrevious) {
   427         return 0;
   428     } else {
   429         return UITER_UNKNOWN_INDEX;
   430     }
   431 }
   433 static UBool U_CALLCONV
   434 unormIteratorHasNext(UCharIterator *api) {
   435     return api->index<api->limit || ((UNormIterator *)api)->hasNext;
   436 }
   438 static UBool U_CALLCONV
   439 unormIteratorHasPrevious(UCharIterator *api) {
   440     return api->index>api->start || ((UNormIterator *)api)->hasPrevious;
   441 }
   443 static UChar32 U_CALLCONV
   444 unormIteratorCurrent(UCharIterator *api) {
   445     UNormIterator *uni=(UNormIterator *)api;
   447     if( api->index<api->limit ||
   448         (uni->hasNext && readNext(uni, uni->iter))
   449     ) {
   450         return uni->chars[api->index];
   451     } else {
   452         return U_SENTINEL;
   453     }
   454 }
   456 static UChar32 U_CALLCONV
   457 unormIteratorNext(UCharIterator *api) {
   458     UNormIterator *uni=(UNormIterator *)api;
   460     if( api->index<api->limit ||
   461         (uni->hasNext && readNext(uni, uni->iter))
   462     ) {
   463         return uni->chars[api->index++];
   464     } else {
   465         return U_SENTINEL;
   466     }
   467 }
   469 static UChar32 U_CALLCONV
   470 unormIteratorPrevious(UCharIterator *api) {
   471     UNormIterator *uni=(UNormIterator *)api;
   473     if( api->index>api->start ||
   474         (uni->hasPrevious && readPrevious(uni, uni->iter))
   475     ) {
   476         return uni->chars[--api->index];
   477     } else {
   478         return U_SENTINEL;
   479     }
   480 }
   482 static uint32_t U_CALLCONV
   483 unormIteratorGetState(const UCharIterator *api) {
   484     /* not uni->state because that may not be at api->index */
   485     return ((UNormIterator *)api)->states[api->index];
   486 }
   488 static void U_CALLCONV
   489 unormIteratorSetState(UCharIterator *api, uint32_t state, UErrorCode *pErrorCode) {
   490     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   491         /* do nothing */
   492     } else if(api==NULL) {
   493         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   494     } else if(state==UITER_NO_STATE) {
   495         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   496     } else {
   497         UNormIterator *uni=(UNormIterator *)api;
   498         UCharIterator *iter=((UNormIterator *)api)->iter;
   499         if(state!=uni->state) {
   500             uni->state=state;
   501             uiter_setState(iter, state, pErrorCode);
   502         }
   504         /*
   505          * Try shortcuts: If the requested state is in the array contents
   506          * then just set the index there.
   507          *
   508          * We assume that the state is unique per position!
   509          */
   510         if(state==uni->states[api->index]) {
   511             return;
   512         } else if(state==uni->states[api->limit]) {
   513             api->index=api->limit;
   514             return;
   515         } else {
   516             /* search for the index with this state */
   517             int32_t i;
   519             for(i=api->start; i<api->limit; ++i) {
   520                 if(state==uni->states[i]) {
   521                     api->index=i;
   522                     return;
   523                 }
   524             }
   525         }
   527         /* there is no array index for this state, reset for fresh contents */
   528         initIndexes((UNormIterator *)api, iter);
   529         uni->states[api->limit]=state;
   530     }
   531 }
   533 static const UCharIterator unormIterator={
   534     NULL, 0, 0, 0, 0, 0,
   535     unormIteratorGetIndex,
   536     unormIteratorMove,
   537     unormIteratorHasNext,
   538     unormIteratorHasPrevious,
   539     unormIteratorCurrent,
   540     unormIteratorNext,
   541     unormIteratorPrevious,
   542     NULL,
   543     unormIteratorGetState,
   544     unormIteratorSetState
   545 };
   547 /* Setup functions ---------------------------------------------------------- */
   549 U_CAPI UNormIterator * U_EXPORT2
   550 unorm_openIter(void *stackMem, int32_t stackMemSize, UErrorCode *pErrorCode) {
   551     UNormIterator *uni;
   553     /* argument checking */
   554     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   555         return NULL;
   556     }
   558     /* allocate */
   559     uni=NULL;
   560     if(stackMem!=NULL && stackMemSize>=sizeof(UNormIterator)) {
   561         if(U_ALIGNMENT_OFFSET(stackMem)==0) {
   562             /* already aligned */
   563             uni=(UNormIterator *)stackMem;
   564         } else {
   565             int32_t align=(int32_t)U_ALIGNMENT_OFFSET_UP(stackMem);
   566             if((stackMemSize-=align)>=(int32_t)sizeof(UNormIterator)) {
   567                 /* needs alignment */
   568                 uni=(UNormIterator *)((char *)stackMem+align);
   569             }
   570         }
   571         /* else does not fit */
   572     }
   574     if(uni!=NULL) {
   575         uni->isStackAllocated=TRUE;
   576     } else {
   577         uni=(UNormIterator *)uprv_malloc(sizeof(UNormIterator));
   578         if(uni==NULL) {
   579             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   580             return NULL;
   581         }
   582         uni->isStackAllocated=FALSE;
   583     }
   585     /*
   586      * initialize
   587      * do not memset because that would unnecessarily initialize the arrays
   588      */
   589     uni->iter=NULL;
   590     uni->chars=uni->charsBuffer;
   591     uni->states=uni->statesBuffer;
   592     uni->capacity=INITIAL_CAPACITY;
   593     uni->state=UITER_NO_STATE;
   594     uni->hasPrevious=uni->hasNext=FALSE;
   595     uni->mode=UNORM_NONE;
   597     /* set a no-op iterator into the api */
   598     uiter_setString(&uni->api, NULL, 0);
   599     return uni;
   600 }
   602 U_CAPI void U_EXPORT2
   603 unorm_closeIter(UNormIterator *uni) {
   604     if(uni!=NULL) {
   605         if(uni->states!=uni->statesBuffer) {
   606             /* chars and states are allocated in the same memory block */
   607             uprv_free(uni->states);
   608         }
   609         if(!uni->isStackAllocated) {
   610             uprv_free(uni);
   611         }
   612     }
   613 }
   615 U_CAPI UCharIterator * U_EXPORT2
   616 unorm_setIter(UNormIterator *uni, UCharIterator *iter, UNormalizationMode mode, UErrorCode *pErrorCode) {
   617     /* argument checking */
   618     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   619         return NULL;
   620     }
   621     if(uni==NULL) {
   622         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   623         return NULL;
   624     }
   625     if( iter==NULL || iter->getState==NULL || iter->setState==NULL ||
   626         mode<UNORM_NONE || UNORM_MODE_COUNT<=mode
   627     ) {
   628         /* set a no-op iterator into the api */
   629         uiter_setString(&uni->api, NULL, 0);
   630         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   631         return NULL;
   632     }
   634     /* set the iterator and initialize */
   635     uprv_memcpy(&uni->api, &unormIterator, sizeof(unormIterator));
   637     uni->iter=iter;
   638     uni->mode=mode;
   640     initIndexes(uni, iter);
   641     uni->states[uni->api.limit]=uni->state=uiter_getState(iter);
   643     return &uni->api;
   644 }
   646 #endif /* uconfig.h switches */

mercurial