michael@0: /* michael@0: ****************************************************************************** michael@0: * michael@0: * Copyright (C) 1999-2013, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ****************************************************************************** michael@0: * file name: ubidiln.c michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 1999aug06 michael@0: * created by: Markus W. Scherer, updated by Matitiahu Allouche michael@0: */ michael@0: michael@0: #include "cmemory.h" michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/ustring.h" michael@0: #include "unicode/uchar.h" michael@0: #include "unicode/ubidi.h" michael@0: #include "ubidiimp.h" michael@0: #include "uassert.h" michael@0: michael@0: #ifndef U_COMMON_IMPLEMENTATION michael@0: #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu michael@0: #endif michael@0: michael@0: /* michael@0: * General remarks about the functions in this file: michael@0: * michael@0: * These functions deal with the aspects of potentially mixed-directional michael@0: * text in a single paragraph or in a line of a single paragraph michael@0: * which has already been processed according to michael@0: * the Unicode 6.3 BiDi algorithm as defined in michael@0: * http://www.unicode.org/unicode/reports/tr9/ , version 28, michael@0: * also described in The Unicode Standard, Version 6.3.0 . michael@0: * michael@0: * This means that there is a UBiDi object with a levels michael@0: * and a dirProps array. michael@0: * paraLevel and direction are also set. michael@0: * Only if the length of the text is zero, then levels==dirProps==NULL. michael@0: * michael@0: * The overall directionality of the paragraph michael@0: * or line is used to bypass the reordering steps if possible. michael@0: * Even purely RTL text does not need reordering there because michael@0: * the ubidi_getLogical/VisualIndex() functions can compute the michael@0: * index on the fly in such a case. michael@0: * michael@0: * The implementation of the access to same-level-runs and of the reordering michael@0: * do attempt to provide better performance and less memory usage compared to michael@0: * a direct implementation of especially rule (L2) with an array of michael@0: * one (32-bit) integer per text character. michael@0: * michael@0: * Here, the levels array is scanned as soon as necessary, and a vector of michael@0: * same-level-runs is created. Reordering then is done on this vector. michael@0: * For each run of text positions that were resolved to the same level, michael@0: * only 8 bytes are stored: the first text position of the run and the visual michael@0: * position behind the run after reordering. michael@0: * One sign bit is used to hold the directionality of the run. michael@0: * This is inefficient if there are many very short runs. If the average run michael@0: * length is <2, then this uses more memory. michael@0: * michael@0: * In a further attempt to save memory, the levels array is never changed michael@0: * after all the resolution rules (Xn, Wn, Nn, In). michael@0: * Many functions have to consider the field trailingWSStart: michael@0: * if it is less than length, then there is an implicit trailing run michael@0: * at the paraLevel, michael@0: * which is not reflected in the levels array. michael@0: * This allows a line UBiDi object to use the same levels array as michael@0: * its paragraph parent object. michael@0: * michael@0: * When a UBiDi object is created for a line of a paragraph, then the michael@0: * paragraph's levels and dirProps arrays are reused by way of setting michael@0: * a pointer into them, not by copying. This again saves memory and forbids to michael@0: * change the now shared levels for (L1). michael@0: */ michael@0: michael@0: /* handle trailing WS (L1) -------------------------------------------------- */ michael@0: michael@0: /* michael@0: * setTrailingWSStart() sets the start index for a trailing michael@0: * run of WS in the line. This is necessary because we do not modify michael@0: * the paragraph's levels array that we just point into. michael@0: * Using trailingWSStart is another form of performing (L1). michael@0: * michael@0: * To make subsequent operations easier, we also include the run michael@0: * before the WS if it is at the paraLevel - we merge the two here. michael@0: * michael@0: * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is michael@0: * set correctly for the line even when contextual multiple paragraphs. michael@0: */ michael@0: static void michael@0: setTrailingWSStart(UBiDi *pBiDi) { michael@0: /* pBiDi->direction!=UBIDI_MIXED */ michael@0: michael@0: const DirProp *dirProps=pBiDi->dirProps; michael@0: UBiDiLevel *levels=pBiDi->levels; michael@0: int32_t start=pBiDi->length; michael@0: UBiDiLevel paraLevel=pBiDi->paraLevel; michael@0: michael@0: /* If the line is terminated by a block separator, all preceding WS etc... michael@0: are already set to paragraph level. michael@0: Setting trailingWSStart to pBidi->length will avoid changing the michael@0: level of B chars from 0 to paraLevel in ubidi_getLevels when michael@0: orderParagraphsLTR==TRUE. michael@0: */ michael@0: if(dirProps[start-1]==B) { michael@0: pBiDi->trailingWSStart=start; /* currently == pBiDi->length */ michael@0: return; michael@0: } michael@0: /* go backwards across all WS, BN, explicit codes */ michael@0: while(start>0 && DIRPROP_FLAG(PURE_DIRPROP(dirProps[start-1]))&MASK_WS) { michael@0: --start; michael@0: } michael@0: michael@0: /* if the WS run can be merged with the previous run then do so here */ michael@0: while(start>0 && levels[start-1]==paraLevel) { michael@0: --start; michael@0: } michael@0: michael@0: pBiDi->trailingWSStart=start; michael@0: } michael@0: michael@0: /* ubidi_setLine ------------------------------------------------------------ */ michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ubidi_setLine(const UBiDi *pParaBiDi, michael@0: int32_t start, int32_t limit, michael@0: UBiDi *pLineBiDi, michael@0: UErrorCode *pErrorCode) { michael@0: int32_t length; michael@0: michael@0: /* check the argument values */ michael@0: RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); michael@0: RETURN_VOID_IF_NOT_VALID_PARA(pParaBiDi, *pErrorCode); michael@0: RETURN_VOID_IF_BAD_RANGE(start, 0, limit, *pErrorCode); michael@0: RETURN_VOID_IF_BAD_RANGE(limit, 0, pParaBiDi->length+1, *pErrorCode); michael@0: if(pLineBiDi==NULL) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: if(ubidi_getParagraph(pParaBiDi, start, NULL, NULL, NULL, pErrorCode) != michael@0: ubidi_getParagraph(pParaBiDi, limit-1, NULL, NULL, NULL, pErrorCode)) { michael@0: /* the line crosses a paragraph boundary */ michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: michael@0: /* set the values in pLineBiDi from its pParaBiDi parent */ michael@0: pLineBiDi->pParaBiDi=NULL; /* mark unfinished setLine */ michael@0: pLineBiDi->text=pParaBiDi->text+start; michael@0: length=pLineBiDi->length=limit-start; michael@0: pLineBiDi->resultLength=pLineBiDi->originalLength=length; michael@0: pLineBiDi->paraLevel=GET_PARALEVEL(pParaBiDi, start); michael@0: pLineBiDi->paraCount=pParaBiDi->paraCount; michael@0: pLineBiDi->runs=NULL; michael@0: pLineBiDi->flags=0; michael@0: pLineBiDi->reorderingMode=pParaBiDi->reorderingMode; michael@0: pLineBiDi->reorderingOptions=pParaBiDi->reorderingOptions; michael@0: pLineBiDi->controlCount=0; michael@0: if(pParaBiDi->controlCount>0) { michael@0: int32_t j; michael@0: for(j=start; jtext[j])) { michael@0: pLineBiDi->controlCount++; michael@0: } michael@0: } michael@0: pLineBiDi->resultLength-=pLineBiDi->controlCount; michael@0: } michael@0: michael@0: pLineBiDi->dirProps=pParaBiDi->dirProps+start; michael@0: pLineBiDi->levels=pParaBiDi->levels+start; michael@0: pLineBiDi->runCount=-1; michael@0: michael@0: if(pParaBiDi->direction!=UBIDI_MIXED) { michael@0: /* the parent is already trivial */ michael@0: pLineBiDi->direction=pParaBiDi->direction; michael@0: michael@0: /* michael@0: * The parent's levels are all either michael@0: * implicitly or explicitly ==paraLevel; michael@0: * do the same here. michael@0: */ michael@0: if(pParaBiDi->trailingWSStart<=start) { michael@0: pLineBiDi->trailingWSStart=0; michael@0: } else if(pParaBiDi->trailingWSStarttrailingWSStart=pParaBiDi->trailingWSStart-start; michael@0: } else { michael@0: pLineBiDi->trailingWSStart=length; michael@0: } michael@0: } else { michael@0: const UBiDiLevel *levels=pLineBiDi->levels; michael@0: int32_t i, trailingWSStart; michael@0: UBiDiLevel level; michael@0: michael@0: setTrailingWSStart(pLineBiDi); michael@0: trailingWSStart=pLineBiDi->trailingWSStart; michael@0: michael@0: /* recalculate pLineBiDi->direction */ michael@0: if(trailingWSStart==0) { michael@0: /* all levels are at paraLevel */ michael@0: pLineBiDi->direction=(UBiDiDirection)(pLineBiDi->paraLevel&1); michael@0: } else { michael@0: /* get the level of the first character */ michael@0: level=(UBiDiLevel)(levels[0]&1); michael@0: michael@0: /* if there is anything of a different level, then the line is mixed */ michael@0: if(trailingWSStartparaLevel&1)!=level) { michael@0: /* the trailing WS is at paraLevel, which differs from levels[0] */ michael@0: pLineBiDi->direction=UBIDI_MIXED; michael@0: } else { michael@0: /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */ michael@0: i=1; michael@0: for(;;) { michael@0: if(i==trailingWSStart) { michael@0: /* the direction values match those in level */ michael@0: pLineBiDi->direction=(UBiDiDirection)level; michael@0: break; michael@0: } else if((levels[i]&1)!=level) { michael@0: pLineBiDi->direction=UBIDI_MIXED; michael@0: break; michael@0: } michael@0: ++i; michael@0: } michael@0: } michael@0: } michael@0: michael@0: switch(pLineBiDi->direction) { michael@0: case UBIDI_LTR: michael@0: /* make sure paraLevel is even */ michael@0: pLineBiDi->paraLevel=(UBiDiLevel)((pLineBiDi->paraLevel+1)&~1); michael@0: michael@0: /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ michael@0: pLineBiDi->trailingWSStart=0; michael@0: break; michael@0: case UBIDI_RTL: michael@0: /* make sure paraLevel is odd */ michael@0: pLineBiDi->paraLevel|=1; michael@0: michael@0: /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ michael@0: pLineBiDi->trailingWSStart=0; michael@0: break; michael@0: default: michael@0: break; michael@0: } michael@0: } michael@0: pLineBiDi->pParaBiDi=pParaBiDi; /* mark successful setLine */ michael@0: return; michael@0: } michael@0: michael@0: U_CAPI UBiDiLevel U_EXPORT2 michael@0: ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex) { michael@0: /* return paraLevel if in the trailing WS run, otherwise the real level */ michael@0: if(!IS_VALID_PARA_OR_LINE(pBiDi) || charIndex<0 || pBiDi->length<=charIndex) { michael@0: return 0; michael@0: } else if(pBiDi->direction!=UBIDI_MIXED || charIndex>=pBiDi->trailingWSStart) { michael@0: return GET_PARALEVEL(pBiDi, charIndex); michael@0: } else { michael@0: return pBiDi->levels[charIndex]; michael@0: } michael@0: } michael@0: michael@0: U_CAPI const UBiDiLevel * U_EXPORT2 michael@0: ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { michael@0: int32_t start, length; michael@0: michael@0: RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, NULL); michael@0: RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, NULL); michael@0: if((length=pBiDi->length)<=0) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return NULL; michael@0: } michael@0: if((start=pBiDi->trailingWSStart)==length) { michael@0: /* the current levels array reflects the WS run */ michael@0: return pBiDi->levels; michael@0: } michael@0: michael@0: /* michael@0: * After the previous if(), we know that the levels array michael@0: * has an implicit trailing WS run and therefore does not fully michael@0: * reflect itself all the levels. michael@0: * This must be a UBiDi object for a line, and michael@0: * we need to create a new levels array. michael@0: */ michael@0: if(getLevelsMemory(pBiDi, length)) { michael@0: UBiDiLevel *levels=pBiDi->levelsMemory; michael@0: michael@0: if(start>0 && levels!=pBiDi->levels) { michael@0: uprv_memcpy(levels, pBiDi->levels, start); michael@0: } michael@0: /* pBiDi->paraLevel is ok even if contextual multiple paragraphs, michael@0: since pBidi is a line object */ michael@0: uprv_memset(levels+start, pBiDi->paraLevel, length-start); michael@0: michael@0: /* this new levels array is set for the line and reflects the WS run */ michael@0: pBiDi->trailingWSStart=length; michael@0: return pBiDi->levels=levels; michael@0: } else { michael@0: /* out of memory */ michael@0: *pErrorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition, michael@0: int32_t *pLogicalLimit, UBiDiLevel *pLevel) { michael@0: UErrorCode errorCode; michael@0: int32_t runCount, visualStart, logicalLimit, logicalFirst, i; michael@0: Run iRun; michael@0: michael@0: errorCode=U_ZERO_ERROR; michael@0: RETURN_VOID_IF_BAD_RANGE(logicalPosition, 0, pBiDi->length, errorCode); michael@0: /* ubidi_countRuns will check VALID_PARA_OR_LINE */ michael@0: runCount=ubidi_countRuns((UBiDi *)pBiDi, &errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: return; michael@0: } michael@0: /* this is done based on runs rather than on levels since levels have michael@0: a special interpretation when UBIDI_REORDER_RUNS_ONLY michael@0: */ michael@0: visualStart=logicalLimit=0; michael@0: iRun=pBiDi->runs[0]; michael@0: michael@0: for(i=0; iruns[i]; michael@0: logicalFirst=GET_INDEX(iRun.logicalStart); michael@0: logicalLimit=logicalFirst+iRun.visualLimit-visualStart; michael@0: if((logicalPosition>=logicalFirst) && michael@0: (logicalPositionreorderingMode==UBIDI_REORDER_RUNS_ONLY) { michael@0: *pLevel=(UBiDiLevel)GET_ODD_BIT(iRun.logicalStart); michael@0: } michael@0: else if(pBiDi->direction!=UBIDI_MIXED || logicalPosition>=pBiDi->trailingWSStart) { michael@0: *pLevel=GET_PARALEVEL(pBiDi, logicalPosition); michael@0: } else { michael@0: *pLevel=pBiDi->levels[logicalPosition]; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* runs API functions ------------------------------------------------------- */ michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { michael@0: RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); michael@0: RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); michael@0: ubidi_getRuns(pBiDi, pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return -1; michael@0: } michael@0: return pBiDi->runCount; michael@0: } michael@0: michael@0: U_CAPI UBiDiDirection U_EXPORT2 michael@0: ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, michael@0: int32_t *pLogicalStart, int32_t *pLength) michael@0: { michael@0: int32_t start; michael@0: UErrorCode errorCode = U_ZERO_ERROR; michael@0: RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, errorCode, UBIDI_LTR); michael@0: ubidi_getRuns(pBiDi, &errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: return UBIDI_LTR; michael@0: } michael@0: RETURN_IF_BAD_RANGE(runIndex, 0, pBiDi->runCount, errorCode, UBIDI_LTR); michael@0: michael@0: start=pBiDi->runs[runIndex].logicalStart; michael@0: if(pLogicalStart!=NULL) { michael@0: *pLogicalStart=GET_INDEX(start); michael@0: } michael@0: if(pLength!=NULL) { michael@0: if(runIndex>0) { michael@0: *pLength=pBiDi->runs[runIndex].visualLimit- michael@0: pBiDi->runs[runIndex-1].visualLimit; michael@0: } else { michael@0: *pLength=pBiDi->runs[0].visualLimit; michael@0: } michael@0: } michael@0: return (UBiDiDirection)GET_ODD_BIT(start); michael@0: } michael@0: michael@0: /* in trivial cases there is only one trivial run; called by ubidi_getRuns() */ michael@0: static void michael@0: getSingleRun(UBiDi *pBiDi, UBiDiLevel level) { michael@0: /* simple, single-run case */ michael@0: pBiDi->runs=pBiDi->simpleRuns; michael@0: pBiDi->runCount=1; michael@0: michael@0: /* fill and reorder the single run */ michael@0: pBiDi->runs[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, level); michael@0: pBiDi->runs[0].visualLimit=pBiDi->length; michael@0: pBiDi->runs[0].insertRemove=0; michael@0: } michael@0: michael@0: /* reorder the runs array (L2) ---------------------------------------------- */ michael@0: michael@0: /* michael@0: * Reorder the same-level runs in the runs array. michael@0: * Here, runCount>1 and maxLevel>=minLevel>=paraLevel. michael@0: * All the visualStart fields=logical start before reordering. michael@0: * The "odd" bits are not set yet. michael@0: * michael@0: * Reordering with this data structure lends itself to some handy shortcuts: michael@0: * michael@0: * Since each run is moved but not modified, and since at the initial maxLevel michael@0: * each sequence of same-level runs consists of only one run each, we michael@0: * don't need to do anything there and can predecrement maxLevel. michael@0: * In many simple cases, the reordering is thus done entirely in the michael@0: * index mapping. michael@0: * Also, reordering occurs only down to the lowest odd level that occurs, michael@0: * which is minLevel|1. However, if the lowest level itself is odd, then michael@0: * in the last reordering the sequence of the runs at this level or higher michael@0: * will be all runs, and we don't need the elaborate loop to search for them. michael@0: * This is covered by ++minLevel instead of minLevel|=1 followed michael@0: * by an extra reorder-all after the reorder-some loop. michael@0: * About a trailing WS run: michael@0: * Such a run would need special treatment because its level is not michael@0: * reflected in levels[] if this is not a paragraph object. michael@0: * Instead, all characters from trailingWSStart on are implicitly at michael@0: * paraLevel. michael@0: * However, for all maxLevel>paraLevel, this run will never be reordered michael@0: * and does not need to be taken into account. maxLevel==paraLevel is only reordered michael@0: * if minLevel==paraLevel is odd, which is done in the extra segment. michael@0: * This means that for the main reordering loop we don't need to consider michael@0: * this run and can --runCount. If it is later part of the all-runs michael@0: * reordering, then runCount is adjusted accordingly. michael@0: */ michael@0: static void michael@0: reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) { michael@0: Run *runs, tempRun; michael@0: UBiDiLevel *levels; michael@0: int32_t firstRun, endRun, limitRun, runCount; michael@0: michael@0: /* nothing to do? */ michael@0: if(maxLevel<=(minLevel|1)) { michael@0: return; michael@0: } michael@0: michael@0: /* michael@0: * Reorder only down to the lowest odd level michael@0: * and reorder at an odd minLevel in a separate, simpler loop. michael@0: * See comments above for why minLevel is always incremented. michael@0: */ michael@0: ++minLevel; michael@0: michael@0: runs=pBiDi->runs; michael@0: levels=pBiDi->levels; michael@0: runCount=pBiDi->runCount; michael@0: michael@0: /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */ michael@0: if(pBiDi->trailingWSStartlength) { michael@0: --runCount; michael@0: } michael@0: michael@0: while(--maxLevel>=minLevel) { michael@0: firstRun=0; michael@0: michael@0: /* loop for all sequences of runs */ michael@0: for(;;) { michael@0: /* look for a sequence of runs that are all at >=maxLevel */ michael@0: /* look for the first run of such a sequence */ michael@0: while(firstRun=runCount) { michael@0: break; /* no more such runs */ michael@0: } michael@0: michael@0: /* look for the limit run of such a sequence (the run behind it) */ michael@0: for(limitRun=firstRun; ++limitRun=maxLevel;) {} michael@0: michael@0: /* Swap the entire sequence of runs from firstRun to limitRun-1. */ michael@0: endRun=limitRun-1; michael@0: while(firstRuntrailingWSStart==pBiDi->length) { michael@0: --runCount; michael@0: } michael@0: michael@0: /* Swap the entire sequence of all runs. (endRun==runCount) */ michael@0: while(firstRunruns; michael@0: int32_t runCount=pBiDi->runCount, visualStart=0, i, length, logicalStart; michael@0: michael@0: for(i=0; i=logicalStart) && (logicalIndex<(logicalStart+length))) { michael@0: return i; michael@0: } michael@0: visualStart+=length; michael@0: } michael@0: /* we should never get here */ michael@0: U_ASSERT(FALSE); michael@0: *pErrorCode = U_INVALID_STATE_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: /* michael@0: * Compute the runs array from the levels array. michael@0: * After ubidi_getRuns() returns TRUE, runCount is guaranteed to be >0 michael@0: * and the runs are reordered. michael@0: * Odd-level runs have visualStart on their visual right edge and michael@0: * they progress visually to the left. michael@0: * If option UBIDI_OPTION_INSERT_MARKS is set, insertRemove will contain the michael@0: * sum of appropriate LRM/RLM_BEFORE/AFTER flags. michael@0: * If option UBIDI_OPTION_REMOVE_CONTROLS is set, insertRemove will contain the michael@0: * negative number of BiDi control characters within this run. michael@0: */ michael@0: U_CFUNC UBool michael@0: ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { michael@0: /* michael@0: * This method returns immediately if the runs are already set. This michael@0: * includes the case of length==0 (handled in setPara).. michael@0: */ michael@0: if (pBiDi->runCount>=0) { michael@0: return TRUE; michael@0: } michael@0: michael@0: if(pBiDi->direction!=UBIDI_MIXED) { michael@0: /* simple, single-run case - this covers length==0 */ michael@0: /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */ michael@0: getSingleRun(pBiDi, pBiDi->paraLevel); michael@0: } else /* UBIDI_MIXED, length>0 */ { michael@0: /* mixed directionality */ michael@0: int32_t length=pBiDi->length, limit; michael@0: UBiDiLevel *levels=pBiDi->levels; michael@0: int32_t i, runCount; michael@0: UBiDiLevel level=UBIDI_DEFAULT_LTR; /* initialize with no valid level */ michael@0: /* michael@0: * If there are WS characters at the end of the line michael@0: * and the run preceding them has a level different from michael@0: * paraLevel, then they will form their own run at paraLevel (L1). michael@0: * Count them separately. michael@0: * We need some special treatment for this in order to not michael@0: * modify the levels array which a line UBiDi object shares michael@0: * with its paragraph parent and its other line siblings. michael@0: * In other words, for the trailing WS, it may be michael@0: * levels[]!=paraLevel but we have to treat it like it were so. michael@0: */ michael@0: limit=pBiDi->trailingWSStart; michael@0: /* count the runs, there is at least one non-WS run, and limit>0 */ michael@0: runCount=0; michael@0: for(i=0; i1 || limit1 */ michael@0: if(getRunsMemory(pBiDi, runCount)) { michael@0: runs=pBiDi->runsMemory; michael@0: } else { michael@0: return FALSE; michael@0: } michael@0: michael@0: /* set the runs */ michael@0: /* FOOD FOR THOUGHT: this could be optimized, e.g.: michael@0: * 464->444, 484->444, 575->555, 595->555 michael@0: * However, that would take longer. Check also how it would michael@0: * interact with BiDi control removal and inserting Marks. michael@0: */ michael@0: runIndex=0; michael@0: michael@0: /* search for the run limits and initialize visualLimit values with the run lengths */ michael@0: i=0; michael@0: do { michael@0: /* prepare this run */ michael@0: start=i; michael@0: level=levels[i]; michael@0: if(levelmaxLevel) { michael@0: maxLevel=level; michael@0: } michael@0: michael@0: /* look for the run limit */ michael@0: while(++iparaLevel is ok even michael@0: if contextual multiple paragraphs. */ michael@0: if(pBiDi->paraLevelparaLevel; michael@0: } michael@0: } michael@0: michael@0: /* set the object fields */ michael@0: pBiDi->runs=runs; michael@0: pBiDi->runCount=runCount; michael@0: michael@0: reorderLine(pBiDi, minLevel, maxLevel); michael@0: michael@0: /* now add the direction flags and adjust the visualLimit's to be just that */ michael@0: /* this loop will also handle the trailing WS run */ michael@0: limit=0; michael@0: for(i=0; iparaLevel is ok even if michael@0: contextual multiple paragraphs. */ michael@0: if(runIndexparaLevel & 1) != 0)? 0 : runIndex; michael@0: michael@0: ADD_ODD_BIT_FROM_LEVEL(runs[trailingRun].logicalStart, pBiDi->paraLevel); michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* handle insert LRM/RLM BEFORE/AFTER run */ michael@0: if(pBiDi->insertPoints.size>0) { michael@0: Point *point, *start=pBiDi->insertPoints.points, michael@0: *limit=start+pBiDi->insertPoints.size; michael@0: int32_t runIndex; michael@0: for(point=start; pointpos, pErrorCode); michael@0: pBiDi->runs[runIndex].insertRemove|=point->flag; michael@0: } michael@0: } michael@0: michael@0: /* handle remove BiDi control characters */ michael@0: if(pBiDi->controlCount>0) { michael@0: int32_t runIndex; michael@0: const UChar *start=pBiDi->text, *limit=start+pBiDi->length, *pu; michael@0: for(pu=start; puruns[runIndex].insertRemove--; michael@0: } michael@0: } michael@0: } michael@0: michael@0: return TRUE; michael@0: } michael@0: michael@0: static UBool michael@0: prepareReorder(const UBiDiLevel *levels, int32_t length, michael@0: int32_t *indexMap, michael@0: UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel) { michael@0: int32_t start; michael@0: UBiDiLevel level, minLevel, maxLevel; michael@0: michael@0: if(levels==NULL || length<=0) { michael@0: return FALSE; michael@0: } michael@0: michael@0: /* determine minLevel and maxLevel */ michael@0: minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1; michael@0: maxLevel=0; michael@0: for(start=length; start>0;) { michael@0: level=levels[--start]; michael@0: if(level>UBIDI_MAX_EXPLICIT_LEVEL+1) { michael@0: return FALSE; michael@0: } michael@0: if(levelmaxLevel) { michael@0: maxLevel=level; michael@0: } michael@0: } michael@0: *pMinLevel=minLevel; michael@0: *pMaxLevel=maxLevel; michael@0: michael@0: /* initialize the index map */ michael@0: for(start=length; start>0;) { michael@0: --start; michael@0: indexMap[start]=start; michael@0: } michael@0: michael@0: return TRUE; michael@0: } michael@0: michael@0: /* reorder a line based on a levels array (L2) ------------------------------ */ michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { michael@0: int32_t start, limit, sumOfSosEos; michael@0: UBiDiLevel minLevel = 0, maxLevel = 0; michael@0: michael@0: if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) { michael@0: return; michael@0: } michael@0: michael@0: /* nothing to do? */ michael@0: if(minLevel==maxLevel && (minLevel&1)==0) { michael@0: return; michael@0: } michael@0: michael@0: /* reorder only down to the lowest odd level */ michael@0: minLevel|=1; michael@0: michael@0: /* loop maxLevel..minLevel */ michael@0: do { michael@0: start=0; michael@0: michael@0: /* loop for all sequences of levels to reorder at the current maxLevel */ michael@0: for(;;) { michael@0: /* look for a sequence of levels that are all at >=maxLevel */ michael@0: /* look for the first index of such a sequence */ michael@0: while(start=length) { michael@0: break; /* no more such sequences */ michael@0: } michael@0: michael@0: /* look for the limit of such a sequence (the index behind it) */ michael@0: for(limit=start; ++limit=maxLevel;) {} michael@0: michael@0: /* michael@0: * sos=start of sequence, eos=end of sequence michael@0: * michael@0: * The closed (inclusive) interval from sos to eos includes all the logical michael@0: * and visual indexes within this sequence. They are logically and michael@0: * visually contiguous and in the same range. michael@0: * michael@0: * For each run, the new visual index=sos+eos-old visual index; michael@0: * we pre-add sos+eos into sumOfSosEos -> michael@0: * new visual index=sumOfSosEos-old visual index; michael@0: */ michael@0: sumOfSosEos=start+limit-1; michael@0: michael@0: /* reorder each index in the sequence */ michael@0: do { michael@0: indexMap[start]=sumOfSosEos-indexMap[start]; michael@0: } while(++start=minLevel); michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { michael@0: int32_t start, end, limit, temp; michael@0: UBiDiLevel minLevel = 0, maxLevel = 0; michael@0: michael@0: if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) { michael@0: return; michael@0: } michael@0: michael@0: /* nothing to do? */ michael@0: if(minLevel==maxLevel && (minLevel&1)==0) { michael@0: return; michael@0: } michael@0: michael@0: /* reorder only down to the lowest odd level */ michael@0: minLevel|=1; michael@0: michael@0: /* loop maxLevel..minLevel */ michael@0: do { michael@0: start=0; michael@0: michael@0: /* loop for all sequences of levels to reorder at the current maxLevel */ michael@0: for(;;) { michael@0: /* look for a sequence of levels that are all at >=maxLevel */ michael@0: /* look for the first index of such a sequence */ michael@0: while(start=length) { michael@0: break; /* no more such runs */ michael@0: } michael@0: michael@0: /* look for the limit of such a sequence (the index behind it) */ michael@0: for(limit=start; ++limit=maxLevel;) {} michael@0: michael@0: /* michael@0: * Swap the entire interval of indexes from start to limit-1. michael@0: * We don't need to swap the levels for the purpose of this michael@0: * algorithm: the sequence of levels that we look at does not michael@0: * move anyway. michael@0: */ michael@0: end=limit-1; michael@0: while(start=minLevel); michael@0: } michael@0: michael@0: /* API functions for logical<->visual mapping ------------------------------- */ michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) { michael@0: int32_t visualIndex=UBIDI_MAP_NOWHERE; michael@0: RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); michael@0: RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); michael@0: RETURN_IF_BAD_RANGE(logicalIndex, 0, pBiDi->length, *pErrorCode, -1); michael@0: michael@0: /* we can do the trivial cases without the runs array */ michael@0: switch(pBiDi->direction) { michael@0: case UBIDI_LTR: michael@0: visualIndex=logicalIndex; michael@0: break; michael@0: case UBIDI_RTL: michael@0: visualIndex=pBiDi->length-logicalIndex-1; michael@0: break; michael@0: default: michael@0: if(!ubidi_getRuns(pBiDi, pErrorCode)) { michael@0: *pErrorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return -1; michael@0: } else { michael@0: Run *runs=pBiDi->runs; michael@0: int32_t i, visualStart=0, offset, length; michael@0: michael@0: /* linear search for the run, search on the visual runs */ michael@0: for(i=0; irunCount; ++i) { michael@0: length=runs[i].visualLimit-visualStart; michael@0: offset=logicalIndex-GET_INDEX(runs[i].logicalStart); michael@0: if(offset>=0 && offset=pBiDi->runCount) { michael@0: return UBIDI_MAP_NOWHERE; michael@0: } michael@0: } michael@0: } michael@0: michael@0: if(pBiDi->insertPoints.size>0) { michael@0: /* add the number of added marks until the calculated visual index */ michael@0: Run *runs=pBiDi->runs; michael@0: int32_t i, length, insertRemove; michael@0: int32_t visualStart=0, markFound=0; michael@0: for(i=0; ; i++, visualStart+=length) { michael@0: length=runs[i].visualLimit-visualStart; michael@0: insertRemove=runs[i].insertRemove; michael@0: if(insertRemove & (LRM_BEFORE|RLM_BEFORE)) { michael@0: markFound++; michael@0: } michael@0: /* is it the run containing the visual index? */ michael@0: if(visualIndexcontrolCount>0) { michael@0: /* subtract the number of controls until the calculated visual index */ michael@0: Run *runs=pBiDi->runs; michael@0: int32_t i, j, start, limit, length, insertRemove; michael@0: int32_t visualStart=0, controlFound=0; michael@0: UChar uchar=pBiDi->text[logicalIndex]; michael@0: /* is the logical index pointing to a control ? */ michael@0: if(IS_BIDI_CONTROL_CHAR(uchar)) { michael@0: return UBIDI_MAP_NOWHERE; michael@0: } michael@0: /* loop on runs */ michael@0: for(i=0; ; i++, visualStart+=length) { michael@0: length=runs[i].visualLimit-visualStart; michael@0: insertRemove=runs[i].insertRemove; michael@0: /* calculated visual index is beyond this run? */ michael@0: if(visualIndex>=runs[i].visualLimit) { michael@0: controlFound-=insertRemove; michael@0: continue; michael@0: } michael@0: /* calculated visual index must be within current run */ michael@0: if(insertRemove==0) { michael@0: return visualIndex-controlFound; michael@0: } michael@0: if(IS_EVEN_RUN(runs[i].logicalStart)) { michael@0: /* LTR: check from run start to logical index */ michael@0: start=runs[i].logicalStart; michael@0: limit=logicalIndex; michael@0: } else { michael@0: /* RTL: check from logical index to run end */ michael@0: start=logicalIndex+1; michael@0: limit=GET_INDEX(runs[i].logicalStart)+length; michael@0: } michael@0: for(j=start; jtext[j]; michael@0: if(IS_BIDI_CONTROL_CHAR(uchar)) { michael@0: controlFound++; michael@0: } michael@0: } michael@0: return visualIndex-controlFound; michael@0: } michael@0: } michael@0: michael@0: return visualIndex; michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) { michael@0: Run *runs; michael@0: int32_t i, runCount, start; michael@0: RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); michael@0: RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); michael@0: RETURN_IF_BAD_RANGE(visualIndex, 0, pBiDi->resultLength, *pErrorCode, -1); michael@0: /* we can do the trivial cases without the runs array */ michael@0: if(pBiDi->insertPoints.size==0 && pBiDi->controlCount==0) { michael@0: if(pBiDi->direction==UBIDI_LTR) { michael@0: return visualIndex; michael@0: } michael@0: else if(pBiDi->direction==UBIDI_RTL) { michael@0: return pBiDi->length-visualIndex-1; michael@0: } michael@0: } michael@0: if(!ubidi_getRuns(pBiDi, pErrorCode)) { michael@0: *pErrorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return -1; michael@0: } michael@0: michael@0: runs=pBiDi->runs; michael@0: runCount=pBiDi->runCount; michael@0: if(pBiDi->insertPoints.size>0) { michael@0: /* handle inserted LRM/RLM */ michael@0: int32_t markFound=0, insertRemove; michael@0: int32_t visualStart=0, length; michael@0: runs=pBiDi->runs; michael@0: /* subtract number of marks until visual index */ michael@0: for(i=0; ; i++, visualStart+=length) { michael@0: length=runs[i].visualLimit-visualStart; michael@0: insertRemove=runs[i].insertRemove; michael@0: if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) { michael@0: if(visualIndex<=(visualStart+markFound)) { michael@0: return UBIDI_MAP_NOWHERE; michael@0: } michael@0: markFound++; michael@0: } michael@0: /* is adjusted visual index within this run? */ michael@0: if(visualIndex<(runs[i].visualLimit+markFound)) { michael@0: visualIndex-=markFound; michael@0: break; michael@0: } michael@0: if(insertRemove&(LRM_AFTER|RLM_AFTER)) { michael@0: if(visualIndex==(visualStart+length+markFound)) { michael@0: return UBIDI_MAP_NOWHERE; michael@0: } michael@0: markFound++; michael@0: } michael@0: } michael@0: } michael@0: else if(pBiDi->controlCount>0) { michael@0: /* handle removed BiDi control characters */ michael@0: int32_t controlFound=0, insertRemove, length; michael@0: int32_t logicalStart, logicalEnd, visualStart=0, j, k; michael@0: UChar uchar; michael@0: UBool evenRun; michael@0: /* add number of controls until visual index */ michael@0: for(i=0; ; i++, visualStart+=length) { michael@0: length=runs[i].visualLimit-visualStart; michael@0: insertRemove=runs[i].insertRemove; michael@0: /* is adjusted visual index beyond current run? */ michael@0: if(visualIndex>=(runs[i].visualLimit-controlFound+insertRemove)) { michael@0: controlFound-=insertRemove; michael@0: continue; michael@0: } michael@0: /* adjusted visual index is within current run */ michael@0: if(insertRemove==0) { michael@0: visualIndex+=controlFound; michael@0: break; michael@0: } michael@0: /* count non-control chars until visualIndex */ michael@0: logicalStart=runs[i].logicalStart; michael@0: evenRun=IS_EVEN_RUN(logicalStart); michael@0: REMOVE_ODD_BIT(logicalStart); michael@0: logicalEnd=logicalStart+length-1; michael@0: for(j=0; jtext[k]; michael@0: if(IS_BIDI_CONTROL_CHAR(uchar)) { michael@0: controlFound++; michael@0: } michael@0: if((visualIndex+controlFound)==(visualStart+j)) { michael@0: break; michael@0: } michael@0: } michael@0: visualIndex+=controlFound; michael@0: break; michael@0: } michael@0: } michael@0: /* handle all cases */ michael@0: if(runCount<=10) { michael@0: /* linear search for the run */ michael@0: for(i=0; visualIndex>=runs[i].visualLimit; ++i) {} michael@0: } else { michael@0: /* binary search for the run */ michael@0: int32_t begin=0, limit=runCount; michael@0: michael@0: /* the middle if() is guaranteed to find the run, we don't need a loop limit */ michael@0: for(;;) { michael@0: i=(begin+limit)/2; michael@0: if(visualIndex>=runs[i].visualLimit) { michael@0: begin=i+1; michael@0: } else if(i==0 || visualIndex>=runs[i-1].visualLimit) { michael@0: break; michael@0: } else { michael@0: limit=i; michael@0: } michael@0: } michael@0: } michael@0: michael@0: start=runs[i].logicalStart; michael@0: if(IS_EVEN_RUN(start)) { michael@0: /* LTR */ michael@0: /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */ michael@0: if(i>0) { michael@0: visualIndex-=runs[i-1].visualLimit; michael@0: } michael@0: return start+visualIndex; michael@0: } else { michael@0: /* RTL */ michael@0: return GET_INDEX(start)+runs[i].visualLimit-visualIndex-1; michael@0: } michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { michael@0: RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); michael@0: /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */ michael@0: ubidi_countRuns(pBiDi, pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: /* no op */ michael@0: } else if(indexMap==NULL) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: } else { michael@0: /* fill a logical-to-visual index map using the runs[] */ michael@0: int32_t visualStart, visualLimit, i, j, k; michael@0: int32_t logicalStart, logicalLimit; michael@0: Run *runs=pBiDi->runs; michael@0: if (pBiDi->length<=0) { michael@0: return; michael@0: } michael@0: if (pBiDi->length>pBiDi->resultLength) { michael@0: uprv_memset(indexMap, 0xFF, pBiDi->length*sizeof(int32_t)); michael@0: } michael@0: michael@0: visualStart=0; michael@0: for(j=0; jrunCount; ++j) { michael@0: logicalStart=GET_INDEX(runs[j].logicalStart); michael@0: visualLimit=runs[j].visualLimit; michael@0: if(IS_EVEN_RUN(runs[j].logicalStart)) { michael@0: do { /* LTR */ michael@0: indexMap[logicalStart++]=visualStart++; michael@0: } while(visualStartinsertPoints.size>0) { michael@0: int32_t markFound=0, runCount=pBiDi->runCount; michael@0: int32_t length, insertRemove; michael@0: visualStart=0; michael@0: /* add number of marks found until each index */ michael@0: for(i=0; i0) { michael@0: logicalStart=GET_INDEX(runs[i].logicalStart); michael@0: logicalLimit=logicalStart+length; michael@0: for(j=logicalStart; jcontrolCount>0) { michael@0: int32_t controlFound=0, runCount=pBiDi->runCount; michael@0: int32_t length, insertRemove; michael@0: UBool evenRun; michael@0: UChar uchar; michael@0: visualStart=0; michael@0: /* subtract number of controls found until each index */ michael@0: for(i=0; itext[k]; michael@0: if(IS_BIDI_CONTROL_CHAR(uchar)) { michael@0: controlFound++; michael@0: indexMap[k]=UBIDI_MAP_NOWHERE; michael@0: continue; michael@0: } michael@0: indexMap[k]-=controlFound; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { michael@0: RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); michael@0: if(indexMap==NULL) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */ michael@0: ubidi_countRuns(pBiDi, pErrorCode); michael@0: if(U_SUCCESS(*pErrorCode)) { michael@0: /* fill a visual-to-logical index map using the runs[] */ michael@0: Run *runs=pBiDi->runs, *runsLimit=runs+pBiDi->runCount; michael@0: int32_t logicalStart, visualStart, visualLimit, *pi=indexMap; michael@0: michael@0: if (pBiDi->resultLength<=0) { michael@0: return; michael@0: } michael@0: visualStart=0; michael@0: for(; runslogicalStart; michael@0: visualLimit=runs->visualLimit; michael@0: if(IS_EVEN_RUN(logicalStart)) { michael@0: do { /* LTR */ michael@0: *pi++ = logicalStart++; michael@0: } while(++visualStartinsertPoints.size>0) { michael@0: int32_t markFound=0, runCount=pBiDi->runCount; michael@0: int32_t insertRemove, i, j, k; michael@0: runs=pBiDi->runs; michael@0: /* count all inserted marks */ michael@0: for(i=0; iresultLength; michael@0: for(i=runCount-1; i>=0 && markFound>0; i--) { michael@0: insertRemove=runs[i].insertRemove; michael@0: if(insertRemove&(LRM_AFTER|RLM_AFTER)) { michael@0: indexMap[--k]= UBIDI_MAP_NOWHERE; michael@0: markFound--; michael@0: } michael@0: visualStart= i>0 ? runs[i-1].visualLimit : 0; michael@0: for(j=runs[i].visualLimit-1; j>=visualStart && markFound>0; j--) { michael@0: indexMap[--k]=indexMap[j]; michael@0: } michael@0: if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) { michael@0: indexMap[--k]= UBIDI_MAP_NOWHERE; michael@0: markFound--; michael@0: } michael@0: } michael@0: } michael@0: else if(pBiDi->controlCount>0) { michael@0: int32_t runCount=pBiDi->runCount, logicalEnd; michael@0: int32_t insertRemove, length, i, j, k, m; michael@0: UChar uchar; michael@0: UBool evenRun; michael@0: runs=pBiDi->runs; michael@0: visualStart=0; michael@0: /* move forward indexes by number of preceding controls */ michael@0: k=0; michael@0: for(i=0; itext[m]; michael@0: if(!IS_BIDI_CONTROL_CHAR(uchar)) { michael@0: indexMap[k++]=m; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length) { michael@0: if(srcMap!=NULL && destMap!=NULL && length>0) { michael@0: const int32_t *pi; michael@0: int32_t destLength=-1, count=0; michael@0: /* find highest value and count positive indexes in srcMap */ michael@0: pi=srcMap+length; michael@0: while(pi>srcMap) { michael@0: if(*--pi>destLength) { michael@0: destLength=*pi; michael@0: } michael@0: if(*pi>=0) { michael@0: count++; michael@0: } michael@0: } michael@0: destLength++; /* add 1 for origin 0 */ michael@0: if(count0) { michael@0: if(*--pi>=0) { michael@0: destMap[*pi]=--length; michael@0: } else { michael@0: --length; michael@0: } michael@0: } michael@0: } michael@0: }