intl/icu/source/common/ubidiln.c

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 ******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 1999-2013, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 ******************************************************************************
michael@0 8 * file name: ubidiln.c
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 1999aug06
michael@0 14 * created by: Markus W. Scherer, updated by Matitiahu Allouche
michael@0 15 */
michael@0 16
michael@0 17 #include "cmemory.h"
michael@0 18 #include "unicode/utypes.h"
michael@0 19 #include "unicode/ustring.h"
michael@0 20 #include "unicode/uchar.h"
michael@0 21 #include "unicode/ubidi.h"
michael@0 22 #include "ubidiimp.h"
michael@0 23 #include "uassert.h"
michael@0 24
michael@0 25 #ifndef U_COMMON_IMPLEMENTATION
michael@0 26 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
michael@0 27 #endif
michael@0 28
michael@0 29 /*
michael@0 30 * General remarks about the functions in this file:
michael@0 31 *
michael@0 32 * These functions deal with the aspects of potentially mixed-directional
michael@0 33 * text in a single paragraph or in a line of a single paragraph
michael@0 34 * which has already been processed according to
michael@0 35 * the Unicode 6.3 BiDi algorithm as defined in
michael@0 36 * http://www.unicode.org/unicode/reports/tr9/ , version 28,
michael@0 37 * also described in The Unicode Standard, Version 6.3.0 .
michael@0 38 *
michael@0 39 * This means that there is a UBiDi object with a levels
michael@0 40 * and a dirProps array.
michael@0 41 * paraLevel and direction are also set.
michael@0 42 * Only if the length of the text is zero, then levels==dirProps==NULL.
michael@0 43 *
michael@0 44 * The overall directionality of the paragraph
michael@0 45 * or line is used to bypass the reordering steps if possible.
michael@0 46 * Even purely RTL text does not need reordering there because
michael@0 47 * the ubidi_getLogical/VisualIndex() functions can compute the
michael@0 48 * index on the fly in such a case.
michael@0 49 *
michael@0 50 * The implementation of the access to same-level-runs and of the reordering
michael@0 51 * do attempt to provide better performance and less memory usage compared to
michael@0 52 * a direct implementation of especially rule (L2) with an array of
michael@0 53 * one (32-bit) integer per text character.
michael@0 54 *
michael@0 55 * Here, the levels array is scanned as soon as necessary, and a vector of
michael@0 56 * same-level-runs is created. Reordering then is done on this vector.
michael@0 57 * For each run of text positions that were resolved to the same level,
michael@0 58 * only 8 bytes are stored: the first text position of the run and the visual
michael@0 59 * position behind the run after reordering.
michael@0 60 * One sign bit is used to hold the directionality of the run.
michael@0 61 * This is inefficient if there are many very short runs. If the average run
michael@0 62 * length is <2, then this uses more memory.
michael@0 63 *
michael@0 64 * In a further attempt to save memory, the levels array is never changed
michael@0 65 * after all the resolution rules (Xn, Wn, Nn, In).
michael@0 66 * Many functions have to consider the field trailingWSStart:
michael@0 67 * if it is less than length, then there is an implicit trailing run
michael@0 68 * at the paraLevel,
michael@0 69 * which is not reflected in the levels array.
michael@0 70 * This allows a line UBiDi object to use the same levels array as
michael@0 71 * its paragraph parent object.
michael@0 72 *
michael@0 73 * When a UBiDi object is created for a line of a paragraph, then the
michael@0 74 * paragraph's levels and dirProps arrays are reused by way of setting
michael@0 75 * a pointer into them, not by copying. This again saves memory and forbids to
michael@0 76 * change the now shared levels for (L1).
michael@0 77 */
michael@0 78
michael@0 79 /* handle trailing WS (L1) -------------------------------------------------- */
michael@0 80
michael@0 81 /*
michael@0 82 * setTrailingWSStart() sets the start index for a trailing
michael@0 83 * run of WS in the line. This is necessary because we do not modify
michael@0 84 * the paragraph's levels array that we just point into.
michael@0 85 * Using trailingWSStart is another form of performing (L1).
michael@0 86 *
michael@0 87 * To make subsequent operations easier, we also include the run
michael@0 88 * before the WS if it is at the paraLevel - we merge the two here.
michael@0 89 *
michael@0 90 * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is
michael@0 91 * set correctly for the line even when contextual multiple paragraphs.
michael@0 92 */
michael@0 93 static void
michael@0 94 setTrailingWSStart(UBiDi *pBiDi) {
michael@0 95 /* pBiDi->direction!=UBIDI_MIXED */
michael@0 96
michael@0 97 const DirProp *dirProps=pBiDi->dirProps;
michael@0 98 UBiDiLevel *levels=pBiDi->levels;
michael@0 99 int32_t start=pBiDi->length;
michael@0 100 UBiDiLevel paraLevel=pBiDi->paraLevel;
michael@0 101
michael@0 102 /* If the line is terminated by a block separator, all preceding WS etc...
michael@0 103 are already set to paragraph level.
michael@0 104 Setting trailingWSStart to pBidi->length will avoid changing the
michael@0 105 level of B chars from 0 to paraLevel in ubidi_getLevels when
michael@0 106 orderParagraphsLTR==TRUE.
michael@0 107 */
michael@0 108 if(dirProps[start-1]==B) {
michael@0 109 pBiDi->trailingWSStart=start; /* currently == pBiDi->length */
michael@0 110 return;
michael@0 111 }
michael@0 112 /* go backwards across all WS, BN, explicit codes */
michael@0 113 while(start>0 && DIRPROP_FLAG(PURE_DIRPROP(dirProps[start-1]))&MASK_WS) {
michael@0 114 --start;
michael@0 115 }
michael@0 116
michael@0 117 /* if the WS run can be merged with the previous run then do so here */
michael@0 118 while(start>0 && levels[start-1]==paraLevel) {
michael@0 119 --start;
michael@0 120 }
michael@0 121
michael@0 122 pBiDi->trailingWSStart=start;
michael@0 123 }
michael@0 124
michael@0 125 /* ubidi_setLine ------------------------------------------------------------ */
michael@0 126
michael@0 127 U_CAPI void U_EXPORT2
michael@0 128 ubidi_setLine(const UBiDi *pParaBiDi,
michael@0 129 int32_t start, int32_t limit,
michael@0 130 UBiDi *pLineBiDi,
michael@0 131 UErrorCode *pErrorCode) {
michael@0 132 int32_t length;
michael@0 133
michael@0 134 /* check the argument values */
michael@0 135 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
michael@0 136 RETURN_VOID_IF_NOT_VALID_PARA(pParaBiDi, *pErrorCode);
michael@0 137 RETURN_VOID_IF_BAD_RANGE(start, 0, limit, *pErrorCode);
michael@0 138 RETURN_VOID_IF_BAD_RANGE(limit, 0, pParaBiDi->length+1, *pErrorCode);
michael@0 139 if(pLineBiDi==NULL) {
michael@0 140 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 141 return;
michael@0 142 }
michael@0 143 if(ubidi_getParagraph(pParaBiDi, start, NULL, NULL, NULL, pErrorCode) !=
michael@0 144 ubidi_getParagraph(pParaBiDi, limit-1, NULL, NULL, NULL, pErrorCode)) {
michael@0 145 /* the line crosses a paragraph boundary */
michael@0 146 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 147 return;
michael@0 148 }
michael@0 149
michael@0 150 /* set the values in pLineBiDi from its pParaBiDi parent */
michael@0 151 pLineBiDi->pParaBiDi=NULL; /* mark unfinished setLine */
michael@0 152 pLineBiDi->text=pParaBiDi->text+start;
michael@0 153 length=pLineBiDi->length=limit-start;
michael@0 154 pLineBiDi->resultLength=pLineBiDi->originalLength=length;
michael@0 155 pLineBiDi->paraLevel=GET_PARALEVEL(pParaBiDi, start);
michael@0 156 pLineBiDi->paraCount=pParaBiDi->paraCount;
michael@0 157 pLineBiDi->runs=NULL;
michael@0 158 pLineBiDi->flags=0;
michael@0 159 pLineBiDi->reorderingMode=pParaBiDi->reorderingMode;
michael@0 160 pLineBiDi->reorderingOptions=pParaBiDi->reorderingOptions;
michael@0 161 pLineBiDi->controlCount=0;
michael@0 162 if(pParaBiDi->controlCount>0) {
michael@0 163 int32_t j;
michael@0 164 for(j=start; j<limit; j++) {
michael@0 165 if(IS_BIDI_CONTROL_CHAR(pParaBiDi->text[j])) {
michael@0 166 pLineBiDi->controlCount++;
michael@0 167 }
michael@0 168 }
michael@0 169 pLineBiDi->resultLength-=pLineBiDi->controlCount;
michael@0 170 }
michael@0 171
michael@0 172 pLineBiDi->dirProps=pParaBiDi->dirProps+start;
michael@0 173 pLineBiDi->levels=pParaBiDi->levels+start;
michael@0 174 pLineBiDi->runCount=-1;
michael@0 175
michael@0 176 if(pParaBiDi->direction!=UBIDI_MIXED) {
michael@0 177 /* the parent is already trivial */
michael@0 178 pLineBiDi->direction=pParaBiDi->direction;
michael@0 179
michael@0 180 /*
michael@0 181 * The parent's levels are all either
michael@0 182 * implicitly or explicitly ==paraLevel;
michael@0 183 * do the same here.
michael@0 184 */
michael@0 185 if(pParaBiDi->trailingWSStart<=start) {
michael@0 186 pLineBiDi->trailingWSStart=0;
michael@0 187 } else if(pParaBiDi->trailingWSStart<limit) {
michael@0 188 pLineBiDi->trailingWSStart=pParaBiDi->trailingWSStart-start;
michael@0 189 } else {
michael@0 190 pLineBiDi->trailingWSStart=length;
michael@0 191 }
michael@0 192 } else {
michael@0 193 const UBiDiLevel *levels=pLineBiDi->levels;
michael@0 194 int32_t i, trailingWSStart;
michael@0 195 UBiDiLevel level;
michael@0 196
michael@0 197 setTrailingWSStart(pLineBiDi);
michael@0 198 trailingWSStart=pLineBiDi->trailingWSStart;
michael@0 199
michael@0 200 /* recalculate pLineBiDi->direction */
michael@0 201 if(trailingWSStart==0) {
michael@0 202 /* all levels are at paraLevel */
michael@0 203 pLineBiDi->direction=(UBiDiDirection)(pLineBiDi->paraLevel&1);
michael@0 204 } else {
michael@0 205 /* get the level of the first character */
michael@0 206 level=(UBiDiLevel)(levels[0]&1);
michael@0 207
michael@0 208 /* if there is anything of a different level, then the line is mixed */
michael@0 209 if(trailingWSStart<length && (pLineBiDi->paraLevel&1)!=level) {
michael@0 210 /* the trailing WS is at paraLevel, which differs from levels[0] */
michael@0 211 pLineBiDi->direction=UBIDI_MIXED;
michael@0 212 } else {
michael@0 213 /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */
michael@0 214 i=1;
michael@0 215 for(;;) {
michael@0 216 if(i==trailingWSStart) {
michael@0 217 /* the direction values match those in level */
michael@0 218 pLineBiDi->direction=(UBiDiDirection)level;
michael@0 219 break;
michael@0 220 } else if((levels[i]&1)!=level) {
michael@0 221 pLineBiDi->direction=UBIDI_MIXED;
michael@0 222 break;
michael@0 223 }
michael@0 224 ++i;
michael@0 225 }
michael@0 226 }
michael@0 227 }
michael@0 228
michael@0 229 switch(pLineBiDi->direction) {
michael@0 230 case UBIDI_LTR:
michael@0 231 /* make sure paraLevel is even */
michael@0 232 pLineBiDi->paraLevel=(UBiDiLevel)((pLineBiDi->paraLevel+1)&~1);
michael@0 233
michael@0 234 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
michael@0 235 pLineBiDi->trailingWSStart=0;
michael@0 236 break;
michael@0 237 case UBIDI_RTL:
michael@0 238 /* make sure paraLevel is odd */
michael@0 239 pLineBiDi->paraLevel|=1;
michael@0 240
michael@0 241 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
michael@0 242 pLineBiDi->trailingWSStart=0;
michael@0 243 break;
michael@0 244 default:
michael@0 245 break;
michael@0 246 }
michael@0 247 }
michael@0 248 pLineBiDi->pParaBiDi=pParaBiDi; /* mark successful setLine */
michael@0 249 return;
michael@0 250 }
michael@0 251
michael@0 252 U_CAPI UBiDiLevel U_EXPORT2
michael@0 253 ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex) {
michael@0 254 /* return paraLevel if in the trailing WS run, otherwise the real level */
michael@0 255 if(!IS_VALID_PARA_OR_LINE(pBiDi) || charIndex<0 || pBiDi->length<=charIndex) {
michael@0 256 return 0;
michael@0 257 } else if(pBiDi->direction!=UBIDI_MIXED || charIndex>=pBiDi->trailingWSStart) {
michael@0 258 return GET_PARALEVEL(pBiDi, charIndex);
michael@0 259 } else {
michael@0 260 return pBiDi->levels[charIndex];
michael@0 261 }
michael@0 262 }
michael@0 263
michael@0 264 U_CAPI const UBiDiLevel * U_EXPORT2
michael@0 265 ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
michael@0 266 int32_t start, length;
michael@0 267
michael@0 268 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, NULL);
michael@0 269 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, NULL);
michael@0 270 if((length=pBiDi->length)<=0) {
michael@0 271 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 272 return NULL;
michael@0 273 }
michael@0 274 if((start=pBiDi->trailingWSStart)==length) {
michael@0 275 /* the current levels array reflects the WS run */
michael@0 276 return pBiDi->levels;
michael@0 277 }
michael@0 278
michael@0 279 /*
michael@0 280 * After the previous if(), we know that the levels array
michael@0 281 * has an implicit trailing WS run and therefore does not fully
michael@0 282 * reflect itself all the levels.
michael@0 283 * This must be a UBiDi object for a line, and
michael@0 284 * we need to create a new levels array.
michael@0 285 */
michael@0 286 if(getLevelsMemory(pBiDi, length)) {
michael@0 287 UBiDiLevel *levels=pBiDi->levelsMemory;
michael@0 288
michael@0 289 if(start>0 && levels!=pBiDi->levels) {
michael@0 290 uprv_memcpy(levels, pBiDi->levels, start);
michael@0 291 }
michael@0 292 /* pBiDi->paraLevel is ok even if contextual multiple paragraphs,
michael@0 293 since pBidi is a line object */
michael@0 294 uprv_memset(levels+start, pBiDi->paraLevel, length-start);
michael@0 295
michael@0 296 /* this new levels array is set for the line and reflects the WS run */
michael@0 297 pBiDi->trailingWSStart=length;
michael@0 298 return pBiDi->levels=levels;
michael@0 299 } else {
michael@0 300 /* out of memory */
michael@0 301 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 302 return NULL;
michael@0 303 }
michael@0 304 }
michael@0 305
michael@0 306 U_CAPI void U_EXPORT2
michael@0 307 ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition,
michael@0 308 int32_t *pLogicalLimit, UBiDiLevel *pLevel) {
michael@0 309 UErrorCode errorCode;
michael@0 310 int32_t runCount, visualStart, logicalLimit, logicalFirst, i;
michael@0 311 Run iRun;
michael@0 312
michael@0 313 errorCode=U_ZERO_ERROR;
michael@0 314 RETURN_VOID_IF_BAD_RANGE(logicalPosition, 0, pBiDi->length, errorCode);
michael@0 315 /* ubidi_countRuns will check VALID_PARA_OR_LINE */
michael@0 316 runCount=ubidi_countRuns((UBiDi *)pBiDi, &errorCode);
michael@0 317 if(U_FAILURE(errorCode)) {
michael@0 318 return;
michael@0 319 }
michael@0 320 /* this is done based on runs rather than on levels since levels have
michael@0 321 a special interpretation when UBIDI_REORDER_RUNS_ONLY
michael@0 322 */
michael@0 323 visualStart=logicalLimit=0;
michael@0 324 iRun=pBiDi->runs[0];
michael@0 325
michael@0 326 for(i=0; i<runCount; i++) {
michael@0 327 iRun = pBiDi->runs[i];
michael@0 328 logicalFirst=GET_INDEX(iRun.logicalStart);
michael@0 329 logicalLimit=logicalFirst+iRun.visualLimit-visualStart;
michael@0 330 if((logicalPosition>=logicalFirst) &&
michael@0 331 (logicalPosition<logicalLimit)) {
michael@0 332 break;
michael@0 333 }
michael@0 334 visualStart = iRun.visualLimit;
michael@0 335 }
michael@0 336 if(pLogicalLimit) {
michael@0 337 *pLogicalLimit=logicalLimit;
michael@0 338 }
michael@0 339 if(pLevel) {
michael@0 340 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
michael@0 341 *pLevel=(UBiDiLevel)GET_ODD_BIT(iRun.logicalStart);
michael@0 342 }
michael@0 343 else if(pBiDi->direction!=UBIDI_MIXED || logicalPosition>=pBiDi->trailingWSStart) {
michael@0 344 *pLevel=GET_PARALEVEL(pBiDi, logicalPosition);
michael@0 345 } else {
michael@0 346 *pLevel=pBiDi->levels[logicalPosition];
michael@0 347 }
michael@0 348 }
michael@0 349 }
michael@0 350
michael@0 351 /* runs API functions ------------------------------------------------------- */
michael@0 352
michael@0 353 U_CAPI int32_t U_EXPORT2
michael@0 354 ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) {
michael@0 355 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
michael@0 356 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
michael@0 357 ubidi_getRuns(pBiDi, pErrorCode);
michael@0 358 if(U_FAILURE(*pErrorCode)) {
michael@0 359 return -1;
michael@0 360 }
michael@0 361 return pBiDi->runCount;
michael@0 362 }
michael@0 363
michael@0 364 U_CAPI UBiDiDirection U_EXPORT2
michael@0 365 ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
michael@0 366 int32_t *pLogicalStart, int32_t *pLength)
michael@0 367 {
michael@0 368 int32_t start;
michael@0 369 UErrorCode errorCode = U_ZERO_ERROR;
michael@0 370 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, errorCode, UBIDI_LTR);
michael@0 371 ubidi_getRuns(pBiDi, &errorCode);
michael@0 372 if(U_FAILURE(errorCode)) {
michael@0 373 return UBIDI_LTR;
michael@0 374 }
michael@0 375 RETURN_IF_BAD_RANGE(runIndex, 0, pBiDi->runCount, errorCode, UBIDI_LTR);
michael@0 376
michael@0 377 start=pBiDi->runs[runIndex].logicalStart;
michael@0 378 if(pLogicalStart!=NULL) {
michael@0 379 *pLogicalStart=GET_INDEX(start);
michael@0 380 }
michael@0 381 if(pLength!=NULL) {
michael@0 382 if(runIndex>0) {
michael@0 383 *pLength=pBiDi->runs[runIndex].visualLimit-
michael@0 384 pBiDi->runs[runIndex-1].visualLimit;
michael@0 385 } else {
michael@0 386 *pLength=pBiDi->runs[0].visualLimit;
michael@0 387 }
michael@0 388 }
michael@0 389 return (UBiDiDirection)GET_ODD_BIT(start);
michael@0 390 }
michael@0 391
michael@0 392 /* in trivial cases there is only one trivial run; called by ubidi_getRuns() */
michael@0 393 static void
michael@0 394 getSingleRun(UBiDi *pBiDi, UBiDiLevel level) {
michael@0 395 /* simple, single-run case */
michael@0 396 pBiDi->runs=pBiDi->simpleRuns;
michael@0 397 pBiDi->runCount=1;
michael@0 398
michael@0 399 /* fill and reorder the single run */
michael@0 400 pBiDi->runs[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, level);
michael@0 401 pBiDi->runs[0].visualLimit=pBiDi->length;
michael@0 402 pBiDi->runs[0].insertRemove=0;
michael@0 403 }
michael@0 404
michael@0 405 /* reorder the runs array (L2) ---------------------------------------------- */
michael@0 406
michael@0 407 /*
michael@0 408 * Reorder the same-level runs in the runs array.
michael@0 409 * Here, runCount>1 and maxLevel>=minLevel>=paraLevel.
michael@0 410 * All the visualStart fields=logical start before reordering.
michael@0 411 * The "odd" bits are not set yet.
michael@0 412 *
michael@0 413 * Reordering with this data structure lends itself to some handy shortcuts:
michael@0 414 *
michael@0 415 * Since each run is moved but not modified, and since at the initial maxLevel
michael@0 416 * each sequence of same-level runs consists of only one run each, we
michael@0 417 * don't need to do anything there and can predecrement maxLevel.
michael@0 418 * In many simple cases, the reordering is thus done entirely in the
michael@0 419 * index mapping.
michael@0 420 * Also, reordering occurs only down to the lowest odd level that occurs,
michael@0 421 * which is minLevel|1. However, if the lowest level itself is odd, then
michael@0 422 * in the last reordering the sequence of the runs at this level or higher
michael@0 423 * will be all runs, and we don't need the elaborate loop to search for them.
michael@0 424 * This is covered by ++minLevel instead of minLevel|=1 followed
michael@0 425 * by an extra reorder-all after the reorder-some loop.
michael@0 426 * About a trailing WS run:
michael@0 427 * Such a run would need special treatment because its level is not
michael@0 428 * reflected in levels[] if this is not a paragraph object.
michael@0 429 * Instead, all characters from trailingWSStart on are implicitly at
michael@0 430 * paraLevel.
michael@0 431 * However, for all maxLevel>paraLevel, this run will never be reordered
michael@0 432 * and does not need to be taken into account. maxLevel==paraLevel is only reordered
michael@0 433 * if minLevel==paraLevel is odd, which is done in the extra segment.
michael@0 434 * This means that for the main reordering loop we don't need to consider
michael@0 435 * this run and can --runCount. If it is later part of the all-runs
michael@0 436 * reordering, then runCount is adjusted accordingly.
michael@0 437 */
michael@0 438 static void
michael@0 439 reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) {
michael@0 440 Run *runs, tempRun;
michael@0 441 UBiDiLevel *levels;
michael@0 442 int32_t firstRun, endRun, limitRun, runCount;
michael@0 443
michael@0 444 /* nothing to do? */
michael@0 445 if(maxLevel<=(minLevel|1)) {
michael@0 446 return;
michael@0 447 }
michael@0 448
michael@0 449 /*
michael@0 450 * Reorder only down to the lowest odd level
michael@0 451 * and reorder at an odd minLevel in a separate, simpler loop.
michael@0 452 * See comments above for why minLevel is always incremented.
michael@0 453 */
michael@0 454 ++minLevel;
michael@0 455
michael@0 456 runs=pBiDi->runs;
michael@0 457 levels=pBiDi->levels;
michael@0 458 runCount=pBiDi->runCount;
michael@0 459
michael@0 460 /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */
michael@0 461 if(pBiDi->trailingWSStart<pBiDi->length) {
michael@0 462 --runCount;
michael@0 463 }
michael@0 464
michael@0 465 while(--maxLevel>=minLevel) {
michael@0 466 firstRun=0;
michael@0 467
michael@0 468 /* loop for all sequences of runs */
michael@0 469 for(;;) {
michael@0 470 /* look for a sequence of runs that are all at >=maxLevel */
michael@0 471 /* look for the first run of such a sequence */
michael@0 472 while(firstRun<runCount && levels[runs[firstRun].logicalStart]<maxLevel) {
michael@0 473 ++firstRun;
michael@0 474 }
michael@0 475 if(firstRun>=runCount) {
michael@0 476 break; /* no more such runs */
michael@0 477 }
michael@0 478
michael@0 479 /* look for the limit run of such a sequence (the run behind it) */
michael@0 480 for(limitRun=firstRun; ++limitRun<runCount && levels[runs[limitRun].logicalStart]>=maxLevel;) {}
michael@0 481
michael@0 482 /* Swap the entire sequence of runs from firstRun to limitRun-1. */
michael@0 483 endRun=limitRun-1;
michael@0 484 while(firstRun<endRun) {
michael@0 485 tempRun = runs[firstRun];
michael@0 486 runs[firstRun]=runs[endRun];
michael@0 487 runs[endRun]=tempRun;
michael@0 488 ++firstRun;
michael@0 489 --endRun;
michael@0 490 }
michael@0 491
michael@0 492 if(limitRun==runCount) {
michael@0 493 break; /* no more such runs */
michael@0 494 } else {
michael@0 495 firstRun=limitRun+1;
michael@0 496 }
michael@0 497 }
michael@0 498 }
michael@0 499
michael@0 500 /* now do maxLevel==old minLevel (==odd!), see above */
michael@0 501 if(!(minLevel&1)) {
michael@0 502 firstRun=0;
michael@0 503
michael@0 504 /* include the trailing WS run in this complete reordering */
michael@0 505 if(pBiDi->trailingWSStart==pBiDi->length) {
michael@0 506 --runCount;
michael@0 507 }
michael@0 508
michael@0 509 /* Swap the entire sequence of all runs. (endRun==runCount) */
michael@0 510 while(firstRun<runCount) {
michael@0 511 tempRun=runs[firstRun];
michael@0 512 runs[firstRun]=runs[runCount];
michael@0 513 runs[runCount]=tempRun;
michael@0 514 ++firstRun;
michael@0 515 --runCount;
michael@0 516 }
michael@0 517 }
michael@0 518 }
michael@0 519
michael@0 520 /* compute the runs array --------------------------------------------------- */
michael@0 521
michael@0 522 static int32_t getRunFromLogicalIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) {
michael@0 523 Run *runs=pBiDi->runs;
michael@0 524 int32_t runCount=pBiDi->runCount, visualStart=0, i, length, logicalStart;
michael@0 525
michael@0 526 for(i=0; i<runCount; i++) {
michael@0 527 length=runs[i].visualLimit-visualStart;
michael@0 528 logicalStart=GET_INDEX(runs[i].logicalStart);
michael@0 529 if((logicalIndex>=logicalStart) && (logicalIndex<(logicalStart+length))) {
michael@0 530 return i;
michael@0 531 }
michael@0 532 visualStart+=length;
michael@0 533 }
michael@0 534 /* we should never get here */
michael@0 535 U_ASSERT(FALSE);
michael@0 536 *pErrorCode = U_INVALID_STATE_ERROR;
michael@0 537 return 0;
michael@0 538 }
michael@0 539
michael@0 540 /*
michael@0 541 * Compute the runs array from the levels array.
michael@0 542 * After ubidi_getRuns() returns TRUE, runCount is guaranteed to be >0
michael@0 543 * and the runs are reordered.
michael@0 544 * Odd-level runs have visualStart on their visual right edge and
michael@0 545 * they progress visually to the left.
michael@0 546 * If option UBIDI_OPTION_INSERT_MARKS is set, insertRemove will contain the
michael@0 547 * sum of appropriate LRM/RLM_BEFORE/AFTER flags.
michael@0 548 * If option UBIDI_OPTION_REMOVE_CONTROLS is set, insertRemove will contain the
michael@0 549 * negative number of BiDi control characters within this run.
michael@0 550 */
michael@0 551 U_CFUNC UBool
michael@0 552 ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) {
michael@0 553 /*
michael@0 554 * This method returns immediately if the runs are already set. This
michael@0 555 * includes the case of length==0 (handled in setPara)..
michael@0 556 */
michael@0 557 if (pBiDi->runCount>=0) {
michael@0 558 return TRUE;
michael@0 559 }
michael@0 560
michael@0 561 if(pBiDi->direction!=UBIDI_MIXED) {
michael@0 562 /* simple, single-run case - this covers length==0 */
michael@0 563 /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */
michael@0 564 getSingleRun(pBiDi, pBiDi->paraLevel);
michael@0 565 } else /* UBIDI_MIXED, length>0 */ {
michael@0 566 /* mixed directionality */
michael@0 567 int32_t length=pBiDi->length, limit;
michael@0 568 UBiDiLevel *levels=pBiDi->levels;
michael@0 569 int32_t i, runCount;
michael@0 570 UBiDiLevel level=UBIDI_DEFAULT_LTR; /* initialize with no valid level */
michael@0 571 /*
michael@0 572 * If there are WS characters at the end of the line
michael@0 573 * and the run preceding them has a level different from
michael@0 574 * paraLevel, then they will form their own run at paraLevel (L1).
michael@0 575 * Count them separately.
michael@0 576 * We need some special treatment for this in order to not
michael@0 577 * modify the levels array which a line UBiDi object shares
michael@0 578 * with its paragraph parent and its other line siblings.
michael@0 579 * In other words, for the trailing WS, it may be
michael@0 580 * levels[]!=paraLevel but we have to treat it like it were so.
michael@0 581 */
michael@0 582 limit=pBiDi->trailingWSStart;
michael@0 583 /* count the runs, there is at least one non-WS run, and limit>0 */
michael@0 584 runCount=0;
michael@0 585 for(i=0; i<limit; ++i) {
michael@0 586 /* increment runCount at the start of each run */
michael@0 587 if(levels[i]!=level) {
michael@0 588 ++runCount;
michael@0 589 level=levels[i];
michael@0 590 }
michael@0 591 }
michael@0 592
michael@0 593 /*
michael@0 594 * We don't need to see if the last run can be merged with a trailing
michael@0 595 * WS run because setTrailingWSStart() would have done that.
michael@0 596 */
michael@0 597 if(runCount==1 && limit==length) {
michael@0 598 /* There is only one non-WS run and no trailing WS-run. */
michael@0 599 getSingleRun(pBiDi, levels[0]);
michael@0 600 } else /* runCount>1 || limit<length */ {
michael@0 601 /* allocate and set the runs */
michael@0 602 Run *runs;
michael@0 603 int32_t runIndex, start;
michael@0 604 UBiDiLevel minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1, maxLevel=0;
michael@0 605
michael@0 606 /* now, count a (non-mergeable) WS run */
michael@0 607 if(limit<length) {
michael@0 608 ++runCount;
michael@0 609 }
michael@0 610
michael@0 611 /* runCount>1 */
michael@0 612 if(getRunsMemory(pBiDi, runCount)) {
michael@0 613 runs=pBiDi->runsMemory;
michael@0 614 } else {
michael@0 615 return FALSE;
michael@0 616 }
michael@0 617
michael@0 618 /* set the runs */
michael@0 619 /* FOOD FOR THOUGHT: this could be optimized, e.g.:
michael@0 620 * 464->444, 484->444, 575->555, 595->555
michael@0 621 * However, that would take longer. Check also how it would
michael@0 622 * interact with BiDi control removal and inserting Marks.
michael@0 623 */
michael@0 624 runIndex=0;
michael@0 625
michael@0 626 /* search for the run limits and initialize visualLimit values with the run lengths */
michael@0 627 i=0;
michael@0 628 do {
michael@0 629 /* prepare this run */
michael@0 630 start=i;
michael@0 631 level=levels[i];
michael@0 632 if(level<minLevel) {
michael@0 633 minLevel=level;
michael@0 634 }
michael@0 635 if(level>maxLevel) {
michael@0 636 maxLevel=level;
michael@0 637 }
michael@0 638
michael@0 639 /* look for the run limit */
michael@0 640 while(++i<limit && levels[i]==level) {}
michael@0 641
michael@0 642 /* i is another run limit */
michael@0 643 runs[runIndex].logicalStart=start;
michael@0 644 runs[runIndex].visualLimit=i-start;
michael@0 645 runs[runIndex].insertRemove=0;
michael@0 646 ++runIndex;
michael@0 647 } while(i<limit);
michael@0 648
michael@0 649 if(limit<length) {
michael@0 650 /* there is a separate WS run */
michael@0 651 runs[runIndex].logicalStart=limit;
michael@0 652 runs[runIndex].visualLimit=length-limit;
michael@0 653 /* For the trailing WS run, pBiDi->paraLevel is ok even
michael@0 654 if contextual multiple paragraphs. */
michael@0 655 if(pBiDi->paraLevel<minLevel) {
michael@0 656 minLevel=pBiDi->paraLevel;
michael@0 657 }
michael@0 658 }
michael@0 659
michael@0 660 /* set the object fields */
michael@0 661 pBiDi->runs=runs;
michael@0 662 pBiDi->runCount=runCount;
michael@0 663
michael@0 664 reorderLine(pBiDi, minLevel, maxLevel);
michael@0 665
michael@0 666 /* now add the direction flags and adjust the visualLimit's to be just that */
michael@0 667 /* this loop will also handle the trailing WS run */
michael@0 668 limit=0;
michael@0 669 for(i=0; i<runCount; ++i) {
michael@0 670 ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, levels[runs[i].logicalStart]);
michael@0 671 limit+=runs[i].visualLimit;
michael@0 672 runs[i].visualLimit=limit;
michael@0 673 }
michael@0 674
michael@0 675 /* Set the "odd" bit for the trailing WS run. */
michael@0 676 /* For a RTL paragraph, it will be the *first* run in visual order. */
michael@0 677 /* For the trailing WS run, pBiDi->paraLevel is ok even if
michael@0 678 contextual multiple paragraphs. */
michael@0 679 if(runIndex<runCount) {
michael@0 680 int32_t trailingRun = ((pBiDi->paraLevel & 1) != 0)? 0 : runIndex;
michael@0 681
michael@0 682 ADD_ODD_BIT_FROM_LEVEL(runs[trailingRun].logicalStart, pBiDi->paraLevel);
michael@0 683 }
michael@0 684 }
michael@0 685 }
michael@0 686
michael@0 687 /* handle insert LRM/RLM BEFORE/AFTER run */
michael@0 688 if(pBiDi->insertPoints.size>0) {
michael@0 689 Point *point, *start=pBiDi->insertPoints.points,
michael@0 690 *limit=start+pBiDi->insertPoints.size;
michael@0 691 int32_t runIndex;
michael@0 692 for(point=start; point<limit; point++) {
michael@0 693 runIndex=getRunFromLogicalIndex(pBiDi, point->pos, pErrorCode);
michael@0 694 pBiDi->runs[runIndex].insertRemove|=point->flag;
michael@0 695 }
michael@0 696 }
michael@0 697
michael@0 698 /* handle remove BiDi control characters */
michael@0 699 if(pBiDi->controlCount>0) {
michael@0 700 int32_t runIndex;
michael@0 701 const UChar *start=pBiDi->text, *limit=start+pBiDi->length, *pu;
michael@0 702 for(pu=start; pu<limit; pu++) {
michael@0 703 if(IS_BIDI_CONTROL_CHAR(*pu)) {
michael@0 704 runIndex=getRunFromLogicalIndex(pBiDi, (int32_t)(pu-start), pErrorCode);
michael@0 705 pBiDi->runs[runIndex].insertRemove--;
michael@0 706 }
michael@0 707 }
michael@0 708 }
michael@0 709
michael@0 710 return TRUE;
michael@0 711 }
michael@0 712
michael@0 713 static UBool
michael@0 714 prepareReorder(const UBiDiLevel *levels, int32_t length,
michael@0 715 int32_t *indexMap,
michael@0 716 UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel) {
michael@0 717 int32_t start;
michael@0 718 UBiDiLevel level, minLevel, maxLevel;
michael@0 719
michael@0 720 if(levels==NULL || length<=0) {
michael@0 721 return FALSE;
michael@0 722 }
michael@0 723
michael@0 724 /* determine minLevel and maxLevel */
michael@0 725 minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1;
michael@0 726 maxLevel=0;
michael@0 727 for(start=length; start>0;) {
michael@0 728 level=levels[--start];
michael@0 729 if(level>UBIDI_MAX_EXPLICIT_LEVEL+1) {
michael@0 730 return FALSE;
michael@0 731 }
michael@0 732 if(level<minLevel) {
michael@0 733 minLevel=level;
michael@0 734 }
michael@0 735 if(level>maxLevel) {
michael@0 736 maxLevel=level;
michael@0 737 }
michael@0 738 }
michael@0 739 *pMinLevel=minLevel;
michael@0 740 *pMaxLevel=maxLevel;
michael@0 741
michael@0 742 /* initialize the index map */
michael@0 743 for(start=length; start>0;) {
michael@0 744 --start;
michael@0 745 indexMap[start]=start;
michael@0 746 }
michael@0 747
michael@0 748 return TRUE;
michael@0 749 }
michael@0 750
michael@0 751 /* reorder a line based on a levels array (L2) ------------------------------ */
michael@0 752
michael@0 753 U_CAPI void U_EXPORT2
michael@0 754 ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) {
michael@0 755 int32_t start, limit, sumOfSosEos;
michael@0 756 UBiDiLevel minLevel = 0, maxLevel = 0;
michael@0 757
michael@0 758 if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) {
michael@0 759 return;
michael@0 760 }
michael@0 761
michael@0 762 /* nothing to do? */
michael@0 763 if(minLevel==maxLevel && (minLevel&1)==0) {
michael@0 764 return;
michael@0 765 }
michael@0 766
michael@0 767 /* reorder only down to the lowest odd level */
michael@0 768 minLevel|=1;
michael@0 769
michael@0 770 /* loop maxLevel..minLevel */
michael@0 771 do {
michael@0 772 start=0;
michael@0 773
michael@0 774 /* loop for all sequences of levels to reorder at the current maxLevel */
michael@0 775 for(;;) {
michael@0 776 /* look for a sequence of levels that are all at >=maxLevel */
michael@0 777 /* look for the first index of such a sequence */
michael@0 778 while(start<length && levels[start]<maxLevel) {
michael@0 779 ++start;
michael@0 780 }
michael@0 781 if(start>=length) {
michael@0 782 break; /* no more such sequences */
michael@0 783 }
michael@0 784
michael@0 785 /* look for the limit of such a sequence (the index behind it) */
michael@0 786 for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {}
michael@0 787
michael@0 788 /*
michael@0 789 * sos=start of sequence, eos=end of sequence
michael@0 790 *
michael@0 791 * The closed (inclusive) interval from sos to eos includes all the logical
michael@0 792 * and visual indexes within this sequence. They are logically and
michael@0 793 * visually contiguous and in the same range.
michael@0 794 *
michael@0 795 * For each run, the new visual index=sos+eos-old visual index;
michael@0 796 * we pre-add sos+eos into sumOfSosEos ->
michael@0 797 * new visual index=sumOfSosEos-old visual index;
michael@0 798 */
michael@0 799 sumOfSosEos=start+limit-1;
michael@0 800
michael@0 801 /* reorder each index in the sequence */
michael@0 802 do {
michael@0 803 indexMap[start]=sumOfSosEos-indexMap[start];
michael@0 804 } while(++start<limit);
michael@0 805
michael@0 806 /* start==limit */
michael@0 807 if(limit==length) {
michael@0 808 break; /* no more such sequences */
michael@0 809 } else {
michael@0 810 start=limit+1;
michael@0 811 }
michael@0 812 }
michael@0 813 } while(--maxLevel>=minLevel);
michael@0 814 }
michael@0 815
michael@0 816 U_CAPI void U_EXPORT2
michael@0 817 ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) {
michael@0 818 int32_t start, end, limit, temp;
michael@0 819 UBiDiLevel minLevel = 0, maxLevel = 0;
michael@0 820
michael@0 821 if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) {
michael@0 822 return;
michael@0 823 }
michael@0 824
michael@0 825 /* nothing to do? */
michael@0 826 if(minLevel==maxLevel && (minLevel&1)==0) {
michael@0 827 return;
michael@0 828 }
michael@0 829
michael@0 830 /* reorder only down to the lowest odd level */
michael@0 831 minLevel|=1;
michael@0 832
michael@0 833 /* loop maxLevel..minLevel */
michael@0 834 do {
michael@0 835 start=0;
michael@0 836
michael@0 837 /* loop for all sequences of levels to reorder at the current maxLevel */
michael@0 838 for(;;) {
michael@0 839 /* look for a sequence of levels that are all at >=maxLevel */
michael@0 840 /* look for the first index of such a sequence */
michael@0 841 while(start<length && levels[start]<maxLevel) {
michael@0 842 ++start;
michael@0 843 }
michael@0 844 if(start>=length) {
michael@0 845 break; /* no more such runs */
michael@0 846 }
michael@0 847
michael@0 848 /* look for the limit of such a sequence (the index behind it) */
michael@0 849 for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {}
michael@0 850
michael@0 851 /*
michael@0 852 * Swap the entire interval of indexes from start to limit-1.
michael@0 853 * We don't need to swap the levels for the purpose of this
michael@0 854 * algorithm: the sequence of levels that we look at does not
michael@0 855 * move anyway.
michael@0 856 */
michael@0 857 end=limit-1;
michael@0 858 while(start<end) {
michael@0 859 temp=indexMap[start];
michael@0 860 indexMap[start]=indexMap[end];
michael@0 861 indexMap[end]=temp;
michael@0 862
michael@0 863 ++start;
michael@0 864 --end;
michael@0 865 }
michael@0 866
michael@0 867 if(limit==length) {
michael@0 868 break; /* no more such sequences */
michael@0 869 } else {
michael@0 870 start=limit+1;
michael@0 871 }
michael@0 872 }
michael@0 873 } while(--maxLevel>=minLevel);
michael@0 874 }
michael@0 875
michael@0 876 /* API functions for logical<->visual mapping ------------------------------- */
michael@0 877
michael@0 878 U_CAPI int32_t U_EXPORT2
michael@0 879 ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) {
michael@0 880 int32_t visualIndex=UBIDI_MAP_NOWHERE;
michael@0 881 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
michael@0 882 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
michael@0 883 RETURN_IF_BAD_RANGE(logicalIndex, 0, pBiDi->length, *pErrorCode, -1);
michael@0 884
michael@0 885 /* we can do the trivial cases without the runs array */
michael@0 886 switch(pBiDi->direction) {
michael@0 887 case UBIDI_LTR:
michael@0 888 visualIndex=logicalIndex;
michael@0 889 break;
michael@0 890 case UBIDI_RTL:
michael@0 891 visualIndex=pBiDi->length-logicalIndex-1;
michael@0 892 break;
michael@0 893 default:
michael@0 894 if(!ubidi_getRuns(pBiDi, pErrorCode)) {
michael@0 895 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 896 return -1;
michael@0 897 } else {
michael@0 898 Run *runs=pBiDi->runs;
michael@0 899 int32_t i, visualStart=0, offset, length;
michael@0 900
michael@0 901 /* linear search for the run, search on the visual runs */
michael@0 902 for(i=0; i<pBiDi->runCount; ++i) {
michael@0 903 length=runs[i].visualLimit-visualStart;
michael@0 904 offset=logicalIndex-GET_INDEX(runs[i].logicalStart);
michael@0 905 if(offset>=0 && offset<length) {
michael@0 906 if(IS_EVEN_RUN(runs[i].logicalStart)) {
michael@0 907 /* LTR */
michael@0 908 visualIndex=visualStart+offset;
michael@0 909 } else {
michael@0 910 /* RTL */
michael@0 911 visualIndex=visualStart+length-offset-1;
michael@0 912 }
michael@0 913 break; /* exit for loop */
michael@0 914 }
michael@0 915 visualStart+=length;
michael@0 916 }
michael@0 917 if(i>=pBiDi->runCount) {
michael@0 918 return UBIDI_MAP_NOWHERE;
michael@0 919 }
michael@0 920 }
michael@0 921 }
michael@0 922
michael@0 923 if(pBiDi->insertPoints.size>0) {
michael@0 924 /* add the number of added marks until the calculated visual index */
michael@0 925 Run *runs=pBiDi->runs;
michael@0 926 int32_t i, length, insertRemove;
michael@0 927 int32_t visualStart=0, markFound=0;
michael@0 928 for(i=0; ; i++, visualStart+=length) {
michael@0 929 length=runs[i].visualLimit-visualStart;
michael@0 930 insertRemove=runs[i].insertRemove;
michael@0 931 if(insertRemove & (LRM_BEFORE|RLM_BEFORE)) {
michael@0 932 markFound++;
michael@0 933 }
michael@0 934 /* is it the run containing the visual index? */
michael@0 935 if(visualIndex<runs[i].visualLimit) {
michael@0 936 return visualIndex+markFound;
michael@0 937 }
michael@0 938 if(insertRemove & (LRM_AFTER|RLM_AFTER)) {
michael@0 939 markFound++;
michael@0 940 }
michael@0 941 }
michael@0 942 }
michael@0 943 else if(pBiDi->controlCount>0) {
michael@0 944 /* subtract the number of controls until the calculated visual index */
michael@0 945 Run *runs=pBiDi->runs;
michael@0 946 int32_t i, j, start, limit, length, insertRemove;
michael@0 947 int32_t visualStart=0, controlFound=0;
michael@0 948 UChar uchar=pBiDi->text[logicalIndex];
michael@0 949 /* is the logical index pointing to a control ? */
michael@0 950 if(IS_BIDI_CONTROL_CHAR(uchar)) {
michael@0 951 return UBIDI_MAP_NOWHERE;
michael@0 952 }
michael@0 953 /* loop on runs */
michael@0 954 for(i=0; ; i++, visualStart+=length) {
michael@0 955 length=runs[i].visualLimit-visualStart;
michael@0 956 insertRemove=runs[i].insertRemove;
michael@0 957 /* calculated visual index is beyond this run? */
michael@0 958 if(visualIndex>=runs[i].visualLimit) {
michael@0 959 controlFound-=insertRemove;
michael@0 960 continue;
michael@0 961 }
michael@0 962 /* calculated visual index must be within current run */
michael@0 963 if(insertRemove==0) {
michael@0 964 return visualIndex-controlFound;
michael@0 965 }
michael@0 966 if(IS_EVEN_RUN(runs[i].logicalStart)) {
michael@0 967 /* LTR: check from run start to logical index */
michael@0 968 start=runs[i].logicalStart;
michael@0 969 limit=logicalIndex;
michael@0 970 } else {
michael@0 971 /* RTL: check from logical index to run end */
michael@0 972 start=logicalIndex+1;
michael@0 973 limit=GET_INDEX(runs[i].logicalStart)+length;
michael@0 974 }
michael@0 975 for(j=start; j<limit; j++) {
michael@0 976 uchar=pBiDi->text[j];
michael@0 977 if(IS_BIDI_CONTROL_CHAR(uchar)) {
michael@0 978 controlFound++;
michael@0 979 }
michael@0 980 }
michael@0 981 return visualIndex-controlFound;
michael@0 982 }
michael@0 983 }
michael@0 984
michael@0 985 return visualIndex;
michael@0 986 }
michael@0 987
michael@0 988 U_CAPI int32_t U_EXPORT2
michael@0 989 ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) {
michael@0 990 Run *runs;
michael@0 991 int32_t i, runCount, start;
michael@0 992 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
michael@0 993 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
michael@0 994 RETURN_IF_BAD_RANGE(visualIndex, 0, pBiDi->resultLength, *pErrorCode, -1);
michael@0 995 /* we can do the trivial cases without the runs array */
michael@0 996 if(pBiDi->insertPoints.size==0 && pBiDi->controlCount==0) {
michael@0 997 if(pBiDi->direction==UBIDI_LTR) {
michael@0 998 return visualIndex;
michael@0 999 }
michael@0 1000 else if(pBiDi->direction==UBIDI_RTL) {
michael@0 1001 return pBiDi->length-visualIndex-1;
michael@0 1002 }
michael@0 1003 }
michael@0 1004 if(!ubidi_getRuns(pBiDi, pErrorCode)) {
michael@0 1005 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 1006 return -1;
michael@0 1007 }
michael@0 1008
michael@0 1009 runs=pBiDi->runs;
michael@0 1010 runCount=pBiDi->runCount;
michael@0 1011 if(pBiDi->insertPoints.size>0) {
michael@0 1012 /* handle inserted LRM/RLM */
michael@0 1013 int32_t markFound=0, insertRemove;
michael@0 1014 int32_t visualStart=0, length;
michael@0 1015 runs=pBiDi->runs;
michael@0 1016 /* subtract number of marks until visual index */
michael@0 1017 for(i=0; ; i++, visualStart+=length) {
michael@0 1018 length=runs[i].visualLimit-visualStart;
michael@0 1019 insertRemove=runs[i].insertRemove;
michael@0 1020 if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
michael@0 1021 if(visualIndex<=(visualStart+markFound)) {
michael@0 1022 return UBIDI_MAP_NOWHERE;
michael@0 1023 }
michael@0 1024 markFound++;
michael@0 1025 }
michael@0 1026 /* is adjusted visual index within this run? */
michael@0 1027 if(visualIndex<(runs[i].visualLimit+markFound)) {
michael@0 1028 visualIndex-=markFound;
michael@0 1029 break;
michael@0 1030 }
michael@0 1031 if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
michael@0 1032 if(visualIndex==(visualStart+length+markFound)) {
michael@0 1033 return UBIDI_MAP_NOWHERE;
michael@0 1034 }
michael@0 1035 markFound++;
michael@0 1036 }
michael@0 1037 }
michael@0 1038 }
michael@0 1039 else if(pBiDi->controlCount>0) {
michael@0 1040 /* handle removed BiDi control characters */
michael@0 1041 int32_t controlFound=0, insertRemove, length;
michael@0 1042 int32_t logicalStart, logicalEnd, visualStart=0, j, k;
michael@0 1043 UChar uchar;
michael@0 1044 UBool evenRun;
michael@0 1045 /* add number of controls until visual index */
michael@0 1046 for(i=0; ; i++, visualStart+=length) {
michael@0 1047 length=runs[i].visualLimit-visualStart;
michael@0 1048 insertRemove=runs[i].insertRemove;
michael@0 1049 /* is adjusted visual index beyond current run? */
michael@0 1050 if(visualIndex>=(runs[i].visualLimit-controlFound+insertRemove)) {
michael@0 1051 controlFound-=insertRemove;
michael@0 1052 continue;
michael@0 1053 }
michael@0 1054 /* adjusted visual index is within current run */
michael@0 1055 if(insertRemove==0) {
michael@0 1056 visualIndex+=controlFound;
michael@0 1057 break;
michael@0 1058 }
michael@0 1059 /* count non-control chars until visualIndex */
michael@0 1060 logicalStart=runs[i].logicalStart;
michael@0 1061 evenRun=IS_EVEN_RUN(logicalStart);
michael@0 1062 REMOVE_ODD_BIT(logicalStart);
michael@0 1063 logicalEnd=logicalStart+length-1;
michael@0 1064 for(j=0; j<length; j++) {
michael@0 1065 k= evenRun ? logicalStart+j : logicalEnd-j;
michael@0 1066 uchar=pBiDi->text[k];
michael@0 1067 if(IS_BIDI_CONTROL_CHAR(uchar)) {
michael@0 1068 controlFound++;
michael@0 1069 }
michael@0 1070 if((visualIndex+controlFound)==(visualStart+j)) {
michael@0 1071 break;
michael@0 1072 }
michael@0 1073 }
michael@0 1074 visualIndex+=controlFound;
michael@0 1075 break;
michael@0 1076 }
michael@0 1077 }
michael@0 1078 /* handle all cases */
michael@0 1079 if(runCount<=10) {
michael@0 1080 /* linear search for the run */
michael@0 1081 for(i=0; visualIndex>=runs[i].visualLimit; ++i) {}
michael@0 1082 } else {
michael@0 1083 /* binary search for the run */
michael@0 1084 int32_t begin=0, limit=runCount;
michael@0 1085
michael@0 1086 /* the middle if() is guaranteed to find the run, we don't need a loop limit */
michael@0 1087 for(;;) {
michael@0 1088 i=(begin+limit)/2;
michael@0 1089 if(visualIndex>=runs[i].visualLimit) {
michael@0 1090 begin=i+1;
michael@0 1091 } else if(i==0 || visualIndex>=runs[i-1].visualLimit) {
michael@0 1092 break;
michael@0 1093 } else {
michael@0 1094 limit=i;
michael@0 1095 }
michael@0 1096 }
michael@0 1097 }
michael@0 1098
michael@0 1099 start=runs[i].logicalStart;
michael@0 1100 if(IS_EVEN_RUN(start)) {
michael@0 1101 /* LTR */
michael@0 1102 /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */
michael@0 1103 if(i>0) {
michael@0 1104 visualIndex-=runs[i-1].visualLimit;
michael@0 1105 }
michael@0 1106 return start+visualIndex;
michael@0 1107 } else {
michael@0 1108 /* RTL */
michael@0 1109 return GET_INDEX(start)+runs[i].visualLimit-visualIndex-1;
michael@0 1110 }
michael@0 1111 }
michael@0 1112
michael@0 1113 U_CAPI void U_EXPORT2
michael@0 1114 ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) {
michael@0 1115 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
michael@0 1116 /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
michael@0 1117 ubidi_countRuns(pBiDi, pErrorCode);
michael@0 1118 if(U_FAILURE(*pErrorCode)) {
michael@0 1119 /* no op */
michael@0 1120 } else if(indexMap==NULL) {
michael@0 1121 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1122 } else {
michael@0 1123 /* fill a logical-to-visual index map using the runs[] */
michael@0 1124 int32_t visualStart, visualLimit, i, j, k;
michael@0 1125 int32_t logicalStart, logicalLimit;
michael@0 1126 Run *runs=pBiDi->runs;
michael@0 1127 if (pBiDi->length<=0) {
michael@0 1128 return;
michael@0 1129 }
michael@0 1130 if (pBiDi->length>pBiDi->resultLength) {
michael@0 1131 uprv_memset(indexMap, 0xFF, pBiDi->length*sizeof(int32_t));
michael@0 1132 }
michael@0 1133
michael@0 1134 visualStart=0;
michael@0 1135 for(j=0; j<pBiDi->runCount; ++j) {
michael@0 1136 logicalStart=GET_INDEX(runs[j].logicalStart);
michael@0 1137 visualLimit=runs[j].visualLimit;
michael@0 1138 if(IS_EVEN_RUN(runs[j].logicalStart)) {
michael@0 1139 do { /* LTR */
michael@0 1140 indexMap[logicalStart++]=visualStart++;
michael@0 1141 } while(visualStart<visualLimit);
michael@0 1142 } else {
michael@0 1143 logicalStart+=visualLimit-visualStart; /* logicalLimit */
michael@0 1144 do { /* RTL */
michael@0 1145 indexMap[--logicalStart]=visualStart++;
michael@0 1146 } while(visualStart<visualLimit);
michael@0 1147 }
michael@0 1148 /* visualStart==visualLimit; */
michael@0 1149 }
michael@0 1150
michael@0 1151 if(pBiDi->insertPoints.size>0) {
michael@0 1152 int32_t markFound=0, runCount=pBiDi->runCount;
michael@0 1153 int32_t length, insertRemove;
michael@0 1154 visualStart=0;
michael@0 1155 /* add number of marks found until each index */
michael@0 1156 for(i=0; i<runCount; i++, visualStart+=length) {
michael@0 1157 length=runs[i].visualLimit-visualStart;
michael@0 1158 insertRemove=runs[i].insertRemove;
michael@0 1159 if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
michael@0 1160 markFound++;
michael@0 1161 }
michael@0 1162 if(markFound>0) {
michael@0 1163 logicalStart=GET_INDEX(runs[i].logicalStart);
michael@0 1164 logicalLimit=logicalStart+length;
michael@0 1165 for(j=logicalStart; j<logicalLimit; j++) {
michael@0 1166 indexMap[j]+=markFound;
michael@0 1167 }
michael@0 1168 }
michael@0 1169 if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
michael@0 1170 markFound++;
michael@0 1171 }
michael@0 1172 }
michael@0 1173 }
michael@0 1174 else if(pBiDi->controlCount>0) {
michael@0 1175 int32_t controlFound=0, runCount=pBiDi->runCount;
michael@0 1176 int32_t length, insertRemove;
michael@0 1177 UBool evenRun;
michael@0 1178 UChar uchar;
michael@0 1179 visualStart=0;
michael@0 1180 /* subtract number of controls found until each index */
michael@0 1181 for(i=0; i<runCount; i++, visualStart+=length) {
michael@0 1182 length=runs[i].visualLimit-visualStart;
michael@0 1183 insertRemove=runs[i].insertRemove;
michael@0 1184 /* no control found within previous runs nor within this run */
michael@0 1185 if((controlFound-insertRemove)==0) {
michael@0 1186 continue;
michael@0 1187 }
michael@0 1188 logicalStart=runs[i].logicalStart;
michael@0 1189 evenRun=IS_EVEN_RUN(logicalStart);
michael@0 1190 REMOVE_ODD_BIT(logicalStart);
michael@0 1191 logicalLimit=logicalStart+length;
michael@0 1192 /* if no control within this run */
michael@0 1193 if(insertRemove==0) {
michael@0 1194 for(j=logicalStart; j<logicalLimit; j++) {
michael@0 1195 indexMap[j]-=controlFound;
michael@0 1196 }
michael@0 1197 continue;
michael@0 1198 }
michael@0 1199 for(j=0; j<length; j++) {
michael@0 1200 k= evenRun ? logicalStart+j : logicalLimit-j-1;
michael@0 1201 uchar=pBiDi->text[k];
michael@0 1202 if(IS_BIDI_CONTROL_CHAR(uchar)) {
michael@0 1203 controlFound++;
michael@0 1204 indexMap[k]=UBIDI_MAP_NOWHERE;
michael@0 1205 continue;
michael@0 1206 }
michael@0 1207 indexMap[k]-=controlFound;
michael@0 1208 }
michael@0 1209 }
michael@0 1210 }
michael@0 1211 }
michael@0 1212 }
michael@0 1213
michael@0 1214 U_CAPI void U_EXPORT2
michael@0 1215 ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) {
michael@0 1216 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
michael@0 1217 if(indexMap==NULL) {
michael@0 1218 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1219 return;
michael@0 1220 }
michael@0 1221 /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
michael@0 1222 ubidi_countRuns(pBiDi, pErrorCode);
michael@0 1223 if(U_SUCCESS(*pErrorCode)) {
michael@0 1224 /* fill a visual-to-logical index map using the runs[] */
michael@0 1225 Run *runs=pBiDi->runs, *runsLimit=runs+pBiDi->runCount;
michael@0 1226 int32_t logicalStart, visualStart, visualLimit, *pi=indexMap;
michael@0 1227
michael@0 1228 if (pBiDi->resultLength<=0) {
michael@0 1229 return;
michael@0 1230 }
michael@0 1231 visualStart=0;
michael@0 1232 for(; runs<runsLimit; ++runs) {
michael@0 1233 logicalStart=runs->logicalStart;
michael@0 1234 visualLimit=runs->visualLimit;
michael@0 1235 if(IS_EVEN_RUN(logicalStart)) {
michael@0 1236 do { /* LTR */
michael@0 1237 *pi++ = logicalStart++;
michael@0 1238 } while(++visualStart<visualLimit);
michael@0 1239 } else {
michael@0 1240 REMOVE_ODD_BIT(logicalStart);
michael@0 1241 logicalStart+=visualLimit-visualStart; /* logicalLimit */
michael@0 1242 do { /* RTL */
michael@0 1243 *pi++ = --logicalStart;
michael@0 1244 } while(++visualStart<visualLimit);
michael@0 1245 }
michael@0 1246 /* visualStart==visualLimit; */
michael@0 1247 }
michael@0 1248
michael@0 1249 if(pBiDi->insertPoints.size>0) {
michael@0 1250 int32_t markFound=0, runCount=pBiDi->runCount;
michael@0 1251 int32_t insertRemove, i, j, k;
michael@0 1252 runs=pBiDi->runs;
michael@0 1253 /* count all inserted marks */
michael@0 1254 for(i=0; i<runCount; i++) {
michael@0 1255 insertRemove=runs[i].insertRemove;
michael@0 1256 if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
michael@0 1257 markFound++;
michael@0 1258 }
michael@0 1259 if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
michael@0 1260 markFound++;
michael@0 1261 }
michael@0 1262 }
michael@0 1263 /* move back indexes by number of preceding marks */
michael@0 1264 k=pBiDi->resultLength;
michael@0 1265 for(i=runCount-1; i>=0 && markFound>0; i--) {
michael@0 1266 insertRemove=runs[i].insertRemove;
michael@0 1267 if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
michael@0 1268 indexMap[--k]= UBIDI_MAP_NOWHERE;
michael@0 1269 markFound--;
michael@0 1270 }
michael@0 1271 visualStart= i>0 ? runs[i-1].visualLimit : 0;
michael@0 1272 for(j=runs[i].visualLimit-1; j>=visualStart && markFound>0; j--) {
michael@0 1273 indexMap[--k]=indexMap[j];
michael@0 1274 }
michael@0 1275 if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
michael@0 1276 indexMap[--k]= UBIDI_MAP_NOWHERE;
michael@0 1277 markFound--;
michael@0 1278 }
michael@0 1279 }
michael@0 1280 }
michael@0 1281 else if(pBiDi->controlCount>0) {
michael@0 1282 int32_t runCount=pBiDi->runCount, logicalEnd;
michael@0 1283 int32_t insertRemove, length, i, j, k, m;
michael@0 1284 UChar uchar;
michael@0 1285 UBool evenRun;
michael@0 1286 runs=pBiDi->runs;
michael@0 1287 visualStart=0;
michael@0 1288 /* move forward indexes by number of preceding controls */
michael@0 1289 k=0;
michael@0 1290 for(i=0; i<runCount; i++, visualStart+=length) {
michael@0 1291 length=runs[i].visualLimit-visualStart;
michael@0 1292 insertRemove=runs[i].insertRemove;
michael@0 1293 /* if no control found yet, nothing to do in this run */
michael@0 1294 if((insertRemove==0)&&(k==visualStart)) {
michael@0 1295 k+=length;
michael@0 1296 continue;
michael@0 1297 }
michael@0 1298 /* if no control in this run */
michael@0 1299 if(insertRemove==0) {
michael@0 1300 visualLimit=runs[i].visualLimit;
michael@0 1301 for(j=visualStart; j<visualLimit; j++) {
michael@0 1302 indexMap[k++]=indexMap[j];
michael@0 1303 }
michael@0 1304 continue;
michael@0 1305 }
michael@0 1306 logicalStart=runs[i].logicalStart;
michael@0 1307 evenRun=IS_EVEN_RUN(logicalStart);
michael@0 1308 REMOVE_ODD_BIT(logicalStart);
michael@0 1309 logicalEnd=logicalStart+length-1;
michael@0 1310 for(j=0; j<length; j++) {
michael@0 1311 m= evenRun ? logicalStart+j : logicalEnd-j;
michael@0 1312 uchar=pBiDi->text[m];
michael@0 1313 if(!IS_BIDI_CONTROL_CHAR(uchar)) {
michael@0 1314 indexMap[k++]=m;
michael@0 1315 }
michael@0 1316 }
michael@0 1317 }
michael@0 1318 }
michael@0 1319 }
michael@0 1320 }
michael@0 1321
michael@0 1322 U_CAPI void U_EXPORT2
michael@0 1323 ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length) {
michael@0 1324 if(srcMap!=NULL && destMap!=NULL && length>0) {
michael@0 1325 const int32_t *pi;
michael@0 1326 int32_t destLength=-1, count=0;
michael@0 1327 /* find highest value and count positive indexes in srcMap */
michael@0 1328 pi=srcMap+length;
michael@0 1329 while(pi>srcMap) {
michael@0 1330 if(*--pi>destLength) {
michael@0 1331 destLength=*pi;
michael@0 1332 }
michael@0 1333 if(*pi>=0) {
michael@0 1334 count++;
michael@0 1335 }
michael@0 1336 }
michael@0 1337 destLength++; /* add 1 for origin 0 */
michael@0 1338 if(count<destLength) {
michael@0 1339 /* we must fill unmatched destMap entries with -1 */
michael@0 1340 uprv_memset(destMap, 0xFF, destLength*sizeof(int32_t));
michael@0 1341 }
michael@0 1342 pi=srcMap+length;
michael@0 1343 while(length>0) {
michael@0 1344 if(*--pi>=0) {
michael@0 1345 destMap[*pi]=--length;
michael@0 1346 } else {
michael@0 1347 --length;
michael@0 1348 }
michael@0 1349 }
michael@0 1350 }
michael@0 1351 }

mercurial