intl/icu/source/common/ubidi.c

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 ******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 1999-2013, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 ******************************************************************************
michael@0 8 * file name: ubidi.c
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 1999jul27
michael@0 14 * created by: Markus W. Scherer, updated by Matitiahu Allouche
michael@0 15 *
michael@0 16 */
michael@0 17
michael@0 18 #include "cmemory.h"
michael@0 19 #include "unicode/utypes.h"
michael@0 20 #include "unicode/ustring.h"
michael@0 21 #include "unicode/uchar.h"
michael@0 22 #include "unicode/ubidi.h"
michael@0 23 #include "unicode/utf16.h"
michael@0 24 #include "ubidi_props.h"
michael@0 25 #include "ubidiimp.h"
michael@0 26 #include "uassert.h"
michael@0 27
michael@0 28 /*
michael@0 29 * General implementation notes:
michael@0 30 *
michael@0 31 * Throughout the implementation, there are comments like (W2) that refer to
michael@0 32 * rules of the BiDi algorithm in its version 5, in this example to the second
michael@0 33 * rule of the resolution of weak types.
michael@0 34 *
michael@0 35 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
michael@0 36 * character according to UTF-16, the second UChar gets the directional property of
michael@0 37 * the entire character assigned, while the first one gets a BN, a boundary
michael@0 38 * neutral, type, which is ignored by most of the algorithm according to
michael@0 39 * rule (X9) and the implementation suggestions of the BiDi algorithm.
michael@0 40 *
michael@0 41 * Later, adjustWSLevels() will set the level for each BN to that of the
michael@0 42 * following character (UChar), which results in surrogate pairs getting the
michael@0 43 * same level on each of their surrogates.
michael@0 44 *
michael@0 45 * In a UTF-8 implementation, the same thing could be done: the last byte of
michael@0 46 * a multi-byte sequence would get the "real" property, while all previous
michael@0 47 * bytes of that sequence would get BN.
michael@0 48 *
michael@0 49 * It is not possible to assign all those parts of a character the same real
michael@0 50 * property because this would fail in the resolution of weak types with rules
michael@0 51 * that look at immediately surrounding types.
michael@0 52 *
michael@0 53 * As a related topic, this implementation does not remove Boundary Neutral
michael@0 54 * types from the input, but ignores them wherever this is relevant.
michael@0 55 * For example, the loop for the resolution of the weak types reads
michael@0 56 * types until it finds a non-BN.
michael@0 57 * Also, explicit embedding codes are neither changed into BN nor removed.
michael@0 58 * They are only treated the same way real BNs are.
michael@0 59 * As stated before, adjustWSLevels() takes care of them at the end.
michael@0 60 * For the purpose of conformance, the levels of all these codes
michael@0 61 * do not matter.
michael@0 62 *
michael@0 63 * Note that this implementation never modifies the dirProps
michael@0 64 * after the initial setup, except for FSI which is changed to either
michael@0 65 * LRI or RLI in getDirProps(), and paired brackets which may be changed
michael@0 66 * to L or R according to N0.
michael@0 67 *
michael@0 68 *
michael@0 69 * In this implementation, the resolution of weak types (Wn),
michael@0 70 * neutrals (Nn), and the assignment of the resolved level (In)
michael@0 71 * are all done in one single loop, in resolveImplicitLevels().
michael@0 72 * Changes of dirProp values are done on the fly, without writing
michael@0 73 * them back to the dirProps array.
michael@0 74 *
michael@0 75 *
michael@0 76 * This implementation contains code that allows to bypass steps of the
michael@0 77 * algorithm that are not needed on the specific paragraph
michael@0 78 * in order to speed up the most common cases considerably,
michael@0 79 * like text that is entirely LTR, or RTL text without numbers.
michael@0 80 *
michael@0 81 * Most of this is done by setting a bit for each directional property
michael@0 82 * in a flags variable and later checking for whether there are
michael@0 83 * any LTR characters or any RTL characters, or both, whether
michael@0 84 * there are any explicit embedding codes, etc.
michael@0 85 *
michael@0 86 * If the (Xn) steps are performed, then the flags are re-evaluated,
michael@0 87 * because they will then not contain the embedding codes any more
michael@0 88 * and will be adjusted for override codes, so that subsequently
michael@0 89 * more bypassing may be possible than what the initial flags suggested.
michael@0 90 *
michael@0 91 * If the text is not mixed-directional, then the
michael@0 92 * algorithm steps for the weak type resolution are not performed,
michael@0 93 * and all levels are set to the paragraph level.
michael@0 94 *
michael@0 95 * If there are no explicit embedding codes, then the (Xn) steps
michael@0 96 * are not performed.
michael@0 97 *
michael@0 98 * If embedding levels are supplied as a parameter, then all
michael@0 99 * explicit embedding codes are ignored, and the (Xn) steps
michael@0 100 * are not performed.
michael@0 101 *
michael@0 102 * White Space types could get the level of the run they belong to,
michael@0 103 * and are checked with a test of (flags&MASK_EMBEDDING) to
michael@0 104 * consider if the paragraph direction should be considered in
michael@0 105 * the flags variable.
michael@0 106 *
michael@0 107 * If there are no White Space types in the paragraph, then
michael@0 108 * (L1) is not necessary in adjustWSLevels().
michael@0 109 */
michael@0 110
michael@0 111 /* to avoid some conditional statements, use tiny constant arrays */
michael@0 112 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
michael@0 113 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
michael@0 114 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
michael@0 115
michael@0 116 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
michael@0 117 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
michael@0 118 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
michael@0 119
michael@0 120 #define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
michael@0 121
michael@0 122 /* UBiDi object management -------------------------------------------------- */
michael@0 123
michael@0 124 U_CAPI UBiDi * U_EXPORT2
michael@0 125 ubidi_open(void)
michael@0 126 {
michael@0 127 UErrorCode errorCode=U_ZERO_ERROR;
michael@0 128 return ubidi_openSized(0, 0, &errorCode);
michael@0 129 }
michael@0 130
michael@0 131 U_CAPI UBiDi * U_EXPORT2
michael@0 132 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
michael@0 133 UBiDi *pBiDi;
michael@0 134
michael@0 135 /* check the argument values */
michael@0 136 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
michael@0 137 return NULL;
michael@0 138 } else if(maxLength<0 || maxRunCount<0) {
michael@0 139 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 140 return NULL; /* invalid arguments */
michael@0 141 }
michael@0 142
michael@0 143 /* allocate memory for the object */
michael@0 144 pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
michael@0 145 if(pBiDi==NULL) {
michael@0 146 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 147 return NULL;
michael@0 148 }
michael@0 149
michael@0 150 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
michael@0 151 uprv_memset(pBiDi, 0, sizeof(UBiDi));
michael@0 152
michael@0 153 /* get BiDi properties */
michael@0 154 pBiDi->bdp=ubidi_getSingleton();
michael@0 155
michael@0 156 /* allocate memory for arrays as requested */
michael@0 157 if(maxLength>0) {
michael@0 158 if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
michael@0 159 !getInitialLevelsMemory(pBiDi, maxLength)
michael@0 160 ) {
michael@0 161 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 162 }
michael@0 163 } else {
michael@0 164 pBiDi->mayAllocateText=TRUE;
michael@0 165 }
michael@0 166
michael@0 167 if(maxRunCount>0) {
michael@0 168 if(maxRunCount==1) {
michael@0 169 /* use simpleRuns[] */
michael@0 170 pBiDi->runsSize=sizeof(Run);
michael@0 171 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
michael@0 172 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 173 }
michael@0 174 } else {
michael@0 175 pBiDi->mayAllocateRuns=TRUE;
michael@0 176 }
michael@0 177
michael@0 178 if(U_SUCCESS(*pErrorCode)) {
michael@0 179 return pBiDi;
michael@0 180 } else {
michael@0 181 ubidi_close(pBiDi);
michael@0 182 return NULL;
michael@0 183 }
michael@0 184 }
michael@0 185
michael@0 186 /*
michael@0 187 * We are allowed to allocate memory if memory==NULL or
michael@0 188 * mayAllocate==TRUE for each array that we need.
michael@0 189 * We also try to grow memory as needed if we
michael@0 190 * allocate it.
michael@0 191 *
michael@0 192 * Assume sizeNeeded>0.
michael@0 193 * If *pMemory!=NULL, then assume *pSize>0.
michael@0 194 *
michael@0 195 * ### this realloc() may unnecessarily copy the old data,
michael@0 196 * which we know we don't need any more;
michael@0 197 * is this the best way to do this??
michael@0 198 */
michael@0 199 U_CFUNC UBool
michael@0 200 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
michael@0 201 void **pMemory = (void **)bidiMem;
michael@0 202 /* check for existing memory */
michael@0 203 if(*pMemory==NULL) {
michael@0 204 /* we need to allocate memory */
michael@0 205 if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
michael@0 206 *pSize=sizeNeeded;
michael@0 207 return TRUE;
michael@0 208 } else {
michael@0 209 return FALSE;
michael@0 210 }
michael@0 211 } else {
michael@0 212 if(sizeNeeded<=*pSize) {
michael@0 213 /* there is already enough memory */
michael@0 214 return TRUE;
michael@0 215 }
michael@0 216 else if(!mayAllocate) {
michael@0 217 /* not enough memory, and we must not allocate */
michael@0 218 return FALSE;
michael@0 219 } else {
michael@0 220 /* we try to grow */
michael@0 221 void *memory;
michael@0 222 /* in most cases, we do not need the copy-old-data part of
michael@0 223 * realloc, but it is needed when adding runs using getRunsMemory()
michael@0 224 * in setParaRunsOnly()
michael@0 225 */
michael@0 226 if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
michael@0 227 *pMemory=memory;
michael@0 228 *pSize=sizeNeeded;
michael@0 229 return TRUE;
michael@0 230 } else {
michael@0 231 /* we failed to grow */
michael@0 232 return FALSE;
michael@0 233 }
michael@0 234 }
michael@0 235 }
michael@0 236 }
michael@0 237
michael@0 238 U_CAPI void U_EXPORT2
michael@0 239 ubidi_close(UBiDi *pBiDi) {
michael@0 240 if(pBiDi!=NULL) {
michael@0 241 pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */
michael@0 242 if(pBiDi->dirPropsMemory!=NULL) {
michael@0 243 uprv_free(pBiDi->dirPropsMemory);
michael@0 244 }
michael@0 245 if(pBiDi->levelsMemory!=NULL) {
michael@0 246 uprv_free(pBiDi->levelsMemory);
michael@0 247 }
michael@0 248 if(pBiDi->openingsMemory!=NULL) {
michael@0 249 uprv_free(pBiDi->openingsMemory);
michael@0 250 }
michael@0 251 if(pBiDi->parasMemory!=NULL) {
michael@0 252 uprv_free(pBiDi->parasMemory);
michael@0 253 }
michael@0 254 if(pBiDi->runsMemory!=NULL) {
michael@0 255 uprv_free(pBiDi->runsMemory);
michael@0 256 }
michael@0 257 if(pBiDi->isolatesMemory!=NULL) {
michael@0 258 uprv_free(pBiDi->isolatesMemory);
michael@0 259 }
michael@0 260 if(pBiDi->insertPoints.points!=NULL) {
michael@0 261 uprv_free(pBiDi->insertPoints.points);
michael@0 262 }
michael@0 263
michael@0 264 uprv_free(pBiDi);
michael@0 265 }
michael@0 266 }
michael@0 267
michael@0 268 /* set to approximate "inverse BiDi" ---------------------------------------- */
michael@0 269
michael@0 270 U_CAPI void U_EXPORT2
michael@0 271 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
michael@0 272 if(pBiDi!=NULL) {
michael@0 273 pBiDi->isInverse=isInverse;
michael@0 274 pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
michael@0 275 : UBIDI_REORDER_DEFAULT;
michael@0 276 }
michael@0 277 }
michael@0 278
michael@0 279 U_CAPI UBool U_EXPORT2
michael@0 280 ubidi_isInverse(UBiDi *pBiDi) {
michael@0 281 if(pBiDi!=NULL) {
michael@0 282 return pBiDi->isInverse;
michael@0 283 } else {
michael@0 284 return FALSE;
michael@0 285 }
michael@0 286 }
michael@0 287
michael@0 288 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
michael@0 289 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
michael@0 290 * concept of RUNS_ONLY which is a double operation.
michael@0 291 * It could be advantageous to divide this into 3 concepts:
michael@0 292 * a) Operation: direct / inverse / RUNS_ONLY
michael@0 293 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
michael@0 294 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
michael@0 295 * This would allow combinations not possible today like RUNS_ONLY with
michael@0 296 * NUMBERS_SPECIAL.
michael@0 297 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
michael@0 298 * REMOVE_CONTROLS for the inverse step.
michael@0 299 * Not all combinations would be supported, and probably not all do make sense.
michael@0 300 * This would need to document which ones are supported and what are the
michael@0 301 * fallbacks for unsupported combinations.
michael@0 302 */
michael@0 303 U_CAPI void U_EXPORT2
michael@0 304 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
michael@0 305 if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
michael@0 306 && (reorderingMode < UBIDI_REORDER_COUNT)) {
michael@0 307 pBiDi->reorderingMode = reorderingMode;
michael@0 308 pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
michael@0 309 }
michael@0 310 }
michael@0 311
michael@0 312 U_CAPI UBiDiReorderingMode U_EXPORT2
michael@0 313 ubidi_getReorderingMode(UBiDi *pBiDi) {
michael@0 314 if (pBiDi!=NULL) {
michael@0 315 return pBiDi->reorderingMode;
michael@0 316 } else {
michael@0 317 return UBIDI_REORDER_DEFAULT;
michael@0 318 }
michael@0 319 }
michael@0 320
michael@0 321 U_CAPI void U_EXPORT2
michael@0 322 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
michael@0 323 if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
michael@0 324 reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
michael@0 325 }
michael@0 326 if (pBiDi!=NULL) {
michael@0 327 pBiDi->reorderingOptions=reorderingOptions;
michael@0 328 }
michael@0 329 }
michael@0 330
michael@0 331 U_CAPI uint32_t U_EXPORT2
michael@0 332 ubidi_getReorderingOptions(UBiDi *pBiDi) {
michael@0 333 if (pBiDi!=NULL) {
michael@0 334 return pBiDi->reorderingOptions;
michael@0 335 } else {
michael@0 336 return 0;
michael@0 337 }
michael@0 338 }
michael@0 339
michael@0 340 U_CAPI UBiDiDirection U_EXPORT2
michael@0 341 ubidi_getBaseDirection(const UChar *text,
michael@0 342 int32_t length){
michael@0 343
michael@0 344 int32_t i;
michael@0 345 UChar32 uchar;
michael@0 346 UCharDirection dir;
michael@0 347
michael@0 348 if( text==NULL || length<-1 ){
michael@0 349 return UBIDI_NEUTRAL;
michael@0 350 }
michael@0 351
michael@0 352 if(length==-1) {
michael@0 353 length=u_strlen(text);
michael@0 354 }
michael@0 355
michael@0 356 for( i = 0 ; i < length; ) {
michael@0 357 /* i is incremented by U16_NEXT */
michael@0 358 U16_NEXT(text, i, length, uchar);
michael@0 359 dir = u_charDirection(uchar);
michael@0 360 if( dir == U_LEFT_TO_RIGHT )
michael@0 361 return UBIDI_LTR;
michael@0 362 if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
michael@0 363 return UBIDI_RTL;
michael@0 364 }
michael@0 365 return UBIDI_NEUTRAL;
michael@0 366 }
michael@0 367
michael@0 368 /* perform (P2)..(P3) ------------------------------------------------------- */
michael@0 369
michael@0 370 /**
michael@0 371 * Returns the directionality of the first strong character
michael@0 372 * after the last B in prologue, if any.
michael@0 373 * Requires prologue!=null.
michael@0 374 */
michael@0 375 static DirProp
michael@0 376 firstL_R_AL(UBiDi *pBiDi) {
michael@0 377 const UChar *text=pBiDi->prologue;
michael@0 378 int32_t length=pBiDi->proLength;
michael@0 379 int32_t i;
michael@0 380 UChar32 uchar;
michael@0 381 DirProp dirProp, result=ON;
michael@0 382 for(i=0; i<length; ) {
michael@0 383 /* i is incremented by U16_NEXT */
michael@0 384 U16_NEXT(text, i, length, uchar);
michael@0 385 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
michael@0 386 if(result==ON) {
michael@0 387 if(dirProp==L || dirProp==R || dirProp==AL) {
michael@0 388 result=dirProp;
michael@0 389 }
michael@0 390 } else {
michael@0 391 if(dirProp==B) {
michael@0 392 result=ON;
michael@0 393 }
michael@0 394 }
michael@0 395 }
michael@0 396 return result;
michael@0 397 }
michael@0 398
michael@0 399 /*
michael@0 400 * Check that there are enough entries in the array pointed to by pBiDi->paras
michael@0 401 */
michael@0 402 static UBool
michael@0 403 checkParaCount(UBiDi *pBiDi) {
michael@0 404 int32_t count=pBiDi->paraCount;
michael@0 405 if(pBiDi->paras==pBiDi->simpleParas) {
michael@0 406 if(count<=SIMPLE_PARAS_SIZE)
michael@0 407 return TRUE;
michael@0 408 if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_SIZE * 2))
michael@0 409 return FALSE;
michael@0 410 pBiDi->paras=pBiDi->parasMemory;
michael@0 411 uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_SIZE * sizeof(Para));
michael@0 412 return TRUE;
michael@0 413 }
michael@0 414 if(!getInitialParasMemory(pBiDi, count * 2))
michael@0 415 return FALSE;
michael@0 416 pBiDi->paras=pBiDi->parasMemory;
michael@0 417 return TRUE;
michael@0 418 }
michael@0 419
michael@0 420 /*
michael@0 421 * Get the directional properties for the text, calculate the flags bit-set, and
michael@0 422 * determine the paragraph level if necessary (in pBiDi->paras[i].level).
michael@0 423 * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
michael@0 424 */
michael@0 425 static UBool
michael@0 426 getDirProps(UBiDi *pBiDi) {
michael@0 427 const UChar *text=pBiDi->text;
michael@0 428 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */
michael@0 429
michael@0 430 int32_t i=0, originalLength=pBiDi->originalLength;
michael@0 431 Flags flags=0; /* collect all directionalities in the text */
michael@0 432 UChar32 uchar;
michael@0 433 DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */
michael@0 434 UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
michael@0 435 /* for inverse BiDi, the default para level is set to RTL if there is a
michael@0 436 strong R or AL character at either end of the text */
michael@0 437 UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
michael@0 438 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
michael@0 439 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
michael@0 440 int32_t lastArabicPos=-1;
michael@0 441 int32_t controlCount=0;
michael@0 442 UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
michael@0 443 UBIDI_OPTION_REMOVE_CONTROLS);
michael@0 444
michael@0 445 typedef enum {
michael@0 446 NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */
michael@0 447 SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */
michael@0 448 SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */
michael@0 449 LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */
michael@0 450 } State;
michael@0 451 State state;
michael@0 452 DirProp lastStrong=ON; /* for default level & inverse BiDi */
michael@0 453 /* The following stacks are used to manage isolate sequences. Those
michael@0 454 sequences may be nested, but obviously never more deeply than the
michael@0 455 maximum explicit embedding level.
michael@0 456 lastStack is the index of the last used entry in the stack. A value of -1
michael@0 457 means that there is no open isolate sequence.
michael@0 458 lastStack is reset to -1 on paragraph boundaries. */
michael@0 459 /* The following stack contains the position of the initiator of
michael@0 460 each open isolate sequence */
michael@0 461 int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
michael@0 462 /* The following stack contains the last known state before
michael@0 463 encountering the initiator of an isolate sequence */
michael@0 464 int8_t previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
michael@0 465 int32_t stackLast=-1;
michael@0 466
michael@0 467 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
michael@0 468 pBiDi->length=0;
michael@0 469 defaultParaLevel=pBiDi->paraLevel&1;
michael@0 470 if(isDefaultLevel) {
michael@0 471 pBiDi->paras[0].level=defaultParaLevel;
michael@0 472 lastStrong=defaultParaLevel;
michael@0 473 if(pBiDi->proLength>0 && /* there is a prologue */
michael@0 474 (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */
michael@0 475 if(dirProp==L)
michael@0 476 pBiDi->paras[0].level=0; /* set the default para level */
michael@0 477 else
michael@0 478 pBiDi->paras[0].level=1; /* set the default para level */
michael@0 479 state=NOT_SEEKING_STRONG;
michael@0 480 } else {
michael@0 481 state=SEEKING_STRONG_FOR_PARA;
michael@0 482 }
michael@0 483 } else {
michael@0 484 pBiDi->paras[0].level=pBiDi->paraLevel;
michael@0 485 state=NOT_SEEKING_STRONG;
michael@0 486 }
michael@0 487 /* count paragraphs and determine the paragraph level (P2..P3) */
michael@0 488 /*
michael@0 489 * see comment in ubidi.h:
michael@0 490 * the UBIDI_DEFAULT_XXX values are designed so that
michael@0 491 * their bit 0 alone yields the intended default
michael@0 492 */
michael@0 493 for( /* i=0 above */ ; i<originalLength; ) {
michael@0 494 /* i is incremented by U16_NEXT */
michael@0 495 U16_NEXT(text, i, originalLength, uchar);
michael@0 496 flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
michael@0 497 dirProps[i-1]=dirProp;
michael@0 498 if(uchar>0xffff) { /* set the lead surrogate's property to BN */
michael@0 499 flags|=DIRPROP_FLAG(BN);
michael@0 500 dirProps[i-2]=BN;
michael@0 501 }
michael@0 502 if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
michael@0 503 controlCount++;
michael@0 504 if(dirProp==L) {
michael@0 505 if(state==SEEKING_STRONG_FOR_PARA) {
michael@0 506 pBiDi->paras[pBiDi->paraCount-1].level=0;
michael@0 507 state=NOT_SEEKING_STRONG;
michael@0 508 }
michael@0 509 else if(state==SEEKING_STRONG_FOR_FSI) {
michael@0 510 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
michael@0 511 dirProps[isolateStartStack[stackLast]]=LRI;
michael@0 512 flags|=DIRPROP_FLAG(LRI);
michael@0 513 }
michael@0 514 state=LOOKING_FOR_PDI;
michael@0 515 }
michael@0 516 lastStrong=L;
michael@0 517 continue;
michael@0 518 }
michael@0 519 if(dirProp==R || dirProp==AL) {
michael@0 520 if(state==SEEKING_STRONG_FOR_PARA) {
michael@0 521 pBiDi->paras[pBiDi->paraCount-1].level=1;
michael@0 522 state=NOT_SEEKING_STRONG;
michael@0 523 }
michael@0 524 else if(state==SEEKING_STRONG_FOR_FSI) {
michael@0 525 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
michael@0 526 dirProps[isolateStartStack[stackLast]]=RLI;
michael@0 527 flags|=DIRPROP_FLAG(RLI);
michael@0 528 }
michael@0 529 state=LOOKING_FOR_PDI;
michael@0 530 }
michael@0 531 lastStrong=R;
michael@0 532 if(dirProp==AL)
michael@0 533 lastArabicPos=i-1;
michael@0 534 continue;
michael@0 535 }
michael@0 536 if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */
michael@0 537 stackLast++;
michael@0 538 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
michael@0 539 isolateStartStack[stackLast]=i-1;
michael@0 540 previousStateStack[stackLast]=state;
michael@0 541 }
michael@0 542 if(dirProp==FSI)
michael@0 543 state=SEEKING_STRONG_FOR_FSI;
michael@0 544 else
michael@0 545 state=LOOKING_FOR_PDI;
michael@0 546 continue;
michael@0 547 }
michael@0 548 if(dirProp==PDI) {
michael@0 549 if(state==SEEKING_STRONG_FOR_FSI) {
michael@0 550 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
michael@0 551 dirProps[isolateStartStack[stackLast]]=LRI;
michael@0 552 flags|=DIRPROP_FLAG(LRI);
michael@0 553 }
michael@0 554 }
michael@0 555 if(stackLast>=0) {
michael@0 556 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
michael@0 557 state=previousStateStack[stackLast];
michael@0 558 stackLast--;
michael@0 559 }
michael@0 560 continue;
michael@0 561 }
michael@0 562 if(dirProp==B) {
michael@0 563 if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
michael@0 564 continue;
michael@0 565 pBiDi->paras[pBiDi->paraCount-1].limit=i;
michael@0 566 if(isDefaultLevelInverse && lastStrong==R)
michael@0 567 pBiDi->paras[pBiDi->paraCount-1].level=1;
michael@0 568 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
michael@0 569 /* When streaming, we only process whole paragraphs
michael@0 570 thus some updates are only done on paragraph boundaries */
michael@0 571 pBiDi->length=i; /* i is index to next character */
michael@0 572 pBiDi->controlCount=controlCount;
michael@0 573 }
michael@0 574 if(i<originalLength) { /* B not last char in text */
michael@0 575 pBiDi->paraCount++;
michael@0 576 if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */
michael@0 577 return FALSE;
michael@0 578 if(isDefaultLevel) {
michael@0 579 pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
michael@0 580 state=SEEKING_STRONG_FOR_PARA;
michael@0 581 lastStrong=defaultParaLevel;
michael@0 582 } else {
michael@0 583 pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
michael@0 584 state=NOT_SEEKING_STRONG;
michael@0 585 }
michael@0 586 stackLast=-1;
michael@0 587 }
michael@0 588 continue;
michael@0 589 }
michael@0 590 }
michael@0 591 /* Ignore still open isolate sequences with overflow */
michael@0 592 if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
michael@0 593 stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
michael@0 594 if(dirProps[previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL]]!=FSI)
michael@0 595 state=LOOKING_FOR_PDI;
michael@0 596 }
michael@0 597 /* Resolve direction of still unresolved open FSI sequences */
michael@0 598 while(stackLast>=0) {
michael@0 599 if(state==SEEKING_STRONG_FOR_FSI) {
michael@0 600 dirProps[isolateStartStack[stackLast]]=LRI;
michael@0 601 flags|=DIRPROP_FLAG(LRI);
michael@0 602 }
michael@0 603 state=previousStateStack[stackLast];
michael@0 604 stackLast--;
michael@0 605 }
michael@0 606 /* When streaming, ignore text after the last paragraph separator */
michael@0 607 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
michael@0 608 if(pBiDi->length<originalLength)
michael@0 609 pBiDi->paraCount--;
michael@0 610 } else {
michael@0 611 pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
michael@0 612 pBiDi->controlCount=controlCount;
michael@0 613 }
michael@0 614 /* For inverse bidi, default para direction is RTL if there is
michael@0 615 a strong R or AL at either end of the paragraph */
michael@0 616 if(isDefaultLevelInverse && lastStrong==R) {
michael@0 617 pBiDi->paras[pBiDi->paraCount-1].level=1;
michael@0 618 }
michael@0 619 if(isDefaultLevel) {
michael@0 620 pBiDi->paraLevel=pBiDi->paras[0].level;
michael@0 621 }
michael@0 622 /* The following is needed to resolve the text direction for default level
michael@0 623 paragraphs containing no strong character */
michael@0 624 for(i=0; i<pBiDi->paraCount; i++)
michael@0 625 flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
michael@0 626
michael@0 627 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
michael@0 628 flags|=DIRPROP_FLAG(L);
michael@0 629 }
michael@0 630 pBiDi->flags=flags;
michael@0 631 pBiDi->lastArabicPos=lastArabicPos;
michael@0 632 return TRUE;
michael@0 633 }
michael@0 634
michael@0 635 /* determine the paragraph level at position index */
michael@0 636 U_CFUNC UBiDiLevel
michael@0 637 ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
michael@0 638 int32_t i;
michael@0 639 for(i=0; i<pBiDi->paraCount; i++)
michael@0 640 if(pindex<pBiDi->paras[i].limit)
michael@0 641 break;
michael@0 642 if(i>=pBiDi->paraCount)
michael@0 643 i=pBiDi->paraCount-1;
michael@0 644 return (UBiDiLevel)(pBiDi->paras[i].level);
michael@0 645 }
michael@0 646
michael@0 647 /* Functions for handling paired brackets ----------------------------------- */
michael@0 648
michael@0 649 /* In the isoRuns array, the first entry is used for text outside of any
michael@0 650 isolate sequence. Higher entries are used for each more deeply nested
michael@0 651 isolate sequence. isoRunLast is the index of the last used entry. The
michael@0 652 openings array is used to note the data of opening brackets not yet
michael@0 653 matched by a closing bracket, or matched but still susceptible to change
michael@0 654 level.
michael@0 655 Each isoRun entry contains the index of the first and
michael@0 656 one-after-last openings entries for pending opening brackets it
michael@0 657 contains. The next openings entry to use is the one-after-last of the
michael@0 658 most deeply nested isoRun entry.
michael@0 659 isoRun entries also contain their current embedding level and the last
michael@0 660 encountered strong character, since these will be needed to resolve
michael@0 661 the level of paired brackets. */
michael@0 662
michael@0 663 static void
michael@0 664 bracketInit(UBiDi *pBiDi, BracketData *bd) {
michael@0 665 bd->pBiDi=pBiDi;
michael@0 666 bd->isoRunLast=0;
michael@0 667 bd->isoRuns[0].start=0;
michael@0 668 bd->isoRuns[0].limit=0;
michael@0 669 bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
michael@0 670 bd->isoRuns[0].lastStrong=bd->isoRuns[0].contextDir=GET_PARALEVEL(pBiDi, 0)&1;
michael@0 671 bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0;
michael@0 672 if(pBiDi->openingsMemory) {
michael@0 673 bd->openings=pBiDi->openingsMemory;
michael@0 674 bd->openingsSize=pBiDi->openingsSize;
michael@0 675 } else {
michael@0 676 bd->openings=bd->simpleOpenings;
michael@0 677 bd->openingsSize=SIMPLE_OPENINGS_SIZE;
michael@0 678 }
michael@0 679 bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
michael@0 680 bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
michael@0 681 }
michael@0 682
michael@0 683 /* paragraph boundary */
michael@0 684 static void
michael@0 685 bracketProcessB(BracketData *bd, UBiDiLevel level) {
michael@0 686 bd->isoRunLast=0;
michael@0 687 bd->isoRuns[0].limit=0;
michael@0 688 bd->isoRuns[0].level=level;
michael@0 689 bd->isoRuns[0].lastStrong=bd->isoRuns[0].contextDir=level&1;
michael@0 690 bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0;
michael@0 691 }
michael@0 692
michael@0 693 /* LRE, LRO, RLE, RLO, PDF */
michael@0 694 static void
michael@0 695 bracketProcessBoundary(BracketData *bd, int32_t lastCcPos,
michael@0 696 UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
michael@0 697 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
michael@0 698 DirProp *dirProps=bd->pBiDi->dirProps;
michael@0 699 if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO) /* after an isolate */
michael@0 700 return;
michael@0 701 if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)>
michael@0 702 (contextLevel&~UBIDI_LEVEL_OVERRIDE)) /* not a PDF */
michael@0 703 contextLevel=embeddingLevel;
michael@0 704 pLastIsoRun->limit=pLastIsoRun->start;
michael@0 705 pLastIsoRun->level=embeddingLevel;
michael@0 706 pLastIsoRun->lastStrong=pLastIsoRun->contextDir=contextLevel&1;
michael@0 707 pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=lastCcPos;
michael@0 708 }
michael@0 709
michael@0 710 /* LRI or RLI */
michael@0 711 static void
michael@0 712 bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
michael@0 713 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
michael@0 714 int16_t lastLimit;
michael@0 715 lastLimit=pLastIsoRun->limit;
michael@0 716 bd->isoRunLast++;
michael@0 717 pLastIsoRun++;
michael@0 718 pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
michael@0 719 pLastIsoRun->level=level;
michael@0 720 pLastIsoRun->lastStrong=pLastIsoRun->contextDir=level&1;
michael@0 721 pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=0;
michael@0 722 }
michael@0 723
michael@0 724 /* PDI */
michael@0 725 static void
michael@0 726 bracketProcessPDI(BracketData *bd) {
michael@0 727 bd->isoRunLast--;
michael@0 728 }
michael@0 729
michael@0 730 /* newly found opening bracket: create an openings entry */
michael@0 731 static UBool /* return TRUE if success */
michael@0 732 bracketAddOpening(BracketData *bd, UChar match, int32_t position) {
michael@0 733 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
michael@0 734 Opening *pOpening;
michael@0 735 if(pLastIsoRun->limit>=bd->openingsSize) { /* no available new entry */
michael@0 736 UBiDi *pBiDi=bd->pBiDi;
michael@0 737 if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
michael@0 738 return FALSE;
michael@0 739 if(bd->openings==bd->simpleOpenings)
michael@0 740 uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
michael@0 741 SIMPLE_OPENINGS_SIZE * sizeof(Opening));
michael@0 742 bd->openings=pBiDi->openingsMemory; /* may have changed */
michael@0 743 bd->openingsSize=pBiDi->openingsSize;
michael@0 744 }
michael@0 745 pOpening=&bd->openings[pLastIsoRun->limit];
michael@0 746 pOpening->position=position;
michael@0 747 pOpening->match=match;
michael@0 748 pOpening->contextDir=pLastIsoRun->contextDir;
michael@0 749 pOpening->contextPos=pLastIsoRun->contextPos;
michael@0 750 pOpening->flags=0;
michael@0 751 pLastIsoRun->limit++;
michael@0 752 return TRUE;
michael@0 753 }
michael@0 754
michael@0 755 /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
michael@0 756 static void
michael@0 757 fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
michael@0 758 /* This function calls itself recursively */
michael@0 759 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
michael@0 760 Opening *qOpening;
michael@0 761 DirProp *dirProps=bd->pBiDi->dirProps;
michael@0 762 int32_t k, openingPosition, closingPosition;
michael@0 763 for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
michael@0 764 if(qOpening->match>=0) /* not an N0c match */
michael@0 765 continue;
michael@0 766 if(newPropPosition<qOpening->contextPos)
michael@0 767 break;
michael@0 768 if(newPropPosition>=qOpening->position)
michael@0 769 continue;
michael@0 770 if(newProp==qOpening->contextDir)
michael@0 771 break;
michael@0 772 openingPosition=qOpening->position;
michael@0 773 dirProps[openingPosition]=dirProps[newPropPosition];
michael@0 774 closingPosition=-(qOpening->match);
michael@0 775 dirProps[closingPosition]= newProp; /* can never be AL */
michael@0 776 qOpening->match=0; /* prevent further changes */
michael@0 777 fixN0c(bd, k, openingPosition, newProp);
michael@0 778 fixN0c(bd, k, closingPosition, newProp);
michael@0 779 }
michael@0 780 }
michael@0 781
michael@0 782 /* handle strong characters, digits and candidates for closing brackets */
michael@0 783 static UBool /* return TRUE if success */
michael@0 784 bracketProcessChar(BracketData *bd, int32_t position, DirProp dirProp) {
michael@0 785 IsoRun *pLastIsoRun;
michael@0 786 Opening *pOpening, *qOpening;
michael@0 787 DirProp *dirProps, newProp;
michael@0 788 UBiDiDirection direction;
michael@0 789 uint16_t flag;
michael@0 790 int32_t i, k;
michael@0 791 UBool stable;
michael@0 792 UChar c, match;
michael@0 793 dirProps=bd->pBiDi->dirProps;
michael@0 794 if(DIRPROP_FLAG(dirProp)&MASK_STRONG_EN_AN) { /* L, R, AL, EN or AN */
michael@0 795 pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
michael@0 796 /* AN after R or AL becomes R or AL; after L or L+AN, it is kept as-is */
michael@0 797 if(dirProp==AN && (pLastIsoRun->lastStrong==R || pLastIsoRun->lastStrong==AL))
michael@0 798 dirProp=pLastIsoRun->lastStrong;
michael@0 799 /* EN after L or L+AN becomes L; after R or AL, it becomes R or AL */
michael@0 800 if(dirProp==EN) {
michael@0 801 if(pLastIsoRun->lastStrong==L || pLastIsoRun->lastStrong==AN) {
michael@0 802 dirProp=L;
michael@0 803 if(!bd->isNumbersSpecial)
michael@0 804 dirProps[position]=ENL;
michael@0 805 }
michael@0 806 else {
michael@0 807 dirProp=pLastIsoRun->lastStrong; /* may be R or AL */
michael@0 808 if(!bd->isNumbersSpecial)
michael@0 809 dirProps[position]= dirProp==AL ? AN : ENR;
michael@0 810 }
michael@0 811 }
michael@0 812 pLastIsoRun->lastStrong=dirProp;
michael@0 813 pLastIsoRun->contextDir=DIR_FROM_STRONG(dirProp);
michael@0 814 pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=position;
michael@0 815 if(dirProp==AL || dirProp==AN)
michael@0 816 dirProp=R;
michael@0 817 flag=DIRPROP_FLAG(dirProp);
michael@0 818 /* strong characters found after an unmatched opening bracket
michael@0 819 must be noted for possibly applying N0b */
michael@0 820 for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
michael@0 821 bd->openings[i].flags|=flag;
michael@0 822 return TRUE;
michael@0 823 }
michael@0 824 if(dirProp!=ON)
michael@0 825 return TRUE;
michael@0 826 /* First see if it is a matching closing bracket. Hopefully, this is more
michael@0 827 efficient than checking if it is a closing bracket at all */
michael@0 828 c=bd->pBiDi->text[position];
michael@0 829 pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
michael@0 830 for(i=pLastIsoRun->limit-1; i>=pLastIsoRun->start; i--) {
michael@0 831 if(bd->openings[i].match!=c)
michael@0 832 continue;
michael@0 833 /* We have a match */
michael@0 834 pOpening=&bd->openings[i];
michael@0 835 direction=pLastIsoRun->level&1;
michael@0 836 stable=TRUE; /* assume stable until proved otherwise */
michael@0 837
michael@0 838 /* The stable flag is set when brackets are paired and their
michael@0 839 level is resolved and cannot be changed by what will be
michael@0 840 found later in the source string.
michael@0 841 An unstable match can occur only when applying N0c, where
michael@0 842 the resolved level depends on the preceding context, and
michael@0 843 this context may be affected by text occurring later.
michael@0 844 Example: RTL paragraph containing: abc[(latin) HEBREW]
michael@0 845 When the closing parenthesis is encountered, it appears
michael@0 846 that N0c1 must be applied since 'abc' sets an opposite
michael@0 847 direction context and both parentheses receive level 2.
michael@0 848 However, when the closing square bracket is processed,
michael@0 849 N0b applies because of 'HEBREW' being included within the
michael@0 850 brackets, thus the square brackets are treated like R and
michael@0 851 receive level 1. However, this changes the preceding
michael@0 852 context of the opening parenthesis, and it now appears
michael@0 853 that N0c2 must be applied to the parentheses rather than
michael@0 854 N0c1. */
michael@0 855
michael@0 856 if((direction==0 && pOpening->flags&FOUND_L) ||
michael@0 857 (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
michael@0 858 newProp=direction;
michael@0 859 }
michael@0 860 else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */
michael@0 861 if(direction!=pOpening->contextDir) {
michael@0 862 newProp=pOpening->contextDir; /* N0c1 */
michael@0 863 /* it is stable if there is no preceding text or in
michael@0 864 conditions too complicated and not worth checking */
michael@0 865 stable=(i==pLastIsoRun->start);
michael@0 866 }
michael@0 867 else
michael@0 868 newProp=direction; /* N0c2 */
michael@0 869 }
michael@0 870 else {
michael@0 871 newProp=BN; /* N0d */
michael@0 872 }
michael@0 873 if(newProp!=BN) {
michael@0 874 dirProps[pOpening->position]=newProp;
michael@0 875 dirProps[position]=newProp;
michael@0 876 pLastIsoRun->contextDir=newProp;
michael@0 877 pLastIsoRun->contextPos=position;
michael@0 878 }
michael@0 879 /* Update nested N0c pairs that may be affected */
michael@0 880 if(newProp==direction)
michael@0 881 fixN0c(bd, i, pOpening->position, newProp);
michael@0 882 if(stable) {
michael@0 883 pLastIsoRun->limit=i; /* forget any brackets nested within this pair */
michael@0 884 /* remove lower located synonyms if any */
michael@0 885 while(pLastIsoRun->limit>pLastIsoRun->start &&
michael@0 886 bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
michael@0 887 pLastIsoRun->limit--;
michael@0 888 }
michael@0 889 else {
michael@0 890 pOpening->match=-position;
michael@0 891 /* neutralize lower located synonyms if any */
michael@0 892 k=i-1;
michael@0 893 while(k>=pLastIsoRun->start &&
michael@0 894 bd->openings[k].position==pOpening->position)
michael@0 895 bd->openings[k--].match=0;
michael@0 896 /* neutralize any unmatched opening between the current pair;
michael@0 897 this will also neutralize higher located synonyms if any */
michael@0 898 for(k=i+1; k<pLastIsoRun->limit; k++) {
michael@0 899 qOpening=&bd->openings[k];
michael@0 900 if(qOpening->position>=position)
michael@0 901 break;
michael@0 902 if(qOpening->match>0)
michael@0 903 qOpening->match=0;
michael@0 904 }
michael@0 905 }
michael@0 906 return TRUE;
michael@0 907 }
michael@0 908 /* We get here only if the ON character was not a matching closing bracket */
michael@0 909 /* Now see if it is an opening bracket */
michael@0 910 match=u_getBidiPairedBracket(c); /* get the matching char */
michael@0 911 if(match==c) /* if no matching char */
michael@0 912 return TRUE;
michael@0 913 if(ubidi_getPairedBracketType(bd->pBiDi->bdp, c)!=U_BPT_OPEN)
michael@0 914 return TRUE; /* not an opening bracket */
michael@0 915 /* special case: process synonyms
michael@0 916 create an opening entry for each synonym */
michael@0 917 if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
michael@0 918 if(!bracketAddOpening(bd, 0x3009, position))
michael@0 919 return FALSE;
michael@0 920 }
michael@0 921 else if(match==0x3009) { /* RIGHT ANGLE BRACKET */
michael@0 922 if(!bracketAddOpening(bd, 0x232A, position))
michael@0 923 return FALSE;
michael@0 924 }
michael@0 925 return bracketAddOpening(bd, match, position);
michael@0 926 }
michael@0 927
michael@0 928 /* perform (X1)..(X9) ------------------------------------------------------- */
michael@0 929
michael@0 930 /* determine if the text is mixed-directional or single-directional */
michael@0 931 static UBiDiDirection
michael@0 932 directionFromFlags(UBiDi *pBiDi) {
michael@0 933 Flags flags=pBiDi->flags;
michael@0 934 /* if the text contains AN and neutrals, then some neutrals may become RTL */
michael@0 935 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
michael@0 936 return UBIDI_LTR;
michael@0 937 } else if(!(flags&MASK_LTR)) {
michael@0 938 return UBIDI_RTL;
michael@0 939 } else {
michael@0 940 return UBIDI_MIXED;
michael@0 941 }
michael@0 942 }
michael@0 943
michael@0 944 /*
michael@0 945 * Resolve the explicit levels as specified by explicit embedding codes.
michael@0 946 * Recalculate the flags to have them reflect the real properties
michael@0 947 * after taking the explicit embeddings into account.
michael@0 948 *
michael@0 949 * The BiDi algorithm is designed to result in the same behavior whether embedding
michael@0 950 * levels are externally specified (from "styled text", supposedly the preferred
michael@0 951 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
michael@0 952 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
michael@0 953 * However, in a real implementation, the removal of these codes and their index
michael@0 954 * positions in the plain text is undesirable since it would result in
michael@0 955 * reallocated, reindexed text.
michael@0 956 * Instead, this implementation leaves the codes in there and just ignores them
michael@0 957 * in the subsequent processing.
michael@0 958 * In order to get the same reordering behavior, positions with a BN or a not-isolate
michael@0 959 * explicit embedding code just get the same level assigned as the last "real"
michael@0 960 * character.
michael@0 961 *
michael@0 962 * Some implementations, not this one, then overwrite some of these
michael@0 963 * directionality properties at "real" same-level-run boundaries by
michael@0 964 * L or R codes so that the resolution of weak types can be performed on the
michael@0 965 * entire paragraph at once instead of having to parse it once more and
michael@0 966 * perform that resolution on same-level-runs.
michael@0 967 * This limits the scope of the implicit rules in effectively
michael@0 968 * the same way as the run limits.
michael@0 969 *
michael@0 970 * Instead, this implementation does not modify these codes, except for
michael@0 971 * paired brackets whose properties (ON) may be replaced by L or R.
michael@0 972 * On one hand, the paragraph has to be scanned for same-level-runs, but
michael@0 973 * on the other hand, this saves another loop to reset these codes,
michael@0 974 * or saves making and modifying a copy of dirProps[].
michael@0 975 *
michael@0 976 *
michael@0 977 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
michael@0 978 *
michael@0 979 *
michael@0 980 * Handling the stack of explicit levels (Xn):
michael@0 981 *
michael@0 982 * With the BiDi stack of explicit levels, as pushed with each
michael@0 983 * LRE, RLE, LRO, RLO, LRI, RLI and FSO and popped with each PDF and PDI,
michael@0 984 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
michael@0 985 *
michael@0 986 * In order to have a correct push-pop semantics even in the case of overflows,
michael@0 987 * overflow counters and a valid isolate counter are used as described in UAX#9
michael@0 988 * section 3.3.2 "Explicit Levels and Directions".
michael@0 989 *
michael@0 990 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
michael@0 991 */
michael@0 992 static UBiDiDirection
michael@0 993 resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
michael@0 994 DirProp *dirProps=pBiDi->dirProps;
michael@0 995 UBiDiLevel *levels=pBiDi->levels;
michael@0 996 const UChar *text=pBiDi->text;
michael@0 997
michael@0 998 int32_t i=0, length=pBiDi->length;
michael@0 999 Flags flags=pBiDi->flags; /* collect all directionalities in the text */
michael@0 1000 DirProp dirProp;
michael@0 1001 UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
michael@0 1002 UBiDiDirection direction;
michael@0 1003 pBiDi->isolateCount=0;
michael@0 1004
michael@0 1005 if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
michael@0 1006
michael@0 1007 /* determine if the text is mixed-directional or single-directional */
michael@0 1008 direction=directionFromFlags(pBiDi);
michael@0 1009
michael@0 1010 /* we may not need to resolve any explicit levels */
michael@0 1011 if((direction!=UBIDI_MIXED)) {
michael@0 1012 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
michael@0 1013 return direction;
michael@0 1014 }
michael@0 1015 if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
michael@0 1016 /* inverse BiDi: mixed, but all characters are at the same embedding level */
michael@0 1017 /* set all levels to the paragraph level */
michael@0 1018 int32_t paraIndex, start, limit;
michael@0 1019 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
michael@0 1020 if(paraIndex==0)
michael@0 1021 start=0;
michael@0 1022 else
michael@0 1023 start=pBiDi->paras[paraIndex-1].limit;
michael@0 1024 limit=pBiDi->paras[paraIndex].limit;
michael@0 1025 level=pBiDi->paras[paraIndex].level;
michael@0 1026 for(i=start; i<limit; i++)
michael@0 1027 levels[i]=level;
michael@0 1028 }
michael@0 1029 return direction; /* no bracket matching for inverse BiDi */
michael@0 1030 }
michael@0 1031 if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
michael@0 1032 /* no embeddings, set all levels to the paragraph level */
michael@0 1033 /* we still have to perform bracket matching */
michael@0 1034 int32_t paraIndex, start, limit;
michael@0 1035 BracketData bracketData;
michael@0 1036 bracketInit(pBiDi, &bracketData);
michael@0 1037 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
michael@0 1038 if(paraIndex==0)
michael@0 1039 start=0;
michael@0 1040 else
michael@0 1041 start=pBiDi->paras[paraIndex-1].limit;
michael@0 1042 limit=pBiDi->paras[paraIndex].limit;
michael@0 1043 level=pBiDi->paras[paraIndex].level;
michael@0 1044 for(i=start; i<limit; i++) {
michael@0 1045 levels[i]=level;
michael@0 1046 dirProp=dirProps[i];
michael@0 1047 if(dirProp==B) {
michael@0 1048 if((i+1)<length) {
michael@0 1049 if(text[i]==CR && text[i+1]==LF)
michael@0 1050 continue; /* skip CR when followed by LF */
michael@0 1051 bracketProcessB(&bracketData, level);
michael@0 1052 }
michael@0 1053 continue;
michael@0 1054 }
michael@0 1055 if(!bracketProcessChar(&bracketData, i, dirProp)) {
michael@0 1056 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 1057 return UBIDI_LTR;
michael@0 1058 }
michael@0 1059 }
michael@0 1060 }
michael@0 1061 return direction;
michael@0 1062 }
michael@0 1063 {
michael@0 1064 /* continue to perform (Xn) */
michael@0 1065
michael@0 1066 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
michael@0 1067 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
michael@0 1068 UBiDiLevel embeddingLevel=level, newLevel;
michael@0 1069 UBiDiLevel previousLevel=level; /* previous level for regular (not CC) characters */
michael@0 1070 int32_t lastCcPos=0; /* index of last effective LRx,RLx, PDx */
michael@0 1071
michael@0 1072 uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
michael@0 1073 but we need one more entry as base */
michael@0 1074 uint32_t stackLast=0;
michael@0 1075 int32_t overflowIsolateCount=0;
michael@0 1076 int32_t overflowEmbeddingCount=0;
michael@0 1077 int32_t validIsolateCount=0;
michael@0 1078 BracketData bracketData;
michael@0 1079 bracketInit(pBiDi, &bracketData);
michael@0 1080 stack[0]=level; /* initialize base entry to para level, no override, no isolate */
michael@0 1081
michael@0 1082 /* recalculate the flags */
michael@0 1083 flags=0;
michael@0 1084
michael@0 1085 for(i=0; i<length; ++i) {
michael@0 1086 dirProp=dirProps[i];
michael@0 1087 switch(dirProp) {
michael@0 1088 case LRE:
michael@0 1089 case RLE:
michael@0 1090 case LRO:
michael@0 1091 case RLO:
michael@0 1092 /* (X2, X3, X4, X5) */
michael@0 1093 flags|=DIRPROP_FLAG(BN);
michael@0 1094 if (dirProp==LRE || dirProp==LRO)
michael@0 1095 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
michael@0 1096 else
michael@0 1097 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
michael@0 1098 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
michael@0 1099 overflowEmbeddingCount==0) {
michael@0 1100 lastCcPos=i;
michael@0 1101 embeddingLevel=newLevel;
michael@0 1102 if(dirProp==LRO || dirProp==RLO)
michael@0 1103 embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
michael@0 1104 stackLast++;
michael@0 1105 stack[stackLast]=embeddingLevel;
michael@0 1106 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
michael@0 1107 since this has already been done for newLevel which is
michael@0 1108 the source for embeddingLevel.
michael@0 1109 */
michael@0 1110 } else {
michael@0 1111 dirProps[i]|=IGNORE_CC;
michael@0 1112 if(overflowIsolateCount==0)
michael@0 1113 overflowEmbeddingCount++;
michael@0 1114 }
michael@0 1115 break;
michael@0 1116 case PDF:
michael@0 1117 /* (X7) */
michael@0 1118 flags|=DIRPROP_FLAG(BN);
michael@0 1119 /* handle all the overflow cases first */
michael@0 1120 if(overflowIsolateCount) {
michael@0 1121 dirProps[i]|=IGNORE_CC;
michael@0 1122 break;
michael@0 1123 }
michael@0 1124 if(overflowEmbeddingCount) {
michael@0 1125 dirProps[i]|=IGNORE_CC;
michael@0 1126 overflowEmbeddingCount--;
michael@0 1127 break;
michael@0 1128 }
michael@0 1129 if(stackLast>0 && stack[stackLast]<ISOLATE) { /* not an isolate entry */
michael@0 1130 lastCcPos=i;
michael@0 1131 stackLast--;
michael@0 1132 embeddingLevel=(UBiDiLevel)stack[stackLast];
michael@0 1133 } else
michael@0 1134 dirProps[i]|=IGNORE_CC;
michael@0 1135 break;
michael@0 1136 case LRI:
michael@0 1137 case RLI:
michael@0 1138 if(embeddingLevel!=previousLevel) {
michael@0 1139 bracketProcessBoundary(&bracketData, lastCcPos,
michael@0 1140 previousLevel, embeddingLevel);
michael@0 1141 previousLevel=embeddingLevel;
michael@0 1142 }
michael@0 1143 /* (X5a, X5b) */
michael@0 1144 flags|= DIRPROP_FLAG(ON) | DIRPROP_FLAG(BN) | DIRPROP_FLAG_LR(embeddingLevel);
michael@0 1145 level=embeddingLevel;
michael@0 1146 if(dirProp==LRI)
michael@0 1147 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
michael@0 1148 else
michael@0 1149 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
michael@0 1150 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
michael@0 1151 overflowEmbeddingCount==0) {
michael@0 1152 lastCcPos=i;
michael@0 1153 previousLevel=embeddingLevel;
michael@0 1154 validIsolateCount++;
michael@0 1155 if(validIsolateCount>pBiDi->isolateCount)
michael@0 1156 pBiDi->isolateCount=validIsolateCount;
michael@0 1157 embeddingLevel=newLevel;
michael@0 1158 stackLast++;
michael@0 1159 stack[stackLast]=embeddingLevel+ISOLATE;
michael@0 1160 bracketProcessLRI_RLI(&bracketData, embeddingLevel);
michael@0 1161 } else {
michael@0 1162 dirProps[i]|=IGNORE_CC;
michael@0 1163 overflowIsolateCount++;
michael@0 1164 }
michael@0 1165 break;
michael@0 1166 case PDI:
michael@0 1167 if(embeddingLevel!=previousLevel) {
michael@0 1168 bracketProcessBoundary(&bracketData, lastCcPos,
michael@0 1169 previousLevel, embeddingLevel);
michael@0 1170 }
michael@0 1171 /* (X6a) */
michael@0 1172 if(overflowIsolateCount) {
michael@0 1173 dirProps[i]|=IGNORE_CC;
michael@0 1174 overflowIsolateCount--;
michael@0 1175 }
michael@0 1176 else if(validIsolateCount) {
michael@0 1177 lastCcPos=i;
michael@0 1178 overflowEmbeddingCount=0;
michael@0 1179 while(stack[stackLast]<ISOLATE) /* pop embedding entries */
michael@0 1180 stackLast--; /* until the last isolate entry */
michael@0 1181 stackLast--; /* pop also the last isolate entry */
michael@0 1182 validIsolateCount--;
michael@0 1183 bracketProcessPDI(&bracketData);
michael@0 1184 } else
michael@0 1185 dirProps[i]|=IGNORE_CC;
michael@0 1186 embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE;
michael@0 1187 previousLevel=level=embeddingLevel;
michael@0 1188 flags|= DIRPROP_FLAG(ON) | DIRPROP_FLAG(BN) | DIRPROP_FLAG_LR(embeddingLevel);
michael@0 1189 break;
michael@0 1190 case B:
michael@0 1191 level=GET_PARALEVEL(pBiDi, i);
michael@0 1192 if((i+1)<length) {
michael@0 1193 if(text[i]==CR && text[i+1]==LF)
michael@0 1194 break; /* skip CR when followed by LF */
michael@0 1195 overflowEmbeddingCount=overflowIsolateCount=0;
michael@0 1196 validIsolateCount=0;
michael@0 1197 stackLast=0;
michael@0 1198 stack[0]=level; /* initialize base entry to para level, no override, no isolate */
michael@0 1199 previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
michael@0 1200 bracketProcessB(&bracketData, embeddingLevel);
michael@0 1201 }
michael@0 1202 flags|=DIRPROP_FLAG(B);
michael@0 1203 break;
michael@0 1204 case BN:
michael@0 1205 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
michael@0 1206 /* they will get their levels set correctly in adjustWSLevels() */
michael@0 1207 flags|=DIRPROP_FLAG(BN);
michael@0 1208 break;
michael@0 1209 default:
michael@0 1210 /* all other types get the "real" level */
michael@0 1211 level=embeddingLevel;
michael@0 1212 if(embeddingLevel!=previousLevel) {
michael@0 1213 bracketProcessBoundary(&bracketData, lastCcPos,
michael@0 1214 previousLevel, embeddingLevel);
michael@0 1215 previousLevel=embeddingLevel;
michael@0 1216 }
michael@0 1217 if(level&UBIDI_LEVEL_OVERRIDE)
michael@0 1218 flags|=DIRPROP_FLAG_LR(level);
michael@0 1219 else
michael@0 1220 flags|=DIRPROP_FLAG(dirProp);
michael@0 1221 if(!bracketProcessChar(&bracketData, i, dirProp))
michael@0 1222 return -1;
michael@0 1223 break;
michael@0 1224 }
michael@0 1225
michael@0 1226 /*
michael@0 1227 * We need to set reasonable levels even on BN codes and
michael@0 1228 * explicit codes because we will later look at same-level runs (X10).
michael@0 1229 */
michael@0 1230 levels[i]=level;
michael@0 1231 if(i>0 && levels[i-1]!=level) {
michael@0 1232 flags|=DIRPROP_FLAG_MULTI_RUNS;
michael@0 1233 if(level&UBIDI_LEVEL_OVERRIDE)
michael@0 1234 flags|=DIRPROP_FLAG_O(level);
michael@0 1235 else
michael@0 1236 flags|=DIRPROP_FLAG_E(level);
michael@0 1237 }
michael@0 1238 if(DIRPROP_FLAG(dirProp)&MASK_ISO)
michael@0 1239 level=embeddingLevel;
michael@0 1240 }
michael@0 1241 if(flags&MASK_EMBEDDING) {
michael@0 1242 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
michael@0 1243 }
michael@0 1244 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
michael@0 1245 flags|=DIRPROP_FLAG(L);
michael@0 1246 }
michael@0 1247
michael@0 1248 /* subsequently, ignore the explicit codes and BN (X9) */
michael@0 1249
michael@0 1250 /* again, determine if the text is mixed-directional or single-directional */
michael@0 1251 pBiDi->flags=flags;
michael@0 1252 direction=directionFromFlags(pBiDi);
michael@0 1253 }
michael@0 1254 return direction;
michael@0 1255 }
michael@0 1256
michael@0 1257 /*
michael@0 1258 * Use a pre-specified embedding levels array:
michael@0 1259 *
michael@0 1260 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
michael@0 1261 * ignore all explicit codes (X9),
michael@0 1262 * and check all the preset levels.
michael@0 1263 *
michael@0 1264 * Recalculate the flags to have them reflect the real properties
michael@0 1265 * after taking the explicit embeddings into account.
michael@0 1266 */
michael@0 1267 static UBiDiDirection
michael@0 1268 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
michael@0 1269 DirProp *dirProps=pBiDi->dirProps;
michael@0 1270 DirProp dirProp;
michael@0 1271 UBiDiLevel *levels=pBiDi->levels;
michael@0 1272 int32_t isolateCount=0;
michael@0 1273
michael@0 1274 int32_t i, length=pBiDi->length;
michael@0 1275 Flags flags=0; /* collect all directionalities in the text */
michael@0 1276 UBiDiLevel level;
michael@0 1277 pBiDi->isolateCount=0;
michael@0 1278
michael@0 1279 for(i=0; i<length; ++i) {
michael@0 1280 level=levels[i];
michael@0 1281 dirProp=dirProps[i];
michael@0 1282 if(dirProp==LRI || dirProp==RLI) {
michael@0 1283 isolateCount++;
michael@0 1284 if(isolateCount>pBiDi->isolateCount)
michael@0 1285 pBiDi->isolateCount=isolateCount;
michael@0 1286 }
michael@0 1287 else if(dirProp==PDI)
michael@0 1288 isolateCount--;
michael@0 1289 else if(dirProp==B)
michael@0 1290 isolateCount=0;
michael@0 1291 if(level&UBIDI_LEVEL_OVERRIDE) {
michael@0 1292 /* keep the override flag in levels[i] but adjust the flags */
michael@0 1293 level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */
michael@0 1294 flags|=DIRPROP_FLAG_O(level);
michael@0 1295 } else {
michael@0 1296 /* set the flags */
michael@0 1297 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
michael@0 1298 }
michael@0 1299 if((level<GET_PARALEVEL(pBiDi, i) &&
michael@0 1300 !((0==level)&&(dirProp==B))) ||
michael@0 1301 (UBIDI_MAX_EXPLICIT_LEVEL<level)) {
michael@0 1302 /* level out of bounds */
michael@0 1303 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 1304 return UBIDI_LTR;
michael@0 1305 }
michael@0 1306 }
michael@0 1307 if(flags&MASK_EMBEDDING) {
michael@0 1308 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
michael@0 1309 }
michael@0 1310
michael@0 1311 /* determine if the text is mixed-directional or single-directional */
michael@0 1312 pBiDi->flags=flags;
michael@0 1313 return directionFromFlags(pBiDi);
michael@0 1314 }
michael@0 1315
michael@0 1316 /******************************************************************
michael@0 1317 The Properties state machine table
michael@0 1318 *******************************************************************
michael@0 1319
michael@0 1320 All table cells are 8 bits:
michael@0 1321 bits 0..4: next state
michael@0 1322 bits 5..7: action to perform (if > 0)
michael@0 1323
michael@0 1324 Cells may be of format "n" where n represents the next state
michael@0 1325 (except for the rightmost column).
michael@0 1326 Cells may also be of format "s(x,y)" where x represents an action
michael@0 1327 to perform and y represents the next state.
michael@0 1328
michael@0 1329 *******************************************************************
michael@0 1330 Definitions and type for properties state table
michael@0 1331 *******************************************************************
michael@0 1332 */
michael@0 1333 #define IMPTABPROPS_COLUMNS 16
michael@0 1334 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
michael@0 1335 #define GET_STATEPROPS(cell) ((cell)&0x1f)
michael@0 1336 #define GET_ACTIONPROPS(cell) ((cell)>>5)
michael@0 1337 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
michael@0 1338
michael@0 1339 static const uint8_t groupProp[] = /* dirProp regrouped */
michael@0 1340 {
michael@0 1341 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */
michael@0 1342 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14
michael@0 1343 };
michael@0 1344 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
michael@0 1345
michael@0 1346 /******************************************************************
michael@0 1347
michael@0 1348 PROPERTIES STATE TABLE
michael@0 1349
michael@0 1350 In table impTabProps,
michael@0 1351 - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
michael@0 1352 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
michael@0 1353 - the Res column is the reduced property assigned to a run
michael@0 1354
michael@0 1355 Action 1: process current run1, init new run1
michael@0 1356 2: init new run2
michael@0 1357 3: process run1, process run2, init new run1
michael@0 1358 4: process run1, set run1=run2, init new run2
michael@0 1359
michael@0 1360 Notes:
michael@0 1361 1) This table is used in resolveImplicitLevels().
michael@0 1362 2) This table triggers actions when there is a change in the Bidi
michael@0 1363 property of incoming characters (action 1).
michael@0 1364 3) Most such property sequences are processed immediately (in
michael@0 1365 fact, passed to processPropertySeq().
michael@0 1366 4) However, numbers are assembled as one sequence. This means
michael@0 1367 that undefined situations (like CS following digits, until
michael@0 1368 it is known if the next char will be a digit) are held until
michael@0 1369 following chars define them.
michael@0 1370 Example: digits followed by CS, then comes another CS or ON;
michael@0 1371 the digits will be processed, then the CS assigned
michael@0 1372 as the start of an ON sequence (action 3).
michael@0 1373 5) There are cases where more than one sequence must be
michael@0 1374 processed, for instance digits followed by CS followed by L:
michael@0 1375 the digits must be processed as one sequence, and the CS
michael@0 1376 must be processed as an ON sequence, all this before starting
michael@0 1377 assembling chars for the opening L sequence.
michael@0 1378
michael@0 1379
michael@0 1380 */
michael@0 1381 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
michael@0 1382 {
michael@0 1383 /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */
michael@0 1384 /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON },
michael@0 1385 /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L },
michael@0 1386 /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R },
michael@0 1387 /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R },
michael@0 1388 /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN },
michael@0 1389 /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
michael@0 1390 /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN },
michael@0 1391 /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
michael@0 1392 /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
michael@0 1393 /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON },
michael@0 1394 /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN },
michael@0 1395 /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN },
michael@0 1396 /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
michael@0 1397 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN },
michael@0 1398 /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
michael@0 1399 /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S },
michael@0 1400 /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S },
michael@0 1401 /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B },
michael@0 1402 /*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L },
michael@0 1403 /*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L },
michael@0 1404 /*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L },
michael@0 1405 /*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN },
michael@0 1406 /*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN },
michael@0 1407 /*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN }
michael@0 1408 };
michael@0 1409
michael@0 1410 /* we must undef macro s because the levels table have a different
michael@0 1411 * structure (4 bits for action and 4 bits for next state.
michael@0 1412 */
michael@0 1413 #undef s
michael@0 1414
michael@0 1415 /******************************************************************
michael@0 1416 The levels state machine tables
michael@0 1417 *******************************************************************
michael@0 1418
michael@0 1419 All table cells are 8 bits:
michael@0 1420 bits 0..3: next state
michael@0 1421 bits 4..7: action to perform (if > 0)
michael@0 1422
michael@0 1423 Cells may be of format "n" where n represents the next state
michael@0 1424 (except for the rightmost column).
michael@0 1425 Cells may also be of format "s(x,y)" where x represents an action
michael@0 1426 to perform and y represents the next state.
michael@0 1427
michael@0 1428 This format limits each table to 16 states each and to 15 actions.
michael@0 1429
michael@0 1430 *******************************************************************
michael@0 1431 Definitions and type for levels state tables
michael@0 1432 *******************************************************************
michael@0 1433 */
michael@0 1434 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
michael@0 1435 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
michael@0 1436 #define GET_STATE(cell) ((cell)&0x0f)
michael@0 1437 #define GET_ACTION(cell) ((cell)>>4)
michael@0 1438 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
michael@0 1439
michael@0 1440 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
michael@0 1441 typedef uint8_t ImpAct[];
michael@0 1442
michael@0 1443 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
michael@0 1444 * instead of having a pair of ImpTab and a pair of ImpAct.
michael@0 1445 */
michael@0 1446 typedef struct ImpTabPair {
michael@0 1447 const void * pImpTab[2];
michael@0 1448 const void * pImpAct[2];
michael@0 1449 } ImpTabPair;
michael@0 1450
michael@0 1451 /******************************************************************
michael@0 1452
michael@0 1453 LEVELS STATE TABLES
michael@0 1454
michael@0 1455 In all levels state tables,
michael@0 1456 - state 0 is the initial state
michael@0 1457 - the Res column is the increment to add to the text level
michael@0 1458 for this property sequence.
michael@0 1459
michael@0 1460 The impAct arrays for each table of a pair map the local action
michael@0 1461 numbers of the table to the total list of actions. For instance,
michael@0 1462 action 2 in a given table corresponds to the action number which
michael@0 1463 appears in entry [2] of the impAct array for that table.
michael@0 1464 The first entry of all impAct arrays must be 0.
michael@0 1465
michael@0 1466 Action 1: init conditional sequence
michael@0 1467 2: prepend conditional sequence to current sequence
michael@0 1468 3: set ON sequence to new level - 1
michael@0 1469 4: init EN/AN/ON sequence
michael@0 1470 5: fix EN/AN/ON sequence followed by R
michael@0 1471 6: set previous level sequence to level 2
michael@0 1472
michael@0 1473 Notes:
michael@0 1474 1) These tables are used in processPropertySeq(). The input
michael@0 1475 is property sequences as determined by resolveImplicitLevels.
michael@0 1476 2) Most such property sequences are processed immediately
michael@0 1477 (levels are assigned).
michael@0 1478 3) However, some sequences cannot be assigned a final level till
michael@0 1479 one or more following sequences are received. For instance,
michael@0 1480 ON following an R sequence within an even-level paragraph.
michael@0 1481 If the following sequence is R, the ON sequence will be
michael@0 1482 assigned basic run level+1, and so will the R sequence.
michael@0 1483 4) S is generally handled like ON, since its level will be fixed
michael@0 1484 to paragraph level in adjustWSLevels().
michael@0 1485
michael@0 1486 */
michael@0 1487
michael@0 1488 static const ImpTab impTabL_DEFAULT = /* Even paragraph level */
michael@0 1489 /* In this table, conditional sequences receive the higher possible level
michael@0 1490 until proven otherwise.
michael@0 1491 */
michael@0 1492 {
michael@0 1493 /* L , R , EN , AN , ON , S , B , Res */
michael@0 1494 /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
michael@0 1495 /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
michael@0 1496 /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
michael@0 1497 /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
michael@0 1498 /* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0), 1 },
michael@0 1499 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0), 1 }
michael@0 1500 };
michael@0 1501 static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */
michael@0 1502 /* In this table, conditional sequences receive the lower possible level
michael@0 1503 until proven otherwise.
michael@0 1504 */
michael@0 1505 {
michael@0 1506 /* L , R , EN , AN , ON , S , B , Res */
michael@0 1507 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
michael@0 1508 /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 },
michael@0 1509 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
michael@0 1510 /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
michael@0 1511 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
michael@0 1512 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
michael@0 1513 };
michael@0 1514 static const ImpAct impAct0 = {0,1,2,3,4,5,6};
michael@0 1515 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
michael@0 1516 &impTabR_DEFAULT},
michael@0 1517 {&impAct0, &impAct0}};
michael@0 1518
michael@0 1519 static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */
michael@0 1520 /* In this table, conditional sequences receive the higher possible level
michael@0 1521 until proven otherwise.
michael@0 1522 */
michael@0 1523 {
michael@0 1524 /* L , R , EN , AN , ON , S , B , Res */
michael@0 1525 /* 0 : init */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 0 },
michael@0 1526 /* 1 : L+EN/AN */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 2 },
michael@0 1527 /* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), 0 , 0 , 1 },
michael@0 1528 /* 3 : R+ON */ { s(2,0), 2 , 4 , 4 , 3 , 3 , s(2,0), 1 },
michael@0 1529 /* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 }
michael@0 1530 };
michael@0 1531 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
michael@0 1532 &impTabR_DEFAULT},
michael@0 1533 {&impAct0, &impAct0}};
michael@0 1534
michael@0 1535 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
michael@0 1536 /* In this table, EN/AN+ON sequences receive levels as if associated with R
michael@0 1537 until proven that there is L or sor/eor on both sides. AN is handled like EN.
michael@0 1538 */
michael@0 1539 {
michael@0 1540 /* L , R , EN , AN , ON , S , B , Res */
michael@0 1541 /* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
michael@0 1542 /* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 },
michael@0 1543 /* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 },
michael@0 1544 /* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 },
michael@0 1545 /* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 },
michael@0 1546 /* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 }
michael@0 1547 };
michael@0 1548 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
michael@0 1549 /* In this table, EN/AN+ON sequences receive levels as if associated with R
michael@0 1550 until proven that there is L on both sides. AN is handled like EN.
michael@0 1551 */
michael@0 1552 {
michael@0 1553 /* L , R , EN , AN , ON , S , B , Res */
michael@0 1554 /* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
michael@0 1555 /* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
michael@0 1556 /* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 },
michael@0 1557 /* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 },
michael@0 1558 /* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 }
michael@0 1559 };
michael@0 1560 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
michael@0 1561 {&impTabL_GROUP_NUMBERS_WITH_R,
michael@0 1562 &impTabR_GROUP_NUMBERS_WITH_R},
michael@0 1563 {&impAct0, &impAct0}};
michael@0 1564
michael@0 1565
michael@0 1566 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
michael@0 1567 /* This table is identical to the Default LTR table except that EN and AN are
michael@0 1568 handled like L.
michael@0 1569 */
michael@0 1570 {
michael@0 1571 /* L , R , EN , AN , ON , S , B , Res */
michael@0 1572 /* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 },
michael@0 1573 /* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 },
michael@0 1574 /* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 },
michael@0 1575 /* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 },
michael@0 1576 /* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 },
michael@0 1577 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 }
michael@0 1578 };
michael@0 1579 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
michael@0 1580 /* This table is identical to the Default RTL table except that EN and AN are
michael@0 1581 handled like L.
michael@0 1582 */
michael@0 1583 {
michael@0 1584 /* L , R , EN , AN , ON , S , B , Res */
michael@0 1585 /* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
michael@0 1586 /* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 },
michael@0 1587 /* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
michael@0 1588 /* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 },
michael@0 1589 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 },
michael@0 1590 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 }
michael@0 1591 };
michael@0 1592 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
michael@0 1593 {&impTabL_INVERSE_NUMBERS_AS_L,
michael@0 1594 &impTabR_INVERSE_NUMBERS_AS_L},
michael@0 1595 {&impAct0, &impAct0}};
michael@0 1596
michael@0 1597 static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */
michael@0 1598 /* In this table, conditional sequences receive the lower possible level
michael@0 1599 until proven otherwise.
michael@0 1600 */
michael@0 1601 {
michael@0 1602 /* L , R , EN , AN , ON , S , B , Res */
michael@0 1603 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
michael@0 1604 /* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 },
michael@0 1605 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
michael@0 1606 /* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 },
michael@0 1607 /* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 },
michael@0 1608 /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 },
michael@0 1609 /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 }
michael@0 1610 };
michael@0 1611 static const ImpAct impAct1 = {0,1,11,12};
michael@0 1612 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
michael@0 1613 */
michael@0 1614 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
michael@0 1615 {&impTabL_DEFAULT,
michael@0 1616 &impTabR_INVERSE_LIKE_DIRECT},
michael@0 1617 {&impAct0, &impAct1}};
michael@0 1618
michael@0 1619 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
michael@0 1620 /* The case handled in this table is (visually): R EN L
michael@0 1621 */
michael@0 1622 {
michael@0 1623 /* L , R , EN , AN , ON , S , B , Res */
michael@0 1624 /* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 },
michael@0 1625 /* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 },
michael@0 1626 /* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 },
michael@0 1627 /* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 },
michael@0 1628 /* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 },
michael@0 1629 /* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 },
michael@0 1630 /* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 }
michael@0 1631 };
michael@0 1632 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
michael@0 1633 /* The cases handled in this table are (visually): R EN L
michael@0 1634 R L AN L
michael@0 1635 */
michael@0 1636 {
michael@0 1637 /* L , R , EN , AN , ON , S , B , Res */
michael@0 1638 /* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 },
michael@0 1639 /* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 },
michael@0 1640 /* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 },
michael@0 1641 /* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 },
michael@0 1642 /* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 },
michael@0 1643 /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 },
michael@0 1644 /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 }
michael@0 1645 };
michael@0 1646 static const ImpAct impAct2 = {0,1,7,8,9,10};
michael@0 1647 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
michael@0 1648 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
michael@0 1649 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
michael@0 1650 {&impAct0, &impAct2}};
michael@0 1651
michael@0 1652 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
michael@0 1653 {&impTabL_NUMBERS_SPECIAL,
michael@0 1654 &impTabR_INVERSE_LIKE_DIRECT},
michael@0 1655 {&impAct0, &impAct1}};
michael@0 1656
michael@0 1657 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
michael@0 1658 /* The case handled in this table is (visually): R EN L
michael@0 1659 */
michael@0 1660 {
michael@0 1661 /* L , R , EN , AN , ON , S , B , Res */
michael@0 1662 /* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 },
michael@0 1663 /* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 },
michael@0 1664 /* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 },
michael@0 1665 /* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 },
michael@0 1666 /* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 }
michael@0 1667 };
michael@0 1668 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
michael@0 1669 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
michael@0 1670 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
michael@0 1671 {&impAct0, &impAct2}};
michael@0 1672
michael@0 1673 #undef s
michael@0 1674
michael@0 1675 typedef struct {
michael@0 1676 const ImpTab * pImpTab; /* level table pointer */
michael@0 1677 const ImpAct * pImpAct; /* action map array */
michael@0 1678 int32_t startON; /* start of ON sequence */
michael@0 1679 int32_t startL2EN; /* start of level 2 sequence */
michael@0 1680 int32_t lastStrongRTL; /* index of last found R or AL */
michael@0 1681 int32_t state; /* current state */
michael@0 1682 int32_t runStart; /* start position of the run */
michael@0 1683 UBiDiLevel runLevel; /* run level before implicit solving */
michael@0 1684 } LevState;
michael@0 1685
michael@0 1686 /*------------------------------------------------------------------------*/
michael@0 1687
michael@0 1688 static void
michael@0 1689 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
michael@0 1690 /* param pos: position where to insert
michael@0 1691 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
michael@0 1692 */
michael@0 1693 {
michael@0 1694 #define FIRSTALLOC 10
michael@0 1695 Point point;
michael@0 1696 InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
michael@0 1697
michael@0 1698 if (pInsertPoints->capacity == 0)
michael@0 1699 {
michael@0 1700 pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC);
michael@0 1701 if (pInsertPoints->points == NULL)
michael@0 1702 {
michael@0 1703 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 1704 return;
michael@0 1705 }
michael@0 1706 pInsertPoints->capacity=FIRSTALLOC;
michael@0 1707 }
michael@0 1708 if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
michael@0 1709 {
michael@0 1710 void * savePoints=pInsertPoints->points;
michael@0 1711 pInsertPoints->points=uprv_realloc(pInsertPoints->points,
michael@0 1712 pInsertPoints->capacity*2*sizeof(Point));
michael@0 1713 if (pInsertPoints->points == NULL)
michael@0 1714 {
michael@0 1715 pInsertPoints->points=savePoints;
michael@0 1716 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 1717 return;
michael@0 1718 }
michael@0 1719 else pInsertPoints->capacity*=2;
michael@0 1720 }
michael@0 1721 point.pos=pos;
michael@0 1722 point.flag=flag;
michael@0 1723 pInsertPoints->points[pInsertPoints->size]=point;
michael@0 1724 pInsertPoints->size++;
michael@0 1725 #undef FIRSTALLOC
michael@0 1726 }
michael@0 1727
michael@0 1728 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
michael@0 1729
michael@0 1730 /*
michael@0 1731 * This implementation of the (Wn) rules applies all rules in one pass.
michael@0 1732 * In order to do so, it needs a look-ahead of typically 1 character
michael@0 1733 * (except for W5: sequences of ET) and keeps track of changes
michael@0 1734 * in a rule Wp that affect a later Wq (p<q).
michael@0 1735 *
michael@0 1736 * The (Nn) and (In) rules are also performed in that same single loop,
michael@0 1737 * but effectively one iteration behind for white space.
michael@0 1738 *
michael@0 1739 * Since all implicit rules are performed in one step, it is not necessary
michael@0 1740 * to actually store the intermediate directional properties in dirProps[].
michael@0 1741 */
michael@0 1742
michael@0 1743 static void
michael@0 1744 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
michael@0 1745 int32_t start, int32_t limit) {
michael@0 1746 uint8_t cell, oldStateSeq, actionSeq;
michael@0 1747 const ImpTab * pImpTab=pLevState->pImpTab;
michael@0 1748 const ImpAct * pImpAct=pLevState->pImpAct;
michael@0 1749 UBiDiLevel * levels=pBiDi->levels;
michael@0 1750 UBiDiLevel level, addLevel;
michael@0 1751 InsertPoints * pInsertPoints;
michael@0 1752 int32_t start0, k;
michael@0 1753
michael@0 1754 start0=start; /* save original start position */
michael@0 1755 oldStateSeq=(uint8_t)pLevState->state;
michael@0 1756 cell=(*pImpTab)[oldStateSeq][_prop];
michael@0 1757 pLevState->state=GET_STATE(cell); /* isolate the new state */
michael@0 1758 actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
michael@0 1759 addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
michael@0 1760
michael@0 1761 if(actionSeq) {
michael@0 1762 switch(actionSeq) {
michael@0 1763 case 1: /* init ON seq */
michael@0 1764 pLevState->startON=start0;
michael@0 1765 break;
michael@0 1766
michael@0 1767 case 2: /* prepend ON seq to current seq */
michael@0 1768 start=pLevState->startON;
michael@0 1769 break;
michael@0 1770
michael@0 1771 case 3: /* L or S after possible relevant EN/AN */
michael@0 1772 /* check if we had EN after R/AL */
michael@0 1773 if (pLevState->startL2EN >= 0) {
michael@0 1774 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
michael@0 1775 }
michael@0 1776 pLevState->startL2EN=-1; /* not within previous if since could also be -2 */
michael@0 1777 /* check if we had any relevant EN/AN after R/AL */
michael@0 1778 pInsertPoints=&(pBiDi->insertPoints);
michael@0 1779 if ((pInsertPoints->capacity == 0) ||
michael@0 1780 (pInsertPoints->size <= pInsertPoints->confirmed))
michael@0 1781 {
michael@0 1782 /* nothing, just clean up */
michael@0 1783 pLevState->lastStrongRTL=-1;
michael@0 1784 /* check if we have a pending conditional segment */
michael@0 1785 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
michael@0 1786 if ((level & 1) && (pLevState->startON > 0)) { /* after ON */
michael@0 1787 start=pLevState->startON; /* reset to basic run level */
michael@0 1788 }
michael@0 1789 if (_prop == DirProp_S) /* add LRM before S */
michael@0 1790 {
michael@0 1791 addPoint(pBiDi, start0, LRM_BEFORE);
michael@0 1792 pInsertPoints->confirmed=pInsertPoints->size;
michael@0 1793 }
michael@0 1794 break;
michael@0 1795 }
michael@0 1796 /* reset previous RTL cont to level for LTR text */
michael@0 1797 for (k=pLevState->lastStrongRTL+1; k<start0; k++)
michael@0 1798 {
michael@0 1799 /* reset odd level, leave runLevel+2 as is */
michael@0 1800 levels[k]=(levels[k] - 2) & ~1;
michael@0 1801 }
michael@0 1802 /* mark insert points as confirmed */
michael@0 1803 pInsertPoints->confirmed=pInsertPoints->size;
michael@0 1804 pLevState->lastStrongRTL=-1;
michael@0 1805 if (_prop == DirProp_S) /* add LRM before S */
michael@0 1806 {
michael@0 1807 addPoint(pBiDi, start0, LRM_BEFORE);
michael@0 1808 pInsertPoints->confirmed=pInsertPoints->size;
michael@0 1809 }
michael@0 1810 break;
michael@0 1811
michael@0 1812 case 4: /* R/AL after possible relevant EN/AN */
michael@0 1813 /* just clean up */
michael@0 1814 pInsertPoints=&(pBiDi->insertPoints);
michael@0 1815 if (pInsertPoints->capacity > 0)
michael@0 1816 /* remove all non confirmed insert points */
michael@0 1817 pInsertPoints->size=pInsertPoints->confirmed;
michael@0 1818 pLevState->startON=-1;
michael@0 1819 pLevState->startL2EN=-1;
michael@0 1820 pLevState->lastStrongRTL=limit - 1;
michael@0 1821 break;
michael@0 1822
michael@0 1823 case 5: /* EN/AN after R/AL + possible cont */
michael@0 1824 /* check for real AN */
michael@0 1825 if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
michael@0 1826 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
michael@0 1827 {
michael@0 1828 /* real AN */
michael@0 1829 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
michael@0 1830 {
michael@0 1831 /* just note the righmost digit as a strong RTL */
michael@0 1832 pLevState->lastStrongRTL=limit - 1;
michael@0 1833 break;
michael@0 1834 }
michael@0 1835 if (pLevState->startL2EN >= 0) /* after EN, no AN */
michael@0 1836 {
michael@0 1837 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
michael@0 1838 pLevState->startL2EN=-2;
michael@0 1839 }
michael@0 1840 /* note AN */
michael@0 1841 addPoint(pBiDi, start0, LRM_BEFORE);
michael@0 1842 break;
michael@0 1843 }
michael@0 1844 /* if first EN/AN after R/AL */
michael@0 1845 if (pLevState->startL2EN == -1) {
michael@0 1846 pLevState->startL2EN=start0;
michael@0 1847 }
michael@0 1848 break;
michael@0 1849
michael@0 1850 case 6: /* note location of latest R/AL */
michael@0 1851 pLevState->lastStrongRTL=limit - 1;
michael@0 1852 pLevState->startON=-1;
michael@0 1853 break;
michael@0 1854
michael@0 1855 case 7: /* L after R+ON/EN/AN */
michael@0 1856 /* include possible adjacent number on the left */
michael@0 1857 for (k=start0-1; k>=0 && !(levels[k]&1); k--);
michael@0 1858 if(k>=0) {
michael@0 1859 addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */
michael@0 1860 pInsertPoints=&(pBiDi->insertPoints);
michael@0 1861 pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */
michael@0 1862 }
michael@0 1863 pLevState->startON=start0;
michael@0 1864 break;
michael@0 1865
michael@0 1866 case 8: /* AN after L */
michael@0 1867 /* AN numbers between L text on both sides may be trouble. */
michael@0 1868 /* tentatively bracket with LRMs; will be confirmed if followed by L */
michael@0 1869 addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */
michael@0 1870 addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */
michael@0 1871 break;
michael@0 1872
michael@0 1873 case 9: /* R after L+ON/EN/AN */
michael@0 1874 /* false alert, infirm LRMs around previous AN */
michael@0 1875 pInsertPoints=&(pBiDi->insertPoints);
michael@0 1876 pInsertPoints->size=pInsertPoints->confirmed;
michael@0 1877 if (_prop == DirProp_S) /* add RLM before S */
michael@0 1878 {
michael@0 1879 addPoint(pBiDi, start0, RLM_BEFORE);
michael@0 1880 pInsertPoints->confirmed=pInsertPoints->size;
michael@0 1881 }
michael@0 1882 break;
michael@0 1883
michael@0 1884 case 10: /* L after L+ON/AN */
michael@0 1885 level=pLevState->runLevel + addLevel;
michael@0 1886 for(k=pLevState->startON; k<start0; k++) {
michael@0 1887 if (levels[k]<level)
michael@0 1888 levels[k]=level;
michael@0 1889 }
michael@0 1890 pInsertPoints=&(pBiDi->insertPoints);
michael@0 1891 pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */
michael@0 1892 pLevState->startON=start0;
michael@0 1893 break;
michael@0 1894
michael@0 1895 case 11: /* L after L+ON+EN/AN/ON */
michael@0 1896 level=pLevState->runLevel;
michael@0 1897 for(k=start0-1; k>=pLevState->startON; k--) {
michael@0 1898 if(levels[k]==level+3) {
michael@0 1899 while(levels[k]==level+3) {
michael@0 1900 levels[k--]-=2;
michael@0 1901 }
michael@0 1902 while(levels[k]==level) {
michael@0 1903 k--;
michael@0 1904 }
michael@0 1905 }
michael@0 1906 if(levels[k]==level+2) {
michael@0 1907 levels[k]=level;
michael@0 1908 continue;
michael@0 1909 }
michael@0 1910 levels[k]=level+1;
michael@0 1911 }
michael@0 1912 break;
michael@0 1913
michael@0 1914 case 12: /* R after L+ON+EN/AN/ON */
michael@0 1915 level=pLevState->runLevel+1;
michael@0 1916 for(k=start0-1; k>=pLevState->startON; k--) {
michael@0 1917 if(levels[k]>level) {
michael@0 1918 levels[k]-=2;
michael@0 1919 }
michael@0 1920 }
michael@0 1921 break;
michael@0 1922
michael@0 1923 default: /* we should never get here */
michael@0 1924 U_ASSERT(FALSE);
michael@0 1925 break;
michael@0 1926 }
michael@0 1927 }
michael@0 1928 if((addLevel) || (start < start0)) {
michael@0 1929 level=pLevState->runLevel + addLevel;
michael@0 1930 if(start>=pLevState->runStart) {
michael@0 1931 for(k=start; k<limit; k++) {
michael@0 1932 levels[k]=level;
michael@0 1933 }
michael@0 1934 } else {
michael@0 1935 DirProp *dirProps=pBiDi->dirProps, dirProp;
michael@0 1936 int32_t isolateCount=0;
michael@0 1937 for(k=start; k<limit; k++) {
michael@0 1938 dirProp=dirProps[k];
michael@0 1939 if(dirProp==PDI)
michael@0 1940 isolateCount--;
michael@0 1941 if(isolateCount==0)
michael@0 1942 levels[k]=level;
michael@0 1943 if(dirProp==LRI || dirProp==RLI)
michael@0 1944 isolateCount++;
michael@0 1945 }
michael@0 1946 }
michael@0 1947 }
michael@0 1948 }
michael@0 1949
michael@0 1950 /**
michael@0 1951 * Returns the directionality of the last strong character at the end of the prologue, if any.
michael@0 1952 * Requires prologue!=null.
michael@0 1953 */
michael@0 1954 static DirProp
michael@0 1955 lastL_R_AL(UBiDi *pBiDi) {
michael@0 1956 const UChar *text=pBiDi->prologue;
michael@0 1957 int32_t length=pBiDi->proLength;
michael@0 1958 int32_t i;
michael@0 1959 UChar32 uchar;
michael@0 1960 DirProp dirProp;
michael@0 1961 for(i=length; i>0; ) {
michael@0 1962 /* i is decremented by U16_PREV */
michael@0 1963 U16_PREV(text, 0, i, uchar);
michael@0 1964 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
michael@0 1965 if(dirProp==L) {
michael@0 1966 return DirProp_L;
michael@0 1967 }
michael@0 1968 if(dirProp==R || dirProp==AL) {
michael@0 1969 return DirProp_R;
michael@0 1970 }
michael@0 1971 if(dirProp==B) {
michael@0 1972 return DirProp_ON;
michael@0 1973 }
michael@0 1974 }
michael@0 1975 return DirProp_ON;
michael@0 1976 }
michael@0 1977
michael@0 1978 /**
michael@0 1979 * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
michael@0 1980 * Requires epilogue!=null.
michael@0 1981 */
michael@0 1982 static DirProp
michael@0 1983 firstL_R_AL_EN_AN(UBiDi *pBiDi) {
michael@0 1984 const UChar *text=pBiDi->epilogue;
michael@0 1985 int32_t length=pBiDi->epiLength;
michael@0 1986 int32_t i;
michael@0 1987 UChar32 uchar;
michael@0 1988 DirProp dirProp;
michael@0 1989 for(i=0; i<length; ) {
michael@0 1990 /* i is incremented by U16_NEXT */
michael@0 1991 U16_NEXT(text, i, length, uchar);
michael@0 1992 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
michael@0 1993 if(dirProp==L) {
michael@0 1994 return DirProp_L;
michael@0 1995 }
michael@0 1996 if(dirProp==R || dirProp==AL) {
michael@0 1997 return DirProp_R;
michael@0 1998 }
michael@0 1999 if(dirProp==EN) {
michael@0 2000 return DirProp_EN;
michael@0 2001 }
michael@0 2002 if(dirProp==AN) {
michael@0 2003 return DirProp_AN;
michael@0 2004 }
michael@0 2005 }
michael@0 2006 return DirProp_ON;
michael@0 2007 }
michael@0 2008
michael@0 2009 static void
michael@0 2010 resolveImplicitLevels(UBiDi *pBiDi,
michael@0 2011 int32_t start, int32_t limit,
michael@0 2012 DirProp sor, DirProp eor) {
michael@0 2013 const DirProp *dirProps=pBiDi->dirProps;
michael@0 2014 DirProp dirProp;
michael@0 2015 LevState levState;
michael@0 2016 int32_t i, start1, start2;
michael@0 2017 uint16_t oldStateImp, stateImp, actionImp;
michael@0 2018 uint8_t gprop, resProp, cell;
michael@0 2019 UBool inverseRTL;
michael@0 2020 DirProp nextStrongProp=R;
michael@0 2021 int32_t nextStrongPos=-1;
michael@0 2022
michael@0 2023 /* check for RTL inverse BiDi mode */
michael@0 2024 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
michael@0 2025 * loop on the text characters from end to start.
michael@0 2026 * This would need a different properties state table (at least different
michael@0 2027 * actions) and different levels state tables (maybe very similar to the
michael@0 2028 * LTR corresponding ones.
michael@0 2029 */
michael@0 2030 inverseRTL=(UBool)
michael@0 2031 ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
michael@0 2032 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
michael@0 2033 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
michael@0 2034
michael@0 2035 /* initialize for property and levels state tables */
michael@0 2036 levState.startON=-1;
michael@0 2037 levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
michael@0 2038 levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
michael@0 2039 levState.runStart=start;
michael@0 2040 levState.runLevel=pBiDi->levels[start];
michael@0 2041 levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
michael@0 2042 levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
michael@0 2043 if(start==0 && pBiDi->proLength>0) {
michael@0 2044 DirProp lastStrong=lastL_R_AL(pBiDi);
michael@0 2045 if(lastStrong!=DirProp_ON) {
michael@0 2046 sor=lastStrong;
michael@0 2047 }
michael@0 2048 }
michael@0 2049 /* The isolates[] entries contain enough information to
michael@0 2050 resume the bidi algorithm in the same state as it was
michael@0 2051 when it was interrupted by an isolate sequence. */
michael@0 2052 if(dirProps[start]==PDI) {
michael@0 2053 start1=pBiDi->isolates[pBiDi->isolateCount].start1;
michael@0 2054 stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
michael@0 2055 levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
michael@0 2056 pBiDi->isolateCount--;
michael@0 2057 } else {
michael@0 2058 start1=start;
michael@0 2059 if(dirProps[start]==NSM)
michael@0 2060 stateImp = 1 + sor;
michael@0 2061 else
michael@0 2062 stateImp=0;
michael@0 2063 levState.state=0;
michael@0 2064 processPropertySeq(pBiDi, &levState, sor, start, start);
michael@0 2065 }
michael@0 2066 start2=start;
michael@0 2067
michael@0 2068 for(i=start; i<=limit; i++) {
michael@0 2069 if(i>=limit) {
michael@0 2070 if(limit>start) {
michael@0 2071 dirProp=pBiDi->dirProps[limit-1];
michael@0 2072 if(dirProp==LRI || dirProp==RLI)
michael@0 2073 break; /* no forced closing for sequence ending with LRI/RLI */
michael@0 2074 }
michael@0 2075 gprop=eor;
michael@0 2076 } else {
michael@0 2077 DirProp prop, prop1;
michael@0 2078 prop=PURE_DIRPROP(dirProps[i]);
michael@0 2079 if(inverseRTL) {
michael@0 2080 if(prop==AL) {
michael@0 2081 /* AL before EN does not make it AN */
michael@0 2082 prop=R;
michael@0 2083 } else if(prop==EN) {
michael@0 2084 if(nextStrongPos<=i) {
michael@0 2085 /* look for next strong char (L/R/AL) */
michael@0 2086 int32_t j;
michael@0 2087 nextStrongProp=R; /* set default */
michael@0 2088 nextStrongPos=limit;
michael@0 2089 for(j=i+1; j<limit; j++) {
michael@0 2090 prop1=dirProps[j];
michael@0 2091 if(prop1==L || prop1==R || prop1==AL) {
michael@0 2092 nextStrongProp=prop1;
michael@0 2093 nextStrongPos=j;
michael@0 2094 break;
michael@0 2095 }
michael@0 2096 }
michael@0 2097 }
michael@0 2098 if(nextStrongProp==AL) {
michael@0 2099 prop=AN;
michael@0 2100 }
michael@0 2101 }
michael@0 2102 }
michael@0 2103 gprop=groupProp[prop];
michael@0 2104 }
michael@0 2105 oldStateImp=stateImp;
michael@0 2106 cell=impTabProps[oldStateImp][gprop];
michael@0 2107 stateImp=GET_STATEPROPS(cell); /* isolate the new state */
michael@0 2108 actionImp=GET_ACTIONPROPS(cell); /* isolate the action */
michael@0 2109 if((i==limit) && (actionImp==0)) {
michael@0 2110 /* there is an unprocessed sequence if its property == eor */
michael@0 2111 actionImp=1; /* process the last sequence */
michael@0 2112 }
michael@0 2113 if(actionImp) {
michael@0 2114 resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
michael@0 2115 switch(actionImp) {
michael@0 2116 case 1: /* process current seq1, init new seq1 */
michael@0 2117 processPropertySeq(pBiDi, &levState, resProp, start1, i);
michael@0 2118 start1=i;
michael@0 2119 break;
michael@0 2120 case 2: /* init new seq2 */
michael@0 2121 start2=i;
michael@0 2122 break;
michael@0 2123 case 3: /* process seq1, process seq2, init new seq1 */
michael@0 2124 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
michael@0 2125 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
michael@0 2126 start1=i;
michael@0 2127 break;
michael@0 2128 case 4: /* process seq1, set seq1=seq2, init new seq2 */
michael@0 2129 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
michael@0 2130 start1=start2;
michael@0 2131 start2=i;
michael@0 2132 break;
michael@0 2133 default: /* we should never get here */
michael@0 2134 U_ASSERT(FALSE);
michael@0 2135 break;
michael@0 2136 }
michael@0 2137 }
michael@0 2138 }
michael@0 2139
michael@0 2140 /* flush possible pending sequence, e.g. ON */
michael@0 2141 if(limit==pBiDi->length && pBiDi->epiLength>0) {
michael@0 2142 DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
michael@0 2143 if(firstStrong!=DirProp_ON) {
michael@0 2144 eor=firstStrong;
michael@0 2145 }
michael@0 2146 }
michael@0 2147
michael@0 2148 dirProp=dirProps[limit-1];
michael@0 2149 if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
michael@0 2150 pBiDi->isolateCount++;
michael@0 2151 pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
michael@0 2152 pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
michael@0 2153 pBiDi->isolates[pBiDi->isolateCount].start1=start1;
michael@0 2154 }
michael@0 2155 else
michael@0 2156 processPropertySeq(pBiDi, &levState, eor, limit, limit);
michael@0 2157 }
michael@0 2158
michael@0 2159 /* perform (L1) and (X9) ---------------------------------------------------- */
michael@0 2160
michael@0 2161 /*
michael@0 2162 * Reset the embedding levels for some non-graphic characters (L1).
michael@0 2163 * This function also sets appropriate levels for BN, and
michael@0 2164 * explicit embedding types that are supposed to have been removed
michael@0 2165 * from the paragraph in (X9).
michael@0 2166 */
michael@0 2167 static void
michael@0 2168 adjustWSLevels(UBiDi *pBiDi) {
michael@0 2169 const DirProp *dirProps=pBiDi->dirProps;
michael@0 2170 UBiDiLevel *levels=pBiDi->levels;
michael@0 2171 int32_t i;
michael@0 2172
michael@0 2173 if(pBiDi->flags&MASK_WS) {
michael@0 2174 UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
michael@0 2175 Flags flag;
michael@0 2176
michael@0 2177 i=pBiDi->trailingWSStart;
michael@0 2178 while(i>0) {
michael@0 2179 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
michael@0 2180 while(i>0 && (flag=DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i])))&MASK_WS) {
michael@0 2181 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
michael@0 2182 levels[i]=0;
michael@0 2183 } else {
michael@0 2184 levels[i]=GET_PARALEVEL(pBiDi, i);
michael@0 2185 }
michael@0 2186 }
michael@0 2187
michael@0 2188 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
michael@0 2189 /* here, i+1 is guaranteed to be <length */
michael@0 2190 while(i>0) {
michael@0 2191 flag=DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i]));
michael@0 2192 if(flag&MASK_BN_EXPLICIT) {
michael@0 2193 levels[i]=levels[i+1];
michael@0 2194 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
michael@0 2195 levels[i]=0;
michael@0 2196 break;
michael@0 2197 } else if(flag&MASK_B_S) {
michael@0 2198 levels[i]=GET_PARALEVEL(pBiDi, i);
michael@0 2199 break;
michael@0 2200 }
michael@0 2201 }
michael@0 2202 }
michael@0 2203 }
michael@0 2204 }
michael@0 2205
michael@0 2206 U_CAPI void U_EXPORT2
michael@0 2207 ubidi_setContext(UBiDi *pBiDi,
michael@0 2208 const UChar *prologue, int32_t proLength,
michael@0 2209 const UChar *epilogue, int32_t epiLength,
michael@0 2210 UErrorCode *pErrorCode) {
michael@0 2211 /* check the argument values */
michael@0 2212 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
michael@0 2213 if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
michael@0 2214 (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
michael@0 2215 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 2216 return;
michael@0 2217 }
michael@0 2218
michael@0 2219 if(proLength==-1) {
michael@0 2220 pBiDi->proLength=u_strlen(prologue);
michael@0 2221 } else {
michael@0 2222 pBiDi->proLength=proLength;
michael@0 2223 }
michael@0 2224 if(epiLength==-1) {
michael@0 2225 pBiDi->epiLength=u_strlen(epilogue);
michael@0 2226 } else {
michael@0 2227 pBiDi->epiLength=epiLength;
michael@0 2228 }
michael@0 2229 pBiDi->prologue=prologue;
michael@0 2230 pBiDi->epilogue=epilogue;
michael@0 2231 }
michael@0 2232
michael@0 2233 static void
michael@0 2234 setParaSuccess(UBiDi *pBiDi) {
michael@0 2235 pBiDi->proLength=0; /* forget the last context */
michael@0 2236 pBiDi->epiLength=0;
michael@0 2237 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
michael@0 2238 }
michael@0 2239
michael@0 2240 #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y))
michael@0 2241 #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x)))
michael@0 2242
michael@0 2243 static void
michael@0 2244 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
michael@0 2245 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
michael@0 2246 void *runsOnlyMemory;
michael@0 2247 int32_t *visualMap;
michael@0 2248 UChar *visualText;
michael@0 2249 int32_t saveLength, saveTrailingWSStart;
michael@0 2250 const UBiDiLevel *levels;
michael@0 2251 UBiDiLevel *saveLevels;
michael@0 2252 UBiDiDirection saveDirection;
michael@0 2253 UBool saveMayAllocateText;
michael@0 2254 Run *runs;
michael@0 2255 int32_t visualLength, i, j, visualStart, logicalStart,
michael@0 2256 runCount, runLength, addedRuns, insertRemove,
michael@0 2257 start, limit, step, indexOddBit, logicalPos,
michael@0 2258 index0, index1;
michael@0 2259 uint32_t saveOptions;
michael@0 2260
michael@0 2261 pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
michael@0 2262 if(length==0) {
michael@0 2263 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
michael@0 2264 goto cleanup3;
michael@0 2265 }
michael@0 2266 /* obtain memory for mapping table and visual text */
michael@0 2267 runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel)));
michael@0 2268 if(runsOnlyMemory==NULL) {
michael@0 2269 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 2270 goto cleanup3;
michael@0 2271 }
michael@0 2272 visualMap=runsOnlyMemory;
michael@0 2273 visualText=(UChar *)&visualMap[length];
michael@0 2274 saveLevels=(UBiDiLevel *)&visualText[length];
michael@0 2275 saveOptions=pBiDi->reorderingOptions;
michael@0 2276 if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
michael@0 2277 pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
michael@0 2278 pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
michael@0 2279 }
michael@0 2280 paraLevel&=1; /* accept only 0 or 1 */
michael@0 2281 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
michael@0 2282 if(U_FAILURE(*pErrorCode)) {
michael@0 2283 goto cleanup3;
michael@0 2284 }
michael@0 2285 /* we cannot access directly pBiDi->levels since it is not yet set if
michael@0 2286 * direction is not MIXED
michael@0 2287 */
michael@0 2288 levels=ubidi_getLevels(pBiDi, pErrorCode);
michael@0 2289 uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel));
michael@0 2290 saveTrailingWSStart=pBiDi->trailingWSStart;
michael@0 2291 saveLength=pBiDi->length;
michael@0 2292 saveDirection=pBiDi->direction;
michael@0 2293
michael@0 2294 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
michael@0 2295 * the visual map and the dirProps array to drive the second call
michael@0 2296 * to ubidi_setPara (but must make provision for possible removal of
michael@0 2297 * BiDi controls. Alternatively, only use the dirProps array via
michael@0 2298 * customized classifier callback.
michael@0 2299 */
michael@0 2300 visualLength=ubidi_writeReordered(pBiDi, visualText, length,
michael@0 2301 UBIDI_DO_MIRRORING, pErrorCode);
michael@0 2302 ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
michael@0 2303 if(U_FAILURE(*pErrorCode)) {
michael@0 2304 goto cleanup2;
michael@0 2305 }
michael@0 2306 pBiDi->reorderingOptions=saveOptions;
michael@0 2307
michael@0 2308 pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
michael@0 2309 paraLevel^=1;
michael@0 2310 /* Because what we did with reorderingOptions, visualText may be shorter
michael@0 2311 * than the original text. But we don't want the levels memory to be
michael@0 2312 * reallocated shorter than the original length, since we need to restore
michael@0 2313 * the levels as after the first call to ubidi_setpara() before returning.
michael@0 2314 * We will force mayAllocateText to FALSE before the second call to
michael@0 2315 * ubidi_setpara(), and will restore it afterwards.
michael@0 2316 */
michael@0 2317 saveMayAllocateText=pBiDi->mayAllocateText;
michael@0 2318 pBiDi->mayAllocateText=FALSE;
michael@0 2319 ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
michael@0 2320 pBiDi->mayAllocateText=saveMayAllocateText;
michael@0 2321 ubidi_getRuns(pBiDi, pErrorCode);
michael@0 2322 if(U_FAILURE(*pErrorCode)) {
michael@0 2323 goto cleanup1;
michael@0 2324 }
michael@0 2325 /* check if some runs must be split, count how many splits */
michael@0 2326 addedRuns=0;
michael@0 2327 runCount=pBiDi->runCount;
michael@0 2328 runs=pBiDi->runs;
michael@0 2329 visualStart=0;
michael@0 2330 for(i=0; i<runCount; i++, visualStart+=runLength) {
michael@0 2331 runLength=runs[i].visualLimit-visualStart;
michael@0 2332 if(runLength<2) {
michael@0 2333 continue;
michael@0 2334 }
michael@0 2335 logicalStart=GET_INDEX(runs[i].logicalStart);
michael@0 2336 for(j=logicalStart+1; j<logicalStart+runLength; j++) {
michael@0 2337 index0=visualMap[j];
michael@0 2338 index1=visualMap[j-1];
michael@0 2339 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
michael@0 2340 addedRuns++;
michael@0 2341 }
michael@0 2342 }
michael@0 2343 }
michael@0 2344 if(addedRuns) {
michael@0 2345 if(getRunsMemory(pBiDi, runCount+addedRuns)) {
michael@0 2346 if(runCount==1) {
michael@0 2347 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
michael@0 2348 pBiDi->runsMemory[0]=runs[0];
michael@0 2349 }
michael@0 2350 runs=pBiDi->runs=pBiDi->runsMemory;
michael@0 2351 pBiDi->runCount+=addedRuns;
michael@0 2352 } else {
michael@0 2353 goto cleanup1;
michael@0 2354 }
michael@0 2355 }
michael@0 2356 /* split runs which are not consecutive in source text */
michael@0 2357 for(i=runCount-1; i>=0; i--) {
michael@0 2358 runLength= i==0 ? runs[0].visualLimit :
michael@0 2359 runs[i].visualLimit-runs[i-1].visualLimit;
michael@0 2360 logicalStart=runs[i].logicalStart;
michael@0 2361 indexOddBit=GET_ODD_BIT(logicalStart);
michael@0 2362 logicalStart=GET_INDEX(logicalStart);
michael@0 2363 if(runLength<2) {
michael@0 2364 if(addedRuns) {
michael@0 2365 runs[i+addedRuns]=runs[i];
michael@0 2366 }
michael@0 2367 logicalPos=visualMap[logicalStart];
michael@0 2368 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
michael@0 2369 saveLevels[logicalPos]^indexOddBit);
michael@0 2370 continue;
michael@0 2371 }
michael@0 2372 if(indexOddBit) {
michael@0 2373 start=logicalStart;
michael@0 2374 limit=logicalStart+runLength-1;
michael@0 2375 step=1;
michael@0 2376 } else {
michael@0 2377 start=logicalStart+runLength-1;
michael@0 2378 limit=logicalStart;
michael@0 2379 step=-1;
michael@0 2380 }
michael@0 2381 for(j=start; j!=limit; j+=step) {
michael@0 2382 index0=visualMap[j];
michael@0 2383 index1=visualMap[j+step];
michael@0 2384 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
michael@0 2385 logicalPos=BIDI_MIN(visualMap[start], index0);
michael@0 2386 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
michael@0 2387 saveLevels[logicalPos]^indexOddBit);
michael@0 2388 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
michael@0 2389 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
michael@0 2390 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
michael@0 2391 runs[i+addedRuns].insertRemove=insertRemove;
michael@0 2392 runs[i].insertRemove&=~insertRemove;
michael@0 2393 start=j+step;
michael@0 2394 addedRuns--;
michael@0 2395 }
michael@0 2396 }
michael@0 2397 if(addedRuns) {
michael@0 2398 runs[i+addedRuns]=runs[i];
michael@0 2399 }
michael@0 2400 logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
michael@0 2401 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
michael@0 2402 saveLevels[logicalPos]^indexOddBit);
michael@0 2403 }
michael@0 2404
michael@0 2405 cleanup1:
michael@0 2406 /* restore initial paraLevel */
michael@0 2407 pBiDi->paraLevel^=1;
michael@0 2408 cleanup2:
michael@0 2409 /* restore real text */
michael@0 2410 pBiDi->text=text;
michael@0 2411 pBiDi->length=saveLength;
michael@0 2412 pBiDi->originalLength=length;
michael@0 2413 pBiDi->direction=saveDirection;
michael@0 2414 /* the saved levels should never excess levelsSize, but we check anyway */
michael@0 2415 if(saveLength>pBiDi->levelsSize) {
michael@0 2416 saveLength=pBiDi->levelsSize;
michael@0 2417 }
michael@0 2418 uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel));
michael@0 2419 pBiDi->trailingWSStart=saveTrailingWSStart;
michael@0 2420 /* free memory for mapping table and visual text */
michael@0 2421 uprv_free(runsOnlyMemory);
michael@0 2422 if(pBiDi->runCount>1) {
michael@0 2423 pBiDi->direction=UBIDI_MIXED;
michael@0 2424 }
michael@0 2425 cleanup3:
michael@0 2426 pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
michael@0 2427 }
michael@0 2428
michael@0 2429 /* ubidi_setPara ------------------------------------------------------------ */
michael@0 2430
michael@0 2431 U_CAPI void U_EXPORT2
michael@0 2432 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
michael@0 2433 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
michael@0 2434 UErrorCode *pErrorCode) {
michael@0 2435 UBiDiDirection direction;
michael@0 2436
michael@0 2437 /* check the argument values */
michael@0 2438 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
michael@0 2439 if(pBiDi==NULL || text==NULL || length<-1 ||
michael@0 2440 (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
michael@0 2441 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 2442 return;
michael@0 2443 }
michael@0 2444
michael@0 2445 if(length==-1) {
michael@0 2446 length=u_strlen(text);
michael@0 2447 }
michael@0 2448
michael@0 2449 /* special treatment for RUNS_ONLY mode */
michael@0 2450 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
michael@0 2451 setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
michael@0 2452 return;
michael@0 2453 }
michael@0 2454
michael@0 2455 /* initialize the UBiDi structure */
michael@0 2456 pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */
michael@0 2457 pBiDi->text=text;
michael@0 2458 pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
michael@0 2459 pBiDi->paraLevel=paraLevel;
michael@0 2460 pBiDi->direction=paraLevel&1;
michael@0 2461 pBiDi->paraCount=1;
michael@0 2462
michael@0 2463 pBiDi->dirProps=NULL;
michael@0 2464 pBiDi->levels=NULL;
michael@0 2465 pBiDi->runs=NULL;
michael@0 2466 pBiDi->insertPoints.size=0; /* clean up from last call */
michael@0 2467 pBiDi->insertPoints.confirmed=0; /* clean up from last call */
michael@0 2468
michael@0 2469 /*
michael@0 2470 * Save the original paraLevel if contextual; otherwise, set to 0.
michael@0 2471 */
michael@0 2472 pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
michael@0 2473
michael@0 2474 if(length==0) {
michael@0 2475 /*
michael@0 2476 * For an empty paragraph, create a UBiDi object with the paraLevel and
michael@0 2477 * the flags and the direction set but without allocating zero-length arrays.
michael@0 2478 * There is nothing more to do.
michael@0 2479 */
michael@0 2480 if(IS_DEFAULT_LEVEL(paraLevel)) {
michael@0 2481 pBiDi->paraLevel&=1;
michael@0 2482 pBiDi->defaultParaLevel=0;
michael@0 2483 }
michael@0 2484 pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
michael@0 2485 pBiDi->runCount=0;
michael@0 2486 pBiDi->paraCount=0;
michael@0 2487 setParaSuccess(pBiDi); /* mark successful setPara */
michael@0 2488 return;
michael@0 2489 }
michael@0 2490
michael@0 2491 pBiDi->runCount=-1;
michael@0 2492
michael@0 2493 /* allocate paras memory */
michael@0 2494 if(pBiDi->parasMemory)
michael@0 2495 pBiDi->paras=pBiDi->parasMemory;
michael@0 2496 else
michael@0 2497 pBiDi->paras=pBiDi->simpleParas;
michael@0 2498
michael@0 2499 /*
michael@0 2500 * Get the directional properties,
michael@0 2501 * the flags bit-set, and
michael@0 2502 * determine the paragraph level if necessary.
michael@0 2503 */
michael@0 2504 if(getDirPropsMemory(pBiDi, length)) {
michael@0 2505 pBiDi->dirProps=pBiDi->dirPropsMemory;
michael@0 2506 if(!getDirProps(pBiDi)) {
michael@0 2507 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 2508 return;
michael@0 2509 }
michael@0 2510 } else {
michael@0 2511 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 2512 return;
michael@0 2513 }
michael@0 2514 /* the processed length may have changed if UBIDI_OPTION_STREAMING */
michael@0 2515 length= pBiDi->length;
michael@0 2516 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */
michael@0 2517
michael@0 2518 /* are explicit levels specified? */
michael@0 2519 if(embeddingLevels==NULL) {
michael@0 2520 /* no: determine explicit levels according to the (Xn) rules */\
michael@0 2521 if(getLevelsMemory(pBiDi, length)) {
michael@0 2522 pBiDi->levels=pBiDi->levelsMemory;
michael@0 2523 direction=resolveExplicitLevels(pBiDi, pErrorCode);
michael@0 2524 if(U_FAILURE(*pErrorCode)) {
michael@0 2525 return;
michael@0 2526 }
michael@0 2527 } else {
michael@0 2528 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 2529 return;
michael@0 2530 }
michael@0 2531 } else {
michael@0 2532 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
michael@0 2533 pBiDi->levels=embeddingLevels;
michael@0 2534 direction=checkExplicitLevels(pBiDi, pErrorCode);
michael@0 2535 if(U_FAILURE(*pErrorCode)) {
michael@0 2536 return;
michael@0 2537 }
michael@0 2538 }
michael@0 2539
michael@0 2540 /* allocate isolate memory */
michael@0 2541 if(pBiDi->isolateCount<=SIMPLE_ISOLATES_SIZE)
michael@0 2542 pBiDi->isolates=pBiDi->simpleIsolates;
michael@0 2543 else
michael@0 2544 if(pBiDi->isolateCount<=pBiDi->isolatesSize)
michael@0 2545 pBiDi->isolates=pBiDi->isolatesMemory;
michael@0 2546 else {
michael@0 2547 if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
michael@0 2548 pBiDi->isolates=pBiDi->isolatesMemory;
michael@0 2549 } else {
michael@0 2550 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 2551 return;
michael@0 2552 }
michael@0 2553 }
michael@0 2554 pBiDi->isolateCount=-1; /* current isolates stack entry == none */
michael@0 2555
michael@0 2556 /*
michael@0 2557 * The steps after (X9) in the UBiDi algorithm are performed only if
michael@0 2558 * the paragraph text has mixed directionality!
michael@0 2559 */
michael@0 2560 pBiDi->direction=direction;
michael@0 2561 switch(direction) {
michael@0 2562 case UBIDI_LTR:
michael@0 2563 /* make sure paraLevel is even */
michael@0 2564 pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1);
michael@0 2565
michael@0 2566 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
michael@0 2567 pBiDi->trailingWSStart=0;
michael@0 2568 break;
michael@0 2569 case UBIDI_RTL:
michael@0 2570 /* make sure paraLevel is odd */
michael@0 2571 pBiDi->paraLevel|=1;
michael@0 2572
michael@0 2573 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
michael@0 2574 pBiDi->trailingWSStart=0;
michael@0 2575 break;
michael@0 2576 default:
michael@0 2577 /*
michael@0 2578 * Choose the right implicit state table
michael@0 2579 */
michael@0 2580 switch(pBiDi->reorderingMode) {
michael@0 2581 case UBIDI_REORDER_DEFAULT:
michael@0 2582 pBiDi->pImpTabPair=&impTab_DEFAULT;
michael@0 2583 break;
michael@0 2584 case UBIDI_REORDER_NUMBERS_SPECIAL:
michael@0 2585 pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
michael@0 2586 break;
michael@0 2587 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
michael@0 2588 pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
michael@0 2589 break;
michael@0 2590 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
michael@0 2591 pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
michael@0 2592 break;
michael@0 2593 case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
michael@0 2594 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
michael@0 2595 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
michael@0 2596 } else {
michael@0 2597 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
michael@0 2598 }
michael@0 2599 break;
michael@0 2600 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
michael@0 2601 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
michael@0 2602 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
michael@0 2603 } else {
michael@0 2604 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
michael@0 2605 }
michael@0 2606 break;
michael@0 2607 default:
michael@0 2608 /* we should never get here */
michael@0 2609 U_ASSERT(FALSE);
michael@0 2610 break;
michael@0 2611 }
michael@0 2612 /*
michael@0 2613 * If there are no external levels specified and there
michael@0 2614 * are no significant explicit level codes in the text,
michael@0 2615 * then we can treat the entire paragraph as one run.
michael@0 2616 * Otherwise, we need to perform the following rules on runs of
michael@0 2617 * the text with the same embedding levels. (X10)
michael@0 2618 * "Significant" explicit level codes are ones that actually
michael@0 2619 * affect non-BN characters.
michael@0 2620 * Examples for "insignificant" ones are empty embeddings
michael@0 2621 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
michael@0 2622 */
michael@0 2623 if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
michael@0 2624 !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
michael@0 2625 resolveImplicitLevels(pBiDi, 0, length,
michael@0 2626 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
michael@0 2627 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
michael@0 2628 } else {
michael@0 2629 /* sor, eor: start and end types of same-level-run */
michael@0 2630 UBiDiLevel *levels=pBiDi->levels;
michael@0 2631 int32_t start, limit=0;
michael@0 2632 UBiDiLevel level, nextLevel;
michael@0 2633 DirProp sor, eor;
michael@0 2634
michael@0 2635 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
michael@0 2636 level=GET_PARALEVEL(pBiDi, 0);
michael@0 2637 nextLevel=levels[0];
michael@0 2638 if(level<nextLevel) {
michael@0 2639 eor=GET_LR_FROM_LEVEL(nextLevel);
michael@0 2640 } else {
michael@0 2641 eor=GET_LR_FROM_LEVEL(level);
michael@0 2642 }
michael@0 2643
michael@0 2644 do {
michael@0 2645 /* determine start and limit of the run (end points just behind the run) */
michael@0 2646
michael@0 2647 /* the values for this run's start are the same as for the previous run's end */
michael@0 2648 start=limit;
michael@0 2649 level=nextLevel;
michael@0 2650 if((start>0) && (pBiDi->dirProps[start-1]==B)) {
michael@0 2651 /* except if this is a new paragraph, then set sor = para level */
michael@0 2652 sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
michael@0 2653 } else {
michael@0 2654 sor=eor;
michael@0 2655 }
michael@0 2656
michael@0 2657 /* search for the limit of this run */
michael@0 2658 while(++limit<length && levels[limit]==level) {}
michael@0 2659
michael@0 2660 /* get the correct level of the next run */
michael@0 2661 if(limit<length) {
michael@0 2662 nextLevel=levels[limit];
michael@0 2663 } else {
michael@0 2664 nextLevel=GET_PARALEVEL(pBiDi, length-1);
michael@0 2665 }
michael@0 2666
michael@0 2667 /* determine eor from max(level, nextLevel); sor is last run's eor */
michael@0 2668 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) {
michael@0 2669 eor=GET_LR_FROM_LEVEL(nextLevel);
michael@0 2670 } else {
michael@0 2671 eor=GET_LR_FROM_LEVEL(level);
michael@0 2672 }
michael@0 2673
michael@0 2674 /* if the run consists of overridden directional types, then there
michael@0 2675 are no implicit types to be resolved */
michael@0 2676 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
michael@0 2677 resolveImplicitLevels(pBiDi, start, limit, sor, eor);
michael@0 2678 } else {
michael@0 2679 /* remove the UBIDI_LEVEL_OVERRIDE flags */
michael@0 2680 do {
michael@0 2681 levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
michael@0 2682 } while(start<limit);
michael@0 2683 }
michael@0 2684 } while(limit<length);
michael@0 2685 }
michael@0 2686 /* check if we got any memory shortage while adding insert points */
michael@0 2687 if (U_FAILURE(pBiDi->insertPoints.errorCode))
michael@0 2688 {
michael@0 2689 *pErrorCode=pBiDi->insertPoints.errorCode;
michael@0 2690 return;
michael@0 2691 }
michael@0 2692 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
michael@0 2693 adjustWSLevels(pBiDi);
michael@0 2694 break;
michael@0 2695 }
michael@0 2696 /* add RLM for inverse Bidi with contextual orientation resolving
michael@0 2697 * to RTL which would not round-trip otherwise
michael@0 2698 */
michael@0 2699 if((pBiDi->defaultParaLevel>0) &&
michael@0 2700 (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
michael@0 2701 ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
michael@0 2702 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
michael@0 2703 int32_t i, j, start, last;
michael@0 2704 UBiDiLevel level;
michael@0 2705 DirProp dirProp;
michael@0 2706 for(i=0; i<pBiDi->paraCount; i++) {
michael@0 2707 last=(pBiDi->paras[i].limit)-1;
michael@0 2708 level=pBiDi->paras[i].level;
michael@0 2709 if(level==0)
michael@0 2710 continue; /* LTR paragraph */
michael@0 2711 start= i==0 ? 0 : pBiDi->paras[i-1].limit;
michael@0 2712 for(j=last; j>=start; j--) {
michael@0 2713 dirProp=pBiDi->dirProps[j];
michael@0 2714 if(dirProp==L) {
michael@0 2715 if(j<last) {
michael@0 2716 while(pBiDi->dirProps[last]==B) {
michael@0 2717 last--;
michael@0 2718 }
michael@0 2719 }
michael@0 2720 addPoint(pBiDi, last, RLM_BEFORE);
michael@0 2721 break;
michael@0 2722 }
michael@0 2723 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
michael@0 2724 break;
michael@0 2725 }
michael@0 2726 }
michael@0 2727 }
michael@0 2728 }
michael@0 2729
michael@0 2730 if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
michael@0 2731 pBiDi->resultLength -= pBiDi->controlCount;
michael@0 2732 } else {
michael@0 2733 pBiDi->resultLength += pBiDi->insertPoints.size;
michael@0 2734 }
michael@0 2735 setParaSuccess(pBiDi); /* mark successful setPara */
michael@0 2736 }
michael@0 2737
michael@0 2738 U_CAPI void U_EXPORT2
michael@0 2739 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
michael@0 2740 if(pBiDi!=NULL) {
michael@0 2741 pBiDi->orderParagraphsLTR=orderParagraphsLTR;
michael@0 2742 }
michael@0 2743 }
michael@0 2744
michael@0 2745 U_CAPI UBool U_EXPORT2
michael@0 2746 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
michael@0 2747 if(pBiDi!=NULL) {
michael@0 2748 return pBiDi->orderParagraphsLTR;
michael@0 2749 } else {
michael@0 2750 return FALSE;
michael@0 2751 }
michael@0 2752 }
michael@0 2753
michael@0 2754 U_CAPI UBiDiDirection U_EXPORT2
michael@0 2755 ubidi_getDirection(const UBiDi *pBiDi) {
michael@0 2756 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
michael@0 2757 return pBiDi->direction;
michael@0 2758 } else {
michael@0 2759 return UBIDI_LTR;
michael@0 2760 }
michael@0 2761 }
michael@0 2762
michael@0 2763 U_CAPI const UChar * U_EXPORT2
michael@0 2764 ubidi_getText(const UBiDi *pBiDi) {
michael@0 2765 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
michael@0 2766 return pBiDi->text;
michael@0 2767 } else {
michael@0 2768 return NULL;
michael@0 2769 }
michael@0 2770 }
michael@0 2771
michael@0 2772 U_CAPI int32_t U_EXPORT2
michael@0 2773 ubidi_getLength(const UBiDi *pBiDi) {
michael@0 2774 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
michael@0 2775 return pBiDi->originalLength;
michael@0 2776 } else {
michael@0 2777 return 0;
michael@0 2778 }
michael@0 2779 }
michael@0 2780
michael@0 2781 U_CAPI int32_t U_EXPORT2
michael@0 2782 ubidi_getProcessedLength(const UBiDi *pBiDi) {
michael@0 2783 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
michael@0 2784 return pBiDi->length;
michael@0 2785 } else {
michael@0 2786 return 0;
michael@0 2787 }
michael@0 2788 }
michael@0 2789
michael@0 2790 U_CAPI int32_t U_EXPORT2
michael@0 2791 ubidi_getResultLength(const UBiDi *pBiDi) {
michael@0 2792 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
michael@0 2793 return pBiDi->resultLength;
michael@0 2794 } else {
michael@0 2795 return 0;
michael@0 2796 }
michael@0 2797 }
michael@0 2798
michael@0 2799 /* paragraphs API functions ------------------------------------------------- */
michael@0 2800
michael@0 2801 U_CAPI UBiDiLevel U_EXPORT2
michael@0 2802 ubidi_getParaLevel(const UBiDi *pBiDi) {
michael@0 2803 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
michael@0 2804 return pBiDi->paraLevel;
michael@0 2805 } else {
michael@0 2806 return 0;
michael@0 2807 }
michael@0 2808 }
michael@0 2809
michael@0 2810 U_CAPI int32_t U_EXPORT2
michael@0 2811 ubidi_countParagraphs(UBiDi *pBiDi) {
michael@0 2812 if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
michael@0 2813 return 0;
michael@0 2814 } else {
michael@0 2815 return pBiDi->paraCount;
michael@0 2816 }
michael@0 2817 }
michael@0 2818
michael@0 2819 U_CAPI void U_EXPORT2
michael@0 2820 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
michael@0 2821 int32_t *pParaStart, int32_t *pParaLimit,
michael@0 2822 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
michael@0 2823 int32_t paraStart;
michael@0 2824
michael@0 2825 /* check the argument values */
michael@0 2826 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
michael@0 2827 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
michael@0 2828 RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
michael@0 2829
michael@0 2830 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
michael@0 2831 if(paraIndex) {
michael@0 2832 paraStart=pBiDi->paras[paraIndex-1].limit;
michael@0 2833 } else {
michael@0 2834 paraStart=0;
michael@0 2835 }
michael@0 2836 if(pParaStart!=NULL) {
michael@0 2837 *pParaStart=paraStart;
michael@0 2838 }
michael@0 2839 if(pParaLimit!=NULL) {
michael@0 2840 *pParaLimit=pBiDi->paras[paraIndex].limit;
michael@0 2841 }
michael@0 2842 if(pParaLevel!=NULL) {
michael@0 2843 *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
michael@0 2844 }
michael@0 2845 }
michael@0 2846
michael@0 2847 U_CAPI int32_t U_EXPORT2
michael@0 2848 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
michael@0 2849 int32_t *pParaStart, int32_t *pParaLimit,
michael@0 2850 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
michael@0 2851 int32_t paraIndex;
michael@0 2852
michael@0 2853 /* check the argument values */
michael@0 2854 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
michael@0 2855 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
michael@0 2856 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
michael@0 2857 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
michael@0 2858 RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
michael@0 2859
michael@0 2860 for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
michael@0 2861 ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
michael@0 2862 return paraIndex;
michael@0 2863 }
michael@0 2864
michael@0 2865 U_CAPI void U_EXPORT2
michael@0 2866 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
michael@0 2867 const void *newContext, UBiDiClassCallback **oldFn,
michael@0 2868 const void **oldContext, UErrorCode *pErrorCode)
michael@0 2869 {
michael@0 2870 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
michael@0 2871 if(pBiDi==NULL) {
michael@0 2872 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 2873 return;
michael@0 2874 }
michael@0 2875 if( oldFn )
michael@0 2876 {
michael@0 2877 *oldFn = pBiDi->fnClassCallback;
michael@0 2878 }
michael@0 2879 if( oldContext )
michael@0 2880 {
michael@0 2881 *oldContext = pBiDi->coClassCallback;
michael@0 2882 }
michael@0 2883 pBiDi->fnClassCallback = newFn;
michael@0 2884 pBiDi->coClassCallback = newContext;
michael@0 2885 }
michael@0 2886
michael@0 2887 U_CAPI void U_EXPORT2
michael@0 2888 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
michael@0 2889 {
michael@0 2890 if(pBiDi==NULL) {
michael@0 2891 return;
michael@0 2892 }
michael@0 2893 if( fn )
michael@0 2894 {
michael@0 2895 *fn = pBiDi->fnClassCallback;
michael@0 2896 }
michael@0 2897 if( context )
michael@0 2898 {
michael@0 2899 *context = pBiDi->coClassCallback;
michael@0 2900 }
michael@0 2901 }
michael@0 2902
michael@0 2903 U_CAPI UCharDirection U_EXPORT2
michael@0 2904 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
michael@0 2905 {
michael@0 2906 UCharDirection dir;
michael@0 2907
michael@0 2908 if( pBiDi->fnClassCallback == NULL ||
michael@0 2909 (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
michael@0 2910 {
michael@0 2911 dir = ubidi_getClass(pBiDi->bdp, c);
michael@0 2912 }
michael@0 2913 if(dir >= U_CHAR_DIRECTION_COUNT) {
michael@0 2914 dir = ON;
michael@0 2915 }
michael@0 2916 return dir;
michael@0 2917 }

mercurial