layout/base/nsBidi.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
michael@0 2 *
michael@0 3 * This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 6
michael@0 7 #include "nsBidi.h"
michael@0 8 #include "nsUnicodeProperties.h"
michael@0 9 #include "nsCRTGlue.h"
michael@0 10
michael@0 11 using namespace mozilla::unicode;
michael@0 12
michael@0 13 // These are #defined in <sys/regset.h> under Solaris 10 x86
michael@0 14 #undef CS
michael@0 15 #undef ES
michael@0 16
michael@0 17 /* Comparing the description of the Bidi algorithm with this implementation
michael@0 18 is easier with the same names for the Bidi types in the code as there.
michael@0 19 */
michael@0 20 enum {
michael@0 21 L = eCharType_LeftToRight,
michael@0 22 R = eCharType_RightToLeft,
michael@0 23 EN = eCharType_EuropeanNumber,
michael@0 24 ES = eCharType_EuropeanNumberSeparator,
michael@0 25 ET = eCharType_EuropeanNumberTerminator,
michael@0 26 AN = eCharType_ArabicNumber,
michael@0 27 CS = eCharType_CommonNumberSeparator,
michael@0 28 B = eCharType_BlockSeparator,
michael@0 29 S = eCharType_SegmentSeparator,
michael@0 30 WS = eCharType_WhiteSpaceNeutral,
michael@0 31 O_N = eCharType_OtherNeutral,
michael@0 32 LRE = eCharType_LeftToRightEmbedding,
michael@0 33 LRO = eCharType_LeftToRightOverride,
michael@0 34 AL = eCharType_RightToLeftArabic,
michael@0 35 RLE = eCharType_RightToLeftEmbedding,
michael@0 36 RLO = eCharType_RightToLeftOverride,
michael@0 37 PDF = eCharType_PopDirectionalFormat,
michael@0 38 NSM = eCharType_DirNonSpacingMark,
michael@0 39 BN = eCharType_BoundaryNeutral,
michael@0 40 dirPropCount
michael@0 41 };
michael@0 42
michael@0 43 /* to avoid some conditional statements, use tiny constant arrays */
michael@0 44 static Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
michael@0 45 static Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
michael@0 46 static Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
michael@0 47
michael@0 48 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
michael@0 49 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
michael@0 50 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
michael@0 51
michael@0 52 /*
michael@0 53 * General implementation notes:
michael@0 54 *
michael@0 55 * Throughout the implementation, there are comments like (W2) that refer to
michael@0 56 * rules of the Bidi algorithm in its version 5, in this example to the second
michael@0 57 * rule of the resolution of weak types.
michael@0 58 *
michael@0 59 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
michael@0 60 * character according to UTF-16, the second UChar gets the directional property of
michael@0 61 * the entire character assigned, while the first one gets a BN, a boundary
michael@0 62 * neutral, type, which is ignored by most of the algorithm according to
michael@0 63 * rule (X9) and the implementation suggestions of the Bidi algorithm.
michael@0 64 *
michael@0 65 * Later, AdjustWSLevels() will set the level for each BN to that of the
michael@0 66 * following character (UChar), which results in surrogate pairs getting the
michael@0 67 * same level on each of their surrogates.
michael@0 68 *
michael@0 69 * In a UTF-8 implementation, the same thing could be done: the last byte of
michael@0 70 * a multi-byte sequence would get the "real" property, while all previous
michael@0 71 * bytes of that sequence would get BN.
michael@0 72 *
michael@0 73 * It is not possible to assign all those parts of a character the same real
michael@0 74 * property because this would fail in the resolution of weak types with rules
michael@0 75 * that look at immediately surrounding types.
michael@0 76 *
michael@0 77 * As a related topic, this implementation does not remove Boundary Neutral
michael@0 78 * types from the input, but ignores them whenever this is relevant.
michael@0 79 * For example, the loop for the resolution of the weak types reads
michael@0 80 * types until it finds a non-BN.
michael@0 81 * Also, explicit embedding codes are neither changed into BN nor removed.
michael@0 82 * They are only treated the same way real BNs are.
michael@0 83 * As stated before, AdjustWSLevels() takes care of them at the end.
michael@0 84 * For the purpose of conformance, the levels of all these codes
michael@0 85 * do not matter.
michael@0 86 *
michael@0 87 * Note that this implementation never modifies the dirProps
michael@0 88 * after the initial setup.
michael@0 89 *
michael@0 90 *
michael@0 91 * In this implementation, the resolution of weak types (Wn),
michael@0 92 * neutrals (Nn), and the assignment of the resolved level (In)
michael@0 93 * are all done in one single loop, in ResolveImplicitLevels().
michael@0 94 * Changes of dirProp values are done on the fly, without writing
michael@0 95 * them back to the dirProps array.
michael@0 96 *
michael@0 97 *
michael@0 98 * This implementation contains code that allows to bypass steps of the
michael@0 99 * algorithm that are not needed on the specific paragraph
michael@0 100 * in order to speed up the most common cases considerably,
michael@0 101 * like text that is entirely LTR, or RTL text without numbers.
michael@0 102 *
michael@0 103 * Most of this is done by setting a bit for each directional property
michael@0 104 * in a flags variable and later checking for whether there are
michael@0 105 * any LTR characters or any RTL characters, or both, whether
michael@0 106 * there are any explicit embedding codes, etc.
michael@0 107 *
michael@0 108 * If the (Xn) steps are performed, then the flags are re-evaluated,
michael@0 109 * because they will then not contain the embedding codes any more
michael@0 110 * and will be adjusted for override codes, so that subsequently
michael@0 111 * more bypassing may be possible than what the initial flags suggested.
michael@0 112 *
michael@0 113 * If the text is not mixed-directional, then the
michael@0 114 * algorithm steps for the weak type resolution are not performed,
michael@0 115 * and all levels are set to the paragraph level.
michael@0 116 *
michael@0 117 * If there are no explicit embedding codes, then the (Xn) steps
michael@0 118 * are not performed.
michael@0 119 *
michael@0 120 * If embedding levels are supplied as a parameter, then all
michael@0 121 * explicit embedding codes are ignored, and the (Xn) steps
michael@0 122 * are not performed.
michael@0 123 *
michael@0 124 * White Space types could get the level of the run they belong to,
michael@0 125 * and are checked with a test of (flags&MASK_EMBEDDING) to
michael@0 126 * consider if the paragraph direction should be considered in
michael@0 127 * the flags variable.
michael@0 128 *
michael@0 129 * If there are no White Space types in the paragraph, then
michael@0 130 * (L1) is not necessary in AdjustWSLevels().
michael@0 131 */
michael@0 132 nsBidi::nsBidi()
michael@0 133 {
michael@0 134 Init();
michael@0 135
michael@0 136 mMayAllocateText=true;
michael@0 137 mMayAllocateRuns=true;
michael@0 138 }
michael@0 139
michael@0 140 nsBidi::~nsBidi()
michael@0 141 {
michael@0 142 Free();
michael@0 143 }
michael@0 144
michael@0 145 void nsBidi::Init()
michael@0 146 {
michael@0 147 /* reset the object, all pointers nullptr, all flags false, all sizes 0 */
michael@0 148 mLength = 0;
michael@0 149 mParaLevel = 0;
michael@0 150 mFlags = 0;
michael@0 151 mDirection = NSBIDI_LTR;
michael@0 152 mTrailingWSStart = 0;
michael@0 153
michael@0 154 mDirPropsSize = 0;
michael@0 155 mLevelsSize = 0;
michael@0 156 mRunsSize = 0;
michael@0 157 mRunCount = -1;
michael@0 158
michael@0 159 mDirProps=nullptr;
michael@0 160 mLevels=nullptr;
michael@0 161 mRuns=nullptr;
michael@0 162
michael@0 163 mDirPropsMemory=nullptr;
michael@0 164 mLevelsMemory=nullptr;
michael@0 165 mRunsMemory=nullptr;
michael@0 166
michael@0 167 mMayAllocateText=false;
michael@0 168 mMayAllocateRuns=false;
michael@0 169
michael@0 170 }
michael@0 171
michael@0 172 /*
michael@0 173 * We are allowed to allocate memory if aMemory==nullptr or
michael@0 174 * aMayAllocate==true for each array that we need.
michael@0 175 * We also try to grow and shrink memory as needed if we
michael@0 176 * allocate it.
michael@0 177 *
michael@0 178 * Assume aSizeNeeded>0.
michael@0 179 * If *aMemory!=nullptr, then assume *aSize>0.
michael@0 180 *
michael@0 181 * ### this realloc() may unnecessarily copy the old data,
michael@0 182 * which we know we don't need any more;
michael@0 183 * is this the best way to do this??
michael@0 184 */
michael@0 185 bool nsBidi::GetMemory(void **aMemory, size_t *aSize, bool aMayAllocate, size_t aSizeNeeded)
michael@0 186 {
michael@0 187 /* check for existing memory */
michael@0 188 if(*aMemory==nullptr) {
michael@0 189 /* we need to allocate memory */
michael@0 190 if(!aMayAllocate) {
michael@0 191 return false;
michael@0 192 } else {
michael@0 193 *aMemory=moz_malloc(aSizeNeeded);
michael@0 194 if (*aMemory!=nullptr) {
michael@0 195 *aSize=aSizeNeeded;
michael@0 196 return true;
michael@0 197 } else {
michael@0 198 *aSize=0;
michael@0 199 return false;
michael@0 200 }
michael@0 201 }
michael@0 202 } else {
michael@0 203 /* there is some memory, is it enough or too much? */
michael@0 204 if(aSizeNeeded>*aSize && !aMayAllocate) {
michael@0 205 /* not enough memory, and we must not allocate */
michael@0 206 return false;
michael@0 207 } else if(aSizeNeeded!=*aSize && aMayAllocate) {
michael@0 208 /* we may try to grow or shrink */
michael@0 209 void *memory=moz_realloc(*aMemory, aSizeNeeded);
michael@0 210
michael@0 211 if(memory!=nullptr) {
michael@0 212 *aMemory=memory;
michael@0 213 *aSize=aSizeNeeded;
michael@0 214 return true;
michael@0 215 } else {
michael@0 216 /* we failed to grow */
michael@0 217 return false;
michael@0 218 }
michael@0 219 } else {
michael@0 220 /* we have at least enough memory and must not allocate */
michael@0 221 return true;
michael@0 222 }
michael@0 223 }
michael@0 224 }
michael@0 225
michael@0 226 void nsBidi::Free()
michael@0 227 {
michael@0 228 moz_free(mDirPropsMemory);
michael@0 229 mDirPropsMemory = nullptr;
michael@0 230 moz_free(mLevelsMemory);
michael@0 231 mLevelsMemory = nullptr;
michael@0 232 moz_free(mRunsMemory);
michael@0 233 mRunsMemory = nullptr;
michael@0 234 }
michael@0 235
michael@0 236 /* SetPara ------------------------------------------------------------ */
michael@0 237
michael@0 238 nsresult nsBidi::SetPara(const char16_t *aText, int32_t aLength,
michael@0 239 nsBidiLevel aParaLevel, nsBidiLevel *aEmbeddingLevels)
michael@0 240 {
michael@0 241 nsBidiDirection direction;
michael@0 242
michael@0 243 /* check the argument values */
michael@0 244 if(aText==nullptr ||
michael@0 245 ((NSBIDI_MAX_EXPLICIT_LEVEL<aParaLevel) && !IS_DEFAULT_LEVEL(aParaLevel)) ||
michael@0 246 aLength<-1
michael@0 247 ) {
michael@0 248 return NS_ERROR_INVALID_ARG;
michael@0 249 }
michael@0 250
michael@0 251 if(aLength==-1) {
michael@0 252 aLength = NS_strlen(aText);
michael@0 253 }
michael@0 254
michael@0 255 /* initialize member data */
michael@0 256 mLength=aLength;
michael@0 257 mParaLevel=aParaLevel;
michael@0 258 mDirection=NSBIDI_LTR;
michael@0 259 mTrailingWSStart=aLength; /* the levels[] will reflect the WS run */
michael@0 260
michael@0 261 mDirProps=nullptr;
michael@0 262 mLevels=nullptr;
michael@0 263 mRuns=nullptr;
michael@0 264
michael@0 265 if(aLength==0) {
michael@0 266 /*
michael@0 267 * For an empty paragraph, create an nsBidi object with the aParaLevel and
michael@0 268 * the flags and the direction set but without allocating zero-length arrays.
michael@0 269 * There is nothing more to do.
michael@0 270 */
michael@0 271 if(IS_DEFAULT_LEVEL(aParaLevel)) {
michael@0 272 mParaLevel&=1;
michael@0 273 }
michael@0 274 if(aParaLevel&1) {
michael@0 275 mFlags=DIRPROP_FLAG(R);
michael@0 276 mDirection=NSBIDI_RTL;
michael@0 277 } else {
michael@0 278 mFlags=DIRPROP_FLAG(L);
michael@0 279 mDirection=NSBIDI_LTR;
michael@0 280 }
michael@0 281
michael@0 282 mRunCount=0;
michael@0 283 return NS_OK;
michael@0 284 }
michael@0 285
michael@0 286 mRunCount=-1;
michael@0 287
michael@0 288 /*
michael@0 289 * Get the directional properties,
michael@0 290 * the flags bit-set, and
michael@0 291 * determine the partagraph level if necessary.
michael@0 292 */
michael@0 293 if(GETDIRPROPSMEMORY(aLength)) {
michael@0 294 mDirProps=mDirPropsMemory;
michael@0 295 GetDirProps(aText);
michael@0 296 } else {
michael@0 297 return NS_ERROR_OUT_OF_MEMORY;
michael@0 298 }
michael@0 299
michael@0 300 /* are explicit levels specified? */
michael@0 301 if(aEmbeddingLevels==nullptr) {
michael@0 302 /* no: determine explicit levels according to the (Xn) rules */\
michael@0 303 if(GETLEVELSMEMORY(aLength)) {
michael@0 304 mLevels=mLevelsMemory;
michael@0 305 direction=ResolveExplicitLevels();
michael@0 306 } else {
michael@0 307 return NS_ERROR_OUT_OF_MEMORY;
michael@0 308 }
michael@0 309 } else {
michael@0 310 /* set BN for all explicit codes, check that all levels are aParaLevel..NSBIDI_MAX_EXPLICIT_LEVEL */
michael@0 311 mLevels=aEmbeddingLevels;
michael@0 312 nsresult rv = CheckExplicitLevels(&direction);
michael@0 313 if(NS_FAILED(rv)) {
michael@0 314 return rv;
michael@0 315 }
michael@0 316 }
michael@0 317
michael@0 318 /*
michael@0 319 * The steps after (X9) in the Bidi algorithm are performed only if
michael@0 320 * the paragraph text has mixed directionality!
michael@0 321 */
michael@0 322 switch(direction) {
michael@0 323 case NSBIDI_LTR:
michael@0 324 /* make sure paraLevel is even */
michael@0 325 mParaLevel=(mParaLevel+1)&~1;
michael@0 326
michael@0 327 /* all levels are implicitly at paraLevel (important for GetLevels()) */
michael@0 328 mTrailingWSStart=0;
michael@0 329 break;
michael@0 330 case NSBIDI_RTL:
michael@0 331 /* make sure paraLevel is odd */
michael@0 332 mParaLevel|=1;
michael@0 333
michael@0 334 /* all levels are implicitly at paraLevel (important for GetLevels()) */
michael@0 335 mTrailingWSStart=0;
michael@0 336 break;
michael@0 337 default:
michael@0 338 /*
michael@0 339 * If there are no external levels specified and there
michael@0 340 * are no significant explicit level codes in the text,
michael@0 341 * then we can treat the entire paragraph as one run.
michael@0 342 * Otherwise, we need to perform the following rules on runs of
michael@0 343 * the text with the same embedding levels. (X10)
michael@0 344 * "Significant" explicit level codes are ones that actually
michael@0 345 * affect non-BN characters.
michael@0 346 * Examples for "insignificant" ones are empty embeddings
michael@0 347 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
michael@0 348 */
michael@0 349 if(aEmbeddingLevels==nullptr && !(mFlags&DIRPROP_FLAG_MULTI_RUNS)) {
michael@0 350 ResolveImplicitLevels(0, aLength,
michael@0 351 GET_LR_FROM_LEVEL(mParaLevel),
michael@0 352 GET_LR_FROM_LEVEL(mParaLevel));
michael@0 353 } else {
michael@0 354 /* sor, eor: start and end types of same-level-run */
michael@0 355 nsBidiLevel *levels=mLevels;
michael@0 356 int32_t start, limit=0;
michael@0 357 nsBidiLevel level, nextLevel;
michael@0 358 DirProp sor, eor;
michael@0 359
michael@0 360 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
michael@0 361 level=mParaLevel;
michael@0 362 nextLevel=levels[0];
michael@0 363 if(level<nextLevel) {
michael@0 364 eor=GET_LR_FROM_LEVEL(nextLevel);
michael@0 365 } else {
michael@0 366 eor=GET_LR_FROM_LEVEL(level);
michael@0 367 }
michael@0 368
michael@0 369 do {
michael@0 370 /* determine start and limit of the run (end points just behind the run) */
michael@0 371
michael@0 372 /* the values for this run's start are the same as for the previous run's end */
michael@0 373 sor=eor;
michael@0 374 start=limit;
michael@0 375 level=nextLevel;
michael@0 376
michael@0 377 /* search for the limit of this run */
michael@0 378 while(++limit<aLength && levels[limit]==level) {}
michael@0 379
michael@0 380 /* get the correct level of the next run */
michael@0 381 if(limit<aLength) {
michael@0 382 nextLevel=levels[limit];
michael@0 383 } else {
michael@0 384 nextLevel=mParaLevel;
michael@0 385 }
michael@0 386
michael@0 387 /* determine eor from max(level, nextLevel); sor is last run's eor */
michael@0 388 if((level&~NSBIDI_LEVEL_OVERRIDE)<(nextLevel&~NSBIDI_LEVEL_OVERRIDE)) {
michael@0 389 eor=GET_LR_FROM_LEVEL(nextLevel);
michael@0 390 } else {
michael@0 391 eor=GET_LR_FROM_LEVEL(level);
michael@0 392 }
michael@0 393
michael@0 394 /* if the run consists of overridden directional types, then there
michael@0 395 are no implicit types to be resolved */
michael@0 396 if(!(level&NSBIDI_LEVEL_OVERRIDE)) {
michael@0 397 ResolveImplicitLevels(start, limit, sor, eor);
michael@0 398 }
michael@0 399 } while(limit<aLength);
michael@0 400 }
michael@0 401
michael@0 402 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
michael@0 403 AdjustWSLevels();
michael@0 404 break;
michael@0 405 }
michael@0 406
michael@0 407 mDirection=direction;
michael@0 408 return NS_OK;
michael@0 409 }
michael@0 410
michael@0 411 /* perform (P2)..(P3) ------------------------------------------------------- */
michael@0 412
michael@0 413 /*
michael@0 414 * Get the directional properties for the text,
michael@0 415 * calculate the flags bit-set, and
michael@0 416 * determine the partagraph level if necessary.
michael@0 417 */
michael@0 418 void nsBidi::GetDirProps(const char16_t *aText)
michael@0 419 {
michael@0 420 DirProp *dirProps=mDirPropsMemory; /* mDirProps is const */
michael@0 421
michael@0 422 int32_t i=0, length=mLength;
michael@0 423 Flags flags=0; /* collect all directionalities in the text */
michael@0 424 char16_t uchar;
michael@0 425 DirProp dirProp;
michael@0 426
michael@0 427 if(IS_DEFAULT_LEVEL(mParaLevel)) {
michael@0 428 /* determine the paragraph level (P2..P3) */
michael@0 429 for(;;) {
michael@0 430 uchar=aText[i];
michael@0 431 if(!IS_FIRST_SURROGATE(uchar) || i+1==length || !IS_SECOND_SURROGATE(aText[i+1])) {
michael@0 432 /* not a surrogate pair */
michael@0 433 flags|=DIRPROP_FLAG(dirProps[i]=dirProp=GetBidiCat((uint32_t)uchar));
michael@0 434 } else {
michael@0 435 /* a surrogate pair */
michael@0 436 dirProps[i++]=BN; /* first surrogate in the pair gets the BN type */
michael@0 437 flags|=DIRPROP_FLAG(dirProps[i]=dirProp=GetBidiCat(GET_UTF_32(uchar, aText[i])))|DIRPROP_FLAG(BN);
michael@0 438 }
michael@0 439 ++i;
michael@0 440 if(dirProp==L) {
michael@0 441 mParaLevel=0;
michael@0 442 break;
michael@0 443 } else if(dirProp==R || dirProp==AL) {
michael@0 444 mParaLevel=1;
michael@0 445 break;
michael@0 446 } else if(i==length) {
michael@0 447 /*
michael@0 448 * see comment in nsIBidi.h:
michael@0 449 * the DEFAULT_XXX values are designed so that
michael@0 450 * their bit 0 alone yields the intended default
michael@0 451 */
michael@0 452 mParaLevel&=1;
michael@0 453 break;
michael@0 454 }
michael@0 455 }
michael@0 456 }
michael@0 457
michael@0 458 /* get the rest of the directional properties and the flags bits */
michael@0 459 while(i<length) {
michael@0 460 uchar=aText[i];
michael@0 461 if(!IS_FIRST_SURROGATE(uchar) || i+1==length || !IS_SECOND_SURROGATE(aText[i+1])) {
michael@0 462 /* not a surrogate pair */
michael@0 463 flags|=DIRPROP_FLAG(dirProps[i]=GetBidiCat((uint32_t)uchar));
michael@0 464 } else {
michael@0 465 /* a surrogate pair */
michael@0 466 dirProps[i++]=BN; /* second surrogate in the pair gets the BN type */
michael@0 467 flags|=DIRPROP_FLAG(dirProps[i]=GetBidiCat(GET_UTF_32(uchar, aText[i])))|DIRPROP_FLAG(BN);
michael@0 468 }
michael@0 469 ++i;
michael@0 470 }
michael@0 471 if(flags&MASK_EMBEDDING) {
michael@0 472 flags|=DIRPROP_FLAG_LR(mParaLevel);
michael@0 473 }
michael@0 474 mFlags=flags;
michael@0 475 }
michael@0 476
michael@0 477 /* perform (X1)..(X9) ------------------------------------------------------- */
michael@0 478
michael@0 479 /*
michael@0 480 * Resolve the explicit levels as specified by explicit embedding codes.
michael@0 481 * Recalculate the flags to have them reflect the real properties
michael@0 482 * after taking the explicit embeddings into account.
michael@0 483 *
michael@0 484 * The Bidi algorithm is designed to result in the same behavior whether embedding
michael@0 485 * levels are externally specified (from "styled text", supposedly the preferred
michael@0 486 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
michael@0 487 * That is why (X9) instructs to remove all explicit codes (and BN).
michael@0 488 * However, in a real implementation, this removal of these codes and their index
michael@0 489 * positions in the plain text is undesirable since it would result in
michael@0 490 * reallocated, reindexed text.
michael@0 491 * Instead, this implementation leaves the codes in there and just ignores them
michael@0 492 * in the subsequent processing.
michael@0 493 * In order to get the same reordering behavior, positions with a BN or an
michael@0 494 * explicit embedding code just get the same level assigned as the last "real"
michael@0 495 * character.
michael@0 496 *
michael@0 497 * Some implementations, not this one, then overwrite some of these
michael@0 498 * directionality properties at "real" same-level-run boundaries by
michael@0 499 * L or R codes so that the resolution of weak types can be performed on the
michael@0 500 * entire paragraph at once instead of having to parse it once more and
michael@0 501 * perform that resolution on same-level-runs.
michael@0 502 * This limits the scope of the implicit rules in effectively
michael@0 503 * the same way as the run limits.
michael@0 504 *
michael@0 505 * Instead, this implementation does not modify these codes.
michael@0 506 * On one hand, the paragraph has to be scanned for same-level-runs, but
michael@0 507 * on the other hand, this saves another loop to reset these codes,
michael@0 508 * or saves making and modifying a copy of dirProps[].
michael@0 509 *
michael@0 510 *
michael@0 511 * Note that (Pn) and (Xn) changed significantly from version 4 of the Bidi algorithm.
michael@0 512 *
michael@0 513 *
michael@0 514 * Handling the stack of explicit levels (Xn):
michael@0 515 *
michael@0 516 * With the Bidi stack of explicit levels,
michael@0 517 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
michael@0 518 * the explicit level must never exceed NSBIDI_MAX_EXPLICIT_LEVEL==61.
michael@0 519 *
michael@0 520 * In order to have a correct push-pop semantics even in the case of overflows,
michael@0 521 * there are two overflow counters:
michael@0 522 * - countOver60 is incremented with each LRx at level 60
michael@0 523 * - from level 60, one RLx increases the level to 61
michael@0 524 * - countOver61 is incremented with each LRx and RLx at level 61
michael@0 525 *
michael@0 526 * Popping levels with PDF must work in the opposite order so that level 61
michael@0 527 * is correct at the correct point. Underflows (too many PDFs) must be checked.
michael@0 528 *
michael@0 529 * This implementation assumes that NSBIDI_MAX_EXPLICIT_LEVEL is odd.
michael@0 530 */
michael@0 531
michael@0 532 nsBidiDirection nsBidi::ResolveExplicitLevels()
michael@0 533 {
michael@0 534 const DirProp *dirProps=mDirProps;
michael@0 535 nsBidiLevel *levels=mLevels;
michael@0 536
michael@0 537 int32_t i=0, length=mLength;
michael@0 538 Flags flags=mFlags; /* collect all directionalities in the text */
michael@0 539 DirProp dirProp;
michael@0 540 nsBidiLevel level=mParaLevel;
michael@0 541
michael@0 542 nsBidiDirection direction;
michael@0 543
michael@0 544 /* determine if the text is mixed-directional or single-directional */
michael@0 545 direction=DirectionFromFlags(flags);
michael@0 546
michael@0 547 /* we may not need to resolve any explicit levels */
michael@0 548 if(direction!=NSBIDI_MIXED) {
michael@0 549 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
michael@0 550 } else if(!(flags&MASK_EXPLICIT)) {
michael@0 551 /* mixed, but all characters are at the same embedding level */
michael@0 552 /* set all levels to the paragraph level */
michael@0 553 for(i=0; i<length; ++i) {
michael@0 554 levels[i]=level;
michael@0 555 }
michael@0 556 } else {
michael@0 557 /* continue to perform (Xn) */
michael@0 558
michael@0 559 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
michael@0 560 /* both variables may carry the NSBIDI_LEVEL_OVERRIDE flag to indicate the override status */
michael@0 561 nsBidiLevel embeddingLevel=level, newLevel, stackTop=0;
michael@0 562
michael@0 563 nsBidiLevel stack[NSBIDI_MAX_EXPLICIT_LEVEL]; /* we never push anything >=NSBIDI_MAX_EXPLICIT_LEVEL */
michael@0 564 uint32_t countOver60=0, countOver61=0; /* count overflows of explicit levels */
michael@0 565
michael@0 566 /* recalculate the flags */
michael@0 567 flags=0;
michael@0 568
michael@0 569 /* since we assume that this is a single paragraph, we ignore (X8) */
michael@0 570 for(i=0; i<length; ++i) {
michael@0 571 dirProp=dirProps[i];
michael@0 572 switch(dirProp) {
michael@0 573 case LRE:
michael@0 574 case LRO:
michael@0 575 /* (X3, X5) */
michael@0 576 newLevel=(embeddingLevel+2)&~(NSBIDI_LEVEL_OVERRIDE|1); /* least greater even level */
michael@0 577 if(newLevel<=NSBIDI_MAX_EXPLICIT_LEVEL) {
michael@0 578 stack[stackTop]=embeddingLevel;
michael@0 579 ++stackTop;
michael@0 580 embeddingLevel=newLevel;
michael@0 581 if(dirProp==LRO) {
michael@0 582 embeddingLevel|=NSBIDI_LEVEL_OVERRIDE;
michael@0 583 } else {
michael@0 584 embeddingLevel&=~NSBIDI_LEVEL_OVERRIDE;
michael@0 585 }
michael@0 586 } else if((embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)==NSBIDI_MAX_EXPLICIT_LEVEL) {
michael@0 587 ++countOver61;
michael@0 588 } else /* (embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)==NSBIDI_MAX_EXPLICIT_LEVEL-1 */ {
michael@0 589 ++countOver60;
michael@0 590 }
michael@0 591 flags|=DIRPROP_FLAG(BN);
michael@0 592 break;
michael@0 593 case RLE:
michael@0 594 case RLO:
michael@0 595 /* (X2, X4) */
michael@0 596 newLevel=((embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)+1)|1; /* least greater odd level */
michael@0 597 if(newLevel<=NSBIDI_MAX_EXPLICIT_LEVEL) {
michael@0 598 stack[stackTop]=embeddingLevel;
michael@0 599 ++stackTop;
michael@0 600 embeddingLevel=newLevel;
michael@0 601 if(dirProp==RLO) {
michael@0 602 embeddingLevel|=NSBIDI_LEVEL_OVERRIDE;
michael@0 603 } else {
michael@0 604 embeddingLevel&=~NSBIDI_LEVEL_OVERRIDE;
michael@0 605 }
michael@0 606 } else {
michael@0 607 ++countOver61;
michael@0 608 }
michael@0 609 flags|=DIRPROP_FLAG(BN);
michael@0 610 break;
michael@0 611 case PDF:
michael@0 612 /* (X7) */
michael@0 613 /* handle all the overflow cases first */
michael@0 614 if(countOver61>0) {
michael@0 615 --countOver61;
michael@0 616 } else if(countOver60>0 && (embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)!=NSBIDI_MAX_EXPLICIT_LEVEL) {
michael@0 617 /* handle LRx overflows from level 60 */
michael@0 618 --countOver60;
michael@0 619 } else if(stackTop>0) {
michael@0 620 /* this is the pop operation; it also pops level 61 while countOver60>0 */
michael@0 621 --stackTop;
michael@0 622 embeddingLevel=stack[stackTop];
michael@0 623 /* } else { (underflow) */
michael@0 624 }
michael@0 625 flags|=DIRPROP_FLAG(BN);
michael@0 626 break;
michael@0 627 case B:
michael@0 628 /*
michael@0 629 * We do not really expect to see a paragraph separator (B),
michael@0 630 * but we should do something reasonable with it,
michael@0 631 * especially at the end of the text.
michael@0 632 */
michael@0 633 stackTop=0;
michael@0 634 countOver60=countOver61=0;
michael@0 635 embeddingLevel=level=mParaLevel;
michael@0 636 flags|=DIRPROP_FLAG(B);
michael@0 637 break;
michael@0 638 case BN:
michael@0 639 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
michael@0 640 /* they will get their levels set correctly in AdjustWSLevels() */
michael@0 641 flags|=DIRPROP_FLAG(BN);
michael@0 642 break;
michael@0 643 default:
michael@0 644 /* all other types get the "real" level */
michael@0 645 if(level!=embeddingLevel) {
michael@0 646 level=embeddingLevel;
michael@0 647 if(level&NSBIDI_LEVEL_OVERRIDE) {
michael@0 648 flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS;
michael@0 649 } else {
michael@0 650 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS;
michael@0 651 }
michael@0 652 }
michael@0 653 if(!(level&NSBIDI_LEVEL_OVERRIDE)) {
michael@0 654 flags|=DIRPROP_FLAG(dirProp);
michael@0 655 }
michael@0 656 break;
michael@0 657 }
michael@0 658
michael@0 659 /*
michael@0 660 * We need to set reasonable levels even on BN codes and
michael@0 661 * explicit codes because we will later look at same-level runs (X10).
michael@0 662 */
michael@0 663 levels[i]=level;
michael@0 664 }
michael@0 665 if(flags&MASK_EMBEDDING) {
michael@0 666 flags|=DIRPROP_FLAG_LR(mParaLevel);
michael@0 667 }
michael@0 668
michael@0 669 /* subsequently, ignore the explicit codes and BN (X9) */
michael@0 670
michael@0 671 /* again, determine if the text is mixed-directional or single-directional */
michael@0 672 mFlags=flags;
michael@0 673 direction=DirectionFromFlags(flags);
michael@0 674 }
michael@0 675 return direction;
michael@0 676 }
michael@0 677
michael@0 678 /*
michael@0 679 * Use a pre-specified embedding levels array:
michael@0 680 *
michael@0 681 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
michael@0 682 * ignore all explicit codes (X9),
michael@0 683 * and check all the preset levels.
michael@0 684 *
michael@0 685 * Recalculate the flags to have them reflect the real properties
michael@0 686 * after taking the explicit embeddings into account.
michael@0 687 */
michael@0 688 nsresult nsBidi::CheckExplicitLevels(nsBidiDirection *aDirection)
michael@0 689 {
michael@0 690 const DirProp *dirProps=mDirProps;
michael@0 691 nsBidiLevel *levels=mLevels;
michael@0 692
michael@0 693 int32_t i, length=mLength;
michael@0 694 Flags flags=0; /* collect all directionalities in the text */
michael@0 695 nsBidiLevel level, paraLevel=mParaLevel;
michael@0 696
michael@0 697 for(i=0; i<length; ++i) {
michael@0 698 level=levels[i];
michael@0 699 if(level&NSBIDI_LEVEL_OVERRIDE) {
michael@0 700 /* keep the override flag in levels[i] but adjust the flags */
michael@0 701 level&=~NSBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */
michael@0 702 flags|=DIRPROP_FLAG_O(level);
michael@0 703 } else {
michael@0 704 /* set the flags */
michael@0 705 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProps[i]);
michael@0 706 }
michael@0 707 if(level<paraLevel || NSBIDI_MAX_EXPLICIT_LEVEL<level) {
michael@0 708 /* level out of bounds */
michael@0 709 *aDirection = NSBIDI_LTR;
michael@0 710 return NS_ERROR_INVALID_ARG;
michael@0 711 }
michael@0 712 }
michael@0 713 if(flags&MASK_EMBEDDING) {
michael@0 714 flags|=DIRPROP_FLAG_LR(mParaLevel);
michael@0 715 }
michael@0 716
michael@0 717 /* determine if the text is mixed-directional or single-directional */
michael@0 718 mFlags=flags;
michael@0 719 *aDirection = DirectionFromFlags(flags);
michael@0 720 return NS_OK;
michael@0 721 }
michael@0 722
michael@0 723 /* determine if the text is mixed-directional or single-directional */
michael@0 724 nsBidiDirection nsBidi::DirectionFromFlags(Flags aFlags)
michael@0 725 {
michael@0 726 /* if the text contains AN and neutrals, then some neutrals may become RTL */
michael@0 727 if(!(aFlags&MASK_RTL || (aFlags&DIRPROP_FLAG(AN) && aFlags&MASK_POSSIBLE_N))) {
michael@0 728 return NSBIDI_LTR;
michael@0 729 } else if(!(aFlags&MASK_LTR)) {
michael@0 730 return NSBIDI_RTL;
michael@0 731 } else {
michael@0 732 return NSBIDI_MIXED;
michael@0 733 }
michael@0 734 }
michael@0 735
michael@0 736 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
michael@0 737
michael@0 738 /*
michael@0 739 * This implementation of the (Wn) rules applies all rules in one pass.
michael@0 740 * In order to do so, it needs a look-ahead of typically 1 character
michael@0 741 * (except for W5: sequences of ET) and keeps track of changes
michael@0 742 * in a rule Wp that affect a later Wq (p<q).
michael@0 743 *
michael@0 744 * historyOfEN is a variable-saver: it contains 4 boolean states;
michael@0 745 * a bit in it set to 1 means:
michael@0 746 * bit 0: the current code is an EN after W2
michael@0 747 * bit 1: the current code is an EN after W4
michael@0 748 * bit 2: the previous code was an EN after W2
michael@0 749 * bit 3: the previous code was an EN after W4
michael@0 750 * In other words, b0..1 have transitions of EN in the current iteration,
michael@0 751 * while b2..3 have the transitions of EN in the previous iteration.
michael@0 752 * A simple historyOfEN<<=2 suffices for the propagation.
michael@0 753 *
michael@0 754 * The (Nn) and (In) rules are also performed in that same single loop,
michael@0 755 * but effectively one iteration behind for white space.
michael@0 756 *
michael@0 757 * Since all implicit rules are performed in one step, it is not necessary
michael@0 758 * to actually store the intermediate directional properties in dirProps[].
michael@0 759 */
michael@0 760
michael@0 761 #define EN_SHIFT 2
michael@0 762 #define EN_AFTER_W2 1
michael@0 763 #define EN_AFTER_W4 2
michael@0 764 #define EN_ALL 3
michael@0 765 #define PREV_EN_AFTER_W2 4
michael@0 766 #define PREV_EN_AFTER_W4 8
michael@0 767
michael@0 768 void nsBidi::ResolveImplicitLevels(int32_t aStart, int32_t aLimit,
michael@0 769 DirProp aSOR, DirProp aEOR)
michael@0 770 {
michael@0 771 const DirProp *dirProps=mDirProps;
michael@0 772 nsBidiLevel *levels=mLevels;
michael@0 773
michael@0 774 int32_t i, next, neutralStart=-1;
michael@0 775 DirProp prevDirProp, dirProp, nextDirProp, lastStrong, beforeNeutral;
michael@0 776 uint8_t historyOfEN;
michael@0 777
michael@0 778 /* initialize: current at aSOR, next at aStart (it is aStart<aLimit) */
michael@0 779 next=aStart;
michael@0 780 beforeNeutral=dirProp=lastStrong=aSOR;
michael@0 781 nextDirProp=dirProps[next];
michael@0 782 historyOfEN=0;
michael@0 783
michael@0 784 /*
michael@0 785 * In all steps of this implementation, BN and explicit embedding codes
michael@0 786 * must be treated as if they didn't exist (X9).
michael@0 787 * They will get levels set before a non-neutral character, and remain
michael@0 788 * undefined before a neutral one, but AdjustWSLevels() will take care
michael@0 789 * of all of them.
michael@0 790 */
michael@0 791 while(DIRPROP_FLAG(nextDirProp)&MASK_BN_EXPLICIT) {
michael@0 792 if(++next<aLimit) {
michael@0 793 nextDirProp=dirProps[next];
michael@0 794 } else {
michael@0 795 nextDirProp=aEOR;
michael@0 796 break;
michael@0 797 }
michael@0 798 }
michael@0 799
michael@0 800 /* loop for entire run */
michael@0 801 while(next<aLimit) {
michael@0 802 /* advance */
michael@0 803 prevDirProp=dirProp;
michael@0 804 dirProp=nextDirProp;
michael@0 805 i=next;
michael@0 806 do {
michael@0 807 if(++next<aLimit) {
michael@0 808 nextDirProp=dirProps[next];
michael@0 809 } else {
michael@0 810 nextDirProp=aEOR;
michael@0 811 break;
michael@0 812 }
michael@0 813 } while(DIRPROP_FLAG(nextDirProp)&MASK_BN_EXPLICIT);
michael@0 814 historyOfEN<<=EN_SHIFT;
michael@0 815
michael@0 816 /* (W1..W7) */
michael@0 817 switch(dirProp) {
michael@0 818 case L:
michael@0 819 lastStrong=L;
michael@0 820 break;
michael@0 821 case R:
michael@0 822 lastStrong=R;
michael@0 823 break;
michael@0 824 case AL:
michael@0 825 /* (W3) */
michael@0 826 lastStrong=AL;
michael@0 827 dirProp=R;
michael@0 828 break;
michael@0 829 case EN:
michael@0 830 /* we have to set historyOfEN correctly */
michael@0 831 if(lastStrong==AL) {
michael@0 832 /* (W2) */
michael@0 833 dirProp=AN;
michael@0 834 } else {
michael@0 835 if(lastStrong==L) {
michael@0 836 /* (W7) */
michael@0 837 dirProp=L;
michael@0 838 }
michael@0 839 /* this EN stays after (W2) and (W4) - at least before (W7) */
michael@0 840 historyOfEN|=EN_ALL;
michael@0 841 }
michael@0 842 break;
michael@0 843 case ES:
michael@0 844 if( historyOfEN&PREV_EN_AFTER_W2 && /* previous was EN before (W4) */
michael@0 845 nextDirProp==EN && lastStrong!=AL /* next is EN and (W2) won't make it AN */
michael@0 846 ) {
michael@0 847 /* (W4) */
michael@0 848 if(lastStrong!=L) {
michael@0 849 dirProp=EN;
michael@0 850 } else {
michael@0 851 /* (W7) */
michael@0 852 dirProp=L;
michael@0 853 }
michael@0 854 historyOfEN|=EN_AFTER_W4;
michael@0 855 } else {
michael@0 856 /* (W6) */
michael@0 857 dirProp=O_N;
michael@0 858 }
michael@0 859 break;
michael@0 860 case CS:
michael@0 861 if( historyOfEN&PREV_EN_AFTER_W2 && /* previous was EN before (W4) */
michael@0 862 nextDirProp==EN && lastStrong!=AL /* next is EN and (W2) won't make it AN */
michael@0 863 ) {
michael@0 864 /* (W4) */
michael@0 865 if(lastStrong!=L) {
michael@0 866 dirProp=EN;
michael@0 867 } else {
michael@0 868 /* (W7) */
michael@0 869 dirProp=L;
michael@0 870 }
michael@0 871 historyOfEN|=EN_AFTER_W4;
michael@0 872 } else if(prevDirProp==AN && /* previous was AN */
michael@0 873 (nextDirProp==AN || /* next is AN */
michael@0 874 (nextDirProp==EN && lastStrong==AL)) /* or (W2) will make it one */
michael@0 875 ) {
michael@0 876 /* (W4) */
michael@0 877 dirProp=AN;
michael@0 878 } else {
michael@0 879 /* (W6) */
michael@0 880 dirProp=O_N;
michael@0 881 }
michael@0 882 break;
michael@0 883 case ET:
michael@0 884 /* get sequence of ET; advance only next, not current, previous or historyOfEN */
michael@0 885 while(next<aLimit && DIRPROP_FLAG(nextDirProp)&MASK_ET_NSM_BN /* (W1), (X9) */) {
michael@0 886 if(++next<aLimit) {
michael@0 887 nextDirProp=dirProps[next];
michael@0 888 } else {
michael@0 889 nextDirProp=aEOR;
michael@0 890 break;
michael@0 891 }
michael@0 892 }
michael@0 893
michael@0 894 if( historyOfEN&PREV_EN_AFTER_W4 || /* previous was EN before (W5) */
michael@0 895 (nextDirProp==EN && lastStrong!=AL) /* next is EN and (W2) won't make it AN */
michael@0 896 ) {
michael@0 897 /* (W5) */
michael@0 898 if(lastStrong!=L) {
michael@0 899 dirProp=EN;
michael@0 900 } else {
michael@0 901 /* (W7) */
michael@0 902 dirProp=L;
michael@0 903 }
michael@0 904 } else {
michael@0 905 /* (W6) */
michael@0 906 dirProp=O_N;
michael@0 907 }
michael@0 908
michael@0 909 /* apply the result of (W1), (W5)..(W7) to the entire sequence of ET */
michael@0 910 break;
michael@0 911 case NSM:
michael@0 912 /* (W1) */
michael@0 913 dirProp=prevDirProp;
michael@0 914 /* set historyOfEN back to prevDirProp's historyOfEN */
michael@0 915 historyOfEN>>=EN_SHIFT;
michael@0 916 /*
michael@0 917 * Technically, this should be done before the switch() in the form
michael@0 918 * if(nextDirProp==NSM) {
michael@0 919 * dirProps[next]=nextDirProp=dirProp;
michael@0 920 * }
michael@0 921 *
michael@0 922 * - effectively one iteration ahead.
michael@0 923 * However, whether the next dirProp is NSM or is equal to the current dirProp
michael@0 924 * does not change the outcome of any condition in (W2)..(W7).
michael@0 925 */
michael@0 926 break;
michael@0 927 default:
michael@0 928 break;
michael@0 929 }
michael@0 930
michael@0 931 /* here, it is always [prev,this,next]dirProp!=BN; it may be next>i+1 */
michael@0 932
michael@0 933 /* perform (Nn) - here, only L, R, EN, AN, and neutrals are left */
michael@0 934 /* this is one iteration late for the neutrals */
michael@0 935 if(DIRPROP_FLAG(dirProp)&MASK_N) {
michael@0 936 if(neutralStart<0) {
michael@0 937 /* start of a sequence of neutrals */
michael@0 938 neutralStart=i;
michael@0 939 beforeNeutral=prevDirProp;
michael@0 940 }
michael@0 941 } else /* not a neutral, can be only one of { L, R, EN, AN } */ {
michael@0 942 /*
michael@0 943 * Note that all levels[] values are still the same at this
michael@0 944 * point because this function is called for an entire
michael@0 945 * same-level run.
michael@0 946 * Therefore, we need to read only one actual level.
michael@0 947 */
michael@0 948 nsBidiLevel level=levels[i];
michael@0 949
michael@0 950 if(neutralStart>=0) {
michael@0 951 nsBidiLevel final;
michael@0 952 /* end of a sequence of neutrals (dirProp is "afterNeutral") */
michael@0 953 if(beforeNeutral==L) {
michael@0 954 if(dirProp==L) {
michael@0 955 final=0; /* make all neutrals L (N1) */
michael@0 956 } else {
michael@0 957 final=level; /* make all neutrals "e" (N2) */
michael@0 958 }
michael@0 959 } else /* beforeNeutral is one of { R, EN, AN } */ {
michael@0 960 if(dirProp==L) {
michael@0 961 final=level; /* make all neutrals "e" (N2) */
michael@0 962 } else {
michael@0 963 final=1; /* make all neutrals R (N1) */
michael@0 964 }
michael@0 965 }
michael@0 966 /* perform (In) on the sequence of neutrals */
michael@0 967 if((level^final)&1) {
michael@0 968 /* do something only if we need to _change_ the level */
michael@0 969 do {
michael@0 970 ++levels[neutralStart];
michael@0 971 } while(++neutralStart<i);
michael@0 972 }
michael@0 973 neutralStart=-1;
michael@0 974 }
michael@0 975
michael@0 976 /* perform (In) on the non-neutral character */
michael@0 977 /*
michael@0 978 * in the cases of (W5), processing a sequence of ET,
michael@0 979 * and of (X9), skipping BN,
michael@0 980 * there may be multiple characters from i to <next
michael@0 981 * that all get (virtually) the same dirProp and (really) the same level
michael@0 982 */
michael@0 983 if(dirProp==L) {
michael@0 984 if(level&1) {
michael@0 985 ++level;
michael@0 986 } else {
michael@0 987 i=next; /* we keep the levels */
michael@0 988 }
michael@0 989 } else if(dirProp==R) {
michael@0 990 if(!(level&1)) {
michael@0 991 ++level;
michael@0 992 } else {
michael@0 993 i=next; /* we keep the levels */
michael@0 994 }
michael@0 995 } else /* EN or AN */ {
michael@0 996 level=(level+2)&~1; /* least greater even level */
michael@0 997 }
michael@0 998
michael@0 999 /* apply the new level to the sequence, if necessary */
michael@0 1000 while(i<next) {
michael@0 1001 levels[i++]=level;
michael@0 1002 }
michael@0 1003 }
michael@0 1004 }
michael@0 1005
michael@0 1006 /* perform (Nn) - here,
michael@0 1007 the character after the neutrals is aEOR, which is either L or R */
michael@0 1008 /* this is one iteration late for the neutrals */
michael@0 1009 if(neutralStart>=0) {
michael@0 1010 /*
michael@0 1011 * Note that all levels[] values are still the same at this
michael@0 1012 * point because this function is called for an entire
michael@0 1013 * same-level run.
michael@0 1014 * Therefore, we need to read only one actual level.
michael@0 1015 */
michael@0 1016 nsBidiLevel level=levels[neutralStart], final;
michael@0 1017
michael@0 1018 /* end of a sequence of neutrals (aEOR is "afterNeutral") */
michael@0 1019 if(beforeNeutral==L) {
michael@0 1020 if(aEOR==L) {
michael@0 1021 final=0; /* make all neutrals L (N1) */
michael@0 1022 } else {
michael@0 1023 final=level; /* make all neutrals "e" (N2) */
michael@0 1024 }
michael@0 1025 } else /* beforeNeutral is one of { R, EN, AN } */ {
michael@0 1026 if(aEOR==L) {
michael@0 1027 final=level; /* make all neutrals "e" (N2) */
michael@0 1028 } else {
michael@0 1029 final=1; /* make all neutrals R (N1) */
michael@0 1030 }
michael@0 1031 }
michael@0 1032 /* perform (In) on the sequence of neutrals */
michael@0 1033 if((level^final)&1) {
michael@0 1034 /* do something only if we need to _change_ the level */
michael@0 1035 do {
michael@0 1036 ++levels[neutralStart];
michael@0 1037 } while(++neutralStart<aLimit);
michael@0 1038 }
michael@0 1039 }
michael@0 1040 }
michael@0 1041
michael@0 1042 /* perform (L1) and (X9) ---------------------------------------------------- */
michael@0 1043
michael@0 1044 /*
michael@0 1045 * Reset the embedding levels for some non-graphic characters (L1).
michael@0 1046 * This function also sets appropriate levels for BN, and
michael@0 1047 * explicit embedding types that are supposed to have been removed
michael@0 1048 * from the paragraph in (X9).
michael@0 1049 */
michael@0 1050 void nsBidi::AdjustWSLevels()
michael@0 1051 {
michael@0 1052 const DirProp *dirProps=mDirProps;
michael@0 1053 nsBidiLevel *levels=mLevels;
michael@0 1054 int32_t i;
michael@0 1055
michael@0 1056 if(mFlags&MASK_WS) {
michael@0 1057 nsBidiLevel paraLevel=mParaLevel;
michael@0 1058 Flags flag;
michael@0 1059
michael@0 1060 i=mTrailingWSStart;
michael@0 1061 while(i>0) {
michael@0 1062 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
michael@0 1063 while(i>0 && DIRPROP_FLAG(dirProps[--i])&MASK_WS) {
michael@0 1064 levels[i]=paraLevel;
michael@0 1065 }
michael@0 1066
michael@0 1067 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
michael@0 1068 /* here, i+1 is guaranteed to be <length */
michael@0 1069 while(i>0) {
michael@0 1070 flag=DIRPROP_FLAG(dirProps[--i]);
michael@0 1071 if(flag&MASK_BN_EXPLICIT) {
michael@0 1072 levels[i]=levels[i+1];
michael@0 1073 } else if(flag&MASK_B_S) {
michael@0 1074 levels[i]=paraLevel;
michael@0 1075 break;
michael@0 1076 }
michael@0 1077 }
michael@0 1078 }
michael@0 1079 }
michael@0 1080
michael@0 1081 /* now remove the NSBIDI_LEVEL_OVERRIDE flags, if any */
michael@0 1082 /* (a separate loop can be optimized more easily by a compiler) */
michael@0 1083 if(mFlags&MASK_OVERRIDE) {
michael@0 1084 for(i=mTrailingWSStart; i>0;) {
michael@0 1085 levels[--i]&=~NSBIDI_LEVEL_OVERRIDE;
michael@0 1086 }
michael@0 1087 }
michael@0 1088 }
michael@0 1089
michael@0 1090 nsresult nsBidi::GetDirection(nsBidiDirection* aDirection)
michael@0 1091 {
michael@0 1092 *aDirection = mDirection;
michael@0 1093 return NS_OK;
michael@0 1094 }
michael@0 1095
michael@0 1096 nsresult nsBidi::GetParaLevel(nsBidiLevel* aParaLevel)
michael@0 1097 {
michael@0 1098 *aParaLevel = mParaLevel;
michael@0 1099 return NS_OK;
michael@0 1100 }
michael@0 1101 #ifdef FULL_BIDI_ENGINE
michael@0 1102
michael@0 1103 /* -------------------------------------------------------------------------- */
michael@0 1104
michael@0 1105 nsresult nsBidi::GetLength(int32_t* aLength)
michael@0 1106 {
michael@0 1107 *aLength = mLength;
michael@0 1108 return NS_OK;
michael@0 1109 }
michael@0 1110
michael@0 1111 /*
michael@0 1112 * General remarks about the functions in this section:
michael@0 1113 *
michael@0 1114 * These functions deal with the aspects of potentially mixed-directional
michael@0 1115 * text in a single paragraph or in a line of a single paragraph
michael@0 1116 * which has already been processed according to
michael@0 1117 * the Unicode 3.0 Bidi algorithm as defined in
michael@0 1118 * http://www.unicode.org/unicode/reports/tr9/ , version 5,
michael@0 1119 * also described in The Unicode Standard, Version 3.0 .
michael@0 1120 *
michael@0 1121 * This means that there is a nsBidi object with a levels
michael@0 1122 * and a dirProps array.
michael@0 1123 * paraLevel and direction are also set.
michael@0 1124 * Only if the length of the text is zero, then levels==dirProps==nullptr.
michael@0 1125 *
michael@0 1126 * The overall directionality of the paragraph
michael@0 1127 * or line is used to bypass the reordering steps if possible.
michael@0 1128 * Even purely RTL text does not need reordering there because
michael@0 1129 * the getLogical/VisualIndex() functions can compute the
michael@0 1130 * index on the fly in such a case.
michael@0 1131 *
michael@0 1132 * The implementation of the access to same-level-runs and of the reordering
michael@0 1133 * do attempt to provide better performance and less memory usage compared to
michael@0 1134 * a direct implementation of especially rule (L2) with an array of
michael@0 1135 * one (32-bit) integer per text character.
michael@0 1136 *
michael@0 1137 * Here, the levels array is scanned as soon as necessary, and a vector of
michael@0 1138 * same-level-runs is created. Reordering then is done on this vector.
michael@0 1139 * For each run of text positions that were resolved to the same level,
michael@0 1140 * only 8 bytes are stored: the first text position of the run and the visual
michael@0 1141 * position behind the run after reordering.
michael@0 1142 * One sign bit is used to hold the directionality of the run.
michael@0 1143 * This is inefficient if there are many very short runs. If the average run
michael@0 1144 * length is <2, then this uses more memory.
michael@0 1145 *
michael@0 1146 * In a further attempt to save memory, the levels array is never changed
michael@0 1147 * after all the resolution rules (Xn, Wn, Nn, In).
michael@0 1148 * Many functions have to consider the field trailingWSStart:
michael@0 1149 * if it is less than length, then there is an implicit trailing run
michael@0 1150 * at the paraLevel,
michael@0 1151 * which is not reflected in the levels array.
michael@0 1152 * This allows a line nsBidi object to use the same levels array as
michael@0 1153 * its paragraph parent object.
michael@0 1154 *
michael@0 1155 * When a nsBidi object is created for a line of a paragraph, then the
michael@0 1156 * paragraph's levels and dirProps arrays are reused by way of setting
michael@0 1157 * a pointer into them, not by copying. This again saves memory and forbids to
michael@0 1158 * change the now shared levels for (L1).
michael@0 1159 */
michael@0 1160 nsresult nsBidi::SetLine(nsIBidi* aParaBidi, int32_t aStart, int32_t aLimit)
michael@0 1161 {
michael@0 1162 nsBidi* pParent = (nsBidi*)aParaBidi;
michael@0 1163 int32_t length;
michael@0 1164
michael@0 1165 /* check the argument values */
michael@0 1166 if(pParent==nullptr) {
michael@0 1167 return NS_ERROR_INVALID_POINTER;
michael@0 1168 } else if(aStart<0 || aStart>aLimit || aLimit>pParent->mLength) {
michael@0 1169 return NS_ERROR_INVALID_ARG;
michael@0 1170 }
michael@0 1171
michael@0 1172 /* set members from our aParaBidi parent */
michael@0 1173 length=mLength=aLimit-aStart;
michael@0 1174 mParaLevel=pParent->mParaLevel;
michael@0 1175
michael@0 1176 mRuns=nullptr;
michael@0 1177 mFlags=0;
michael@0 1178
michael@0 1179 if(length>0) {
michael@0 1180 mDirProps=pParent->mDirProps+aStart;
michael@0 1181 mLevels=pParent->mLevels+aStart;
michael@0 1182 mRunCount=-1;
michael@0 1183
michael@0 1184 if(pParent->mDirection!=NSBIDI_MIXED) {
michael@0 1185 /* the parent is already trivial */
michael@0 1186 mDirection=pParent->mDirection;
michael@0 1187
michael@0 1188 /*
michael@0 1189 * The parent's levels are all either
michael@0 1190 * implicitly or explicitly ==paraLevel;
michael@0 1191 * do the same here.
michael@0 1192 */
michael@0 1193 if(pParent->mTrailingWSStart<=aStart) {
michael@0 1194 mTrailingWSStart=0;
michael@0 1195 } else if(pParent->mTrailingWSStart<aLimit) {
michael@0 1196 mTrailingWSStart=pParent->mTrailingWSStart-aStart;
michael@0 1197 } else {
michael@0 1198 mTrailingWSStart=length;
michael@0 1199 }
michael@0 1200 } else {
michael@0 1201 const nsBidiLevel *levels=mLevels;
michael@0 1202 int32_t i, trailingWSStart;
michael@0 1203 nsBidiLevel level;
michael@0 1204 Flags flags=0;
michael@0 1205
michael@0 1206 SetTrailingWSStart();
michael@0 1207 trailingWSStart=mTrailingWSStart;
michael@0 1208
michael@0 1209 /* recalculate pLineBidi->direction */
michael@0 1210 if(trailingWSStart==0) {
michael@0 1211 /* all levels are at paraLevel */
michael@0 1212 mDirection=(nsBidiDirection)(mParaLevel&1);
michael@0 1213 } else {
michael@0 1214 /* get the level of the first character */
michael@0 1215 level=levels[0]&1;
michael@0 1216
michael@0 1217 /* if there is anything of a different level, then the line is mixed */
michael@0 1218 if(trailingWSStart<length && (mParaLevel&1)!=level) {
michael@0 1219 /* the trailing WS is at paraLevel, which differs from levels[0] */
michael@0 1220 mDirection=NSBIDI_MIXED;
michael@0 1221 } else {
michael@0 1222 /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */
michael@0 1223 i=1;
michael@0 1224 for(;;) {
michael@0 1225 if(i==trailingWSStart) {
michael@0 1226 /* the direction values match those in level */
michael@0 1227 mDirection=(nsBidiDirection)level;
michael@0 1228 break;
michael@0 1229 } else if((levels[i]&1)!=level) {
michael@0 1230 mDirection=NSBIDI_MIXED;
michael@0 1231 break;
michael@0 1232 }
michael@0 1233 ++i;
michael@0 1234 }
michael@0 1235 }
michael@0 1236 }
michael@0 1237
michael@0 1238 switch(mDirection) {
michael@0 1239 case NSBIDI_LTR:
michael@0 1240 /* make sure paraLevel is even */
michael@0 1241 mParaLevel=(mParaLevel+1)&~1;
michael@0 1242
michael@0 1243 /* all levels are implicitly at paraLevel (important for GetLevels()) */
michael@0 1244 mTrailingWSStart=0;
michael@0 1245 break;
michael@0 1246 case NSBIDI_RTL:
michael@0 1247 /* make sure paraLevel is odd */
michael@0 1248 mParaLevel|=1;
michael@0 1249
michael@0 1250 /* all levels are implicitly at paraLevel (important for GetLevels()) */
michael@0 1251 mTrailingWSStart=0;
michael@0 1252 break;
michael@0 1253 default:
michael@0 1254 break;
michael@0 1255 }
michael@0 1256 }
michael@0 1257 } else {
michael@0 1258 /* create an object for a zero-length line */
michael@0 1259 mDirection=mParaLevel&1 ? NSBIDI_RTL : NSBIDI_LTR;
michael@0 1260 mTrailingWSStart=mRunCount=0;
michael@0 1261
michael@0 1262 mDirProps=nullptr;
michael@0 1263 mLevels=nullptr;
michael@0 1264 }
michael@0 1265 return NS_OK;
michael@0 1266 }
michael@0 1267
michael@0 1268 /* handle trailing WS (L1) -------------------------------------------------- */
michael@0 1269
michael@0 1270 /*
michael@0 1271 * SetTrailingWSStart() sets the start index for a trailing
michael@0 1272 * run of WS in the line. This is necessary because we do not modify
michael@0 1273 * the paragraph's levels array that we just point into.
michael@0 1274 * Using trailingWSStart is another form of performing (L1).
michael@0 1275 *
michael@0 1276 * To make subsequent operations easier, we also include the run
michael@0 1277 * before the WS if it is at the paraLevel - we merge the two here.
michael@0 1278 */
michael@0 1279 void nsBidi::SetTrailingWSStart() {
michael@0 1280 /* mDirection!=NSBIDI_MIXED */
michael@0 1281
michael@0 1282 const DirProp *dirProps=mDirProps;
michael@0 1283 nsBidiLevel *levels=mLevels;
michael@0 1284 int32_t start=mLength;
michael@0 1285 nsBidiLevel paraLevel=mParaLevel;
michael@0 1286
michael@0 1287 /* go backwards across all WS, BN, explicit codes */
michael@0 1288 while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) {
michael@0 1289 --start;
michael@0 1290 }
michael@0 1291
michael@0 1292 /* if the WS run can be merged with the previous run then do so here */
michael@0 1293 while(start>0 && levels[start-1]==paraLevel) {
michael@0 1294 --start;
michael@0 1295 }
michael@0 1296
michael@0 1297 mTrailingWSStart=start;
michael@0 1298 }
michael@0 1299
michael@0 1300 nsresult nsBidi::GetLevelAt(int32_t aCharIndex, nsBidiLevel* aLevel)
michael@0 1301 {
michael@0 1302 /* return paraLevel if in the trailing WS run, otherwise the real level */
michael@0 1303 if(aCharIndex<0 || mLength<=aCharIndex) {
michael@0 1304 *aLevel = 0;
michael@0 1305 } else if(mDirection!=NSBIDI_MIXED || aCharIndex>=mTrailingWSStart) {
michael@0 1306 *aLevel = mParaLevel;
michael@0 1307 } else {
michael@0 1308 *aLevel = mLevels[aCharIndex];
michael@0 1309 }
michael@0 1310 return NS_OK;
michael@0 1311 }
michael@0 1312
michael@0 1313 nsresult nsBidi::GetLevels(nsBidiLevel** aLevels)
michael@0 1314 {
michael@0 1315 int32_t start, length;
michael@0 1316
michael@0 1317 length = mLength;
michael@0 1318 if(length<=0) {
michael@0 1319 *aLevels = nullptr;
michael@0 1320 return NS_ERROR_INVALID_ARG;
michael@0 1321 }
michael@0 1322
michael@0 1323 start = mTrailingWSStart;
michael@0 1324 if(start==length) {
michael@0 1325 /* the current levels array reflects the WS run */
michael@0 1326 *aLevels = mLevels;
michael@0 1327 return NS_OK;
michael@0 1328 }
michael@0 1329
michael@0 1330 /*
michael@0 1331 * After the previous if(), we know that the levels array
michael@0 1332 * has an implicit trailing WS run and therefore does not fully
michael@0 1333 * reflect itself all the levels.
michael@0 1334 * This must be a nsBidi object for a line, and
michael@0 1335 * we need to create a new levels array.
michael@0 1336 */
michael@0 1337
michael@0 1338 if(GETLEVELSMEMORY(length)) {
michael@0 1339 nsBidiLevel *levels=mLevelsMemory;
michael@0 1340
michael@0 1341 if(start>0 && levels!=mLevels) {
michael@0 1342 memcpy(levels, mLevels, start);
michael@0 1343 }
michael@0 1344 memset(levels+start, mParaLevel, length-start);
michael@0 1345
michael@0 1346 /* this new levels array is set for the line and reflects the WS run */
michael@0 1347 mTrailingWSStart=length;
michael@0 1348 *aLevels=mLevels=levels;
michael@0 1349 return NS_OK;
michael@0 1350 } else {
michael@0 1351 /* out of memory */
michael@0 1352 *aLevels = nullptr;
michael@0 1353 return NS_ERROR_OUT_OF_MEMORY;
michael@0 1354 }
michael@0 1355 }
michael@0 1356 #endif // FULL_BIDI_ENGINE
michael@0 1357
michael@0 1358 nsresult nsBidi::GetCharTypeAt(int32_t aCharIndex, nsCharType* pType)
michael@0 1359 {
michael@0 1360 if(aCharIndex<0 || mLength<=aCharIndex) {
michael@0 1361 return NS_ERROR_INVALID_ARG;
michael@0 1362 }
michael@0 1363 *pType = (nsCharType)mDirProps[aCharIndex];
michael@0 1364 return NS_OK;
michael@0 1365 }
michael@0 1366
michael@0 1367 nsresult nsBidi::GetLogicalRun(int32_t aLogicalStart, int32_t *aLogicalLimit, nsBidiLevel *aLevel)
michael@0 1368 {
michael@0 1369 int32_t length = mLength;
michael@0 1370
michael@0 1371 if(aLogicalStart<0 || length<=aLogicalStart) {
michael@0 1372 return NS_ERROR_INVALID_ARG;
michael@0 1373 }
michael@0 1374
michael@0 1375 if(mDirection!=NSBIDI_MIXED || aLogicalStart>=mTrailingWSStart) {
michael@0 1376 if(aLogicalLimit!=nullptr) {
michael@0 1377 *aLogicalLimit=length;
michael@0 1378 }
michael@0 1379 if(aLevel!=nullptr) {
michael@0 1380 *aLevel=mParaLevel;
michael@0 1381 }
michael@0 1382 } else {
michael@0 1383 nsBidiLevel *levels=mLevels;
michael@0 1384 nsBidiLevel level=levels[aLogicalStart];
michael@0 1385
michael@0 1386 /* search for the end of the run */
michael@0 1387 length=mTrailingWSStart;
michael@0 1388 while(++aLogicalStart<length && level==levels[aLogicalStart]) {}
michael@0 1389
michael@0 1390 if(aLogicalLimit!=nullptr) {
michael@0 1391 *aLogicalLimit=aLogicalStart;
michael@0 1392 }
michael@0 1393 if(aLevel!=nullptr) {
michael@0 1394 *aLevel=level;
michael@0 1395 }
michael@0 1396 }
michael@0 1397 return NS_OK;
michael@0 1398 }
michael@0 1399
michael@0 1400 /* runs API functions ------------------------------------------------------- */
michael@0 1401
michael@0 1402 nsresult nsBidi::CountRuns(int32_t* aRunCount)
michael@0 1403 {
michael@0 1404 if(mRunCount<0 && !GetRuns()) {
michael@0 1405 return NS_ERROR_OUT_OF_MEMORY;
michael@0 1406 } else {
michael@0 1407 if (aRunCount)
michael@0 1408 *aRunCount = mRunCount;
michael@0 1409 return NS_OK;
michael@0 1410 }
michael@0 1411 }
michael@0 1412
michael@0 1413 nsresult nsBidi::GetVisualRun(int32_t aRunIndex, int32_t *aLogicalStart, int32_t *aLength, nsBidiDirection *aDirection)
michael@0 1414 {
michael@0 1415 if( aRunIndex<0 ||
michael@0 1416 (mRunCount==-1 && !GetRuns()) ||
michael@0 1417 aRunIndex>=mRunCount
michael@0 1418 ) {
michael@0 1419 *aDirection = NSBIDI_LTR;
michael@0 1420 return NS_OK;
michael@0 1421 } else {
michael@0 1422 int32_t start=mRuns[aRunIndex].logicalStart;
michael@0 1423 if(aLogicalStart!=nullptr) {
michael@0 1424 *aLogicalStart=GET_INDEX(start);
michael@0 1425 }
michael@0 1426 if(aLength!=nullptr) {
michael@0 1427 if(aRunIndex>0) {
michael@0 1428 *aLength=mRuns[aRunIndex].visualLimit-
michael@0 1429 mRuns[aRunIndex-1].visualLimit;
michael@0 1430 } else {
michael@0 1431 *aLength=mRuns[0].visualLimit;
michael@0 1432 }
michael@0 1433 }
michael@0 1434 *aDirection = (nsBidiDirection)GET_ODD_BIT(start);
michael@0 1435 return NS_OK;
michael@0 1436 }
michael@0 1437 }
michael@0 1438
michael@0 1439 /* compute the runs array --------------------------------------------------- */
michael@0 1440
michael@0 1441 /*
michael@0 1442 * Compute the runs array from the levels array.
michael@0 1443 * After GetRuns() returns true, runCount is guaranteed to be >0
michael@0 1444 * and the runs are reordered.
michael@0 1445 * Odd-level runs have visualStart on their visual right edge and
michael@0 1446 * they progress visually to the left.
michael@0 1447 */
michael@0 1448 bool nsBidi::GetRuns()
michael@0 1449 {
michael@0 1450 if(mDirection!=NSBIDI_MIXED) {
michael@0 1451 /* simple, single-run case - this covers length==0 */
michael@0 1452 GetSingleRun(mParaLevel);
michael@0 1453 } else /* NSBIDI_MIXED, length>0 */ {
michael@0 1454 /* mixed directionality */
michael@0 1455 int32_t length=mLength, limit=length;
michael@0 1456
michael@0 1457 /*
michael@0 1458 * If there are WS characters at the end of the line
michael@0 1459 * and the run preceding them has a level different from
michael@0 1460 * paraLevel, then they will form their own run at paraLevel (L1).
michael@0 1461 * Count them separately.
michael@0 1462 * We need some special treatment for this in order to not
michael@0 1463 * modify the levels array which a line nsBidi object shares
michael@0 1464 * with its paragraph parent and its other line siblings.
michael@0 1465 * In other words, for the trailing WS, it may be
michael@0 1466 * levels[]!=paraLevel but we have to treat it like it were so.
michael@0 1467 */
michael@0 1468 limit=mTrailingWSStart;
michael@0 1469 if(limit==0) {
michael@0 1470 /* there is only WS on this line */
michael@0 1471 GetSingleRun(mParaLevel);
michael@0 1472 } else {
michael@0 1473 nsBidiLevel *levels=mLevels;
michael@0 1474 int32_t i, runCount;
michael@0 1475 nsBidiLevel level=NSBIDI_DEFAULT_LTR; /* initialize with no valid level */
michael@0 1476
michael@0 1477 /* count the runs, there is at least one non-WS run, and limit>0 */
michael@0 1478 runCount=0;
michael@0 1479 for(i=0; i<limit; ++i) {
michael@0 1480 /* increment runCount at the start of each run */
michael@0 1481 if(levels[i]!=level) {
michael@0 1482 ++runCount;
michael@0 1483 level=levels[i];
michael@0 1484 }
michael@0 1485 }
michael@0 1486
michael@0 1487 /*
michael@0 1488 * We don't need to see if the last run can be merged with a trailing
michael@0 1489 * WS run because SetTrailingWSStart() would have done that.
michael@0 1490 */
michael@0 1491 if(runCount==1 && limit==length) {
michael@0 1492 /* There is only one non-WS run and no trailing WS-run. */
michael@0 1493 GetSingleRun(levels[0]);
michael@0 1494 } else /* runCount>1 || limit<length */ {
michael@0 1495 /* allocate and set the runs */
michael@0 1496 Run *runs;
michael@0 1497 int32_t runIndex, start;
michael@0 1498 nsBidiLevel minLevel=NSBIDI_MAX_EXPLICIT_LEVEL+1, maxLevel=0;
michael@0 1499
michael@0 1500 /* now, count a (non-mergable) WS run */
michael@0 1501 if(limit<length) {
michael@0 1502 ++runCount;
michael@0 1503 }
michael@0 1504
michael@0 1505 /* runCount>1 */
michael@0 1506 if(GETRUNSMEMORY(runCount)) {
michael@0 1507 runs=mRunsMemory;
michael@0 1508 } else {
michael@0 1509 return false;
michael@0 1510 }
michael@0 1511
michael@0 1512 /* set the runs */
michael@0 1513 /* this could be optimized, e.g.: 464->444, 484->444, 575->555, 595->555 */
michael@0 1514 /* however, that would take longer and make other functions more complicated */
michael@0 1515 runIndex=0;
michael@0 1516
michael@0 1517 /* search for the run ends */
michael@0 1518 start=0;
michael@0 1519 level=levels[0];
michael@0 1520 if(level<minLevel) {
michael@0 1521 minLevel=level;
michael@0 1522 }
michael@0 1523 if(level>maxLevel) {
michael@0 1524 maxLevel=level;
michael@0 1525 }
michael@0 1526
michael@0 1527 /* initialize visualLimit values with the run lengths */
michael@0 1528 for(i=1; i<limit; ++i) {
michael@0 1529 if(levels[i]!=level) {
michael@0 1530 /* i is another run limit */
michael@0 1531 runs[runIndex].logicalStart=start;
michael@0 1532 runs[runIndex].visualLimit=i-start;
michael@0 1533 start=i;
michael@0 1534
michael@0 1535 level=levels[i];
michael@0 1536 if(level<minLevel) {
michael@0 1537 minLevel=level;
michael@0 1538 }
michael@0 1539 if(level>maxLevel) {
michael@0 1540 maxLevel=level;
michael@0 1541 }
michael@0 1542 ++runIndex;
michael@0 1543 }
michael@0 1544 }
michael@0 1545
michael@0 1546 /* finish the last run at i==limit */
michael@0 1547 runs[runIndex].logicalStart=start;
michael@0 1548 runs[runIndex].visualLimit=limit-start;
michael@0 1549 ++runIndex;
michael@0 1550
michael@0 1551 if(limit<length) {
michael@0 1552 /* there is a separate WS run */
michael@0 1553 runs[runIndex].logicalStart=limit;
michael@0 1554 runs[runIndex].visualLimit=length-limit;
michael@0 1555 if(mParaLevel<minLevel) {
michael@0 1556 minLevel=mParaLevel;
michael@0 1557 }
michael@0 1558 }
michael@0 1559
michael@0 1560 /* set the object fields */
michael@0 1561 mRuns=runs;
michael@0 1562 mRunCount=runCount;
michael@0 1563
michael@0 1564 ReorderLine(minLevel, maxLevel);
michael@0 1565
michael@0 1566 /* now add the direction flags and adjust the visualLimit's to be just that */
michael@0 1567 ADD_ODD_BIT_FROM_LEVEL(runs[0].logicalStart, levels[runs[0].logicalStart]);
michael@0 1568 limit=runs[0].visualLimit;
michael@0 1569 for(i=1; i<runIndex; ++i) {
michael@0 1570 ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, levels[runs[i].logicalStart]);
michael@0 1571 limit=runs[i].visualLimit+=limit;
michael@0 1572 }
michael@0 1573
michael@0 1574 /* same for the trailing WS run */
michael@0 1575 if(runIndex<runCount) {
michael@0 1576 ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, mParaLevel);
michael@0 1577 runs[runIndex].visualLimit+=limit;
michael@0 1578 }
michael@0 1579 }
michael@0 1580 }
michael@0 1581 }
michael@0 1582 return true;
michael@0 1583 }
michael@0 1584
michael@0 1585 /* in trivial cases there is only one trivial run; called by GetRuns() */
michael@0 1586 void nsBidi::GetSingleRun(nsBidiLevel aLevel)
michael@0 1587 {
michael@0 1588 /* simple, single-run case */
michael@0 1589 mRuns=mSimpleRuns;
michael@0 1590 mRunCount=1;
michael@0 1591
michael@0 1592 /* fill and reorder the single run */
michael@0 1593 mRuns[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, aLevel);
michael@0 1594 mRuns[0].visualLimit=mLength;
michael@0 1595 }
michael@0 1596
michael@0 1597 /* reorder the runs array (L2) ---------------------------------------------- */
michael@0 1598
michael@0 1599 /*
michael@0 1600 * Reorder the same-level runs in the runs array.
michael@0 1601 * Here, runCount>1 and maxLevel>=minLevel>=paraLevel.
michael@0 1602 * All the visualStart fields=logical start before reordering.
michael@0 1603 * The "odd" bits are not set yet.
michael@0 1604 *
michael@0 1605 * Reordering with this data structure lends itself to some handy shortcuts:
michael@0 1606 *
michael@0 1607 * Since each run is moved but not modified, and since at the initial maxLevel
michael@0 1608 * each sequence of same-level runs consists of only one run each, we
michael@0 1609 * don't need to do anything there and can predecrement maxLevel.
michael@0 1610 * In many simple cases, the reordering is thus done entirely in the
michael@0 1611 * index mapping.
michael@0 1612 * Also, reordering occurs only down to the lowest odd level that occurs,
michael@0 1613 * which is minLevel|1. However, if the lowest level itself is odd, then
michael@0 1614 * in the last reordering the sequence of the runs at this level or higher
michael@0 1615 * will be all runs, and we don't need the elaborate loop to search for them.
michael@0 1616 * This is covered by ++minLevel instead of minLevel|=1 followed
michael@0 1617 * by an extra reorder-all after the reorder-some loop.
michael@0 1618 * About a trailing WS run:
michael@0 1619 * Such a run would need special treatment because its level is not
michael@0 1620 * reflected in levels[] if this is not a paragraph object.
michael@0 1621 * Instead, all characters from trailingWSStart on are implicitly at
michael@0 1622 * paraLevel.
michael@0 1623 * However, for all maxLevel>paraLevel, this run will never be reordered
michael@0 1624 * and does not need to be taken into account. maxLevel==paraLevel is only reordered
michael@0 1625 * if minLevel==paraLevel is odd, which is done in the extra segment.
michael@0 1626 * This means that for the main reordering loop we don't need to consider
michael@0 1627 * this run and can --runCount. If it is later part of the all-runs
michael@0 1628 * reordering, then runCount is adjusted accordingly.
michael@0 1629 */
michael@0 1630 void nsBidi::ReorderLine(nsBidiLevel aMinLevel, nsBidiLevel aMaxLevel)
michael@0 1631 {
michael@0 1632 Run *runs;
michael@0 1633 nsBidiLevel *levels;
michael@0 1634 int32_t firstRun, endRun, limitRun, runCount, temp;
michael@0 1635
michael@0 1636 /* nothing to do? */
michael@0 1637 if(aMaxLevel<=(aMinLevel|1)) {
michael@0 1638 return;
michael@0 1639 }
michael@0 1640
michael@0 1641 /*
michael@0 1642 * Reorder only down to the lowest odd level
michael@0 1643 * and reorder at an odd aMinLevel in a separate, simpler loop.
michael@0 1644 * See comments above for why aMinLevel is always incremented.
michael@0 1645 */
michael@0 1646 ++aMinLevel;
michael@0 1647
michael@0 1648 runs=mRuns;
michael@0 1649 levels=mLevels;
michael@0 1650 runCount=mRunCount;
michael@0 1651
michael@0 1652 /* do not include the WS run at paraLevel<=old aMinLevel except in the simple loop */
michael@0 1653 if(mTrailingWSStart<mLength) {
michael@0 1654 --runCount;
michael@0 1655 }
michael@0 1656
michael@0 1657 while(--aMaxLevel>=aMinLevel) {
michael@0 1658 firstRun=0;
michael@0 1659
michael@0 1660 /* loop for all sequences of runs */
michael@0 1661 for(;;) {
michael@0 1662 /* look for a sequence of runs that are all at >=aMaxLevel */
michael@0 1663 /* look for the first run of such a sequence */
michael@0 1664 while(firstRun<runCount && levels[runs[firstRun].logicalStart]<aMaxLevel) {
michael@0 1665 ++firstRun;
michael@0 1666 }
michael@0 1667 if(firstRun>=runCount) {
michael@0 1668 break; /* no more such runs */
michael@0 1669 }
michael@0 1670
michael@0 1671 /* look for the limit run of such a sequence (the run behind it) */
michael@0 1672 for(limitRun=firstRun; ++limitRun<runCount && levels[runs[limitRun].logicalStart]>=aMaxLevel;) {}
michael@0 1673
michael@0 1674 /* Swap the entire sequence of runs from firstRun to limitRun-1. */
michael@0 1675 endRun=limitRun-1;
michael@0 1676 while(firstRun<endRun) {
michael@0 1677 temp=runs[firstRun].logicalStart;
michael@0 1678 runs[firstRun].logicalStart=runs[endRun].logicalStart;
michael@0 1679 runs[endRun].logicalStart=temp;
michael@0 1680
michael@0 1681 temp=runs[firstRun].visualLimit;
michael@0 1682 runs[firstRun].visualLimit=runs[endRun].visualLimit;
michael@0 1683 runs[endRun].visualLimit=temp;
michael@0 1684
michael@0 1685 ++firstRun;
michael@0 1686 --endRun;
michael@0 1687 }
michael@0 1688
michael@0 1689 if(limitRun==runCount) {
michael@0 1690 break; /* no more such runs */
michael@0 1691 } else {
michael@0 1692 firstRun=limitRun+1;
michael@0 1693 }
michael@0 1694 }
michael@0 1695 }
michael@0 1696
michael@0 1697 /* now do aMaxLevel==old aMinLevel (==odd!), see above */
michael@0 1698 if(!(aMinLevel&1)) {
michael@0 1699 firstRun=0;
michael@0 1700
michael@0 1701 /* include the trailing WS run in this complete reordering */
michael@0 1702 if(mTrailingWSStart==mLength) {
michael@0 1703 --runCount;
michael@0 1704 }
michael@0 1705
michael@0 1706 /* Swap the entire sequence of all runs. (endRun==runCount) */
michael@0 1707 while(firstRun<runCount) {
michael@0 1708 temp=runs[firstRun].logicalStart;
michael@0 1709 runs[firstRun].logicalStart=runs[runCount].logicalStart;
michael@0 1710 runs[runCount].logicalStart=temp;
michael@0 1711
michael@0 1712 temp=runs[firstRun].visualLimit;
michael@0 1713 runs[firstRun].visualLimit=runs[runCount].visualLimit;
michael@0 1714 runs[runCount].visualLimit=temp;
michael@0 1715
michael@0 1716 ++firstRun;
michael@0 1717 --runCount;
michael@0 1718 }
michael@0 1719 }
michael@0 1720 }
michael@0 1721
michael@0 1722 nsresult nsBidi::ReorderVisual(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap)
michael@0 1723 {
michael@0 1724 int32_t start, end, limit, temp;
michael@0 1725 nsBidiLevel minLevel, maxLevel;
michael@0 1726
michael@0 1727 if(aIndexMap==nullptr ||
michael@0 1728 !PrepareReorder(aLevels, aLength, aIndexMap, &minLevel, &maxLevel)) {
michael@0 1729 return NS_OK;
michael@0 1730 }
michael@0 1731
michael@0 1732 /* nothing to do? */
michael@0 1733 if(minLevel==maxLevel && (minLevel&1)==0) {
michael@0 1734 return NS_OK;
michael@0 1735 }
michael@0 1736
michael@0 1737 /* reorder only down to the lowest odd level */
michael@0 1738 minLevel|=1;
michael@0 1739
michael@0 1740 /* loop maxLevel..minLevel */
michael@0 1741 do {
michael@0 1742 start=0;
michael@0 1743
michael@0 1744 /* loop for all sequences of levels to reorder at the current maxLevel */
michael@0 1745 for(;;) {
michael@0 1746 /* look for a sequence of levels that are all at >=maxLevel */
michael@0 1747 /* look for the first index of such a sequence */
michael@0 1748 while(start<aLength && aLevels[start]<maxLevel) {
michael@0 1749 ++start;
michael@0 1750 }
michael@0 1751 if(start>=aLength) {
michael@0 1752 break; /* no more such runs */
michael@0 1753 }
michael@0 1754
michael@0 1755 /* look for the limit of such a sequence (the index behind it) */
michael@0 1756 for(limit=start; ++limit<aLength && aLevels[limit]>=maxLevel;) {}
michael@0 1757
michael@0 1758 /*
michael@0 1759 * Swap the entire interval of indexes from start to limit-1.
michael@0 1760 * We don't need to swap the levels for the purpose of this
michael@0 1761 * algorithm: the sequence of levels that we look at does not
michael@0 1762 * move anyway.
michael@0 1763 */
michael@0 1764 end=limit-1;
michael@0 1765 while(start<end) {
michael@0 1766 temp=aIndexMap[start];
michael@0 1767 aIndexMap[start]=aIndexMap[end];
michael@0 1768 aIndexMap[end]=temp;
michael@0 1769
michael@0 1770 ++start;
michael@0 1771 --end;
michael@0 1772 }
michael@0 1773
michael@0 1774 if(limit==aLength) {
michael@0 1775 break; /* no more such sequences */
michael@0 1776 } else {
michael@0 1777 start=limit+1;
michael@0 1778 }
michael@0 1779 }
michael@0 1780 } while(--maxLevel>=minLevel);
michael@0 1781
michael@0 1782 return NS_OK;
michael@0 1783 }
michael@0 1784
michael@0 1785 bool nsBidi::PrepareReorder(const nsBidiLevel *aLevels, int32_t aLength,
michael@0 1786 int32_t *aIndexMap,
michael@0 1787 nsBidiLevel *aMinLevel, nsBidiLevel *aMaxLevel)
michael@0 1788 {
michael@0 1789 int32_t start;
michael@0 1790 nsBidiLevel level, minLevel, maxLevel;
michael@0 1791
michael@0 1792 if(aLevels==nullptr || aLength<=0) {
michael@0 1793 return false;
michael@0 1794 }
michael@0 1795
michael@0 1796 /* determine minLevel and maxLevel */
michael@0 1797 minLevel=NSBIDI_MAX_EXPLICIT_LEVEL+1;
michael@0 1798 maxLevel=0;
michael@0 1799 for(start=aLength; start>0;) {
michael@0 1800 level=aLevels[--start];
michael@0 1801 if(level>NSBIDI_MAX_EXPLICIT_LEVEL+1) {
michael@0 1802 return false;
michael@0 1803 }
michael@0 1804 if(level<minLevel) {
michael@0 1805 minLevel=level;
michael@0 1806 }
michael@0 1807 if(level>maxLevel) {
michael@0 1808 maxLevel=level;
michael@0 1809 }
michael@0 1810 }
michael@0 1811 *aMinLevel=minLevel;
michael@0 1812 *aMaxLevel=maxLevel;
michael@0 1813
michael@0 1814 /* initialize the index map */
michael@0 1815 for(start=aLength; start>0;) {
michael@0 1816 --start;
michael@0 1817 aIndexMap[start]=start;
michael@0 1818 }
michael@0 1819
michael@0 1820 return true;
michael@0 1821 }
michael@0 1822
michael@0 1823 #ifdef FULL_BIDI_ENGINE
michael@0 1824 /* API functions for logical<->visual mapping ------------------------------- */
michael@0 1825
michael@0 1826 nsresult nsBidi::GetVisualIndex(int32_t aLogicalIndex, int32_t* aVisualIndex) {
michael@0 1827 if(aLogicalIndex<0 || mLength<=aLogicalIndex) {
michael@0 1828 return NS_ERROR_INVALID_ARG;
michael@0 1829 } else {
michael@0 1830 /* we can do the trivial cases without the runs array */
michael@0 1831 switch(mDirection) {
michael@0 1832 case NSBIDI_LTR:
michael@0 1833 *aVisualIndex = aLogicalIndex;
michael@0 1834 return NS_OK;
michael@0 1835 case NSBIDI_RTL:
michael@0 1836 *aVisualIndex = mLength-aLogicalIndex-1;
michael@0 1837 return NS_OK;
michael@0 1838 default:
michael@0 1839 if(mRunCount<0 && !GetRuns()) {
michael@0 1840 return NS_ERROR_OUT_OF_MEMORY;
michael@0 1841 } else {
michael@0 1842 Run *runs=mRuns;
michael@0 1843 int32_t i, visualStart=0, offset, length;
michael@0 1844
michael@0 1845 /* linear search for the run, search on the visual runs */
michael@0 1846 for(i=0;; ++i) {
michael@0 1847 length=runs[i].visualLimit-visualStart;
michael@0 1848 offset=aLogicalIndex-GET_INDEX(runs[i].logicalStart);
michael@0 1849 if(offset>=0 && offset<length) {
michael@0 1850 if(IS_EVEN_RUN(runs[i].logicalStart)) {
michael@0 1851 /* LTR */
michael@0 1852 *aVisualIndex = visualStart+offset;
michael@0 1853 return NS_OK;
michael@0 1854 } else {
michael@0 1855 /* RTL */
michael@0 1856 *aVisualIndex = visualStart+length-offset-1;
michael@0 1857 return NS_OK;
michael@0 1858 }
michael@0 1859 }
michael@0 1860 visualStart+=length;
michael@0 1861 }
michael@0 1862 }
michael@0 1863 }
michael@0 1864 }
michael@0 1865 }
michael@0 1866
michael@0 1867 nsresult nsBidi::GetLogicalIndex(int32_t aVisualIndex, int32_t *aLogicalIndex)
michael@0 1868 {
michael@0 1869 if(aVisualIndex<0 || mLength<=aVisualIndex) {
michael@0 1870 return NS_ERROR_INVALID_ARG;
michael@0 1871 } else {
michael@0 1872 /* we can do the trivial cases without the runs array */
michael@0 1873 switch(mDirection) {
michael@0 1874 case NSBIDI_LTR:
michael@0 1875 *aLogicalIndex = aVisualIndex;
michael@0 1876 return NS_OK;
michael@0 1877 case NSBIDI_RTL:
michael@0 1878 *aLogicalIndex = mLength-aVisualIndex-1;
michael@0 1879 return NS_OK;
michael@0 1880 default:
michael@0 1881 if(mRunCount<0 && !GetRuns()) {
michael@0 1882 return NS_ERROR_OUT_OF_MEMORY;
michael@0 1883 } else {
michael@0 1884 Run *runs=mRuns;
michael@0 1885 int32_t i, runCount=mRunCount, start;
michael@0 1886
michael@0 1887 if(runCount<=10) {
michael@0 1888 /* linear search for the run */
michael@0 1889 for(i=0; aVisualIndex>=runs[i].visualLimit; ++i) {}
michael@0 1890 } else {
michael@0 1891 /* binary search for the run */
michael@0 1892 int32_t start=0, limit=runCount;
michael@0 1893
michael@0 1894 /* the middle if() will guaranteed find the run, we don't need a loop limit */
michael@0 1895 for(;;) {
michael@0 1896 i=(start+limit)/2;
michael@0 1897 if(aVisualIndex>=runs[i].visualLimit) {
michael@0 1898 start=i+1;
michael@0 1899 } else if(i==0 || aVisualIndex>=runs[i-1].visualLimit) {
michael@0 1900 break;
michael@0 1901 } else {
michael@0 1902 limit=i;
michael@0 1903 }
michael@0 1904 }
michael@0 1905 }
michael@0 1906
michael@0 1907 start=runs[i].logicalStart;
michael@0 1908 if(IS_EVEN_RUN(start)) {
michael@0 1909 /* LTR */
michael@0 1910 /* the offset in runs[i] is aVisualIndex-runs[i-1].visualLimit */
michael@0 1911 if(i>0) {
michael@0 1912 aVisualIndex-=runs[i-1].visualLimit;
michael@0 1913 }
michael@0 1914 *aLogicalIndex = GET_INDEX(start)+aVisualIndex;
michael@0 1915 return NS_OK;
michael@0 1916 } else {
michael@0 1917 /* RTL */
michael@0 1918 *aLogicalIndex = GET_INDEX(start)+runs[i].visualLimit-aVisualIndex-1;
michael@0 1919 return NS_OK;
michael@0 1920 }
michael@0 1921 }
michael@0 1922 }
michael@0 1923 }
michael@0 1924 }
michael@0 1925
michael@0 1926 nsresult nsBidi::GetLogicalMap(int32_t *aIndexMap)
michael@0 1927 {
michael@0 1928 nsBidiLevel *levels;
michael@0 1929 nsresult rv;
michael@0 1930
michael@0 1931 /* GetLevels() checks all of its and our arguments */
michael@0 1932 rv = GetLevels(&levels);
michael@0 1933 if(NS_FAILED(rv)) {
michael@0 1934 return rv;
michael@0 1935 } else if(aIndexMap==nullptr) {
michael@0 1936 return NS_ERROR_INVALID_ARG;
michael@0 1937 } else {
michael@0 1938 return ReorderLogical(levels, mLength, aIndexMap);
michael@0 1939 }
michael@0 1940 }
michael@0 1941
michael@0 1942 nsresult nsBidi::GetVisualMap(int32_t *aIndexMap)
michael@0 1943 {
michael@0 1944 int32_t* runCount=nullptr;
michael@0 1945 nsresult rv;
michael@0 1946
michael@0 1947 /* CountRuns() checks all of its and our arguments */
michael@0 1948 rv = CountRuns(runCount);
michael@0 1949 if(NS_FAILED(rv)) {
michael@0 1950 return rv;
michael@0 1951 } else if(aIndexMap==nullptr) {
michael@0 1952 return NS_ERROR_INVALID_ARG;
michael@0 1953 } else {
michael@0 1954 /* fill a visual-to-logical index map using the runs[] */
michael@0 1955 Run *runs=mRuns, *runsLimit=runs+mRunCount;
michael@0 1956 int32_t logicalStart, visualStart, visualLimit;
michael@0 1957
michael@0 1958 visualStart=0;
michael@0 1959 for(; runs<runsLimit; ++runs) {
michael@0 1960 logicalStart=runs->logicalStart;
michael@0 1961 visualLimit=runs->visualLimit;
michael@0 1962 if(IS_EVEN_RUN(logicalStart)) {
michael@0 1963 do { /* LTR */
michael@0 1964 *aIndexMap++ = logicalStart++;
michael@0 1965 } while(++visualStart<visualLimit);
michael@0 1966 } else {
michael@0 1967 REMOVE_ODD_BIT(logicalStart);
michael@0 1968 logicalStart+=visualLimit-visualStart; /* logicalLimit */
michael@0 1969 do { /* RTL */
michael@0 1970 *aIndexMap++ = --logicalStart;
michael@0 1971 } while(++visualStart<visualLimit);
michael@0 1972 }
michael@0 1973 /* visualStart==visualLimit; */
michael@0 1974 }
michael@0 1975 return NS_OK;
michael@0 1976 }
michael@0 1977 }
michael@0 1978
michael@0 1979 /* reorder a line based on a levels array (L2) ------------------------------ */
michael@0 1980
michael@0 1981 nsresult nsBidi::ReorderLogical(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap)
michael@0 1982 {
michael@0 1983 int32_t start, limit, sumOfSosEos;
michael@0 1984 nsBidiLevel minLevel, maxLevel;
michael@0 1985
michael@0 1986 if(aIndexMap==nullptr ||
michael@0 1987 !PrepareReorder(aLevels, aLength, aIndexMap, &minLevel, &maxLevel)) {
michael@0 1988 return NS_OK;
michael@0 1989 }
michael@0 1990
michael@0 1991 /* nothing to do? */
michael@0 1992 if(minLevel==maxLevel && (minLevel&1)==0) {
michael@0 1993 return NS_OK;
michael@0 1994 }
michael@0 1995
michael@0 1996 /* reorder only down to the lowest odd level */
michael@0 1997 minLevel|=1;
michael@0 1998
michael@0 1999 /* loop maxLevel..minLevel */
michael@0 2000 do {
michael@0 2001 start=0;
michael@0 2002
michael@0 2003 /* loop for all sequences of levels to reorder at the current maxLevel */
michael@0 2004 for(;;) {
michael@0 2005 /* look for a sequence of levels that are all at >=maxLevel */
michael@0 2006 /* look for the first index of such a sequence */
michael@0 2007 while(start<aLength && aLevels[start]<maxLevel) {
michael@0 2008 ++start;
michael@0 2009 }
michael@0 2010 if(start>=aLength) {
michael@0 2011 break; /* no more such sequences */
michael@0 2012 }
michael@0 2013
michael@0 2014 /* look for the limit of such a sequence (the index behind it) */
michael@0 2015 for(limit=start; ++limit<aLength && aLevels[limit]>=maxLevel;) {}
michael@0 2016
michael@0 2017 /*
michael@0 2018 * sos=start of sequence, eos=end of sequence
michael@0 2019 *
michael@0 2020 * The closed (inclusive) interval from sos to eos includes all the logical
michael@0 2021 * and visual indexes within this sequence. They are logically and
michael@0 2022 * visually contiguous and in the same range.
michael@0 2023 *
michael@0 2024 * For each run, the new visual index=sos+eos-old visual index;
michael@0 2025 * we pre-add sos+eos into sumOfSosEos ->
michael@0 2026 * new visual index=sumOfSosEos-old visual index;
michael@0 2027 */
michael@0 2028 sumOfSosEos=start+limit-1;
michael@0 2029
michael@0 2030 /* reorder each index in the sequence */
michael@0 2031 do {
michael@0 2032 aIndexMap[start]=sumOfSosEos-aIndexMap[start];
michael@0 2033 } while(++start<limit);
michael@0 2034
michael@0 2035 /* start==limit */
michael@0 2036 if(limit==aLength) {
michael@0 2037 break; /* no more such sequences */
michael@0 2038 } else {
michael@0 2039 start=limit+1;
michael@0 2040 }
michael@0 2041 }
michael@0 2042 } while(--maxLevel>=minLevel);
michael@0 2043
michael@0 2044 return NS_OK;
michael@0 2045 }
michael@0 2046
michael@0 2047 nsresult nsBidi::InvertMap(const int32_t *aSrcMap, int32_t *aDestMap, int32_t aLength)
michael@0 2048 {
michael@0 2049 if(aSrcMap!=nullptr && aDestMap!=nullptr) {
michael@0 2050 aSrcMap+=aLength;
michael@0 2051 while(aLength>0) {
michael@0 2052 aDestMap[*--aSrcMap]=--aLength;
michael@0 2053 }
michael@0 2054 }
michael@0 2055 return NS_OK;
michael@0 2056 }
michael@0 2057
michael@0 2058 int32_t nsBidi::doWriteReverse(const char16_t *src, int32_t srcLength,
michael@0 2059 char16_t *dest, uint16_t options) {
michael@0 2060 /*
michael@0 2061 * RTL run -
michael@0 2062 *
michael@0 2063 * RTL runs need to be copied to the destination in reverse order
michael@0 2064 * of code points, not code units, to keep Unicode characters intact.
michael@0 2065 *
michael@0 2066 * The general strategy for this is to read the source text
michael@0 2067 * in backward order, collect all code units for a code point
michael@0 2068 * (and optionally following combining characters, see below),
michael@0 2069 * and copy all these code units in ascending order
michael@0 2070 * to the destination for this run.
michael@0 2071 *
michael@0 2072 * Several options request whether combining characters
michael@0 2073 * should be kept after their base characters,
michael@0 2074 * whether Bidi control characters should be removed, and
michael@0 2075 * whether characters should be replaced by their mirror-image
michael@0 2076 * equivalent Unicode characters.
michael@0 2077 */
michael@0 2078 int32_t i, j, destSize;
michael@0 2079 uint32_t c;
michael@0 2080
michael@0 2081 /* optimize for several combinations of options */
michael@0 2082 switch(options&(NSBIDI_REMOVE_BIDI_CONTROLS|NSBIDI_DO_MIRRORING|NSBIDI_KEEP_BASE_COMBINING)) {
michael@0 2083 case 0:
michael@0 2084 /*
michael@0 2085 * With none of the "complicated" options set, the destination
michael@0 2086 * run will have the same length as the source run,
michael@0 2087 * and there is no mirroring and no keeping combining characters
michael@0 2088 * with their base characters.
michael@0 2089 */
michael@0 2090 destSize=srcLength;
michael@0 2091
michael@0 2092 /* preserve character integrity */
michael@0 2093 do {
michael@0 2094 /* i is always after the last code unit known to need to be kept in this segment */
michael@0 2095 i=srcLength;
michael@0 2096
michael@0 2097 /* collect code units for one base character */
michael@0 2098 UTF_BACK_1(src, 0, srcLength);
michael@0 2099
michael@0 2100 /* copy this base character */
michael@0 2101 j=srcLength;
michael@0 2102 do {
michael@0 2103 *dest++=src[j++];
michael@0 2104 } while(j<i);
michael@0 2105 } while(srcLength>0);
michael@0 2106 break;
michael@0 2107 case NSBIDI_KEEP_BASE_COMBINING:
michael@0 2108 /*
michael@0 2109 * Here, too, the destination
michael@0 2110 * run will have the same length as the source run,
michael@0 2111 * and there is no mirroring.
michael@0 2112 * We do need to keep combining characters with their base characters.
michael@0 2113 */
michael@0 2114 destSize=srcLength;
michael@0 2115
michael@0 2116 /* preserve character integrity */
michael@0 2117 do {
michael@0 2118 /* i is always after the last code unit known to need to be kept in this segment */
michael@0 2119 i=srcLength;
michael@0 2120
michael@0 2121 /* collect code units and modifier letters for one base character */
michael@0 2122 do {
michael@0 2123 UTF_PREV_CHAR(src, 0, srcLength, c);
michael@0 2124 } while(srcLength>0 && IsBidiCategory(c, eBidiCat_NSM));
michael@0 2125
michael@0 2126 /* copy this "user character" */
michael@0 2127 j=srcLength;
michael@0 2128 do {
michael@0 2129 *dest++=src[j++];
michael@0 2130 } while(j<i);
michael@0 2131 } while(srcLength>0);
michael@0 2132 break;
michael@0 2133 default:
michael@0 2134 /*
michael@0 2135 * With several "complicated" options set, this is the most
michael@0 2136 * general and the slowest copying of an RTL run.
michael@0 2137 * We will do mirroring, remove Bidi controls, and
michael@0 2138 * keep combining characters with their base characters
michael@0 2139 * as requested.
michael@0 2140 */
michael@0 2141 if(!(options&NSBIDI_REMOVE_BIDI_CONTROLS)) {
michael@0 2142 i=srcLength;
michael@0 2143 } else {
michael@0 2144 /* we need to find out the destination length of the run,
michael@0 2145 which will not include the Bidi control characters */
michael@0 2146 int32_t length=srcLength;
michael@0 2147 char16_t ch;
michael@0 2148
michael@0 2149 i=0;
michael@0 2150 do {
michael@0 2151 ch=*src++;
michael@0 2152 if (!IsBidiControl((uint32_t)ch)) {
michael@0 2153 ++i;
michael@0 2154 }
michael@0 2155 } while(--length>0);
michael@0 2156 src-=srcLength;
michael@0 2157 }
michael@0 2158 destSize=i;
michael@0 2159
michael@0 2160 /* preserve character integrity */
michael@0 2161 do {
michael@0 2162 /* i is always after the last code unit known to need to be kept in this segment */
michael@0 2163 i=srcLength;
michael@0 2164
michael@0 2165 /* collect code units for one base character */
michael@0 2166 UTF_PREV_CHAR(src, 0, srcLength, c);
michael@0 2167 if(options&NSBIDI_KEEP_BASE_COMBINING) {
michael@0 2168 /* collect modifier letters for this base character */
michael@0 2169 while(srcLength>0 && IsBidiCategory(c, eBidiCat_NSM)) {
michael@0 2170 UTF_PREV_CHAR(src, 0, srcLength, c);
michael@0 2171 }
michael@0 2172 }
michael@0 2173
michael@0 2174 if(options&NSBIDI_REMOVE_BIDI_CONTROLS && IsBidiControl(c)) {
michael@0 2175 /* do not copy this Bidi control character */
michael@0 2176 continue;
michael@0 2177 }
michael@0 2178
michael@0 2179 /* copy this "user character" */
michael@0 2180 j=srcLength;
michael@0 2181 if(options&NSBIDI_DO_MIRRORING) {
michael@0 2182 /* mirror only the base character */
michael@0 2183 c = SymmSwap(c);
michael@0 2184
michael@0 2185 int32_t k=0;
michael@0 2186 UTF_APPEND_CHAR_UNSAFE(dest, k, c);
michael@0 2187 dest+=k;
michael@0 2188 j+=k;
michael@0 2189 }
michael@0 2190 while(j<i) {
michael@0 2191 *dest++=src[j++];
michael@0 2192 }
michael@0 2193 } while(srcLength>0);
michael@0 2194 break;
michael@0 2195 } /* end of switch */
michael@0 2196 return destSize;
michael@0 2197 }
michael@0 2198
michael@0 2199 nsresult nsBidi::WriteReverse(const char16_t *aSrc, int32_t aSrcLength, char16_t *aDest, uint16_t aOptions, int32_t *aDestSize)
michael@0 2200 {
michael@0 2201 if( aSrc==nullptr || aSrcLength<0 ||
michael@0 2202 aDest==nullptr
michael@0 2203 ) {
michael@0 2204 return NS_ERROR_INVALID_ARG;
michael@0 2205 }
michael@0 2206
michael@0 2207 /* do input and output overlap? */
michael@0 2208 if( aSrc>=aDest && aSrc<aDest+aSrcLength ||
michael@0 2209 aDest>=aSrc && aDest<aSrc+aSrcLength
michael@0 2210 ) {
michael@0 2211 return NS_ERROR_INVALID_ARG;
michael@0 2212 }
michael@0 2213
michael@0 2214 if(aSrcLength>0) {
michael@0 2215 *aDestSize = doWriteReverse(aSrc, aSrcLength, aDest, aOptions);
michael@0 2216 }
michael@0 2217 return NS_OK;
michael@0 2218 }
michael@0 2219 #endif // FULL_BIDI_ENGINE

mercurial