diff -r 000000000000 -r 6474c204b198 layout/base/nsBidi.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/layout/base/nsBidi.cpp Wed Dec 31 06:09:35 2014 +0100 @@ -0,0 +1,2219 @@ +/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsBidi.h" +#include "nsUnicodeProperties.h" +#include "nsCRTGlue.h" + +using namespace mozilla::unicode; + +// These are #defined in under Solaris 10 x86 +#undef CS +#undef ES + +/* Comparing the description of the Bidi algorithm with this implementation + is easier with the same names for the Bidi types in the code as there. +*/ +enum { + L = eCharType_LeftToRight, + R = eCharType_RightToLeft, + EN = eCharType_EuropeanNumber, + ES = eCharType_EuropeanNumberSeparator, + ET = eCharType_EuropeanNumberTerminator, + AN = eCharType_ArabicNumber, + CS = eCharType_CommonNumberSeparator, + B = eCharType_BlockSeparator, + S = eCharType_SegmentSeparator, + WS = eCharType_WhiteSpaceNeutral, + O_N = eCharType_OtherNeutral, + LRE = eCharType_LeftToRightEmbedding, + LRO = eCharType_LeftToRightOverride, + AL = eCharType_RightToLeftArabic, + RLE = eCharType_RightToLeftEmbedding, + RLO = eCharType_RightToLeftOverride, + PDF = eCharType_PopDirectionalFormat, + NSM = eCharType_DirNonSpacingMark, + BN = eCharType_BoundaryNeutral, + dirPropCount +}; + +/* to avoid some conditional statements, use tiny constant arrays */ +static Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) }; +static Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) }; +static Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; + +#define DIRPROP_FLAG_LR(level) flagLR[(level)&1] +#define DIRPROP_FLAG_E(level) flagE[(level)&1] +#define DIRPROP_FLAG_O(level) flagO[(level)&1] + +/* + * General implementation notes: + * + * Throughout the implementation, there are comments like (W2) that refer to + * rules of the Bidi algorithm in its version 5, in this example to the second + * rule of the resolution of weak types. + * + * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32) + * character according to UTF-16, the second UChar gets the directional property of + * the entire character assigned, while the first one gets a BN, a boundary + * neutral, type, which is ignored by most of the algorithm according to + * rule (X9) and the implementation suggestions of the Bidi algorithm. + * + * Later, AdjustWSLevels() will set the level for each BN to that of the + * following character (UChar), which results in surrogate pairs getting the + * same level on each of their surrogates. + * + * In a UTF-8 implementation, the same thing could be done: the last byte of + * a multi-byte sequence would get the "real" property, while all previous + * bytes of that sequence would get BN. + * + * It is not possible to assign all those parts of a character the same real + * property because this would fail in the resolution of weak types with rules + * that look at immediately surrounding types. + * + * As a related topic, this implementation does not remove Boundary Neutral + * types from the input, but ignores them whenever this is relevant. + * For example, the loop for the resolution of the weak types reads + * types until it finds a non-BN. + * Also, explicit embedding codes are neither changed into BN nor removed. + * They are only treated the same way real BNs are. + * As stated before, AdjustWSLevels() takes care of them at the end. + * For the purpose of conformance, the levels of all these codes + * do not matter. + * + * Note that this implementation never modifies the dirProps + * after the initial setup. + * + * + * In this implementation, the resolution of weak types (Wn), + * neutrals (Nn), and the assignment of the resolved level (In) + * are all done in one single loop, in ResolveImplicitLevels(). + * Changes of dirProp values are done on the fly, without writing + * them back to the dirProps array. + * + * + * This implementation contains code that allows to bypass steps of the + * algorithm that are not needed on the specific paragraph + * in order to speed up the most common cases considerably, + * like text that is entirely LTR, or RTL text without numbers. + * + * Most of this is done by setting a bit for each directional property + * in a flags variable and later checking for whether there are + * any LTR characters or any RTL characters, or both, whether + * there are any explicit embedding codes, etc. + * + * If the (Xn) steps are performed, then the flags are re-evaluated, + * because they will then not contain the embedding codes any more + * and will be adjusted for override codes, so that subsequently + * more bypassing may be possible than what the initial flags suggested. + * + * If the text is not mixed-directional, then the + * algorithm steps for the weak type resolution are not performed, + * and all levels are set to the paragraph level. + * + * If there are no explicit embedding codes, then the (Xn) steps + * are not performed. + * + * If embedding levels are supplied as a parameter, then all + * explicit embedding codes are ignored, and the (Xn) steps + * are not performed. + * + * White Space types could get the level of the run they belong to, + * and are checked with a test of (flags&MASK_EMBEDDING) to + * consider if the paragraph direction should be considered in + * the flags variable. + * + * If there are no White Space types in the paragraph, then + * (L1) is not necessary in AdjustWSLevels(). + */ +nsBidi::nsBidi() +{ + Init(); + + mMayAllocateText=true; + mMayAllocateRuns=true; +} + +nsBidi::~nsBidi() +{ + Free(); +} + +void nsBidi::Init() +{ + /* reset the object, all pointers nullptr, all flags false, all sizes 0 */ + mLength = 0; + mParaLevel = 0; + mFlags = 0; + mDirection = NSBIDI_LTR; + mTrailingWSStart = 0; + + mDirPropsSize = 0; + mLevelsSize = 0; + mRunsSize = 0; + mRunCount = -1; + + mDirProps=nullptr; + mLevels=nullptr; + mRuns=nullptr; + + mDirPropsMemory=nullptr; + mLevelsMemory=nullptr; + mRunsMemory=nullptr; + + mMayAllocateText=false; + mMayAllocateRuns=false; + +} + +/* + * We are allowed to allocate memory if aMemory==nullptr or + * aMayAllocate==true for each array that we need. + * We also try to grow and shrink memory as needed if we + * allocate it. + * + * Assume aSizeNeeded>0. + * If *aMemory!=nullptr, then assume *aSize>0. + * + * ### this realloc() may unnecessarily copy the old data, + * which we know we don't need any more; + * is this the best way to do this?? + */ +bool nsBidi::GetMemory(void **aMemory, size_t *aSize, bool aMayAllocate, size_t aSizeNeeded) +{ + /* check for existing memory */ + if(*aMemory==nullptr) { + /* we need to allocate memory */ + if(!aMayAllocate) { + return false; + } else { + *aMemory=moz_malloc(aSizeNeeded); + if (*aMemory!=nullptr) { + *aSize=aSizeNeeded; + return true; + } else { + *aSize=0; + return false; + } + } + } else { + /* there is some memory, is it enough or too much? */ + if(aSizeNeeded>*aSize && !aMayAllocate) { + /* not enough memory, and we must not allocate */ + return false; + } else if(aSizeNeeded!=*aSize && aMayAllocate) { + /* we may try to grow or shrink */ + void *memory=moz_realloc(*aMemory, aSizeNeeded); + + if(memory!=nullptr) { + *aMemory=memory; + *aSize=aSizeNeeded; + return true; + } else { + /* we failed to grow */ + return false; + } + } else { + /* we have at least enough memory and must not allocate */ + return true; + } + } +} + +void nsBidi::Free() +{ + moz_free(mDirPropsMemory); + mDirPropsMemory = nullptr; + moz_free(mLevelsMemory); + mLevelsMemory = nullptr; + moz_free(mRunsMemory); + mRunsMemory = nullptr; +} + +/* SetPara ------------------------------------------------------------ */ + +nsresult nsBidi::SetPara(const char16_t *aText, int32_t aLength, + nsBidiLevel aParaLevel, nsBidiLevel *aEmbeddingLevels) +{ + nsBidiDirection direction; + + /* check the argument values */ + if(aText==nullptr || + ((NSBIDI_MAX_EXPLICIT_LEVEL=NSBIDI_MAX_EXPLICIT_LEVEL */ + uint32_t countOver60=0, countOver61=0; /* count overflows of explicit levels */ + + /* recalculate the flags */ + flags=0; + + /* since we assume that this is a single paragraph, we ignore (X8) */ + for(i=0; i0) { + --countOver61; + } else if(countOver60>0 && (embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)!=NSBIDI_MAX_EXPLICIT_LEVEL) { + /* handle LRx overflows from level 60 */ + --countOver60; + } else if(stackTop>0) { + /* this is the pop operation; it also pops level 61 while countOver60>0 */ + --stackTop; + embeddingLevel=stack[stackTop]; + /* } else { (underflow) */ + } + flags|=DIRPROP_FLAG(BN); + break; + case B: + /* + * We do not really expect to see a paragraph separator (B), + * but we should do something reasonable with it, + * especially at the end of the text. + */ + stackTop=0; + countOver60=countOver61=0; + embeddingLevel=level=mParaLevel; + flags|=DIRPROP_FLAG(B); + break; + case BN: + /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ + /* they will get their levels set correctly in AdjustWSLevels() */ + flags|=DIRPROP_FLAG(BN); + break; + default: + /* all other types get the "real" level */ + if(level!=embeddingLevel) { + level=embeddingLevel; + if(level&NSBIDI_LEVEL_OVERRIDE) { + flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS; + } else { + flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS; + } + } + if(!(level&NSBIDI_LEVEL_OVERRIDE)) { + flags|=DIRPROP_FLAG(dirProp); + } + break; + } + + /* + * We need to set reasonable levels even on BN codes and + * explicit codes because we will later look at same-level runs (X10). + */ + levels[i]=level; + } + if(flags&MASK_EMBEDDING) { + flags|=DIRPROP_FLAG_LR(mParaLevel); + } + + /* subsequently, ignore the explicit codes and BN (X9) */ + + /* again, determine if the text is mixed-directional or single-directional */ + mFlags=flags; + direction=DirectionFromFlags(flags); + } + return direction; +} + +/* + * Use a pre-specified embedding levels array: + * + * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), + * ignore all explicit codes (X9), + * and check all the preset levels. + * + * Recalculate the flags to have them reflect the real properties + * after taking the explicit embeddings into account. + */ +nsresult nsBidi::CheckExplicitLevels(nsBidiDirection *aDirection) +{ + const DirProp *dirProps=mDirProps; + nsBidiLevel *levels=mLevels; + + int32_t i, length=mLength; + Flags flags=0; /* collect all directionalities in the text */ + nsBidiLevel level, paraLevel=mParaLevel; + + for(i=0; i>=EN_SHIFT; + /* + * Technically, this should be done before the switch() in the form + * if(nextDirProp==NSM) { + * dirProps[next]=nextDirProp=dirProp; + * } + * + * - effectively one iteration ahead. + * However, whether the next dirProp is NSM or is equal to the current dirProp + * does not change the outcome of any condition in (W2)..(W7). + */ + break; + default: + break; + } + + /* here, it is always [prev,this,next]dirProp!=BN; it may be next>i+1 */ + + /* perform (Nn) - here, only L, R, EN, AN, and neutrals are left */ + /* this is one iteration late for the neutrals */ + if(DIRPROP_FLAG(dirProp)&MASK_N) { + if(neutralStart<0) { + /* start of a sequence of neutrals */ + neutralStart=i; + beforeNeutral=prevDirProp; + } + } else /* not a neutral, can be only one of { L, R, EN, AN } */ { + /* + * Note that all levels[] values are still the same at this + * point because this function is called for an entire + * same-level run. + * Therefore, we need to read only one actual level. + */ + nsBidiLevel level=levels[i]; + + if(neutralStart>=0) { + nsBidiLevel final; + /* end of a sequence of neutrals (dirProp is "afterNeutral") */ + if(beforeNeutral==L) { + if(dirProp==L) { + final=0; /* make all neutrals L (N1) */ + } else { + final=level; /* make all neutrals "e" (N2) */ + } + } else /* beforeNeutral is one of { R, EN, AN } */ { + if(dirProp==L) { + final=level; /* make all neutrals "e" (N2) */ + } else { + final=1; /* make all neutrals R (N1) */ + } + } + /* perform (In) on the sequence of neutrals */ + if((level^final)&1) { + /* do something only if we need to _change_ the level */ + do { + ++levels[neutralStart]; + } while(++neutralStart=0) { + /* + * Note that all levels[] values are still the same at this + * point because this function is called for an entire + * same-level run. + * Therefore, we need to read only one actual level. + */ + nsBidiLevel level=levels[neutralStart], final; + + /* end of a sequence of neutrals (aEOR is "afterNeutral") */ + if(beforeNeutral==L) { + if(aEOR==L) { + final=0; /* make all neutrals L (N1) */ + } else { + final=level; /* make all neutrals "e" (N2) */ + } + } else /* beforeNeutral is one of { R, EN, AN } */ { + if(aEOR==L) { + final=level; /* make all neutrals "e" (N2) */ + } else { + final=1; /* make all neutrals R (N1) */ + } + } + /* perform (In) on the sequence of neutrals */ + if((level^final)&1) { + /* do something only if we need to _change_ the level */ + do { + ++levels[neutralStart]; + } while(++neutralStart0) { + /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ + while(i>0 && DIRPROP_FLAG(dirProps[--i])&MASK_WS) { + levels[i]=paraLevel; + } + + /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ + /* here, i+1 is guaranteed to be 0) { + flag=DIRPROP_FLAG(dirProps[--i]); + if(flag&MASK_BN_EXPLICIT) { + levels[i]=levels[i+1]; + } else if(flag&MASK_B_S) { + levels[i]=paraLevel; + break; + } + } + } + } + + /* now remove the NSBIDI_LEVEL_OVERRIDE flags, if any */ + /* (a separate loop can be optimized more easily by a compiler) */ + if(mFlags&MASK_OVERRIDE) { + for(i=mTrailingWSStart; i>0;) { + levels[--i]&=~NSBIDI_LEVEL_OVERRIDE; + } + } +} + +nsresult nsBidi::GetDirection(nsBidiDirection* aDirection) +{ + *aDirection = mDirection; + return NS_OK; +} + +nsresult nsBidi::GetParaLevel(nsBidiLevel* aParaLevel) +{ + *aParaLevel = mParaLevel; + return NS_OK; +} +#ifdef FULL_BIDI_ENGINE + +/* -------------------------------------------------------------------------- */ + +nsresult nsBidi::GetLength(int32_t* aLength) +{ + *aLength = mLength; + return NS_OK; +} + +/* + * General remarks about the functions in this section: + * + * These functions deal with the aspects of potentially mixed-directional + * text in a single paragraph or in a line of a single paragraph + * which has already been processed according to + * the Unicode 3.0 Bidi algorithm as defined in + * http://www.unicode.org/unicode/reports/tr9/ , version 5, + * also described in The Unicode Standard, Version 3.0 . + * + * This means that there is a nsBidi object with a levels + * and a dirProps array. + * paraLevel and direction are also set. + * Only if the length of the text is zero, then levels==dirProps==nullptr. + * + * The overall directionality of the paragraph + * or line is used to bypass the reordering steps if possible. + * Even purely RTL text does not need reordering there because + * the getLogical/VisualIndex() functions can compute the + * index on the fly in such a case. + * + * The implementation of the access to same-level-runs and of the reordering + * do attempt to provide better performance and less memory usage compared to + * a direct implementation of especially rule (L2) with an array of + * one (32-bit) integer per text character. + * + * Here, the levels array is scanned as soon as necessary, and a vector of + * same-level-runs is created. Reordering then is done on this vector. + * For each run of text positions that were resolved to the same level, + * only 8 bytes are stored: the first text position of the run and the visual + * position behind the run after reordering. + * One sign bit is used to hold the directionality of the run. + * This is inefficient if there are many very short runs. If the average run + * length is <2, then this uses more memory. + * + * In a further attempt to save memory, the levels array is never changed + * after all the resolution rules (Xn, Wn, Nn, In). + * Many functions have to consider the field trailingWSStart: + * if it is less than length, then there is an implicit trailing run + * at the paraLevel, + * which is not reflected in the levels array. + * This allows a line nsBidi object to use the same levels array as + * its paragraph parent object. + * + * When a nsBidi object is created for a line of a paragraph, then the + * paragraph's levels and dirProps arrays are reused by way of setting + * a pointer into them, not by copying. This again saves memory and forbids to + * change the now shared levels for (L1). + */ +nsresult nsBidi::SetLine(nsIBidi* aParaBidi, int32_t aStart, int32_t aLimit) +{ + nsBidi* pParent = (nsBidi*)aParaBidi; + int32_t length; + + /* check the argument values */ + if(pParent==nullptr) { + return NS_ERROR_INVALID_POINTER; + } else if(aStart<0 || aStart>aLimit || aLimit>pParent->mLength) { + return NS_ERROR_INVALID_ARG; + } + + /* set members from our aParaBidi parent */ + length=mLength=aLimit-aStart; + mParaLevel=pParent->mParaLevel; + + mRuns=nullptr; + mFlags=0; + + if(length>0) { + mDirProps=pParent->mDirProps+aStart; + mLevels=pParent->mLevels+aStart; + mRunCount=-1; + + if(pParent->mDirection!=NSBIDI_MIXED) { + /* the parent is already trivial */ + mDirection=pParent->mDirection; + + /* + * The parent's levels are all either + * implicitly or explicitly ==paraLevel; + * do the same here. + */ + if(pParent->mTrailingWSStart<=aStart) { + mTrailingWSStart=0; + } else if(pParent->mTrailingWSStartmTrailingWSStart-aStart; + } else { + mTrailingWSStart=length; + } + } else { + const nsBidiLevel *levels=mLevels; + int32_t i, trailingWSStart; + nsBidiLevel level; + Flags flags=0; + + SetTrailingWSStart(); + trailingWSStart=mTrailingWSStart; + + /* recalculate pLineBidi->direction */ + if(trailingWSStart==0) { + /* all levels are at paraLevel */ + mDirection=(nsBidiDirection)(mParaLevel&1); + } else { + /* get the level of the first character */ + level=levels[0]&1; + + /* if there is anything of a different level, then the line is mixed */ + if(trailingWSStart0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) { + --start; + } + + /* if the WS run can be merged with the previous run then do so here */ + while(start>0 && levels[start-1]==paraLevel) { + --start; + } + + mTrailingWSStart=start; +} + +nsresult nsBidi::GetLevelAt(int32_t aCharIndex, nsBidiLevel* aLevel) +{ + /* return paraLevel if in the trailing WS run, otherwise the real level */ + if(aCharIndex<0 || mLength<=aCharIndex) { + *aLevel = 0; + } else if(mDirection!=NSBIDI_MIXED || aCharIndex>=mTrailingWSStart) { + *aLevel = mParaLevel; + } else { + *aLevel = mLevels[aCharIndex]; + } + return NS_OK; +} + +nsresult nsBidi::GetLevels(nsBidiLevel** aLevels) +{ + int32_t start, length; + + length = mLength; + if(length<=0) { + *aLevels = nullptr; + return NS_ERROR_INVALID_ARG; + } + + start = mTrailingWSStart; + if(start==length) { + /* the current levels array reflects the WS run */ + *aLevels = mLevels; + return NS_OK; + } + + /* + * After the previous if(), we know that the levels array + * has an implicit trailing WS run and therefore does not fully + * reflect itself all the levels. + * This must be a nsBidi object for a line, and + * we need to create a new levels array. + */ + + if(GETLEVELSMEMORY(length)) { + nsBidiLevel *levels=mLevelsMemory; + + if(start>0 && levels!=mLevels) { + memcpy(levels, mLevels, start); + } + memset(levels+start, mParaLevel, length-start); + + /* this new levels array is set for the line and reflects the WS run */ + mTrailingWSStart=length; + *aLevels=mLevels=levels; + return NS_OK; + } else { + /* out of memory */ + *aLevels = nullptr; + return NS_ERROR_OUT_OF_MEMORY; + } +} +#endif // FULL_BIDI_ENGINE + +nsresult nsBidi::GetCharTypeAt(int32_t aCharIndex, nsCharType* pType) +{ + if(aCharIndex<0 || mLength<=aCharIndex) { + return NS_ERROR_INVALID_ARG; + } + *pType = (nsCharType)mDirProps[aCharIndex]; + return NS_OK; +} + +nsresult nsBidi::GetLogicalRun(int32_t aLogicalStart, int32_t *aLogicalLimit, nsBidiLevel *aLevel) +{ + int32_t length = mLength; + + if(aLogicalStart<0 || length<=aLogicalStart) { + return NS_ERROR_INVALID_ARG; + } + + if(mDirection!=NSBIDI_MIXED || aLogicalStart>=mTrailingWSStart) { + if(aLogicalLimit!=nullptr) { + *aLogicalLimit=length; + } + if(aLevel!=nullptr) { + *aLevel=mParaLevel; + } + } else { + nsBidiLevel *levels=mLevels; + nsBidiLevel level=levels[aLogicalStart]; + + /* search for the end of the run */ + length=mTrailingWSStart; + while(++aLogicalStart=mRunCount + ) { + *aDirection = NSBIDI_LTR; + return NS_OK; + } else { + int32_t start=mRuns[aRunIndex].logicalStart; + if(aLogicalStart!=nullptr) { + *aLogicalStart=GET_INDEX(start); + } + if(aLength!=nullptr) { + if(aRunIndex>0) { + *aLength=mRuns[aRunIndex].visualLimit- + mRuns[aRunIndex-1].visualLimit; + } else { + *aLength=mRuns[0].visualLimit; + } + } + *aDirection = (nsBidiDirection)GET_ODD_BIT(start); + return NS_OK; + } +} + +/* compute the runs array --------------------------------------------------- */ + +/* + * Compute the runs array from the levels array. + * After GetRuns() returns true, runCount is guaranteed to be >0 + * and the runs are reordered. + * Odd-level runs have visualStart on their visual right edge and + * they progress visually to the left. + */ +bool nsBidi::GetRuns() +{ + if(mDirection!=NSBIDI_MIXED) { + /* simple, single-run case - this covers length==0 */ + GetSingleRun(mParaLevel); + } else /* NSBIDI_MIXED, length>0 */ { + /* mixed directionality */ + int32_t length=mLength, limit=length; + + /* + * If there are WS characters at the end of the line + * and the run preceding them has a level different from + * paraLevel, then they will form their own run at paraLevel (L1). + * Count them separately. + * We need some special treatment for this in order to not + * modify the levels array which a line nsBidi object shares + * with its paragraph parent and its other line siblings. + * In other words, for the trailing WS, it may be + * levels[]!=paraLevel but we have to treat it like it were so. + */ + limit=mTrailingWSStart; + if(limit==0) { + /* there is only WS on this line */ + GetSingleRun(mParaLevel); + } else { + nsBidiLevel *levels=mLevels; + int32_t i, runCount; + nsBidiLevel level=NSBIDI_DEFAULT_LTR; /* initialize with no valid level */ + + /* count the runs, there is at least one non-WS run, and limit>0 */ + runCount=0; + for(i=0; i1 || limit1 */ + if(GETRUNSMEMORY(runCount)) { + runs=mRunsMemory; + } else { + return false; + } + + /* set the runs */ + /* this could be optimized, e.g.: 464->444, 484->444, 575->555, 595->555 */ + /* however, that would take longer and make other functions more complicated */ + runIndex=0; + + /* search for the run ends */ + start=0; + level=levels[0]; + if(levelmaxLevel) { + maxLevel=level; + } + + /* initialize visualLimit values with the run lengths */ + for(i=1; imaxLevel) { + maxLevel=level; + } + ++runIndex; + } + } + + /* finish the last run at i==limit */ + runs[runIndex].logicalStart=start; + runs[runIndex].visualLimit=limit-start; + ++runIndex; + + if(limit1 and maxLevel>=minLevel>=paraLevel. + * All the visualStart fields=logical start before reordering. + * The "odd" bits are not set yet. + * + * Reordering with this data structure lends itself to some handy shortcuts: + * + * Since each run is moved but not modified, and since at the initial maxLevel + * each sequence of same-level runs consists of only one run each, we + * don't need to do anything there and can predecrement maxLevel. + * In many simple cases, the reordering is thus done entirely in the + * index mapping. + * Also, reordering occurs only down to the lowest odd level that occurs, + * which is minLevel|1. However, if the lowest level itself is odd, then + * in the last reordering the sequence of the runs at this level or higher + * will be all runs, and we don't need the elaborate loop to search for them. + * This is covered by ++minLevel instead of minLevel|=1 followed + * by an extra reorder-all after the reorder-some loop. + * About a trailing WS run: + * Such a run would need special treatment because its level is not + * reflected in levels[] if this is not a paragraph object. + * Instead, all characters from trailingWSStart on are implicitly at + * paraLevel. + * However, for all maxLevel>paraLevel, this run will never be reordered + * and does not need to be taken into account. maxLevel==paraLevel is only reordered + * if minLevel==paraLevel is odd, which is done in the extra segment. + * This means that for the main reordering loop we don't need to consider + * this run and can --runCount. If it is later part of the all-runs + * reordering, then runCount is adjusted accordingly. + */ +void nsBidi::ReorderLine(nsBidiLevel aMinLevel, nsBidiLevel aMaxLevel) +{ + Run *runs; + nsBidiLevel *levels; + int32_t firstRun, endRun, limitRun, runCount, temp; + + /* nothing to do? */ + if(aMaxLevel<=(aMinLevel|1)) { + return; + } + + /* + * Reorder only down to the lowest odd level + * and reorder at an odd aMinLevel in a separate, simpler loop. + * See comments above for why aMinLevel is always incremented. + */ + ++aMinLevel; + + runs=mRuns; + levels=mLevels; + runCount=mRunCount; + + /* do not include the WS run at paraLevel<=old aMinLevel except in the simple loop */ + if(mTrailingWSStart=aMinLevel) { + firstRun=0; + + /* loop for all sequences of runs */ + for(;;) { + /* look for a sequence of runs that are all at >=aMaxLevel */ + /* look for the first run of such a sequence */ + while(firstRun=runCount) { + break; /* no more such runs */ + } + + /* look for the limit run of such a sequence (the run behind it) */ + for(limitRun=firstRun; ++limitRun=aMaxLevel;) {} + + /* Swap the entire sequence of runs from firstRun to limitRun-1. */ + endRun=limitRun-1; + while(firstRun=maxLevel */ + /* look for the first index of such a sequence */ + while(start=aLength) { + break; /* no more such runs */ + } + + /* look for the limit of such a sequence (the index behind it) */ + for(limit=start; ++limit=maxLevel;) {} + + /* + * Swap the entire interval of indexes from start to limit-1. + * We don't need to swap the levels for the purpose of this + * algorithm: the sequence of levels that we look at does not + * move anyway. + */ + end=limit-1; + while(start=minLevel); + + return NS_OK; +} + +bool nsBidi::PrepareReorder(const nsBidiLevel *aLevels, int32_t aLength, + int32_t *aIndexMap, + nsBidiLevel *aMinLevel, nsBidiLevel *aMaxLevel) +{ + int32_t start; + nsBidiLevel level, minLevel, maxLevel; + + if(aLevels==nullptr || aLength<=0) { + return false; + } + + /* determine minLevel and maxLevel */ + minLevel=NSBIDI_MAX_EXPLICIT_LEVEL+1; + maxLevel=0; + for(start=aLength; start>0;) { + level=aLevels[--start]; + if(level>NSBIDI_MAX_EXPLICIT_LEVEL+1) { + return false; + } + if(levelmaxLevel) { + maxLevel=level; + } + } + *aMinLevel=minLevel; + *aMaxLevel=maxLevel; + + /* initialize the index map */ + for(start=aLength; start>0;) { + --start; + aIndexMap[start]=start; + } + + return true; +} + +#ifdef FULL_BIDI_ENGINE +/* API functions for logical<->visual mapping ------------------------------- */ + +nsresult nsBidi::GetVisualIndex(int32_t aLogicalIndex, int32_t* aVisualIndex) { + if(aLogicalIndex<0 || mLength<=aLogicalIndex) { + return NS_ERROR_INVALID_ARG; + } else { + /* we can do the trivial cases without the runs array */ + switch(mDirection) { + case NSBIDI_LTR: + *aVisualIndex = aLogicalIndex; + return NS_OK; + case NSBIDI_RTL: + *aVisualIndex = mLength-aLogicalIndex-1; + return NS_OK; + default: + if(mRunCount<0 && !GetRuns()) { + return NS_ERROR_OUT_OF_MEMORY; + } else { + Run *runs=mRuns; + int32_t i, visualStart=0, offset, length; + + /* linear search for the run, search on the visual runs */ + for(i=0;; ++i) { + length=runs[i].visualLimit-visualStart; + offset=aLogicalIndex-GET_INDEX(runs[i].logicalStart); + if(offset>=0 && offset=runs[i].visualLimit; ++i) {} + } else { + /* binary search for the run */ + int32_t start=0, limit=runCount; + + /* the middle if() will guaranteed find the run, we don't need a loop limit */ + for(;;) { + i=(start+limit)/2; + if(aVisualIndex>=runs[i].visualLimit) { + start=i+1; + } else if(i==0 || aVisualIndex>=runs[i-1].visualLimit) { + break; + } else { + limit=i; + } + } + } + + start=runs[i].logicalStart; + if(IS_EVEN_RUN(start)) { + /* LTR */ + /* the offset in runs[i] is aVisualIndex-runs[i-1].visualLimit */ + if(i>0) { + aVisualIndex-=runs[i-1].visualLimit; + } + *aLogicalIndex = GET_INDEX(start)+aVisualIndex; + return NS_OK; + } else { + /* RTL */ + *aLogicalIndex = GET_INDEX(start)+runs[i].visualLimit-aVisualIndex-1; + return NS_OK; + } + } + } + } +} + +nsresult nsBidi::GetLogicalMap(int32_t *aIndexMap) +{ + nsBidiLevel *levels; + nsresult rv; + + /* GetLevels() checks all of its and our arguments */ + rv = GetLevels(&levels); + if(NS_FAILED(rv)) { + return rv; + } else if(aIndexMap==nullptr) { + return NS_ERROR_INVALID_ARG; + } else { + return ReorderLogical(levels, mLength, aIndexMap); + } +} + +nsresult nsBidi::GetVisualMap(int32_t *aIndexMap) +{ + int32_t* runCount=nullptr; + nsresult rv; + + /* CountRuns() checks all of its and our arguments */ + rv = CountRuns(runCount); + if(NS_FAILED(rv)) { + return rv; + } else if(aIndexMap==nullptr) { + return NS_ERROR_INVALID_ARG; + } else { + /* fill a visual-to-logical index map using the runs[] */ + Run *runs=mRuns, *runsLimit=runs+mRunCount; + int32_t logicalStart, visualStart, visualLimit; + + visualStart=0; + for(; runslogicalStart; + visualLimit=runs->visualLimit; + if(IS_EVEN_RUN(logicalStart)) { + do { /* LTR */ + *aIndexMap++ = logicalStart++; + } while(++visualStart=maxLevel */ + /* look for the first index of such a sequence */ + while(start=aLength) { + break; /* no more such sequences */ + } + + /* look for the limit of such a sequence (the index behind it) */ + for(limit=start; ++limit=maxLevel;) {} + + /* + * sos=start of sequence, eos=end of sequence + * + * The closed (inclusive) interval from sos to eos includes all the logical + * and visual indexes within this sequence. They are logically and + * visually contiguous and in the same range. + * + * For each run, the new visual index=sos+eos-old visual index; + * we pre-add sos+eos into sumOfSosEos -> + * new visual index=sumOfSosEos-old visual index; + */ + sumOfSosEos=start+limit-1; + + /* reorder each index in the sequence */ + do { + aIndexMap[start]=sumOfSosEos-aIndexMap[start]; + } while(++start=minLevel); + + return NS_OK; +} + +nsresult nsBidi::InvertMap(const int32_t *aSrcMap, int32_t *aDestMap, int32_t aLength) +{ + if(aSrcMap!=nullptr && aDestMap!=nullptr) { + aSrcMap+=aLength; + while(aLength>0) { + aDestMap[*--aSrcMap]=--aLength; + } + } + return NS_OK; +} + +int32_t nsBidi::doWriteReverse(const char16_t *src, int32_t srcLength, + char16_t *dest, uint16_t options) { + /* + * RTL run - + * + * RTL runs need to be copied to the destination in reverse order + * of code points, not code units, to keep Unicode characters intact. + * + * The general strategy for this is to read the source text + * in backward order, collect all code units for a code point + * (and optionally following combining characters, see below), + * and copy all these code units in ascending order + * to the destination for this run. + * + * Several options request whether combining characters + * should be kept after their base characters, + * whether Bidi control characters should be removed, and + * whether characters should be replaced by their mirror-image + * equivalent Unicode characters. + */ + int32_t i, j, destSize; + uint32_t c; + + /* optimize for several combinations of options */ + switch(options&(NSBIDI_REMOVE_BIDI_CONTROLS|NSBIDI_DO_MIRRORING|NSBIDI_KEEP_BASE_COMBINING)) { + case 0: + /* + * With none of the "complicated" options set, the destination + * run will have the same length as the source run, + * and there is no mirroring and no keeping combining characters + * with their base characters. + */ + destSize=srcLength; + + /* preserve character integrity */ + do { + /* i is always after the last code unit known to need to be kept in this segment */ + i=srcLength; + + /* collect code units for one base character */ + UTF_BACK_1(src, 0, srcLength); + + /* copy this base character */ + j=srcLength; + do { + *dest++=src[j++]; + } while(j0); + break; + case NSBIDI_KEEP_BASE_COMBINING: + /* + * Here, too, the destination + * run will have the same length as the source run, + * and there is no mirroring. + * We do need to keep combining characters with their base characters. + */ + destSize=srcLength; + + /* preserve character integrity */ + do { + /* i is always after the last code unit known to need to be kept in this segment */ + i=srcLength; + + /* collect code units and modifier letters for one base character */ + do { + UTF_PREV_CHAR(src, 0, srcLength, c); + } while(srcLength>0 && IsBidiCategory(c, eBidiCat_NSM)); + + /* copy this "user character" */ + j=srcLength; + do { + *dest++=src[j++]; + } while(j0); + break; + default: + /* + * With several "complicated" options set, this is the most + * general and the slowest copying of an RTL run. + * We will do mirroring, remove Bidi controls, and + * keep combining characters with their base characters + * as requested. + */ + if(!(options&NSBIDI_REMOVE_BIDI_CONTROLS)) { + i=srcLength; + } else { + /* we need to find out the destination length of the run, + which will not include the Bidi control characters */ + int32_t length=srcLength; + char16_t ch; + + i=0; + do { + ch=*src++; + if (!IsBidiControl((uint32_t)ch)) { + ++i; + } + } while(--length>0); + src-=srcLength; + } + destSize=i; + + /* preserve character integrity */ + do { + /* i is always after the last code unit known to need to be kept in this segment */ + i=srcLength; + + /* collect code units for one base character */ + UTF_PREV_CHAR(src, 0, srcLength, c); + if(options&NSBIDI_KEEP_BASE_COMBINING) { + /* collect modifier letters for this base character */ + while(srcLength>0 && IsBidiCategory(c, eBidiCat_NSM)) { + UTF_PREV_CHAR(src, 0, srcLength, c); + } + } + + if(options&NSBIDI_REMOVE_BIDI_CONTROLS && IsBidiControl(c)) { + /* do not copy this Bidi control character */ + continue; + } + + /* copy this "user character" */ + j=srcLength; + if(options&NSBIDI_DO_MIRRORING) { + /* mirror only the base character */ + c = SymmSwap(c); + + int32_t k=0; + UTF_APPEND_CHAR_UNSAFE(dest, k, c); + dest+=k; + j+=k; + } + while(j0); + break; + } /* end of switch */ + return destSize; +} + +nsresult nsBidi::WriteReverse(const char16_t *aSrc, int32_t aSrcLength, char16_t *aDest, uint16_t aOptions, int32_t *aDestSize) +{ + if( aSrc==nullptr || aSrcLength<0 || + aDest==nullptr + ) { + return NS_ERROR_INVALID_ARG; + } + + /* do input and output overlap? */ + if( aSrc>=aDest && aSrc=aSrc && aDest0) { + *aDestSize = doWriteReverse(aSrc, aSrcLength, aDest, aOptions); + } + return NS_OK; +} +#endif // FULL_BIDI_ENGINE