michael@0: /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- michael@0: * michael@0: * This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "nsBidi.h" michael@0: #include "nsUnicodeProperties.h" michael@0: #include "nsCRTGlue.h" michael@0: michael@0: using namespace mozilla::unicode; michael@0: michael@0: // These are #defined in under Solaris 10 x86 michael@0: #undef CS michael@0: #undef ES michael@0: michael@0: /* Comparing the description of the Bidi algorithm with this implementation michael@0: is easier with the same names for the Bidi types in the code as there. michael@0: */ michael@0: enum { michael@0: L = eCharType_LeftToRight, michael@0: R = eCharType_RightToLeft, michael@0: EN = eCharType_EuropeanNumber, michael@0: ES = eCharType_EuropeanNumberSeparator, michael@0: ET = eCharType_EuropeanNumberTerminator, michael@0: AN = eCharType_ArabicNumber, michael@0: CS = eCharType_CommonNumberSeparator, michael@0: B = eCharType_BlockSeparator, michael@0: S = eCharType_SegmentSeparator, michael@0: WS = eCharType_WhiteSpaceNeutral, michael@0: O_N = eCharType_OtherNeutral, michael@0: LRE = eCharType_LeftToRightEmbedding, michael@0: LRO = eCharType_LeftToRightOverride, michael@0: AL = eCharType_RightToLeftArabic, michael@0: RLE = eCharType_RightToLeftEmbedding, michael@0: RLO = eCharType_RightToLeftOverride, michael@0: PDF = eCharType_PopDirectionalFormat, michael@0: NSM = eCharType_DirNonSpacingMark, michael@0: BN = eCharType_BoundaryNeutral, michael@0: dirPropCount michael@0: }; michael@0: michael@0: /* to avoid some conditional statements, use tiny constant arrays */ michael@0: static Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) }; michael@0: static Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) }; michael@0: static Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; michael@0: michael@0: #define DIRPROP_FLAG_LR(level) flagLR[(level)&1] michael@0: #define DIRPROP_FLAG_E(level) flagE[(level)&1] michael@0: #define DIRPROP_FLAG_O(level) flagO[(level)&1] michael@0: michael@0: /* michael@0: * General implementation notes: michael@0: * michael@0: * Throughout the implementation, there are comments like (W2) that refer to michael@0: * rules of the Bidi algorithm in its version 5, in this example to the second michael@0: * rule of the resolution of weak types. michael@0: * michael@0: * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32) michael@0: * character according to UTF-16, the second UChar gets the directional property of michael@0: * the entire character assigned, while the first one gets a BN, a boundary michael@0: * neutral, type, which is ignored by most of the algorithm according to michael@0: * rule (X9) and the implementation suggestions of the Bidi algorithm. michael@0: * michael@0: * Later, AdjustWSLevels() will set the level for each BN to that of the michael@0: * following character (UChar), which results in surrogate pairs getting the michael@0: * same level on each of their surrogates. michael@0: * michael@0: * In a UTF-8 implementation, the same thing could be done: the last byte of michael@0: * a multi-byte sequence would get the "real" property, while all previous michael@0: * bytes of that sequence would get BN. michael@0: * michael@0: * It is not possible to assign all those parts of a character the same real michael@0: * property because this would fail in the resolution of weak types with rules michael@0: * that look at immediately surrounding types. michael@0: * michael@0: * As a related topic, this implementation does not remove Boundary Neutral michael@0: * types from the input, but ignores them whenever this is relevant. michael@0: * For example, the loop for the resolution of the weak types reads michael@0: * types until it finds a non-BN. michael@0: * Also, explicit embedding codes are neither changed into BN nor removed. michael@0: * They are only treated the same way real BNs are. michael@0: * As stated before, AdjustWSLevels() takes care of them at the end. michael@0: * For the purpose of conformance, the levels of all these codes michael@0: * do not matter. michael@0: * michael@0: * Note that this implementation never modifies the dirProps michael@0: * after the initial setup. michael@0: * michael@0: * michael@0: * In this implementation, the resolution of weak types (Wn), michael@0: * neutrals (Nn), and the assignment of the resolved level (In) michael@0: * are all done in one single loop, in ResolveImplicitLevels(). michael@0: * Changes of dirProp values are done on the fly, without writing michael@0: * them back to the dirProps array. michael@0: * michael@0: * michael@0: * This implementation contains code that allows to bypass steps of the michael@0: * algorithm that are not needed on the specific paragraph michael@0: * in order to speed up the most common cases considerably, michael@0: * like text that is entirely LTR, or RTL text without numbers. michael@0: * michael@0: * Most of this is done by setting a bit for each directional property michael@0: * in a flags variable and later checking for whether there are michael@0: * any LTR characters or any RTL characters, or both, whether michael@0: * there are any explicit embedding codes, etc. michael@0: * michael@0: * If the (Xn) steps are performed, then the flags are re-evaluated, michael@0: * because they will then not contain the embedding codes any more michael@0: * and will be adjusted for override codes, so that subsequently michael@0: * more bypassing may be possible than what the initial flags suggested. michael@0: * michael@0: * If the text is not mixed-directional, then the michael@0: * algorithm steps for the weak type resolution are not performed, michael@0: * and all levels are set to the paragraph level. michael@0: * michael@0: * If there are no explicit embedding codes, then the (Xn) steps michael@0: * are not performed. michael@0: * michael@0: * If embedding levels are supplied as a parameter, then all michael@0: * explicit embedding codes are ignored, and the (Xn) steps michael@0: * are not performed. michael@0: * michael@0: * White Space types could get the level of the run they belong to, michael@0: * and are checked with a test of (flags&MASK_EMBEDDING) to michael@0: * consider if the paragraph direction should be considered in michael@0: * the flags variable. michael@0: * michael@0: * If there are no White Space types in the paragraph, then michael@0: * (L1) is not necessary in AdjustWSLevels(). michael@0: */ michael@0: nsBidi::nsBidi() michael@0: { michael@0: Init(); michael@0: michael@0: mMayAllocateText=true; michael@0: mMayAllocateRuns=true; michael@0: } michael@0: michael@0: nsBidi::~nsBidi() michael@0: { michael@0: Free(); michael@0: } michael@0: michael@0: void nsBidi::Init() michael@0: { michael@0: /* reset the object, all pointers nullptr, all flags false, all sizes 0 */ michael@0: mLength = 0; michael@0: mParaLevel = 0; michael@0: mFlags = 0; michael@0: mDirection = NSBIDI_LTR; michael@0: mTrailingWSStart = 0; michael@0: michael@0: mDirPropsSize = 0; michael@0: mLevelsSize = 0; michael@0: mRunsSize = 0; michael@0: mRunCount = -1; michael@0: michael@0: mDirProps=nullptr; michael@0: mLevels=nullptr; michael@0: mRuns=nullptr; michael@0: michael@0: mDirPropsMemory=nullptr; michael@0: mLevelsMemory=nullptr; michael@0: mRunsMemory=nullptr; michael@0: michael@0: mMayAllocateText=false; michael@0: mMayAllocateRuns=false; michael@0: michael@0: } michael@0: michael@0: /* michael@0: * We are allowed to allocate memory if aMemory==nullptr or michael@0: * aMayAllocate==true for each array that we need. michael@0: * We also try to grow and shrink memory as needed if we michael@0: * allocate it. michael@0: * michael@0: * Assume aSizeNeeded>0. michael@0: * If *aMemory!=nullptr, then assume *aSize>0. michael@0: * michael@0: * ### this realloc() may unnecessarily copy the old data, michael@0: * which we know we don't need any more; michael@0: * is this the best way to do this?? michael@0: */ michael@0: bool nsBidi::GetMemory(void **aMemory, size_t *aSize, bool aMayAllocate, size_t aSizeNeeded) michael@0: { michael@0: /* check for existing memory */ michael@0: if(*aMemory==nullptr) { michael@0: /* we need to allocate memory */ michael@0: if(!aMayAllocate) { michael@0: return false; michael@0: } else { michael@0: *aMemory=moz_malloc(aSizeNeeded); michael@0: if (*aMemory!=nullptr) { michael@0: *aSize=aSizeNeeded; michael@0: return true; michael@0: } else { michael@0: *aSize=0; michael@0: return false; michael@0: } michael@0: } michael@0: } else { michael@0: /* there is some memory, is it enough or too much? */ michael@0: if(aSizeNeeded>*aSize && !aMayAllocate) { michael@0: /* not enough memory, and we must not allocate */ michael@0: return false; michael@0: } else if(aSizeNeeded!=*aSize && aMayAllocate) { michael@0: /* we may try to grow or shrink */ michael@0: void *memory=moz_realloc(*aMemory, aSizeNeeded); michael@0: michael@0: if(memory!=nullptr) { michael@0: *aMemory=memory; michael@0: *aSize=aSizeNeeded; michael@0: return true; michael@0: } else { michael@0: /* we failed to grow */ michael@0: return false; michael@0: } michael@0: } else { michael@0: /* we have at least enough memory and must not allocate */ michael@0: return true; michael@0: } michael@0: } michael@0: } michael@0: michael@0: void nsBidi::Free() michael@0: { michael@0: moz_free(mDirPropsMemory); michael@0: mDirPropsMemory = nullptr; michael@0: moz_free(mLevelsMemory); michael@0: mLevelsMemory = nullptr; michael@0: moz_free(mRunsMemory); michael@0: mRunsMemory = nullptr; michael@0: } michael@0: michael@0: /* SetPara ------------------------------------------------------------ */ michael@0: michael@0: nsresult nsBidi::SetPara(const char16_t *aText, int32_t aLength, michael@0: nsBidiLevel aParaLevel, nsBidiLevel *aEmbeddingLevels) michael@0: { michael@0: nsBidiDirection direction; michael@0: michael@0: /* check the argument values */ michael@0: if(aText==nullptr || michael@0: ((NSBIDI_MAX_EXPLICIT_LEVEL=NSBIDI_MAX_EXPLICIT_LEVEL */ michael@0: uint32_t countOver60=0, countOver61=0; /* count overflows of explicit levels */ michael@0: michael@0: /* recalculate the flags */ michael@0: flags=0; michael@0: michael@0: /* since we assume that this is a single paragraph, we ignore (X8) */ michael@0: for(i=0; i0) { michael@0: --countOver61; michael@0: } else if(countOver60>0 && (embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)!=NSBIDI_MAX_EXPLICIT_LEVEL) { michael@0: /* handle LRx overflows from level 60 */ michael@0: --countOver60; michael@0: } else if(stackTop>0) { michael@0: /* this is the pop operation; it also pops level 61 while countOver60>0 */ michael@0: --stackTop; michael@0: embeddingLevel=stack[stackTop]; michael@0: /* } else { (underflow) */ michael@0: } michael@0: flags|=DIRPROP_FLAG(BN); michael@0: break; michael@0: case B: michael@0: /* michael@0: * We do not really expect to see a paragraph separator (B), michael@0: * but we should do something reasonable with it, michael@0: * especially at the end of the text. michael@0: */ michael@0: stackTop=0; michael@0: countOver60=countOver61=0; michael@0: embeddingLevel=level=mParaLevel; michael@0: flags|=DIRPROP_FLAG(B); michael@0: break; michael@0: case BN: michael@0: /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ michael@0: /* they will get their levels set correctly in AdjustWSLevels() */ michael@0: flags|=DIRPROP_FLAG(BN); michael@0: break; michael@0: default: michael@0: /* all other types get the "real" level */ michael@0: if(level!=embeddingLevel) { michael@0: level=embeddingLevel; michael@0: if(level&NSBIDI_LEVEL_OVERRIDE) { michael@0: flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS; michael@0: } else { michael@0: flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS; michael@0: } michael@0: } michael@0: if(!(level&NSBIDI_LEVEL_OVERRIDE)) { michael@0: flags|=DIRPROP_FLAG(dirProp); michael@0: } michael@0: break; michael@0: } michael@0: michael@0: /* michael@0: * We need to set reasonable levels even on BN codes and michael@0: * explicit codes because we will later look at same-level runs (X10). michael@0: */ michael@0: levels[i]=level; michael@0: } michael@0: if(flags&MASK_EMBEDDING) { michael@0: flags|=DIRPROP_FLAG_LR(mParaLevel); michael@0: } michael@0: michael@0: /* subsequently, ignore the explicit codes and BN (X9) */ michael@0: michael@0: /* again, determine if the text is mixed-directional or single-directional */ michael@0: mFlags=flags; michael@0: direction=DirectionFromFlags(flags); michael@0: } michael@0: return direction; michael@0: } michael@0: michael@0: /* michael@0: * Use a pre-specified embedding levels array: michael@0: * michael@0: * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), michael@0: * ignore all explicit codes (X9), michael@0: * and check all the preset levels. michael@0: * michael@0: * Recalculate the flags to have them reflect the real properties michael@0: * after taking the explicit embeddings into account. michael@0: */ michael@0: nsresult nsBidi::CheckExplicitLevels(nsBidiDirection *aDirection) michael@0: { michael@0: const DirProp *dirProps=mDirProps; michael@0: nsBidiLevel *levels=mLevels; michael@0: michael@0: int32_t i, length=mLength; michael@0: Flags flags=0; /* collect all directionalities in the text */ michael@0: nsBidiLevel level, paraLevel=mParaLevel; michael@0: michael@0: for(i=0; i>=EN_SHIFT; michael@0: /* michael@0: * Technically, this should be done before the switch() in the form michael@0: * if(nextDirProp==NSM) { michael@0: * dirProps[next]=nextDirProp=dirProp; michael@0: * } michael@0: * michael@0: * - effectively one iteration ahead. michael@0: * However, whether the next dirProp is NSM or is equal to the current dirProp michael@0: * does not change the outcome of any condition in (W2)..(W7). michael@0: */ michael@0: break; michael@0: default: michael@0: break; michael@0: } michael@0: michael@0: /* here, it is always [prev,this,next]dirProp!=BN; it may be next>i+1 */ michael@0: michael@0: /* perform (Nn) - here, only L, R, EN, AN, and neutrals are left */ michael@0: /* this is one iteration late for the neutrals */ michael@0: if(DIRPROP_FLAG(dirProp)&MASK_N) { michael@0: if(neutralStart<0) { michael@0: /* start of a sequence of neutrals */ michael@0: neutralStart=i; michael@0: beforeNeutral=prevDirProp; michael@0: } michael@0: } else /* not a neutral, can be only one of { L, R, EN, AN } */ { michael@0: /* michael@0: * Note that all levels[] values are still the same at this michael@0: * point because this function is called for an entire michael@0: * same-level run. michael@0: * Therefore, we need to read only one actual level. michael@0: */ michael@0: nsBidiLevel level=levels[i]; michael@0: michael@0: if(neutralStart>=0) { michael@0: nsBidiLevel final; michael@0: /* end of a sequence of neutrals (dirProp is "afterNeutral") */ michael@0: if(beforeNeutral==L) { michael@0: if(dirProp==L) { michael@0: final=0; /* make all neutrals L (N1) */ michael@0: } else { michael@0: final=level; /* make all neutrals "e" (N2) */ michael@0: } michael@0: } else /* beforeNeutral is one of { R, EN, AN } */ { michael@0: if(dirProp==L) { michael@0: final=level; /* make all neutrals "e" (N2) */ michael@0: } else { michael@0: final=1; /* make all neutrals R (N1) */ michael@0: } michael@0: } michael@0: /* perform (In) on the sequence of neutrals */ michael@0: if((level^final)&1) { michael@0: /* do something only if we need to _change_ the level */ michael@0: do { michael@0: ++levels[neutralStart]; michael@0: } while(++neutralStart=0) { michael@0: /* michael@0: * Note that all levels[] values are still the same at this michael@0: * point because this function is called for an entire michael@0: * same-level run. michael@0: * Therefore, we need to read only one actual level. michael@0: */ michael@0: nsBidiLevel level=levels[neutralStart], final; michael@0: michael@0: /* end of a sequence of neutrals (aEOR is "afterNeutral") */ michael@0: if(beforeNeutral==L) { michael@0: if(aEOR==L) { michael@0: final=0; /* make all neutrals L (N1) */ michael@0: } else { michael@0: final=level; /* make all neutrals "e" (N2) */ michael@0: } michael@0: } else /* beforeNeutral is one of { R, EN, AN } */ { michael@0: if(aEOR==L) { michael@0: final=level; /* make all neutrals "e" (N2) */ michael@0: } else { michael@0: final=1; /* make all neutrals R (N1) */ michael@0: } michael@0: } michael@0: /* perform (In) on the sequence of neutrals */ michael@0: if((level^final)&1) { michael@0: /* do something only if we need to _change_ the level */ michael@0: do { michael@0: ++levels[neutralStart]; michael@0: } while(++neutralStart0) { michael@0: /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ michael@0: while(i>0 && DIRPROP_FLAG(dirProps[--i])&MASK_WS) { michael@0: levels[i]=paraLevel; michael@0: } michael@0: michael@0: /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ michael@0: /* here, i+1 is guaranteed to be 0) { michael@0: flag=DIRPROP_FLAG(dirProps[--i]); michael@0: if(flag&MASK_BN_EXPLICIT) { michael@0: levels[i]=levels[i+1]; michael@0: } else if(flag&MASK_B_S) { michael@0: levels[i]=paraLevel; michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* now remove the NSBIDI_LEVEL_OVERRIDE flags, if any */ michael@0: /* (a separate loop can be optimized more easily by a compiler) */ michael@0: if(mFlags&MASK_OVERRIDE) { michael@0: for(i=mTrailingWSStart; i>0;) { michael@0: levels[--i]&=~NSBIDI_LEVEL_OVERRIDE; michael@0: } michael@0: } michael@0: } michael@0: michael@0: nsresult nsBidi::GetDirection(nsBidiDirection* aDirection) michael@0: { michael@0: *aDirection = mDirection; michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult nsBidi::GetParaLevel(nsBidiLevel* aParaLevel) michael@0: { michael@0: *aParaLevel = mParaLevel; michael@0: return NS_OK; michael@0: } michael@0: #ifdef FULL_BIDI_ENGINE michael@0: michael@0: /* -------------------------------------------------------------------------- */ michael@0: michael@0: nsresult nsBidi::GetLength(int32_t* aLength) michael@0: { michael@0: *aLength = mLength; michael@0: return NS_OK; michael@0: } michael@0: michael@0: /* michael@0: * General remarks about the functions in this section: michael@0: * michael@0: * These functions deal with the aspects of potentially mixed-directional michael@0: * text in a single paragraph or in a line of a single paragraph michael@0: * which has already been processed according to michael@0: * the Unicode 3.0 Bidi algorithm as defined in michael@0: * http://www.unicode.org/unicode/reports/tr9/ , version 5, michael@0: * also described in The Unicode Standard, Version 3.0 . michael@0: * michael@0: * This means that there is a nsBidi object with a levels michael@0: * and a dirProps array. michael@0: * paraLevel and direction are also set. michael@0: * Only if the length of the text is zero, then levels==dirProps==nullptr. michael@0: * michael@0: * The overall directionality of the paragraph michael@0: * or line is used to bypass the reordering steps if possible. michael@0: * Even purely RTL text does not need reordering there because michael@0: * the getLogical/VisualIndex() functions can compute the michael@0: * index on the fly in such a case. michael@0: * michael@0: * The implementation of the access to same-level-runs and of the reordering michael@0: * do attempt to provide better performance and less memory usage compared to michael@0: * a direct implementation of especially rule (L2) with an array of michael@0: * one (32-bit) integer per text character. michael@0: * michael@0: * Here, the levels array is scanned as soon as necessary, and a vector of michael@0: * same-level-runs is created. Reordering then is done on this vector. michael@0: * For each run of text positions that were resolved to the same level, michael@0: * only 8 bytes are stored: the first text position of the run and the visual michael@0: * position behind the run after reordering. michael@0: * One sign bit is used to hold the directionality of the run. michael@0: * This is inefficient if there are many very short runs. If the average run michael@0: * length is <2, then this uses more memory. michael@0: * michael@0: * In a further attempt to save memory, the levels array is never changed michael@0: * after all the resolution rules (Xn, Wn, Nn, In). michael@0: * Many functions have to consider the field trailingWSStart: michael@0: * if it is less than length, then there is an implicit trailing run michael@0: * at the paraLevel, michael@0: * which is not reflected in the levels array. michael@0: * This allows a line nsBidi object to use the same levels array as michael@0: * its paragraph parent object. michael@0: * michael@0: * When a nsBidi object is created for a line of a paragraph, then the michael@0: * paragraph's levels and dirProps arrays are reused by way of setting michael@0: * a pointer into them, not by copying. This again saves memory and forbids to michael@0: * change the now shared levels for (L1). michael@0: */ michael@0: nsresult nsBidi::SetLine(nsIBidi* aParaBidi, int32_t aStart, int32_t aLimit) michael@0: { michael@0: nsBidi* pParent = (nsBidi*)aParaBidi; michael@0: int32_t length; michael@0: michael@0: /* check the argument values */ michael@0: if(pParent==nullptr) { michael@0: return NS_ERROR_INVALID_POINTER; michael@0: } else if(aStart<0 || aStart>aLimit || aLimit>pParent->mLength) { michael@0: return NS_ERROR_INVALID_ARG; michael@0: } michael@0: michael@0: /* set members from our aParaBidi parent */ michael@0: length=mLength=aLimit-aStart; michael@0: mParaLevel=pParent->mParaLevel; michael@0: michael@0: mRuns=nullptr; michael@0: mFlags=0; michael@0: michael@0: if(length>0) { michael@0: mDirProps=pParent->mDirProps+aStart; michael@0: mLevels=pParent->mLevels+aStart; michael@0: mRunCount=-1; michael@0: michael@0: if(pParent->mDirection!=NSBIDI_MIXED) { michael@0: /* the parent is already trivial */ michael@0: mDirection=pParent->mDirection; michael@0: michael@0: /* michael@0: * The parent's levels are all either michael@0: * implicitly or explicitly ==paraLevel; michael@0: * do the same here. michael@0: */ michael@0: if(pParent->mTrailingWSStart<=aStart) { michael@0: mTrailingWSStart=0; michael@0: } else if(pParent->mTrailingWSStartmTrailingWSStart-aStart; michael@0: } else { michael@0: mTrailingWSStart=length; michael@0: } michael@0: } else { michael@0: const nsBidiLevel *levels=mLevels; michael@0: int32_t i, trailingWSStart; michael@0: nsBidiLevel level; michael@0: Flags flags=0; michael@0: michael@0: SetTrailingWSStart(); michael@0: trailingWSStart=mTrailingWSStart; michael@0: michael@0: /* recalculate pLineBidi->direction */ michael@0: if(trailingWSStart==0) { michael@0: /* all levels are at paraLevel */ michael@0: mDirection=(nsBidiDirection)(mParaLevel&1); michael@0: } else { michael@0: /* get the level of the first character */ michael@0: level=levels[0]&1; michael@0: michael@0: /* if there is anything of a different level, then the line is mixed */ michael@0: if(trailingWSStart0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) { michael@0: --start; michael@0: } michael@0: michael@0: /* if the WS run can be merged with the previous run then do so here */ michael@0: while(start>0 && levels[start-1]==paraLevel) { michael@0: --start; michael@0: } michael@0: michael@0: mTrailingWSStart=start; michael@0: } michael@0: michael@0: nsresult nsBidi::GetLevelAt(int32_t aCharIndex, nsBidiLevel* aLevel) michael@0: { michael@0: /* return paraLevel if in the trailing WS run, otherwise the real level */ michael@0: if(aCharIndex<0 || mLength<=aCharIndex) { michael@0: *aLevel = 0; michael@0: } else if(mDirection!=NSBIDI_MIXED || aCharIndex>=mTrailingWSStart) { michael@0: *aLevel = mParaLevel; michael@0: } else { michael@0: *aLevel = mLevels[aCharIndex]; michael@0: } michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult nsBidi::GetLevels(nsBidiLevel** aLevels) michael@0: { michael@0: int32_t start, length; michael@0: michael@0: length = mLength; michael@0: if(length<=0) { michael@0: *aLevels = nullptr; michael@0: return NS_ERROR_INVALID_ARG; michael@0: } michael@0: michael@0: start = mTrailingWSStart; michael@0: if(start==length) { michael@0: /* the current levels array reflects the WS run */ michael@0: *aLevels = mLevels; michael@0: return NS_OK; michael@0: } michael@0: michael@0: /* michael@0: * After the previous if(), we know that the levels array michael@0: * has an implicit trailing WS run and therefore does not fully michael@0: * reflect itself all the levels. michael@0: * This must be a nsBidi object for a line, and michael@0: * we need to create a new levels array. michael@0: */ michael@0: michael@0: if(GETLEVELSMEMORY(length)) { michael@0: nsBidiLevel *levels=mLevelsMemory; michael@0: michael@0: if(start>0 && levels!=mLevels) { michael@0: memcpy(levels, mLevels, start); michael@0: } michael@0: memset(levels+start, mParaLevel, length-start); michael@0: michael@0: /* this new levels array is set for the line and reflects the WS run */ michael@0: mTrailingWSStart=length; michael@0: *aLevels=mLevels=levels; michael@0: return NS_OK; michael@0: } else { michael@0: /* out of memory */ michael@0: *aLevels = nullptr; michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: } michael@0: #endif // FULL_BIDI_ENGINE michael@0: michael@0: nsresult nsBidi::GetCharTypeAt(int32_t aCharIndex, nsCharType* pType) michael@0: { michael@0: if(aCharIndex<0 || mLength<=aCharIndex) { michael@0: return NS_ERROR_INVALID_ARG; michael@0: } michael@0: *pType = (nsCharType)mDirProps[aCharIndex]; michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult nsBidi::GetLogicalRun(int32_t aLogicalStart, int32_t *aLogicalLimit, nsBidiLevel *aLevel) michael@0: { michael@0: int32_t length = mLength; michael@0: michael@0: if(aLogicalStart<0 || length<=aLogicalStart) { michael@0: return NS_ERROR_INVALID_ARG; michael@0: } michael@0: michael@0: if(mDirection!=NSBIDI_MIXED || aLogicalStart>=mTrailingWSStart) { michael@0: if(aLogicalLimit!=nullptr) { michael@0: *aLogicalLimit=length; michael@0: } michael@0: if(aLevel!=nullptr) { michael@0: *aLevel=mParaLevel; michael@0: } michael@0: } else { michael@0: nsBidiLevel *levels=mLevels; michael@0: nsBidiLevel level=levels[aLogicalStart]; michael@0: michael@0: /* search for the end of the run */ michael@0: length=mTrailingWSStart; michael@0: while(++aLogicalStart=mRunCount michael@0: ) { michael@0: *aDirection = NSBIDI_LTR; michael@0: return NS_OK; michael@0: } else { michael@0: int32_t start=mRuns[aRunIndex].logicalStart; michael@0: if(aLogicalStart!=nullptr) { michael@0: *aLogicalStart=GET_INDEX(start); michael@0: } michael@0: if(aLength!=nullptr) { michael@0: if(aRunIndex>0) { michael@0: *aLength=mRuns[aRunIndex].visualLimit- michael@0: mRuns[aRunIndex-1].visualLimit; michael@0: } else { michael@0: *aLength=mRuns[0].visualLimit; michael@0: } michael@0: } michael@0: *aDirection = (nsBidiDirection)GET_ODD_BIT(start); michael@0: return NS_OK; michael@0: } michael@0: } michael@0: michael@0: /* compute the runs array --------------------------------------------------- */ michael@0: michael@0: /* michael@0: * Compute the runs array from the levels array. michael@0: * After GetRuns() returns true, runCount is guaranteed to be >0 michael@0: * and the runs are reordered. michael@0: * Odd-level runs have visualStart on their visual right edge and michael@0: * they progress visually to the left. michael@0: */ michael@0: bool nsBidi::GetRuns() michael@0: { michael@0: if(mDirection!=NSBIDI_MIXED) { michael@0: /* simple, single-run case - this covers length==0 */ michael@0: GetSingleRun(mParaLevel); michael@0: } else /* NSBIDI_MIXED, length>0 */ { michael@0: /* mixed directionality */ michael@0: int32_t length=mLength, limit=length; michael@0: michael@0: /* michael@0: * If there are WS characters at the end of the line michael@0: * and the run preceding them has a level different from michael@0: * paraLevel, then they will form their own run at paraLevel (L1). michael@0: * Count them separately. michael@0: * We need some special treatment for this in order to not michael@0: * modify the levels array which a line nsBidi object shares michael@0: * with its paragraph parent and its other line siblings. michael@0: * In other words, for the trailing WS, it may be michael@0: * levels[]!=paraLevel but we have to treat it like it were so. michael@0: */ michael@0: limit=mTrailingWSStart; michael@0: if(limit==0) { michael@0: /* there is only WS on this line */ michael@0: GetSingleRun(mParaLevel); michael@0: } else { michael@0: nsBidiLevel *levels=mLevels; michael@0: int32_t i, runCount; michael@0: nsBidiLevel level=NSBIDI_DEFAULT_LTR; /* initialize with no valid level */ michael@0: michael@0: /* count the runs, there is at least one non-WS run, and limit>0 */ michael@0: runCount=0; michael@0: for(i=0; i1 || limit1 */ michael@0: if(GETRUNSMEMORY(runCount)) { michael@0: runs=mRunsMemory; michael@0: } else { michael@0: return false; michael@0: } michael@0: michael@0: /* set the runs */ michael@0: /* this could be optimized, e.g.: 464->444, 484->444, 575->555, 595->555 */ michael@0: /* however, that would take longer and make other functions more complicated */ michael@0: runIndex=0; michael@0: michael@0: /* search for the run ends */ michael@0: start=0; michael@0: level=levels[0]; michael@0: if(levelmaxLevel) { michael@0: maxLevel=level; michael@0: } michael@0: michael@0: /* initialize visualLimit values with the run lengths */ michael@0: for(i=1; imaxLevel) { michael@0: maxLevel=level; michael@0: } michael@0: ++runIndex; michael@0: } michael@0: } michael@0: michael@0: /* finish the last run at i==limit */ michael@0: runs[runIndex].logicalStart=start; michael@0: runs[runIndex].visualLimit=limit-start; michael@0: ++runIndex; michael@0: michael@0: if(limit1 and maxLevel>=minLevel>=paraLevel. michael@0: * All the visualStart fields=logical start before reordering. michael@0: * The "odd" bits are not set yet. michael@0: * michael@0: * Reordering with this data structure lends itself to some handy shortcuts: michael@0: * michael@0: * Since each run is moved but not modified, and since at the initial maxLevel michael@0: * each sequence of same-level runs consists of only one run each, we michael@0: * don't need to do anything there and can predecrement maxLevel. michael@0: * In many simple cases, the reordering is thus done entirely in the michael@0: * index mapping. michael@0: * Also, reordering occurs only down to the lowest odd level that occurs, michael@0: * which is minLevel|1. However, if the lowest level itself is odd, then michael@0: * in the last reordering the sequence of the runs at this level or higher michael@0: * will be all runs, and we don't need the elaborate loop to search for them. michael@0: * This is covered by ++minLevel instead of minLevel|=1 followed michael@0: * by an extra reorder-all after the reorder-some loop. michael@0: * About a trailing WS run: michael@0: * Such a run would need special treatment because its level is not michael@0: * reflected in levels[] if this is not a paragraph object. michael@0: * Instead, all characters from trailingWSStart on are implicitly at michael@0: * paraLevel. michael@0: * However, for all maxLevel>paraLevel, this run will never be reordered michael@0: * and does not need to be taken into account. maxLevel==paraLevel is only reordered michael@0: * if minLevel==paraLevel is odd, which is done in the extra segment. michael@0: * This means that for the main reordering loop we don't need to consider michael@0: * this run and can --runCount. If it is later part of the all-runs michael@0: * reordering, then runCount is adjusted accordingly. michael@0: */ michael@0: void nsBidi::ReorderLine(nsBidiLevel aMinLevel, nsBidiLevel aMaxLevel) michael@0: { michael@0: Run *runs; michael@0: nsBidiLevel *levels; michael@0: int32_t firstRun, endRun, limitRun, runCount, temp; michael@0: michael@0: /* nothing to do? */ michael@0: if(aMaxLevel<=(aMinLevel|1)) { michael@0: return; michael@0: } michael@0: michael@0: /* michael@0: * Reorder only down to the lowest odd level michael@0: * and reorder at an odd aMinLevel in a separate, simpler loop. michael@0: * See comments above for why aMinLevel is always incremented. michael@0: */ michael@0: ++aMinLevel; michael@0: michael@0: runs=mRuns; michael@0: levels=mLevels; michael@0: runCount=mRunCount; michael@0: michael@0: /* do not include the WS run at paraLevel<=old aMinLevel except in the simple loop */ michael@0: if(mTrailingWSStart=aMinLevel) { michael@0: firstRun=0; michael@0: michael@0: /* loop for all sequences of runs */ michael@0: for(;;) { michael@0: /* look for a sequence of runs that are all at >=aMaxLevel */ michael@0: /* look for the first run of such a sequence */ michael@0: while(firstRun=runCount) { michael@0: break; /* no more such runs */ michael@0: } michael@0: michael@0: /* look for the limit run of such a sequence (the run behind it) */ michael@0: for(limitRun=firstRun; ++limitRun=aMaxLevel;) {} michael@0: michael@0: /* Swap the entire sequence of runs from firstRun to limitRun-1. */ michael@0: endRun=limitRun-1; michael@0: while(firstRun=maxLevel */ michael@0: /* look for the first index of such a sequence */ michael@0: while(start=aLength) { michael@0: break; /* no more such runs */ michael@0: } michael@0: michael@0: /* look for the limit of such a sequence (the index behind it) */ michael@0: for(limit=start; ++limit=maxLevel;) {} michael@0: michael@0: /* michael@0: * Swap the entire interval of indexes from start to limit-1. michael@0: * We don't need to swap the levels for the purpose of this michael@0: * algorithm: the sequence of levels that we look at does not michael@0: * move anyway. michael@0: */ michael@0: end=limit-1; michael@0: while(start=minLevel); michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: bool nsBidi::PrepareReorder(const nsBidiLevel *aLevels, int32_t aLength, michael@0: int32_t *aIndexMap, michael@0: nsBidiLevel *aMinLevel, nsBidiLevel *aMaxLevel) michael@0: { michael@0: int32_t start; michael@0: nsBidiLevel level, minLevel, maxLevel; michael@0: michael@0: if(aLevels==nullptr || aLength<=0) { michael@0: return false; michael@0: } michael@0: michael@0: /* determine minLevel and maxLevel */ michael@0: minLevel=NSBIDI_MAX_EXPLICIT_LEVEL+1; michael@0: maxLevel=0; michael@0: for(start=aLength; start>0;) { michael@0: level=aLevels[--start]; michael@0: if(level>NSBIDI_MAX_EXPLICIT_LEVEL+1) { michael@0: return false; michael@0: } michael@0: if(levelmaxLevel) { michael@0: maxLevel=level; michael@0: } michael@0: } michael@0: *aMinLevel=minLevel; michael@0: *aMaxLevel=maxLevel; michael@0: michael@0: /* initialize the index map */ michael@0: for(start=aLength; start>0;) { michael@0: --start; michael@0: aIndexMap[start]=start; michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: #ifdef FULL_BIDI_ENGINE michael@0: /* API functions for logical<->visual mapping ------------------------------- */ michael@0: michael@0: nsresult nsBidi::GetVisualIndex(int32_t aLogicalIndex, int32_t* aVisualIndex) { michael@0: if(aLogicalIndex<0 || mLength<=aLogicalIndex) { michael@0: return NS_ERROR_INVALID_ARG; michael@0: } else { michael@0: /* we can do the trivial cases without the runs array */ michael@0: switch(mDirection) { michael@0: case NSBIDI_LTR: michael@0: *aVisualIndex = aLogicalIndex; michael@0: return NS_OK; michael@0: case NSBIDI_RTL: michael@0: *aVisualIndex = mLength-aLogicalIndex-1; michael@0: return NS_OK; michael@0: default: michael@0: if(mRunCount<0 && !GetRuns()) { michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } else { michael@0: Run *runs=mRuns; michael@0: int32_t i, visualStart=0, offset, length; michael@0: michael@0: /* linear search for the run, search on the visual runs */ michael@0: for(i=0;; ++i) { michael@0: length=runs[i].visualLimit-visualStart; michael@0: offset=aLogicalIndex-GET_INDEX(runs[i].logicalStart); michael@0: if(offset>=0 && offset=runs[i].visualLimit; ++i) {} michael@0: } else { michael@0: /* binary search for the run */ michael@0: int32_t start=0, limit=runCount; michael@0: michael@0: /* the middle if() will guaranteed find the run, we don't need a loop limit */ michael@0: for(;;) { michael@0: i=(start+limit)/2; michael@0: if(aVisualIndex>=runs[i].visualLimit) { michael@0: start=i+1; michael@0: } else if(i==0 || aVisualIndex>=runs[i-1].visualLimit) { michael@0: break; michael@0: } else { michael@0: limit=i; michael@0: } michael@0: } michael@0: } michael@0: michael@0: start=runs[i].logicalStart; michael@0: if(IS_EVEN_RUN(start)) { michael@0: /* LTR */ michael@0: /* the offset in runs[i] is aVisualIndex-runs[i-1].visualLimit */ michael@0: if(i>0) { michael@0: aVisualIndex-=runs[i-1].visualLimit; michael@0: } michael@0: *aLogicalIndex = GET_INDEX(start)+aVisualIndex; michael@0: return NS_OK; michael@0: } else { michael@0: /* RTL */ michael@0: *aLogicalIndex = GET_INDEX(start)+runs[i].visualLimit-aVisualIndex-1; michael@0: return NS_OK; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: nsresult nsBidi::GetLogicalMap(int32_t *aIndexMap) michael@0: { michael@0: nsBidiLevel *levels; michael@0: nsresult rv; michael@0: michael@0: /* GetLevels() checks all of its and our arguments */ michael@0: rv = GetLevels(&levels); michael@0: if(NS_FAILED(rv)) { michael@0: return rv; michael@0: } else if(aIndexMap==nullptr) { michael@0: return NS_ERROR_INVALID_ARG; michael@0: } else { michael@0: return ReorderLogical(levels, mLength, aIndexMap); michael@0: } michael@0: } michael@0: michael@0: nsresult nsBidi::GetVisualMap(int32_t *aIndexMap) michael@0: { michael@0: int32_t* runCount=nullptr; michael@0: nsresult rv; michael@0: michael@0: /* CountRuns() checks all of its and our arguments */ michael@0: rv = CountRuns(runCount); michael@0: if(NS_FAILED(rv)) { michael@0: return rv; michael@0: } else if(aIndexMap==nullptr) { michael@0: return NS_ERROR_INVALID_ARG; michael@0: } else { michael@0: /* fill a visual-to-logical index map using the runs[] */ michael@0: Run *runs=mRuns, *runsLimit=runs+mRunCount; michael@0: int32_t logicalStart, visualStart, visualLimit; michael@0: michael@0: visualStart=0; michael@0: for(; runslogicalStart; michael@0: visualLimit=runs->visualLimit; michael@0: if(IS_EVEN_RUN(logicalStart)) { michael@0: do { /* LTR */ michael@0: *aIndexMap++ = logicalStart++; michael@0: } while(++visualStart=maxLevel */ michael@0: /* look for the first index of such a sequence */ michael@0: while(start=aLength) { michael@0: break; /* no more such sequences */ michael@0: } michael@0: michael@0: /* look for the limit of such a sequence (the index behind it) */ michael@0: for(limit=start; ++limit=maxLevel;) {} michael@0: michael@0: /* michael@0: * sos=start of sequence, eos=end of sequence michael@0: * michael@0: * The closed (inclusive) interval from sos to eos includes all the logical michael@0: * and visual indexes within this sequence. They are logically and michael@0: * visually contiguous and in the same range. michael@0: * michael@0: * For each run, the new visual index=sos+eos-old visual index; michael@0: * we pre-add sos+eos into sumOfSosEos -> michael@0: * new visual index=sumOfSosEos-old visual index; michael@0: */ michael@0: sumOfSosEos=start+limit-1; michael@0: michael@0: /* reorder each index in the sequence */ michael@0: do { michael@0: aIndexMap[start]=sumOfSosEos-aIndexMap[start]; michael@0: } while(++start=minLevel); michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult nsBidi::InvertMap(const int32_t *aSrcMap, int32_t *aDestMap, int32_t aLength) michael@0: { michael@0: if(aSrcMap!=nullptr && aDestMap!=nullptr) { michael@0: aSrcMap+=aLength; michael@0: while(aLength>0) { michael@0: aDestMap[*--aSrcMap]=--aLength; michael@0: } michael@0: } michael@0: return NS_OK; michael@0: } michael@0: michael@0: int32_t nsBidi::doWriteReverse(const char16_t *src, int32_t srcLength, michael@0: char16_t *dest, uint16_t options) { michael@0: /* michael@0: * RTL run - michael@0: * michael@0: * RTL runs need to be copied to the destination in reverse order michael@0: * of code points, not code units, to keep Unicode characters intact. michael@0: * michael@0: * The general strategy for this is to read the source text michael@0: * in backward order, collect all code units for a code point michael@0: * (and optionally following combining characters, see below), michael@0: * and copy all these code units in ascending order michael@0: * to the destination for this run. michael@0: * michael@0: * Several options request whether combining characters michael@0: * should be kept after their base characters, michael@0: * whether Bidi control characters should be removed, and michael@0: * whether characters should be replaced by their mirror-image michael@0: * equivalent Unicode characters. michael@0: */ michael@0: int32_t i, j, destSize; michael@0: uint32_t c; michael@0: michael@0: /* optimize for several combinations of options */ michael@0: switch(options&(NSBIDI_REMOVE_BIDI_CONTROLS|NSBIDI_DO_MIRRORING|NSBIDI_KEEP_BASE_COMBINING)) { michael@0: case 0: michael@0: /* michael@0: * With none of the "complicated" options set, the destination michael@0: * run will have the same length as the source run, michael@0: * and there is no mirroring and no keeping combining characters michael@0: * with their base characters. michael@0: */ michael@0: destSize=srcLength; michael@0: michael@0: /* preserve character integrity */ michael@0: do { michael@0: /* i is always after the last code unit known to need to be kept in this segment */ michael@0: i=srcLength; michael@0: michael@0: /* collect code units for one base character */ michael@0: UTF_BACK_1(src, 0, srcLength); michael@0: michael@0: /* copy this base character */ michael@0: j=srcLength; michael@0: do { michael@0: *dest++=src[j++]; michael@0: } while(j0); michael@0: break; michael@0: case NSBIDI_KEEP_BASE_COMBINING: michael@0: /* michael@0: * Here, too, the destination michael@0: * run will have the same length as the source run, michael@0: * and there is no mirroring. michael@0: * We do need to keep combining characters with their base characters. michael@0: */ michael@0: destSize=srcLength; michael@0: michael@0: /* preserve character integrity */ michael@0: do { michael@0: /* i is always after the last code unit known to need to be kept in this segment */ michael@0: i=srcLength; michael@0: michael@0: /* collect code units and modifier letters for one base character */ michael@0: do { michael@0: UTF_PREV_CHAR(src, 0, srcLength, c); michael@0: } while(srcLength>0 && IsBidiCategory(c, eBidiCat_NSM)); michael@0: michael@0: /* copy this "user character" */ michael@0: j=srcLength; michael@0: do { michael@0: *dest++=src[j++]; michael@0: } while(j0); michael@0: break; michael@0: default: michael@0: /* michael@0: * With several "complicated" options set, this is the most michael@0: * general and the slowest copying of an RTL run. michael@0: * We will do mirroring, remove Bidi controls, and michael@0: * keep combining characters with their base characters michael@0: * as requested. michael@0: */ michael@0: if(!(options&NSBIDI_REMOVE_BIDI_CONTROLS)) { michael@0: i=srcLength; michael@0: } else { michael@0: /* we need to find out the destination length of the run, michael@0: which will not include the Bidi control characters */ michael@0: int32_t length=srcLength; michael@0: char16_t ch; michael@0: michael@0: i=0; michael@0: do { michael@0: ch=*src++; michael@0: if (!IsBidiControl((uint32_t)ch)) { michael@0: ++i; michael@0: } michael@0: } while(--length>0); michael@0: src-=srcLength; michael@0: } michael@0: destSize=i; michael@0: michael@0: /* preserve character integrity */ michael@0: do { michael@0: /* i is always after the last code unit known to need to be kept in this segment */ michael@0: i=srcLength; michael@0: michael@0: /* collect code units for one base character */ michael@0: UTF_PREV_CHAR(src, 0, srcLength, c); michael@0: if(options&NSBIDI_KEEP_BASE_COMBINING) { michael@0: /* collect modifier letters for this base character */ michael@0: while(srcLength>0 && IsBidiCategory(c, eBidiCat_NSM)) { michael@0: UTF_PREV_CHAR(src, 0, srcLength, c); michael@0: } michael@0: } michael@0: michael@0: if(options&NSBIDI_REMOVE_BIDI_CONTROLS && IsBidiControl(c)) { michael@0: /* do not copy this Bidi control character */ michael@0: continue; michael@0: } michael@0: michael@0: /* copy this "user character" */ michael@0: j=srcLength; michael@0: if(options&NSBIDI_DO_MIRRORING) { michael@0: /* mirror only the base character */ michael@0: c = SymmSwap(c); michael@0: michael@0: int32_t k=0; michael@0: UTF_APPEND_CHAR_UNSAFE(dest, k, c); michael@0: dest+=k; michael@0: j+=k; michael@0: } michael@0: while(j0); michael@0: break; michael@0: } /* end of switch */ michael@0: return destSize; michael@0: } michael@0: michael@0: nsresult nsBidi::WriteReverse(const char16_t *aSrc, int32_t aSrcLength, char16_t *aDest, uint16_t aOptions, int32_t *aDestSize) michael@0: { michael@0: if( aSrc==nullptr || aSrcLength<0 || michael@0: aDest==nullptr michael@0: ) { michael@0: return NS_ERROR_INVALID_ARG; michael@0: } michael@0: michael@0: /* do input and output overlap? */ michael@0: if( aSrc>=aDest && aSrc=aSrc && aDest0) { michael@0: *aDestSize = doWriteReverse(aSrc, aSrcLength, aDest, aOptions); michael@0: } michael@0: return NS_OK; michael@0: } michael@0: #endif // FULL_BIDI_ENGINE