|
1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- |
|
2 * |
|
3 * This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 #include "nsBidi.h" |
|
8 #include "nsUnicodeProperties.h" |
|
9 #include "nsCRTGlue.h" |
|
10 |
|
11 using namespace mozilla::unicode; |
|
12 |
|
13 // These are #defined in <sys/regset.h> under Solaris 10 x86 |
|
14 #undef CS |
|
15 #undef ES |
|
16 |
|
17 /* Comparing the description of the Bidi algorithm with this implementation |
|
18 is easier with the same names for the Bidi types in the code as there. |
|
19 */ |
|
20 enum { |
|
21 L = eCharType_LeftToRight, |
|
22 R = eCharType_RightToLeft, |
|
23 EN = eCharType_EuropeanNumber, |
|
24 ES = eCharType_EuropeanNumberSeparator, |
|
25 ET = eCharType_EuropeanNumberTerminator, |
|
26 AN = eCharType_ArabicNumber, |
|
27 CS = eCharType_CommonNumberSeparator, |
|
28 B = eCharType_BlockSeparator, |
|
29 S = eCharType_SegmentSeparator, |
|
30 WS = eCharType_WhiteSpaceNeutral, |
|
31 O_N = eCharType_OtherNeutral, |
|
32 LRE = eCharType_LeftToRightEmbedding, |
|
33 LRO = eCharType_LeftToRightOverride, |
|
34 AL = eCharType_RightToLeftArabic, |
|
35 RLE = eCharType_RightToLeftEmbedding, |
|
36 RLO = eCharType_RightToLeftOverride, |
|
37 PDF = eCharType_PopDirectionalFormat, |
|
38 NSM = eCharType_DirNonSpacingMark, |
|
39 BN = eCharType_BoundaryNeutral, |
|
40 dirPropCount |
|
41 }; |
|
42 |
|
43 /* to avoid some conditional statements, use tiny constant arrays */ |
|
44 static Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) }; |
|
45 static Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) }; |
|
46 static Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; |
|
47 |
|
48 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1] |
|
49 #define DIRPROP_FLAG_E(level) flagE[(level)&1] |
|
50 #define DIRPROP_FLAG_O(level) flagO[(level)&1] |
|
51 |
|
52 /* |
|
53 * General implementation notes: |
|
54 * |
|
55 * Throughout the implementation, there are comments like (W2) that refer to |
|
56 * rules of the Bidi algorithm in its version 5, in this example to the second |
|
57 * rule of the resolution of weak types. |
|
58 * |
|
59 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32) |
|
60 * character according to UTF-16, the second UChar gets the directional property of |
|
61 * the entire character assigned, while the first one gets a BN, a boundary |
|
62 * neutral, type, which is ignored by most of the algorithm according to |
|
63 * rule (X9) and the implementation suggestions of the Bidi algorithm. |
|
64 * |
|
65 * Later, AdjustWSLevels() will set the level for each BN to that of the |
|
66 * following character (UChar), which results in surrogate pairs getting the |
|
67 * same level on each of their surrogates. |
|
68 * |
|
69 * In a UTF-8 implementation, the same thing could be done: the last byte of |
|
70 * a multi-byte sequence would get the "real" property, while all previous |
|
71 * bytes of that sequence would get BN. |
|
72 * |
|
73 * It is not possible to assign all those parts of a character the same real |
|
74 * property because this would fail in the resolution of weak types with rules |
|
75 * that look at immediately surrounding types. |
|
76 * |
|
77 * As a related topic, this implementation does not remove Boundary Neutral |
|
78 * types from the input, but ignores them whenever this is relevant. |
|
79 * For example, the loop for the resolution of the weak types reads |
|
80 * types until it finds a non-BN. |
|
81 * Also, explicit embedding codes are neither changed into BN nor removed. |
|
82 * They are only treated the same way real BNs are. |
|
83 * As stated before, AdjustWSLevels() takes care of them at the end. |
|
84 * For the purpose of conformance, the levels of all these codes |
|
85 * do not matter. |
|
86 * |
|
87 * Note that this implementation never modifies the dirProps |
|
88 * after the initial setup. |
|
89 * |
|
90 * |
|
91 * In this implementation, the resolution of weak types (Wn), |
|
92 * neutrals (Nn), and the assignment of the resolved level (In) |
|
93 * are all done in one single loop, in ResolveImplicitLevels(). |
|
94 * Changes of dirProp values are done on the fly, without writing |
|
95 * them back to the dirProps array. |
|
96 * |
|
97 * |
|
98 * This implementation contains code that allows to bypass steps of the |
|
99 * algorithm that are not needed on the specific paragraph |
|
100 * in order to speed up the most common cases considerably, |
|
101 * like text that is entirely LTR, or RTL text without numbers. |
|
102 * |
|
103 * Most of this is done by setting a bit for each directional property |
|
104 * in a flags variable and later checking for whether there are |
|
105 * any LTR characters or any RTL characters, or both, whether |
|
106 * there are any explicit embedding codes, etc. |
|
107 * |
|
108 * If the (Xn) steps are performed, then the flags are re-evaluated, |
|
109 * because they will then not contain the embedding codes any more |
|
110 * and will be adjusted for override codes, so that subsequently |
|
111 * more bypassing may be possible than what the initial flags suggested. |
|
112 * |
|
113 * If the text is not mixed-directional, then the |
|
114 * algorithm steps for the weak type resolution are not performed, |
|
115 * and all levels are set to the paragraph level. |
|
116 * |
|
117 * If there are no explicit embedding codes, then the (Xn) steps |
|
118 * are not performed. |
|
119 * |
|
120 * If embedding levels are supplied as a parameter, then all |
|
121 * explicit embedding codes are ignored, and the (Xn) steps |
|
122 * are not performed. |
|
123 * |
|
124 * White Space types could get the level of the run they belong to, |
|
125 * and are checked with a test of (flags&MASK_EMBEDDING) to |
|
126 * consider if the paragraph direction should be considered in |
|
127 * the flags variable. |
|
128 * |
|
129 * If there are no White Space types in the paragraph, then |
|
130 * (L1) is not necessary in AdjustWSLevels(). |
|
131 */ |
|
132 nsBidi::nsBidi() |
|
133 { |
|
134 Init(); |
|
135 |
|
136 mMayAllocateText=true; |
|
137 mMayAllocateRuns=true; |
|
138 } |
|
139 |
|
140 nsBidi::~nsBidi() |
|
141 { |
|
142 Free(); |
|
143 } |
|
144 |
|
145 void nsBidi::Init() |
|
146 { |
|
147 /* reset the object, all pointers nullptr, all flags false, all sizes 0 */ |
|
148 mLength = 0; |
|
149 mParaLevel = 0; |
|
150 mFlags = 0; |
|
151 mDirection = NSBIDI_LTR; |
|
152 mTrailingWSStart = 0; |
|
153 |
|
154 mDirPropsSize = 0; |
|
155 mLevelsSize = 0; |
|
156 mRunsSize = 0; |
|
157 mRunCount = -1; |
|
158 |
|
159 mDirProps=nullptr; |
|
160 mLevels=nullptr; |
|
161 mRuns=nullptr; |
|
162 |
|
163 mDirPropsMemory=nullptr; |
|
164 mLevelsMemory=nullptr; |
|
165 mRunsMemory=nullptr; |
|
166 |
|
167 mMayAllocateText=false; |
|
168 mMayAllocateRuns=false; |
|
169 |
|
170 } |
|
171 |
|
172 /* |
|
173 * We are allowed to allocate memory if aMemory==nullptr or |
|
174 * aMayAllocate==true for each array that we need. |
|
175 * We also try to grow and shrink memory as needed if we |
|
176 * allocate it. |
|
177 * |
|
178 * Assume aSizeNeeded>0. |
|
179 * If *aMemory!=nullptr, then assume *aSize>0. |
|
180 * |
|
181 * ### this realloc() may unnecessarily copy the old data, |
|
182 * which we know we don't need any more; |
|
183 * is this the best way to do this?? |
|
184 */ |
|
185 bool nsBidi::GetMemory(void **aMemory, size_t *aSize, bool aMayAllocate, size_t aSizeNeeded) |
|
186 { |
|
187 /* check for existing memory */ |
|
188 if(*aMemory==nullptr) { |
|
189 /* we need to allocate memory */ |
|
190 if(!aMayAllocate) { |
|
191 return false; |
|
192 } else { |
|
193 *aMemory=moz_malloc(aSizeNeeded); |
|
194 if (*aMemory!=nullptr) { |
|
195 *aSize=aSizeNeeded; |
|
196 return true; |
|
197 } else { |
|
198 *aSize=0; |
|
199 return false; |
|
200 } |
|
201 } |
|
202 } else { |
|
203 /* there is some memory, is it enough or too much? */ |
|
204 if(aSizeNeeded>*aSize && !aMayAllocate) { |
|
205 /* not enough memory, and we must not allocate */ |
|
206 return false; |
|
207 } else if(aSizeNeeded!=*aSize && aMayAllocate) { |
|
208 /* we may try to grow or shrink */ |
|
209 void *memory=moz_realloc(*aMemory, aSizeNeeded); |
|
210 |
|
211 if(memory!=nullptr) { |
|
212 *aMemory=memory; |
|
213 *aSize=aSizeNeeded; |
|
214 return true; |
|
215 } else { |
|
216 /* we failed to grow */ |
|
217 return false; |
|
218 } |
|
219 } else { |
|
220 /* we have at least enough memory and must not allocate */ |
|
221 return true; |
|
222 } |
|
223 } |
|
224 } |
|
225 |
|
226 void nsBidi::Free() |
|
227 { |
|
228 moz_free(mDirPropsMemory); |
|
229 mDirPropsMemory = nullptr; |
|
230 moz_free(mLevelsMemory); |
|
231 mLevelsMemory = nullptr; |
|
232 moz_free(mRunsMemory); |
|
233 mRunsMemory = nullptr; |
|
234 } |
|
235 |
|
236 /* SetPara ------------------------------------------------------------ */ |
|
237 |
|
238 nsresult nsBidi::SetPara(const char16_t *aText, int32_t aLength, |
|
239 nsBidiLevel aParaLevel, nsBidiLevel *aEmbeddingLevels) |
|
240 { |
|
241 nsBidiDirection direction; |
|
242 |
|
243 /* check the argument values */ |
|
244 if(aText==nullptr || |
|
245 ((NSBIDI_MAX_EXPLICIT_LEVEL<aParaLevel) && !IS_DEFAULT_LEVEL(aParaLevel)) || |
|
246 aLength<-1 |
|
247 ) { |
|
248 return NS_ERROR_INVALID_ARG; |
|
249 } |
|
250 |
|
251 if(aLength==-1) { |
|
252 aLength = NS_strlen(aText); |
|
253 } |
|
254 |
|
255 /* initialize member data */ |
|
256 mLength=aLength; |
|
257 mParaLevel=aParaLevel; |
|
258 mDirection=NSBIDI_LTR; |
|
259 mTrailingWSStart=aLength; /* the levels[] will reflect the WS run */ |
|
260 |
|
261 mDirProps=nullptr; |
|
262 mLevels=nullptr; |
|
263 mRuns=nullptr; |
|
264 |
|
265 if(aLength==0) { |
|
266 /* |
|
267 * For an empty paragraph, create an nsBidi object with the aParaLevel and |
|
268 * the flags and the direction set but without allocating zero-length arrays. |
|
269 * There is nothing more to do. |
|
270 */ |
|
271 if(IS_DEFAULT_LEVEL(aParaLevel)) { |
|
272 mParaLevel&=1; |
|
273 } |
|
274 if(aParaLevel&1) { |
|
275 mFlags=DIRPROP_FLAG(R); |
|
276 mDirection=NSBIDI_RTL; |
|
277 } else { |
|
278 mFlags=DIRPROP_FLAG(L); |
|
279 mDirection=NSBIDI_LTR; |
|
280 } |
|
281 |
|
282 mRunCount=0; |
|
283 return NS_OK; |
|
284 } |
|
285 |
|
286 mRunCount=-1; |
|
287 |
|
288 /* |
|
289 * Get the directional properties, |
|
290 * the flags bit-set, and |
|
291 * determine the partagraph level if necessary. |
|
292 */ |
|
293 if(GETDIRPROPSMEMORY(aLength)) { |
|
294 mDirProps=mDirPropsMemory; |
|
295 GetDirProps(aText); |
|
296 } else { |
|
297 return NS_ERROR_OUT_OF_MEMORY; |
|
298 } |
|
299 |
|
300 /* are explicit levels specified? */ |
|
301 if(aEmbeddingLevels==nullptr) { |
|
302 /* no: determine explicit levels according to the (Xn) rules */\ |
|
303 if(GETLEVELSMEMORY(aLength)) { |
|
304 mLevels=mLevelsMemory; |
|
305 direction=ResolveExplicitLevels(); |
|
306 } else { |
|
307 return NS_ERROR_OUT_OF_MEMORY; |
|
308 } |
|
309 } else { |
|
310 /* set BN for all explicit codes, check that all levels are aParaLevel..NSBIDI_MAX_EXPLICIT_LEVEL */ |
|
311 mLevels=aEmbeddingLevels; |
|
312 nsresult rv = CheckExplicitLevels(&direction); |
|
313 if(NS_FAILED(rv)) { |
|
314 return rv; |
|
315 } |
|
316 } |
|
317 |
|
318 /* |
|
319 * The steps after (X9) in the Bidi algorithm are performed only if |
|
320 * the paragraph text has mixed directionality! |
|
321 */ |
|
322 switch(direction) { |
|
323 case NSBIDI_LTR: |
|
324 /* make sure paraLevel is even */ |
|
325 mParaLevel=(mParaLevel+1)&~1; |
|
326 |
|
327 /* all levels are implicitly at paraLevel (important for GetLevels()) */ |
|
328 mTrailingWSStart=0; |
|
329 break; |
|
330 case NSBIDI_RTL: |
|
331 /* make sure paraLevel is odd */ |
|
332 mParaLevel|=1; |
|
333 |
|
334 /* all levels are implicitly at paraLevel (important for GetLevels()) */ |
|
335 mTrailingWSStart=0; |
|
336 break; |
|
337 default: |
|
338 /* |
|
339 * If there are no external levels specified and there |
|
340 * are no significant explicit level codes in the text, |
|
341 * then we can treat the entire paragraph as one run. |
|
342 * Otherwise, we need to perform the following rules on runs of |
|
343 * the text with the same embedding levels. (X10) |
|
344 * "Significant" explicit level codes are ones that actually |
|
345 * affect non-BN characters. |
|
346 * Examples for "insignificant" ones are empty embeddings |
|
347 * LRE-PDF, LRE-RLE-PDF-PDF, etc. |
|
348 */ |
|
349 if(aEmbeddingLevels==nullptr && !(mFlags&DIRPROP_FLAG_MULTI_RUNS)) { |
|
350 ResolveImplicitLevels(0, aLength, |
|
351 GET_LR_FROM_LEVEL(mParaLevel), |
|
352 GET_LR_FROM_LEVEL(mParaLevel)); |
|
353 } else { |
|
354 /* sor, eor: start and end types of same-level-run */ |
|
355 nsBidiLevel *levels=mLevels; |
|
356 int32_t start, limit=0; |
|
357 nsBidiLevel level, nextLevel; |
|
358 DirProp sor, eor; |
|
359 |
|
360 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ |
|
361 level=mParaLevel; |
|
362 nextLevel=levels[0]; |
|
363 if(level<nextLevel) { |
|
364 eor=GET_LR_FROM_LEVEL(nextLevel); |
|
365 } else { |
|
366 eor=GET_LR_FROM_LEVEL(level); |
|
367 } |
|
368 |
|
369 do { |
|
370 /* determine start and limit of the run (end points just behind the run) */ |
|
371 |
|
372 /* the values for this run's start are the same as for the previous run's end */ |
|
373 sor=eor; |
|
374 start=limit; |
|
375 level=nextLevel; |
|
376 |
|
377 /* search for the limit of this run */ |
|
378 while(++limit<aLength && levels[limit]==level) {} |
|
379 |
|
380 /* get the correct level of the next run */ |
|
381 if(limit<aLength) { |
|
382 nextLevel=levels[limit]; |
|
383 } else { |
|
384 nextLevel=mParaLevel; |
|
385 } |
|
386 |
|
387 /* determine eor from max(level, nextLevel); sor is last run's eor */ |
|
388 if((level&~NSBIDI_LEVEL_OVERRIDE)<(nextLevel&~NSBIDI_LEVEL_OVERRIDE)) { |
|
389 eor=GET_LR_FROM_LEVEL(nextLevel); |
|
390 } else { |
|
391 eor=GET_LR_FROM_LEVEL(level); |
|
392 } |
|
393 |
|
394 /* if the run consists of overridden directional types, then there |
|
395 are no implicit types to be resolved */ |
|
396 if(!(level&NSBIDI_LEVEL_OVERRIDE)) { |
|
397 ResolveImplicitLevels(start, limit, sor, eor); |
|
398 } |
|
399 } while(limit<aLength); |
|
400 } |
|
401 |
|
402 /* reset the embedding levels for some non-graphic characters (L1), (X9) */ |
|
403 AdjustWSLevels(); |
|
404 break; |
|
405 } |
|
406 |
|
407 mDirection=direction; |
|
408 return NS_OK; |
|
409 } |
|
410 |
|
411 /* perform (P2)..(P3) ------------------------------------------------------- */ |
|
412 |
|
413 /* |
|
414 * Get the directional properties for the text, |
|
415 * calculate the flags bit-set, and |
|
416 * determine the partagraph level if necessary. |
|
417 */ |
|
418 void nsBidi::GetDirProps(const char16_t *aText) |
|
419 { |
|
420 DirProp *dirProps=mDirPropsMemory; /* mDirProps is const */ |
|
421 |
|
422 int32_t i=0, length=mLength; |
|
423 Flags flags=0; /* collect all directionalities in the text */ |
|
424 char16_t uchar; |
|
425 DirProp dirProp; |
|
426 |
|
427 if(IS_DEFAULT_LEVEL(mParaLevel)) { |
|
428 /* determine the paragraph level (P2..P3) */ |
|
429 for(;;) { |
|
430 uchar=aText[i]; |
|
431 if(!IS_FIRST_SURROGATE(uchar) || i+1==length || !IS_SECOND_SURROGATE(aText[i+1])) { |
|
432 /* not a surrogate pair */ |
|
433 flags|=DIRPROP_FLAG(dirProps[i]=dirProp=GetBidiCat((uint32_t)uchar)); |
|
434 } else { |
|
435 /* a surrogate pair */ |
|
436 dirProps[i++]=BN; /* first surrogate in the pair gets the BN type */ |
|
437 flags|=DIRPROP_FLAG(dirProps[i]=dirProp=GetBidiCat(GET_UTF_32(uchar, aText[i])))|DIRPROP_FLAG(BN); |
|
438 } |
|
439 ++i; |
|
440 if(dirProp==L) { |
|
441 mParaLevel=0; |
|
442 break; |
|
443 } else if(dirProp==R || dirProp==AL) { |
|
444 mParaLevel=1; |
|
445 break; |
|
446 } else if(i==length) { |
|
447 /* |
|
448 * see comment in nsIBidi.h: |
|
449 * the DEFAULT_XXX values are designed so that |
|
450 * their bit 0 alone yields the intended default |
|
451 */ |
|
452 mParaLevel&=1; |
|
453 break; |
|
454 } |
|
455 } |
|
456 } |
|
457 |
|
458 /* get the rest of the directional properties and the flags bits */ |
|
459 while(i<length) { |
|
460 uchar=aText[i]; |
|
461 if(!IS_FIRST_SURROGATE(uchar) || i+1==length || !IS_SECOND_SURROGATE(aText[i+1])) { |
|
462 /* not a surrogate pair */ |
|
463 flags|=DIRPROP_FLAG(dirProps[i]=GetBidiCat((uint32_t)uchar)); |
|
464 } else { |
|
465 /* a surrogate pair */ |
|
466 dirProps[i++]=BN; /* second surrogate in the pair gets the BN type */ |
|
467 flags|=DIRPROP_FLAG(dirProps[i]=GetBidiCat(GET_UTF_32(uchar, aText[i])))|DIRPROP_FLAG(BN); |
|
468 } |
|
469 ++i; |
|
470 } |
|
471 if(flags&MASK_EMBEDDING) { |
|
472 flags|=DIRPROP_FLAG_LR(mParaLevel); |
|
473 } |
|
474 mFlags=flags; |
|
475 } |
|
476 |
|
477 /* perform (X1)..(X9) ------------------------------------------------------- */ |
|
478 |
|
479 /* |
|
480 * Resolve the explicit levels as specified by explicit embedding codes. |
|
481 * Recalculate the flags to have them reflect the real properties |
|
482 * after taking the explicit embeddings into account. |
|
483 * |
|
484 * The Bidi algorithm is designed to result in the same behavior whether embedding |
|
485 * levels are externally specified (from "styled text", supposedly the preferred |
|
486 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text. |
|
487 * That is why (X9) instructs to remove all explicit codes (and BN). |
|
488 * However, in a real implementation, this removal of these codes and their index |
|
489 * positions in the plain text is undesirable since it would result in |
|
490 * reallocated, reindexed text. |
|
491 * Instead, this implementation leaves the codes in there and just ignores them |
|
492 * in the subsequent processing. |
|
493 * In order to get the same reordering behavior, positions with a BN or an |
|
494 * explicit embedding code just get the same level assigned as the last "real" |
|
495 * character. |
|
496 * |
|
497 * Some implementations, not this one, then overwrite some of these |
|
498 * directionality properties at "real" same-level-run boundaries by |
|
499 * L or R codes so that the resolution of weak types can be performed on the |
|
500 * entire paragraph at once instead of having to parse it once more and |
|
501 * perform that resolution on same-level-runs. |
|
502 * This limits the scope of the implicit rules in effectively |
|
503 * the same way as the run limits. |
|
504 * |
|
505 * Instead, this implementation does not modify these codes. |
|
506 * On one hand, the paragraph has to be scanned for same-level-runs, but |
|
507 * on the other hand, this saves another loop to reset these codes, |
|
508 * or saves making and modifying a copy of dirProps[]. |
|
509 * |
|
510 * |
|
511 * Note that (Pn) and (Xn) changed significantly from version 4 of the Bidi algorithm. |
|
512 * |
|
513 * |
|
514 * Handling the stack of explicit levels (Xn): |
|
515 * |
|
516 * With the Bidi stack of explicit levels, |
|
517 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF, |
|
518 * the explicit level must never exceed NSBIDI_MAX_EXPLICIT_LEVEL==61. |
|
519 * |
|
520 * In order to have a correct push-pop semantics even in the case of overflows, |
|
521 * there are two overflow counters: |
|
522 * - countOver60 is incremented with each LRx at level 60 |
|
523 * - from level 60, one RLx increases the level to 61 |
|
524 * - countOver61 is incremented with each LRx and RLx at level 61 |
|
525 * |
|
526 * Popping levels with PDF must work in the opposite order so that level 61 |
|
527 * is correct at the correct point. Underflows (too many PDFs) must be checked. |
|
528 * |
|
529 * This implementation assumes that NSBIDI_MAX_EXPLICIT_LEVEL is odd. |
|
530 */ |
|
531 |
|
532 nsBidiDirection nsBidi::ResolveExplicitLevels() |
|
533 { |
|
534 const DirProp *dirProps=mDirProps; |
|
535 nsBidiLevel *levels=mLevels; |
|
536 |
|
537 int32_t i=0, length=mLength; |
|
538 Flags flags=mFlags; /* collect all directionalities in the text */ |
|
539 DirProp dirProp; |
|
540 nsBidiLevel level=mParaLevel; |
|
541 |
|
542 nsBidiDirection direction; |
|
543 |
|
544 /* determine if the text is mixed-directional or single-directional */ |
|
545 direction=DirectionFromFlags(flags); |
|
546 |
|
547 /* we may not need to resolve any explicit levels */ |
|
548 if(direction!=NSBIDI_MIXED) { |
|
549 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ |
|
550 } else if(!(flags&MASK_EXPLICIT)) { |
|
551 /* mixed, but all characters are at the same embedding level */ |
|
552 /* set all levels to the paragraph level */ |
|
553 for(i=0; i<length; ++i) { |
|
554 levels[i]=level; |
|
555 } |
|
556 } else { |
|
557 /* continue to perform (Xn) */ |
|
558 |
|
559 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */ |
|
560 /* both variables may carry the NSBIDI_LEVEL_OVERRIDE flag to indicate the override status */ |
|
561 nsBidiLevel embeddingLevel=level, newLevel, stackTop=0; |
|
562 |
|
563 nsBidiLevel stack[NSBIDI_MAX_EXPLICIT_LEVEL]; /* we never push anything >=NSBIDI_MAX_EXPLICIT_LEVEL */ |
|
564 uint32_t countOver60=0, countOver61=0; /* count overflows of explicit levels */ |
|
565 |
|
566 /* recalculate the flags */ |
|
567 flags=0; |
|
568 |
|
569 /* since we assume that this is a single paragraph, we ignore (X8) */ |
|
570 for(i=0; i<length; ++i) { |
|
571 dirProp=dirProps[i]; |
|
572 switch(dirProp) { |
|
573 case LRE: |
|
574 case LRO: |
|
575 /* (X3, X5) */ |
|
576 newLevel=(embeddingLevel+2)&~(NSBIDI_LEVEL_OVERRIDE|1); /* least greater even level */ |
|
577 if(newLevel<=NSBIDI_MAX_EXPLICIT_LEVEL) { |
|
578 stack[stackTop]=embeddingLevel; |
|
579 ++stackTop; |
|
580 embeddingLevel=newLevel; |
|
581 if(dirProp==LRO) { |
|
582 embeddingLevel|=NSBIDI_LEVEL_OVERRIDE; |
|
583 } else { |
|
584 embeddingLevel&=~NSBIDI_LEVEL_OVERRIDE; |
|
585 } |
|
586 } else if((embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)==NSBIDI_MAX_EXPLICIT_LEVEL) { |
|
587 ++countOver61; |
|
588 } else /* (embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)==NSBIDI_MAX_EXPLICIT_LEVEL-1 */ { |
|
589 ++countOver60; |
|
590 } |
|
591 flags|=DIRPROP_FLAG(BN); |
|
592 break; |
|
593 case RLE: |
|
594 case RLO: |
|
595 /* (X2, X4) */ |
|
596 newLevel=((embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)+1)|1; /* least greater odd level */ |
|
597 if(newLevel<=NSBIDI_MAX_EXPLICIT_LEVEL) { |
|
598 stack[stackTop]=embeddingLevel; |
|
599 ++stackTop; |
|
600 embeddingLevel=newLevel; |
|
601 if(dirProp==RLO) { |
|
602 embeddingLevel|=NSBIDI_LEVEL_OVERRIDE; |
|
603 } else { |
|
604 embeddingLevel&=~NSBIDI_LEVEL_OVERRIDE; |
|
605 } |
|
606 } else { |
|
607 ++countOver61; |
|
608 } |
|
609 flags|=DIRPROP_FLAG(BN); |
|
610 break; |
|
611 case PDF: |
|
612 /* (X7) */ |
|
613 /* handle all the overflow cases first */ |
|
614 if(countOver61>0) { |
|
615 --countOver61; |
|
616 } else if(countOver60>0 && (embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)!=NSBIDI_MAX_EXPLICIT_LEVEL) { |
|
617 /* handle LRx overflows from level 60 */ |
|
618 --countOver60; |
|
619 } else if(stackTop>0) { |
|
620 /* this is the pop operation; it also pops level 61 while countOver60>0 */ |
|
621 --stackTop; |
|
622 embeddingLevel=stack[stackTop]; |
|
623 /* } else { (underflow) */ |
|
624 } |
|
625 flags|=DIRPROP_FLAG(BN); |
|
626 break; |
|
627 case B: |
|
628 /* |
|
629 * We do not really expect to see a paragraph separator (B), |
|
630 * but we should do something reasonable with it, |
|
631 * especially at the end of the text. |
|
632 */ |
|
633 stackTop=0; |
|
634 countOver60=countOver61=0; |
|
635 embeddingLevel=level=mParaLevel; |
|
636 flags|=DIRPROP_FLAG(B); |
|
637 break; |
|
638 case BN: |
|
639 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ |
|
640 /* they will get their levels set correctly in AdjustWSLevels() */ |
|
641 flags|=DIRPROP_FLAG(BN); |
|
642 break; |
|
643 default: |
|
644 /* all other types get the "real" level */ |
|
645 if(level!=embeddingLevel) { |
|
646 level=embeddingLevel; |
|
647 if(level&NSBIDI_LEVEL_OVERRIDE) { |
|
648 flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS; |
|
649 } else { |
|
650 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS; |
|
651 } |
|
652 } |
|
653 if(!(level&NSBIDI_LEVEL_OVERRIDE)) { |
|
654 flags|=DIRPROP_FLAG(dirProp); |
|
655 } |
|
656 break; |
|
657 } |
|
658 |
|
659 /* |
|
660 * We need to set reasonable levels even on BN codes and |
|
661 * explicit codes because we will later look at same-level runs (X10). |
|
662 */ |
|
663 levels[i]=level; |
|
664 } |
|
665 if(flags&MASK_EMBEDDING) { |
|
666 flags|=DIRPROP_FLAG_LR(mParaLevel); |
|
667 } |
|
668 |
|
669 /* subsequently, ignore the explicit codes and BN (X9) */ |
|
670 |
|
671 /* again, determine if the text is mixed-directional or single-directional */ |
|
672 mFlags=flags; |
|
673 direction=DirectionFromFlags(flags); |
|
674 } |
|
675 return direction; |
|
676 } |
|
677 |
|
678 /* |
|
679 * Use a pre-specified embedding levels array: |
|
680 * |
|
681 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), |
|
682 * ignore all explicit codes (X9), |
|
683 * and check all the preset levels. |
|
684 * |
|
685 * Recalculate the flags to have them reflect the real properties |
|
686 * after taking the explicit embeddings into account. |
|
687 */ |
|
688 nsresult nsBidi::CheckExplicitLevels(nsBidiDirection *aDirection) |
|
689 { |
|
690 const DirProp *dirProps=mDirProps; |
|
691 nsBidiLevel *levels=mLevels; |
|
692 |
|
693 int32_t i, length=mLength; |
|
694 Flags flags=0; /* collect all directionalities in the text */ |
|
695 nsBidiLevel level, paraLevel=mParaLevel; |
|
696 |
|
697 for(i=0; i<length; ++i) { |
|
698 level=levels[i]; |
|
699 if(level&NSBIDI_LEVEL_OVERRIDE) { |
|
700 /* keep the override flag in levels[i] but adjust the flags */ |
|
701 level&=~NSBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */ |
|
702 flags|=DIRPROP_FLAG_O(level); |
|
703 } else { |
|
704 /* set the flags */ |
|
705 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProps[i]); |
|
706 } |
|
707 if(level<paraLevel || NSBIDI_MAX_EXPLICIT_LEVEL<level) { |
|
708 /* level out of bounds */ |
|
709 *aDirection = NSBIDI_LTR; |
|
710 return NS_ERROR_INVALID_ARG; |
|
711 } |
|
712 } |
|
713 if(flags&MASK_EMBEDDING) { |
|
714 flags|=DIRPROP_FLAG_LR(mParaLevel); |
|
715 } |
|
716 |
|
717 /* determine if the text is mixed-directional or single-directional */ |
|
718 mFlags=flags; |
|
719 *aDirection = DirectionFromFlags(flags); |
|
720 return NS_OK; |
|
721 } |
|
722 |
|
723 /* determine if the text is mixed-directional or single-directional */ |
|
724 nsBidiDirection nsBidi::DirectionFromFlags(Flags aFlags) |
|
725 { |
|
726 /* if the text contains AN and neutrals, then some neutrals may become RTL */ |
|
727 if(!(aFlags&MASK_RTL || (aFlags&DIRPROP_FLAG(AN) && aFlags&MASK_POSSIBLE_N))) { |
|
728 return NSBIDI_LTR; |
|
729 } else if(!(aFlags&MASK_LTR)) { |
|
730 return NSBIDI_RTL; |
|
731 } else { |
|
732 return NSBIDI_MIXED; |
|
733 } |
|
734 } |
|
735 |
|
736 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */ |
|
737 |
|
738 /* |
|
739 * This implementation of the (Wn) rules applies all rules in one pass. |
|
740 * In order to do so, it needs a look-ahead of typically 1 character |
|
741 * (except for W5: sequences of ET) and keeps track of changes |
|
742 * in a rule Wp that affect a later Wq (p<q). |
|
743 * |
|
744 * historyOfEN is a variable-saver: it contains 4 boolean states; |
|
745 * a bit in it set to 1 means: |
|
746 * bit 0: the current code is an EN after W2 |
|
747 * bit 1: the current code is an EN after W4 |
|
748 * bit 2: the previous code was an EN after W2 |
|
749 * bit 3: the previous code was an EN after W4 |
|
750 * In other words, b0..1 have transitions of EN in the current iteration, |
|
751 * while b2..3 have the transitions of EN in the previous iteration. |
|
752 * A simple historyOfEN<<=2 suffices for the propagation. |
|
753 * |
|
754 * The (Nn) and (In) rules are also performed in that same single loop, |
|
755 * but effectively one iteration behind for white space. |
|
756 * |
|
757 * Since all implicit rules are performed in one step, it is not necessary |
|
758 * to actually store the intermediate directional properties in dirProps[]. |
|
759 */ |
|
760 |
|
761 #define EN_SHIFT 2 |
|
762 #define EN_AFTER_W2 1 |
|
763 #define EN_AFTER_W4 2 |
|
764 #define EN_ALL 3 |
|
765 #define PREV_EN_AFTER_W2 4 |
|
766 #define PREV_EN_AFTER_W4 8 |
|
767 |
|
768 void nsBidi::ResolveImplicitLevels(int32_t aStart, int32_t aLimit, |
|
769 DirProp aSOR, DirProp aEOR) |
|
770 { |
|
771 const DirProp *dirProps=mDirProps; |
|
772 nsBidiLevel *levels=mLevels; |
|
773 |
|
774 int32_t i, next, neutralStart=-1; |
|
775 DirProp prevDirProp, dirProp, nextDirProp, lastStrong, beforeNeutral; |
|
776 uint8_t historyOfEN; |
|
777 |
|
778 /* initialize: current at aSOR, next at aStart (it is aStart<aLimit) */ |
|
779 next=aStart; |
|
780 beforeNeutral=dirProp=lastStrong=aSOR; |
|
781 nextDirProp=dirProps[next]; |
|
782 historyOfEN=0; |
|
783 |
|
784 /* |
|
785 * In all steps of this implementation, BN and explicit embedding codes |
|
786 * must be treated as if they didn't exist (X9). |
|
787 * They will get levels set before a non-neutral character, and remain |
|
788 * undefined before a neutral one, but AdjustWSLevels() will take care |
|
789 * of all of them. |
|
790 */ |
|
791 while(DIRPROP_FLAG(nextDirProp)&MASK_BN_EXPLICIT) { |
|
792 if(++next<aLimit) { |
|
793 nextDirProp=dirProps[next]; |
|
794 } else { |
|
795 nextDirProp=aEOR; |
|
796 break; |
|
797 } |
|
798 } |
|
799 |
|
800 /* loop for entire run */ |
|
801 while(next<aLimit) { |
|
802 /* advance */ |
|
803 prevDirProp=dirProp; |
|
804 dirProp=nextDirProp; |
|
805 i=next; |
|
806 do { |
|
807 if(++next<aLimit) { |
|
808 nextDirProp=dirProps[next]; |
|
809 } else { |
|
810 nextDirProp=aEOR; |
|
811 break; |
|
812 } |
|
813 } while(DIRPROP_FLAG(nextDirProp)&MASK_BN_EXPLICIT); |
|
814 historyOfEN<<=EN_SHIFT; |
|
815 |
|
816 /* (W1..W7) */ |
|
817 switch(dirProp) { |
|
818 case L: |
|
819 lastStrong=L; |
|
820 break; |
|
821 case R: |
|
822 lastStrong=R; |
|
823 break; |
|
824 case AL: |
|
825 /* (W3) */ |
|
826 lastStrong=AL; |
|
827 dirProp=R; |
|
828 break; |
|
829 case EN: |
|
830 /* we have to set historyOfEN correctly */ |
|
831 if(lastStrong==AL) { |
|
832 /* (W2) */ |
|
833 dirProp=AN; |
|
834 } else { |
|
835 if(lastStrong==L) { |
|
836 /* (W7) */ |
|
837 dirProp=L; |
|
838 } |
|
839 /* this EN stays after (W2) and (W4) - at least before (W7) */ |
|
840 historyOfEN|=EN_ALL; |
|
841 } |
|
842 break; |
|
843 case ES: |
|
844 if( historyOfEN&PREV_EN_AFTER_W2 && /* previous was EN before (W4) */ |
|
845 nextDirProp==EN && lastStrong!=AL /* next is EN and (W2) won't make it AN */ |
|
846 ) { |
|
847 /* (W4) */ |
|
848 if(lastStrong!=L) { |
|
849 dirProp=EN; |
|
850 } else { |
|
851 /* (W7) */ |
|
852 dirProp=L; |
|
853 } |
|
854 historyOfEN|=EN_AFTER_W4; |
|
855 } else { |
|
856 /* (W6) */ |
|
857 dirProp=O_N; |
|
858 } |
|
859 break; |
|
860 case CS: |
|
861 if( historyOfEN&PREV_EN_AFTER_W2 && /* previous was EN before (W4) */ |
|
862 nextDirProp==EN && lastStrong!=AL /* next is EN and (W2) won't make it AN */ |
|
863 ) { |
|
864 /* (W4) */ |
|
865 if(lastStrong!=L) { |
|
866 dirProp=EN; |
|
867 } else { |
|
868 /* (W7) */ |
|
869 dirProp=L; |
|
870 } |
|
871 historyOfEN|=EN_AFTER_W4; |
|
872 } else if(prevDirProp==AN && /* previous was AN */ |
|
873 (nextDirProp==AN || /* next is AN */ |
|
874 (nextDirProp==EN && lastStrong==AL)) /* or (W2) will make it one */ |
|
875 ) { |
|
876 /* (W4) */ |
|
877 dirProp=AN; |
|
878 } else { |
|
879 /* (W6) */ |
|
880 dirProp=O_N; |
|
881 } |
|
882 break; |
|
883 case ET: |
|
884 /* get sequence of ET; advance only next, not current, previous or historyOfEN */ |
|
885 while(next<aLimit && DIRPROP_FLAG(nextDirProp)&MASK_ET_NSM_BN /* (W1), (X9) */) { |
|
886 if(++next<aLimit) { |
|
887 nextDirProp=dirProps[next]; |
|
888 } else { |
|
889 nextDirProp=aEOR; |
|
890 break; |
|
891 } |
|
892 } |
|
893 |
|
894 if( historyOfEN&PREV_EN_AFTER_W4 || /* previous was EN before (W5) */ |
|
895 (nextDirProp==EN && lastStrong!=AL) /* next is EN and (W2) won't make it AN */ |
|
896 ) { |
|
897 /* (W5) */ |
|
898 if(lastStrong!=L) { |
|
899 dirProp=EN; |
|
900 } else { |
|
901 /* (W7) */ |
|
902 dirProp=L; |
|
903 } |
|
904 } else { |
|
905 /* (W6) */ |
|
906 dirProp=O_N; |
|
907 } |
|
908 |
|
909 /* apply the result of (W1), (W5)..(W7) to the entire sequence of ET */ |
|
910 break; |
|
911 case NSM: |
|
912 /* (W1) */ |
|
913 dirProp=prevDirProp; |
|
914 /* set historyOfEN back to prevDirProp's historyOfEN */ |
|
915 historyOfEN>>=EN_SHIFT; |
|
916 /* |
|
917 * Technically, this should be done before the switch() in the form |
|
918 * if(nextDirProp==NSM) { |
|
919 * dirProps[next]=nextDirProp=dirProp; |
|
920 * } |
|
921 * |
|
922 * - effectively one iteration ahead. |
|
923 * However, whether the next dirProp is NSM or is equal to the current dirProp |
|
924 * does not change the outcome of any condition in (W2)..(W7). |
|
925 */ |
|
926 break; |
|
927 default: |
|
928 break; |
|
929 } |
|
930 |
|
931 /* here, it is always [prev,this,next]dirProp!=BN; it may be next>i+1 */ |
|
932 |
|
933 /* perform (Nn) - here, only L, R, EN, AN, and neutrals are left */ |
|
934 /* this is one iteration late for the neutrals */ |
|
935 if(DIRPROP_FLAG(dirProp)&MASK_N) { |
|
936 if(neutralStart<0) { |
|
937 /* start of a sequence of neutrals */ |
|
938 neutralStart=i; |
|
939 beforeNeutral=prevDirProp; |
|
940 } |
|
941 } else /* not a neutral, can be only one of { L, R, EN, AN } */ { |
|
942 /* |
|
943 * Note that all levels[] values are still the same at this |
|
944 * point because this function is called for an entire |
|
945 * same-level run. |
|
946 * Therefore, we need to read only one actual level. |
|
947 */ |
|
948 nsBidiLevel level=levels[i]; |
|
949 |
|
950 if(neutralStart>=0) { |
|
951 nsBidiLevel final; |
|
952 /* end of a sequence of neutrals (dirProp is "afterNeutral") */ |
|
953 if(beforeNeutral==L) { |
|
954 if(dirProp==L) { |
|
955 final=0; /* make all neutrals L (N1) */ |
|
956 } else { |
|
957 final=level; /* make all neutrals "e" (N2) */ |
|
958 } |
|
959 } else /* beforeNeutral is one of { R, EN, AN } */ { |
|
960 if(dirProp==L) { |
|
961 final=level; /* make all neutrals "e" (N2) */ |
|
962 } else { |
|
963 final=1; /* make all neutrals R (N1) */ |
|
964 } |
|
965 } |
|
966 /* perform (In) on the sequence of neutrals */ |
|
967 if((level^final)&1) { |
|
968 /* do something only if we need to _change_ the level */ |
|
969 do { |
|
970 ++levels[neutralStart]; |
|
971 } while(++neutralStart<i); |
|
972 } |
|
973 neutralStart=-1; |
|
974 } |
|
975 |
|
976 /* perform (In) on the non-neutral character */ |
|
977 /* |
|
978 * in the cases of (W5), processing a sequence of ET, |
|
979 * and of (X9), skipping BN, |
|
980 * there may be multiple characters from i to <next |
|
981 * that all get (virtually) the same dirProp and (really) the same level |
|
982 */ |
|
983 if(dirProp==L) { |
|
984 if(level&1) { |
|
985 ++level; |
|
986 } else { |
|
987 i=next; /* we keep the levels */ |
|
988 } |
|
989 } else if(dirProp==R) { |
|
990 if(!(level&1)) { |
|
991 ++level; |
|
992 } else { |
|
993 i=next; /* we keep the levels */ |
|
994 } |
|
995 } else /* EN or AN */ { |
|
996 level=(level+2)&~1; /* least greater even level */ |
|
997 } |
|
998 |
|
999 /* apply the new level to the sequence, if necessary */ |
|
1000 while(i<next) { |
|
1001 levels[i++]=level; |
|
1002 } |
|
1003 } |
|
1004 } |
|
1005 |
|
1006 /* perform (Nn) - here, |
|
1007 the character after the neutrals is aEOR, which is either L or R */ |
|
1008 /* this is one iteration late for the neutrals */ |
|
1009 if(neutralStart>=0) { |
|
1010 /* |
|
1011 * Note that all levels[] values are still the same at this |
|
1012 * point because this function is called for an entire |
|
1013 * same-level run. |
|
1014 * Therefore, we need to read only one actual level. |
|
1015 */ |
|
1016 nsBidiLevel level=levels[neutralStart], final; |
|
1017 |
|
1018 /* end of a sequence of neutrals (aEOR is "afterNeutral") */ |
|
1019 if(beforeNeutral==L) { |
|
1020 if(aEOR==L) { |
|
1021 final=0; /* make all neutrals L (N1) */ |
|
1022 } else { |
|
1023 final=level; /* make all neutrals "e" (N2) */ |
|
1024 } |
|
1025 } else /* beforeNeutral is one of { R, EN, AN } */ { |
|
1026 if(aEOR==L) { |
|
1027 final=level; /* make all neutrals "e" (N2) */ |
|
1028 } else { |
|
1029 final=1; /* make all neutrals R (N1) */ |
|
1030 } |
|
1031 } |
|
1032 /* perform (In) on the sequence of neutrals */ |
|
1033 if((level^final)&1) { |
|
1034 /* do something only if we need to _change_ the level */ |
|
1035 do { |
|
1036 ++levels[neutralStart]; |
|
1037 } while(++neutralStart<aLimit); |
|
1038 } |
|
1039 } |
|
1040 } |
|
1041 |
|
1042 /* perform (L1) and (X9) ---------------------------------------------------- */ |
|
1043 |
|
1044 /* |
|
1045 * Reset the embedding levels for some non-graphic characters (L1). |
|
1046 * This function also sets appropriate levels for BN, and |
|
1047 * explicit embedding types that are supposed to have been removed |
|
1048 * from the paragraph in (X9). |
|
1049 */ |
|
1050 void nsBidi::AdjustWSLevels() |
|
1051 { |
|
1052 const DirProp *dirProps=mDirProps; |
|
1053 nsBidiLevel *levels=mLevels; |
|
1054 int32_t i; |
|
1055 |
|
1056 if(mFlags&MASK_WS) { |
|
1057 nsBidiLevel paraLevel=mParaLevel; |
|
1058 Flags flag; |
|
1059 |
|
1060 i=mTrailingWSStart; |
|
1061 while(i>0) { |
|
1062 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ |
|
1063 while(i>0 && DIRPROP_FLAG(dirProps[--i])&MASK_WS) { |
|
1064 levels[i]=paraLevel; |
|
1065 } |
|
1066 |
|
1067 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ |
|
1068 /* here, i+1 is guaranteed to be <length */ |
|
1069 while(i>0) { |
|
1070 flag=DIRPROP_FLAG(dirProps[--i]); |
|
1071 if(flag&MASK_BN_EXPLICIT) { |
|
1072 levels[i]=levels[i+1]; |
|
1073 } else if(flag&MASK_B_S) { |
|
1074 levels[i]=paraLevel; |
|
1075 break; |
|
1076 } |
|
1077 } |
|
1078 } |
|
1079 } |
|
1080 |
|
1081 /* now remove the NSBIDI_LEVEL_OVERRIDE flags, if any */ |
|
1082 /* (a separate loop can be optimized more easily by a compiler) */ |
|
1083 if(mFlags&MASK_OVERRIDE) { |
|
1084 for(i=mTrailingWSStart; i>0;) { |
|
1085 levels[--i]&=~NSBIDI_LEVEL_OVERRIDE; |
|
1086 } |
|
1087 } |
|
1088 } |
|
1089 |
|
1090 nsresult nsBidi::GetDirection(nsBidiDirection* aDirection) |
|
1091 { |
|
1092 *aDirection = mDirection; |
|
1093 return NS_OK; |
|
1094 } |
|
1095 |
|
1096 nsresult nsBidi::GetParaLevel(nsBidiLevel* aParaLevel) |
|
1097 { |
|
1098 *aParaLevel = mParaLevel; |
|
1099 return NS_OK; |
|
1100 } |
|
1101 #ifdef FULL_BIDI_ENGINE |
|
1102 |
|
1103 /* -------------------------------------------------------------------------- */ |
|
1104 |
|
1105 nsresult nsBidi::GetLength(int32_t* aLength) |
|
1106 { |
|
1107 *aLength = mLength; |
|
1108 return NS_OK; |
|
1109 } |
|
1110 |
|
1111 /* |
|
1112 * General remarks about the functions in this section: |
|
1113 * |
|
1114 * These functions deal with the aspects of potentially mixed-directional |
|
1115 * text in a single paragraph or in a line of a single paragraph |
|
1116 * which has already been processed according to |
|
1117 * the Unicode 3.0 Bidi algorithm as defined in |
|
1118 * http://www.unicode.org/unicode/reports/tr9/ , version 5, |
|
1119 * also described in The Unicode Standard, Version 3.0 . |
|
1120 * |
|
1121 * This means that there is a nsBidi object with a levels |
|
1122 * and a dirProps array. |
|
1123 * paraLevel and direction are also set. |
|
1124 * Only if the length of the text is zero, then levels==dirProps==nullptr. |
|
1125 * |
|
1126 * The overall directionality of the paragraph |
|
1127 * or line is used to bypass the reordering steps if possible. |
|
1128 * Even purely RTL text does not need reordering there because |
|
1129 * the getLogical/VisualIndex() functions can compute the |
|
1130 * index on the fly in such a case. |
|
1131 * |
|
1132 * The implementation of the access to same-level-runs and of the reordering |
|
1133 * do attempt to provide better performance and less memory usage compared to |
|
1134 * a direct implementation of especially rule (L2) with an array of |
|
1135 * one (32-bit) integer per text character. |
|
1136 * |
|
1137 * Here, the levels array is scanned as soon as necessary, and a vector of |
|
1138 * same-level-runs is created. Reordering then is done on this vector. |
|
1139 * For each run of text positions that were resolved to the same level, |
|
1140 * only 8 bytes are stored: the first text position of the run and the visual |
|
1141 * position behind the run after reordering. |
|
1142 * One sign bit is used to hold the directionality of the run. |
|
1143 * This is inefficient if there are many very short runs. If the average run |
|
1144 * length is <2, then this uses more memory. |
|
1145 * |
|
1146 * In a further attempt to save memory, the levels array is never changed |
|
1147 * after all the resolution rules (Xn, Wn, Nn, In). |
|
1148 * Many functions have to consider the field trailingWSStart: |
|
1149 * if it is less than length, then there is an implicit trailing run |
|
1150 * at the paraLevel, |
|
1151 * which is not reflected in the levels array. |
|
1152 * This allows a line nsBidi object to use the same levels array as |
|
1153 * its paragraph parent object. |
|
1154 * |
|
1155 * When a nsBidi object is created for a line of a paragraph, then the |
|
1156 * paragraph's levels and dirProps arrays are reused by way of setting |
|
1157 * a pointer into them, not by copying. This again saves memory and forbids to |
|
1158 * change the now shared levels for (L1). |
|
1159 */ |
|
1160 nsresult nsBidi::SetLine(nsIBidi* aParaBidi, int32_t aStart, int32_t aLimit) |
|
1161 { |
|
1162 nsBidi* pParent = (nsBidi*)aParaBidi; |
|
1163 int32_t length; |
|
1164 |
|
1165 /* check the argument values */ |
|
1166 if(pParent==nullptr) { |
|
1167 return NS_ERROR_INVALID_POINTER; |
|
1168 } else if(aStart<0 || aStart>aLimit || aLimit>pParent->mLength) { |
|
1169 return NS_ERROR_INVALID_ARG; |
|
1170 } |
|
1171 |
|
1172 /* set members from our aParaBidi parent */ |
|
1173 length=mLength=aLimit-aStart; |
|
1174 mParaLevel=pParent->mParaLevel; |
|
1175 |
|
1176 mRuns=nullptr; |
|
1177 mFlags=0; |
|
1178 |
|
1179 if(length>0) { |
|
1180 mDirProps=pParent->mDirProps+aStart; |
|
1181 mLevels=pParent->mLevels+aStart; |
|
1182 mRunCount=-1; |
|
1183 |
|
1184 if(pParent->mDirection!=NSBIDI_MIXED) { |
|
1185 /* the parent is already trivial */ |
|
1186 mDirection=pParent->mDirection; |
|
1187 |
|
1188 /* |
|
1189 * The parent's levels are all either |
|
1190 * implicitly or explicitly ==paraLevel; |
|
1191 * do the same here. |
|
1192 */ |
|
1193 if(pParent->mTrailingWSStart<=aStart) { |
|
1194 mTrailingWSStart=0; |
|
1195 } else if(pParent->mTrailingWSStart<aLimit) { |
|
1196 mTrailingWSStart=pParent->mTrailingWSStart-aStart; |
|
1197 } else { |
|
1198 mTrailingWSStart=length; |
|
1199 } |
|
1200 } else { |
|
1201 const nsBidiLevel *levels=mLevels; |
|
1202 int32_t i, trailingWSStart; |
|
1203 nsBidiLevel level; |
|
1204 Flags flags=0; |
|
1205 |
|
1206 SetTrailingWSStart(); |
|
1207 trailingWSStart=mTrailingWSStart; |
|
1208 |
|
1209 /* recalculate pLineBidi->direction */ |
|
1210 if(trailingWSStart==0) { |
|
1211 /* all levels are at paraLevel */ |
|
1212 mDirection=(nsBidiDirection)(mParaLevel&1); |
|
1213 } else { |
|
1214 /* get the level of the first character */ |
|
1215 level=levels[0]&1; |
|
1216 |
|
1217 /* if there is anything of a different level, then the line is mixed */ |
|
1218 if(trailingWSStart<length && (mParaLevel&1)!=level) { |
|
1219 /* the trailing WS is at paraLevel, which differs from levels[0] */ |
|
1220 mDirection=NSBIDI_MIXED; |
|
1221 } else { |
|
1222 /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */ |
|
1223 i=1; |
|
1224 for(;;) { |
|
1225 if(i==trailingWSStart) { |
|
1226 /* the direction values match those in level */ |
|
1227 mDirection=(nsBidiDirection)level; |
|
1228 break; |
|
1229 } else if((levels[i]&1)!=level) { |
|
1230 mDirection=NSBIDI_MIXED; |
|
1231 break; |
|
1232 } |
|
1233 ++i; |
|
1234 } |
|
1235 } |
|
1236 } |
|
1237 |
|
1238 switch(mDirection) { |
|
1239 case NSBIDI_LTR: |
|
1240 /* make sure paraLevel is even */ |
|
1241 mParaLevel=(mParaLevel+1)&~1; |
|
1242 |
|
1243 /* all levels are implicitly at paraLevel (important for GetLevels()) */ |
|
1244 mTrailingWSStart=0; |
|
1245 break; |
|
1246 case NSBIDI_RTL: |
|
1247 /* make sure paraLevel is odd */ |
|
1248 mParaLevel|=1; |
|
1249 |
|
1250 /* all levels are implicitly at paraLevel (important for GetLevels()) */ |
|
1251 mTrailingWSStart=0; |
|
1252 break; |
|
1253 default: |
|
1254 break; |
|
1255 } |
|
1256 } |
|
1257 } else { |
|
1258 /* create an object for a zero-length line */ |
|
1259 mDirection=mParaLevel&1 ? NSBIDI_RTL : NSBIDI_LTR; |
|
1260 mTrailingWSStart=mRunCount=0; |
|
1261 |
|
1262 mDirProps=nullptr; |
|
1263 mLevels=nullptr; |
|
1264 } |
|
1265 return NS_OK; |
|
1266 } |
|
1267 |
|
1268 /* handle trailing WS (L1) -------------------------------------------------- */ |
|
1269 |
|
1270 /* |
|
1271 * SetTrailingWSStart() sets the start index for a trailing |
|
1272 * run of WS in the line. This is necessary because we do not modify |
|
1273 * the paragraph's levels array that we just point into. |
|
1274 * Using trailingWSStart is another form of performing (L1). |
|
1275 * |
|
1276 * To make subsequent operations easier, we also include the run |
|
1277 * before the WS if it is at the paraLevel - we merge the two here. |
|
1278 */ |
|
1279 void nsBidi::SetTrailingWSStart() { |
|
1280 /* mDirection!=NSBIDI_MIXED */ |
|
1281 |
|
1282 const DirProp *dirProps=mDirProps; |
|
1283 nsBidiLevel *levels=mLevels; |
|
1284 int32_t start=mLength; |
|
1285 nsBidiLevel paraLevel=mParaLevel; |
|
1286 |
|
1287 /* go backwards across all WS, BN, explicit codes */ |
|
1288 while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) { |
|
1289 --start; |
|
1290 } |
|
1291 |
|
1292 /* if the WS run can be merged with the previous run then do so here */ |
|
1293 while(start>0 && levels[start-1]==paraLevel) { |
|
1294 --start; |
|
1295 } |
|
1296 |
|
1297 mTrailingWSStart=start; |
|
1298 } |
|
1299 |
|
1300 nsresult nsBidi::GetLevelAt(int32_t aCharIndex, nsBidiLevel* aLevel) |
|
1301 { |
|
1302 /* return paraLevel if in the trailing WS run, otherwise the real level */ |
|
1303 if(aCharIndex<0 || mLength<=aCharIndex) { |
|
1304 *aLevel = 0; |
|
1305 } else if(mDirection!=NSBIDI_MIXED || aCharIndex>=mTrailingWSStart) { |
|
1306 *aLevel = mParaLevel; |
|
1307 } else { |
|
1308 *aLevel = mLevels[aCharIndex]; |
|
1309 } |
|
1310 return NS_OK; |
|
1311 } |
|
1312 |
|
1313 nsresult nsBidi::GetLevels(nsBidiLevel** aLevels) |
|
1314 { |
|
1315 int32_t start, length; |
|
1316 |
|
1317 length = mLength; |
|
1318 if(length<=0) { |
|
1319 *aLevels = nullptr; |
|
1320 return NS_ERROR_INVALID_ARG; |
|
1321 } |
|
1322 |
|
1323 start = mTrailingWSStart; |
|
1324 if(start==length) { |
|
1325 /* the current levels array reflects the WS run */ |
|
1326 *aLevels = mLevels; |
|
1327 return NS_OK; |
|
1328 } |
|
1329 |
|
1330 /* |
|
1331 * After the previous if(), we know that the levels array |
|
1332 * has an implicit trailing WS run and therefore does not fully |
|
1333 * reflect itself all the levels. |
|
1334 * This must be a nsBidi object for a line, and |
|
1335 * we need to create a new levels array. |
|
1336 */ |
|
1337 |
|
1338 if(GETLEVELSMEMORY(length)) { |
|
1339 nsBidiLevel *levels=mLevelsMemory; |
|
1340 |
|
1341 if(start>0 && levels!=mLevels) { |
|
1342 memcpy(levels, mLevels, start); |
|
1343 } |
|
1344 memset(levels+start, mParaLevel, length-start); |
|
1345 |
|
1346 /* this new levels array is set for the line and reflects the WS run */ |
|
1347 mTrailingWSStart=length; |
|
1348 *aLevels=mLevels=levels; |
|
1349 return NS_OK; |
|
1350 } else { |
|
1351 /* out of memory */ |
|
1352 *aLevels = nullptr; |
|
1353 return NS_ERROR_OUT_OF_MEMORY; |
|
1354 } |
|
1355 } |
|
1356 #endif // FULL_BIDI_ENGINE |
|
1357 |
|
1358 nsresult nsBidi::GetCharTypeAt(int32_t aCharIndex, nsCharType* pType) |
|
1359 { |
|
1360 if(aCharIndex<0 || mLength<=aCharIndex) { |
|
1361 return NS_ERROR_INVALID_ARG; |
|
1362 } |
|
1363 *pType = (nsCharType)mDirProps[aCharIndex]; |
|
1364 return NS_OK; |
|
1365 } |
|
1366 |
|
1367 nsresult nsBidi::GetLogicalRun(int32_t aLogicalStart, int32_t *aLogicalLimit, nsBidiLevel *aLevel) |
|
1368 { |
|
1369 int32_t length = mLength; |
|
1370 |
|
1371 if(aLogicalStart<0 || length<=aLogicalStart) { |
|
1372 return NS_ERROR_INVALID_ARG; |
|
1373 } |
|
1374 |
|
1375 if(mDirection!=NSBIDI_MIXED || aLogicalStart>=mTrailingWSStart) { |
|
1376 if(aLogicalLimit!=nullptr) { |
|
1377 *aLogicalLimit=length; |
|
1378 } |
|
1379 if(aLevel!=nullptr) { |
|
1380 *aLevel=mParaLevel; |
|
1381 } |
|
1382 } else { |
|
1383 nsBidiLevel *levels=mLevels; |
|
1384 nsBidiLevel level=levels[aLogicalStart]; |
|
1385 |
|
1386 /* search for the end of the run */ |
|
1387 length=mTrailingWSStart; |
|
1388 while(++aLogicalStart<length && level==levels[aLogicalStart]) {} |
|
1389 |
|
1390 if(aLogicalLimit!=nullptr) { |
|
1391 *aLogicalLimit=aLogicalStart; |
|
1392 } |
|
1393 if(aLevel!=nullptr) { |
|
1394 *aLevel=level; |
|
1395 } |
|
1396 } |
|
1397 return NS_OK; |
|
1398 } |
|
1399 |
|
1400 /* runs API functions ------------------------------------------------------- */ |
|
1401 |
|
1402 nsresult nsBidi::CountRuns(int32_t* aRunCount) |
|
1403 { |
|
1404 if(mRunCount<0 && !GetRuns()) { |
|
1405 return NS_ERROR_OUT_OF_MEMORY; |
|
1406 } else { |
|
1407 if (aRunCount) |
|
1408 *aRunCount = mRunCount; |
|
1409 return NS_OK; |
|
1410 } |
|
1411 } |
|
1412 |
|
1413 nsresult nsBidi::GetVisualRun(int32_t aRunIndex, int32_t *aLogicalStart, int32_t *aLength, nsBidiDirection *aDirection) |
|
1414 { |
|
1415 if( aRunIndex<0 || |
|
1416 (mRunCount==-1 && !GetRuns()) || |
|
1417 aRunIndex>=mRunCount |
|
1418 ) { |
|
1419 *aDirection = NSBIDI_LTR; |
|
1420 return NS_OK; |
|
1421 } else { |
|
1422 int32_t start=mRuns[aRunIndex].logicalStart; |
|
1423 if(aLogicalStart!=nullptr) { |
|
1424 *aLogicalStart=GET_INDEX(start); |
|
1425 } |
|
1426 if(aLength!=nullptr) { |
|
1427 if(aRunIndex>0) { |
|
1428 *aLength=mRuns[aRunIndex].visualLimit- |
|
1429 mRuns[aRunIndex-1].visualLimit; |
|
1430 } else { |
|
1431 *aLength=mRuns[0].visualLimit; |
|
1432 } |
|
1433 } |
|
1434 *aDirection = (nsBidiDirection)GET_ODD_BIT(start); |
|
1435 return NS_OK; |
|
1436 } |
|
1437 } |
|
1438 |
|
1439 /* compute the runs array --------------------------------------------------- */ |
|
1440 |
|
1441 /* |
|
1442 * Compute the runs array from the levels array. |
|
1443 * After GetRuns() returns true, runCount is guaranteed to be >0 |
|
1444 * and the runs are reordered. |
|
1445 * Odd-level runs have visualStart on their visual right edge and |
|
1446 * they progress visually to the left. |
|
1447 */ |
|
1448 bool nsBidi::GetRuns() |
|
1449 { |
|
1450 if(mDirection!=NSBIDI_MIXED) { |
|
1451 /* simple, single-run case - this covers length==0 */ |
|
1452 GetSingleRun(mParaLevel); |
|
1453 } else /* NSBIDI_MIXED, length>0 */ { |
|
1454 /* mixed directionality */ |
|
1455 int32_t length=mLength, limit=length; |
|
1456 |
|
1457 /* |
|
1458 * If there are WS characters at the end of the line |
|
1459 * and the run preceding them has a level different from |
|
1460 * paraLevel, then they will form their own run at paraLevel (L1). |
|
1461 * Count them separately. |
|
1462 * We need some special treatment for this in order to not |
|
1463 * modify the levels array which a line nsBidi object shares |
|
1464 * with its paragraph parent and its other line siblings. |
|
1465 * In other words, for the trailing WS, it may be |
|
1466 * levels[]!=paraLevel but we have to treat it like it were so. |
|
1467 */ |
|
1468 limit=mTrailingWSStart; |
|
1469 if(limit==0) { |
|
1470 /* there is only WS on this line */ |
|
1471 GetSingleRun(mParaLevel); |
|
1472 } else { |
|
1473 nsBidiLevel *levels=mLevels; |
|
1474 int32_t i, runCount; |
|
1475 nsBidiLevel level=NSBIDI_DEFAULT_LTR; /* initialize with no valid level */ |
|
1476 |
|
1477 /* count the runs, there is at least one non-WS run, and limit>0 */ |
|
1478 runCount=0; |
|
1479 for(i=0; i<limit; ++i) { |
|
1480 /* increment runCount at the start of each run */ |
|
1481 if(levels[i]!=level) { |
|
1482 ++runCount; |
|
1483 level=levels[i]; |
|
1484 } |
|
1485 } |
|
1486 |
|
1487 /* |
|
1488 * We don't need to see if the last run can be merged with a trailing |
|
1489 * WS run because SetTrailingWSStart() would have done that. |
|
1490 */ |
|
1491 if(runCount==1 && limit==length) { |
|
1492 /* There is only one non-WS run and no trailing WS-run. */ |
|
1493 GetSingleRun(levels[0]); |
|
1494 } else /* runCount>1 || limit<length */ { |
|
1495 /* allocate and set the runs */ |
|
1496 Run *runs; |
|
1497 int32_t runIndex, start; |
|
1498 nsBidiLevel minLevel=NSBIDI_MAX_EXPLICIT_LEVEL+1, maxLevel=0; |
|
1499 |
|
1500 /* now, count a (non-mergable) WS run */ |
|
1501 if(limit<length) { |
|
1502 ++runCount; |
|
1503 } |
|
1504 |
|
1505 /* runCount>1 */ |
|
1506 if(GETRUNSMEMORY(runCount)) { |
|
1507 runs=mRunsMemory; |
|
1508 } else { |
|
1509 return false; |
|
1510 } |
|
1511 |
|
1512 /* set the runs */ |
|
1513 /* this could be optimized, e.g.: 464->444, 484->444, 575->555, 595->555 */ |
|
1514 /* however, that would take longer and make other functions more complicated */ |
|
1515 runIndex=0; |
|
1516 |
|
1517 /* search for the run ends */ |
|
1518 start=0; |
|
1519 level=levels[0]; |
|
1520 if(level<minLevel) { |
|
1521 minLevel=level; |
|
1522 } |
|
1523 if(level>maxLevel) { |
|
1524 maxLevel=level; |
|
1525 } |
|
1526 |
|
1527 /* initialize visualLimit values with the run lengths */ |
|
1528 for(i=1; i<limit; ++i) { |
|
1529 if(levels[i]!=level) { |
|
1530 /* i is another run limit */ |
|
1531 runs[runIndex].logicalStart=start; |
|
1532 runs[runIndex].visualLimit=i-start; |
|
1533 start=i; |
|
1534 |
|
1535 level=levels[i]; |
|
1536 if(level<minLevel) { |
|
1537 minLevel=level; |
|
1538 } |
|
1539 if(level>maxLevel) { |
|
1540 maxLevel=level; |
|
1541 } |
|
1542 ++runIndex; |
|
1543 } |
|
1544 } |
|
1545 |
|
1546 /* finish the last run at i==limit */ |
|
1547 runs[runIndex].logicalStart=start; |
|
1548 runs[runIndex].visualLimit=limit-start; |
|
1549 ++runIndex; |
|
1550 |
|
1551 if(limit<length) { |
|
1552 /* there is a separate WS run */ |
|
1553 runs[runIndex].logicalStart=limit; |
|
1554 runs[runIndex].visualLimit=length-limit; |
|
1555 if(mParaLevel<minLevel) { |
|
1556 minLevel=mParaLevel; |
|
1557 } |
|
1558 } |
|
1559 |
|
1560 /* set the object fields */ |
|
1561 mRuns=runs; |
|
1562 mRunCount=runCount; |
|
1563 |
|
1564 ReorderLine(minLevel, maxLevel); |
|
1565 |
|
1566 /* now add the direction flags and adjust the visualLimit's to be just that */ |
|
1567 ADD_ODD_BIT_FROM_LEVEL(runs[0].logicalStart, levels[runs[0].logicalStart]); |
|
1568 limit=runs[0].visualLimit; |
|
1569 for(i=1; i<runIndex; ++i) { |
|
1570 ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, levels[runs[i].logicalStart]); |
|
1571 limit=runs[i].visualLimit+=limit; |
|
1572 } |
|
1573 |
|
1574 /* same for the trailing WS run */ |
|
1575 if(runIndex<runCount) { |
|
1576 ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, mParaLevel); |
|
1577 runs[runIndex].visualLimit+=limit; |
|
1578 } |
|
1579 } |
|
1580 } |
|
1581 } |
|
1582 return true; |
|
1583 } |
|
1584 |
|
1585 /* in trivial cases there is only one trivial run; called by GetRuns() */ |
|
1586 void nsBidi::GetSingleRun(nsBidiLevel aLevel) |
|
1587 { |
|
1588 /* simple, single-run case */ |
|
1589 mRuns=mSimpleRuns; |
|
1590 mRunCount=1; |
|
1591 |
|
1592 /* fill and reorder the single run */ |
|
1593 mRuns[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, aLevel); |
|
1594 mRuns[0].visualLimit=mLength; |
|
1595 } |
|
1596 |
|
1597 /* reorder the runs array (L2) ---------------------------------------------- */ |
|
1598 |
|
1599 /* |
|
1600 * Reorder the same-level runs in the runs array. |
|
1601 * Here, runCount>1 and maxLevel>=minLevel>=paraLevel. |
|
1602 * All the visualStart fields=logical start before reordering. |
|
1603 * The "odd" bits are not set yet. |
|
1604 * |
|
1605 * Reordering with this data structure lends itself to some handy shortcuts: |
|
1606 * |
|
1607 * Since each run is moved but not modified, and since at the initial maxLevel |
|
1608 * each sequence of same-level runs consists of only one run each, we |
|
1609 * don't need to do anything there and can predecrement maxLevel. |
|
1610 * In many simple cases, the reordering is thus done entirely in the |
|
1611 * index mapping. |
|
1612 * Also, reordering occurs only down to the lowest odd level that occurs, |
|
1613 * which is minLevel|1. However, if the lowest level itself is odd, then |
|
1614 * in the last reordering the sequence of the runs at this level or higher |
|
1615 * will be all runs, and we don't need the elaborate loop to search for them. |
|
1616 * This is covered by ++minLevel instead of minLevel|=1 followed |
|
1617 * by an extra reorder-all after the reorder-some loop. |
|
1618 * About a trailing WS run: |
|
1619 * Such a run would need special treatment because its level is not |
|
1620 * reflected in levels[] if this is not a paragraph object. |
|
1621 * Instead, all characters from trailingWSStart on are implicitly at |
|
1622 * paraLevel. |
|
1623 * However, for all maxLevel>paraLevel, this run will never be reordered |
|
1624 * and does not need to be taken into account. maxLevel==paraLevel is only reordered |
|
1625 * if minLevel==paraLevel is odd, which is done in the extra segment. |
|
1626 * This means that for the main reordering loop we don't need to consider |
|
1627 * this run and can --runCount. If it is later part of the all-runs |
|
1628 * reordering, then runCount is adjusted accordingly. |
|
1629 */ |
|
1630 void nsBidi::ReorderLine(nsBidiLevel aMinLevel, nsBidiLevel aMaxLevel) |
|
1631 { |
|
1632 Run *runs; |
|
1633 nsBidiLevel *levels; |
|
1634 int32_t firstRun, endRun, limitRun, runCount, temp; |
|
1635 |
|
1636 /* nothing to do? */ |
|
1637 if(aMaxLevel<=(aMinLevel|1)) { |
|
1638 return; |
|
1639 } |
|
1640 |
|
1641 /* |
|
1642 * Reorder only down to the lowest odd level |
|
1643 * and reorder at an odd aMinLevel in a separate, simpler loop. |
|
1644 * See comments above for why aMinLevel is always incremented. |
|
1645 */ |
|
1646 ++aMinLevel; |
|
1647 |
|
1648 runs=mRuns; |
|
1649 levels=mLevels; |
|
1650 runCount=mRunCount; |
|
1651 |
|
1652 /* do not include the WS run at paraLevel<=old aMinLevel except in the simple loop */ |
|
1653 if(mTrailingWSStart<mLength) { |
|
1654 --runCount; |
|
1655 } |
|
1656 |
|
1657 while(--aMaxLevel>=aMinLevel) { |
|
1658 firstRun=0; |
|
1659 |
|
1660 /* loop for all sequences of runs */ |
|
1661 for(;;) { |
|
1662 /* look for a sequence of runs that are all at >=aMaxLevel */ |
|
1663 /* look for the first run of such a sequence */ |
|
1664 while(firstRun<runCount && levels[runs[firstRun].logicalStart]<aMaxLevel) { |
|
1665 ++firstRun; |
|
1666 } |
|
1667 if(firstRun>=runCount) { |
|
1668 break; /* no more such runs */ |
|
1669 } |
|
1670 |
|
1671 /* look for the limit run of such a sequence (the run behind it) */ |
|
1672 for(limitRun=firstRun; ++limitRun<runCount && levels[runs[limitRun].logicalStart]>=aMaxLevel;) {} |
|
1673 |
|
1674 /* Swap the entire sequence of runs from firstRun to limitRun-1. */ |
|
1675 endRun=limitRun-1; |
|
1676 while(firstRun<endRun) { |
|
1677 temp=runs[firstRun].logicalStart; |
|
1678 runs[firstRun].logicalStart=runs[endRun].logicalStart; |
|
1679 runs[endRun].logicalStart=temp; |
|
1680 |
|
1681 temp=runs[firstRun].visualLimit; |
|
1682 runs[firstRun].visualLimit=runs[endRun].visualLimit; |
|
1683 runs[endRun].visualLimit=temp; |
|
1684 |
|
1685 ++firstRun; |
|
1686 --endRun; |
|
1687 } |
|
1688 |
|
1689 if(limitRun==runCount) { |
|
1690 break; /* no more such runs */ |
|
1691 } else { |
|
1692 firstRun=limitRun+1; |
|
1693 } |
|
1694 } |
|
1695 } |
|
1696 |
|
1697 /* now do aMaxLevel==old aMinLevel (==odd!), see above */ |
|
1698 if(!(aMinLevel&1)) { |
|
1699 firstRun=0; |
|
1700 |
|
1701 /* include the trailing WS run in this complete reordering */ |
|
1702 if(mTrailingWSStart==mLength) { |
|
1703 --runCount; |
|
1704 } |
|
1705 |
|
1706 /* Swap the entire sequence of all runs. (endRun==runCount) */ |
|
1707 while(firstRun<runCount) { |
|
1708 temp=runs[firstRun].logicalStart; |
|
1709 runs[firstRun].logicalStart=runs[runCount].logicalStart; |
|
1710 runs[runCount].logicalStart=temp; |
|
1711 |
|
1712 temp=runs[firstRun].visualLimit; |
|
1713 runs[firstRun].visualLimit=runs[runCount].visualLimit; |
|
1714 runs[runCount].visualLimit=temp; |
|
1715 |
|
1716 ++firstRun; |
|
1717 --runCount; |
|
1718 } |
|
1719 } |
|
1720 } |
|
1721 |
|
1722 nsresult nsBidi::ReorderVisual(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap) |
|
1723 { |
|
1724 int32_t start, end, limit, temp; |
|
1725 nsBidiLevel minLevel, maxLevel; |
|
1726 |
|
1727 if(aIndexMap==nullptr || |
|
1728 !PrepareReorder(aLevels, aLength, aIndexMap, &minLevel, &maxLevel)) { |
|
1729 return NS_OK; |
|
1730 } |
|
1731 |
|
1732 /* nothing to do? */ |
|
1733 if(minLevel==maxLevel && (minLevel&1)==0) { |
|
1734 return NS_OK; |
|
1735 } |
|
1736 |
|
1737 /* reorder only down to the lowest odd level */ |
|
1738 minLevel|=1; |
|
1739 |
|
1740 /* loop maxLevel..minLevel */ |
|
1741 do { |
|
1742 start=0; |
|
1743 |
|
1744 /* loop for all sequences of levels to reorder at the current maxLevel */ |
|
1745 for(;;) { |
|
1746 /* look for a sequence of levels that are all at >=maxLevel */ |
|
1747 /* look for the first index of such a sequence */ |
|
1748 while(start<aLength && aLevels[start]<maxLevel) { |
|
1749 ++start; |
|
1750 } |
|
1751 if(start>=aLength) { |
|
1752 break; /* no more such runs */ |
|
1753 } |
|
1754 |
|
1755 /* look for the limit of such a sequence (the index behind it) */ |
|
1756 for(limit=start; ++limit<aLength && aLevels[limit]>=maxLevel;) {} |
|
1757 |
|
1758 /* |
|
1759 * Swap the entire interval of indexes from start to limit-1. |
|
1760 * We don't need to swap the levels for the purpose of this |
|
1761 * algorithm: the sequence of levels that we look at does not |
|
1762 * move anyway. |
|
1763 */ |
|
1764 end=limit-1; |
|
1765 while(start<end) { |
|
1766 temp=aIndexMap[start]; |
|
1767 aIndexMap[start]=aIndexMap[end]; |
|
1768 aIndexMap[end]=temp; |
|
1769 |
|
1770 ++start; |
|
1771 --end; |
|
1772 } |
|
1773 |
|
1774 if(limit==aLength) { |
|
1775 break; /* no more such sequences */ |
|
1776 } else { |
|
1777 start=limit+1; |
|
1778 } |
|
1779 } |
|
1780 } while(--maxLevel>=minLevel); |
|
1781 |
|
1782 return NS_OK; |
|
1783 } |
|
1784 |
|
1785 bool nsBidi::PrepareReorder(const nsBidiLevel *aLevels, int32_t aLength, |
|
1786 int32_t *aIndexMap, |
|
1787 nsBidiLevel *aMinLevel, nsBidiLevel *aMaxLevel) |
|
1788 { |
|
1789 int32_t start; |
|
1790 nsBidiLevel level, minLevel, maxLevel; |
|
1791 |
|
1792 if(aLevels==nullptr || aLength<=0) { |
|
1793 return false; |
|
1794 } |
|
1795 |
|
1796 /* determine minLevel and maxLevel */ |
|
1797 minLevel=NSBIDI_MAX_EXPLICIT_LEVEL+1; |
|
1798 maxLevel=0; |
|
1799 for(start=aLength; start>0;) { |
|
1800 level=aLevels[--start]; |
|
1801 if(level>NSBIDI_MAX_EXPLICIT_LEVEL+1) { |
|
1802 return false; |
|
1803 } |
|
1804 if(level<minLevel) { |
|
1805 minLevel=level; |
|
1806 } |
|
1807 if(level>maxLevel) { |
|
1808 maxLevel=level; |
|
1809 } |
|
1810 } |
|
1811 *aMinLevel=minLevel; |
|
1812 *aMaxLevel=maxLevel; |
|
1813 |
|
1814 /* initialize the index map */ |
|
1815 for(start=aLength; start>0;) { |
|
1816 --start; |
|
1817 aIndexMap[start]=start; |
|
1818 } |
|
1819 |
|
1820 return true; |
|
1821 } |
|
1822 |
|
1823 #ifdef FULL_BIDI_ENGINE |
|
1824 /* API functions for logical<->visual mapping ------------------------------- */ |
|
1825 |
|
1826 nsresult nsBidi::GetVisualIndex(int32_t aLogicalIndex, int32_t* aVisualIndex) { |
|
1827 if(aLogicalIndex<0 || mLength<=aLogicalIndex) { |
|
1828 return NS_ERROR_INVALID_ARG; |
|
1829 } else { |
|
1830 /* we can do the trivial cases without the runs array */ |
|
1831 switch(mDirection) { |
|
1832 case NSBIDI_LTR: |
|
1833 *aVisualIndex = aLogicalIndex; |
|
1834 return NS_OK; |
|
1835 case NSBIDI_RTL: |
|
1836 *aVisualIndex = mLength-aLogicalIndex-1; |
|
1837 return NS_OK; |
|
1838 default: |
|
1839 if(mRunCount<0 && !GetRuns()) { |
|
1840 return NS_ERROR_OUT_OF_MEMORY; |
|
1841 } else { |
|
1842 Run *runs=mRuns; |
|
1843 int32_t i, visualStart=0, offset, length; |
|
1844 |
|
1845 /* linear search for the run, search on the visual runs */ |
|
1846 for(i=0;; ++i) { |
|
1847 length=runs[i].visualLimit-visualStart; |
|
1848 offset=aLogicalIndex-GET_INDEX(runs[i].logicalStart); |
|
1849 if(offset>=0 && offset<length) { |
|
1850 if(IS_EVEN_RUN(runs[i].logicalStart)) { |
|
1851 /* LTR */ |
|
1852 *aVisualIndex = visualStart+offset; |
|
1853 return NS_OK; |
|
1854 } else { |
|
1855 /* RTL */ |
|
1856 *aVisualIndex = visualStart+length-offset-1; |
|
1857 return NS_OK; |
|
1858 } |
|
1859 } |
|
1860 visualStart+=length; |
|
1861 } |
|
1862 } |
|
1863 } |
|
1864 } |
|
1865 } |
|
1866 |
|
1867 nsresult nsBidi::GetLogicalIndex(int32_t aVisualIndex, int32_t *aLogicalIndex) |
|
1868 { |
|
1869 if(aVisualIndex<0 || mLength<=aVisualIndex) { |
|
1870 return NS_ERROR_INVALID_ARG; |
|
1871 } else { |
|
1872 /* we can do the trivial cases without the runs array */ |
|
1873 switch(mDirection) { |
|
1874 case NSBIDI_LTR: |
|
1875 *aLogicalIndex = aVisualIndex; |
|
1876 return NS_OK; |
|
1877 case NSBIDI_RTL: |
|
1878 *aLogicalIndex = mLength-aVisualIndex-1; |
|
1879 return NS_OK; |
|
1880 default: |
|
1881 if(mRunCount<0 && !GetRuns()) { |
|
1882 return NS_ERROR_OUT_OF_MEMORY; |
|
1883 } else { |
|
1884 Run *runs=mRuns; |
|
1885 int32_t i, runCount=mRunCount, start; |
|
1886 |
|
1887 if(runCount<=10) { |
|
1888 /* linear search for the run */ |
|
1889 for(i=0; aVisualIndex>=runs[i].visualLimit; ++i) {} |
|
1890 } else { |
|
1891 /* binary search for the run */ |
|
1892 int32_t start=0, limit=runCount; |
|
1893 |
|
1894 /* the middle if() will guaranteed find the run, we don't need a loop limit */ |
|
1895 for(;;) { |
|
1896 i=(start+limit)/2; |
|
1897 if(aVisualIndex>=runs[i].visualLimit) { |
|
1898 start=i+1; |
|
1899 } else if(i==0 || aVisualIndex>=runs[i-1].visualLimit) { |
|
1900 break; |
|
1901 } else { |
|
1902 limit=i; |
|
1903 } |
|
1904 } |
|
1905 } |
|
1906 |
|
1907 start=runs[i].logicalStart; |
|
1908 if(IS_EVEN_RUN(start)) { |
|
1909 /* LTR */ |
|
1910 /* the offset in runs[i] is aVisualIndex-runs[i-1].visualLimit */ |
|
1911 if(i>0) { |
|
1912 aVisualIndex-=runs[i-1].visualLimit; |
|
1913 } |
|
1914 *aLogicalIndex = GET_INDEX(start)+aVisualIndex; |
|
1915 return NS_OK; |
|
1916 } else { |
|
1917 /* RTL */ |
|
1918 *aLogicalIndex = GET_INDEX(start)+runs[i].visualLimit-aVisualIndex-1; |
|
1919 return NS_OK; |
|
1920 } |
|
1921 } |
|
1922 } |
|
1923 } |
|
1924 } |
|
1925 |
|
1926 nsresult nsBidi::GetLogicalMap(int32_t *aIndexMap) |
|
1927 { |
|
1928 nsBidiLevel *levels; |
|
1929 nsresult rv; |
|
1930 |
|
1931 /* GetLevels() checks all of its and our arguments */ |
|
1932 rv = GetLevels(&levels); |
|
1933 if(NS_FAILED(rv)) { |
|
1934 return rv; |
|
1935 } else if(aIndexMap==nullptr) { |
|
1936 return NS_ERROR_INVALID_ARG; |
|
1937 } else { |
|
1938 return ReorderLogical(levels, mLength, aIndexMap); |
|
1939 } |
|
1940 } |
|
1941 |
|
1942 nsresult nsBidi::GetVisualMap(int32_t *aIndexMap) |
|
1943 { |
|
1944 int32_t* runCount=nullptr; |
|
1945 nsresult rv; |
|
1946 |
|
1947 /* CountRuns() checks all of its and our arguments */ |
|
1948 rv = CountRuns(runCount); |
|
1949 if(NS_FAILED(rv)) { |
|
1950 return rv; |
|
1951 } else if(aIndexMap==nullptr) { |
|
1952 return NS_ERROR_INVALID_ARG; |
|
1953 } else { |
|
1954 /* fill a visual-to-logical index map using the runs[] */ |
|
1955 Run *runs=mRuns, *runsLimit=runs+mRunCount; |
|
1956 int32_t logicalStart, visualStart, visualLimit; |
|
1957 |
|
1958 visualStart=0; |
|
1959 for(; runs<runsLimit; ++runs) { |
|
1960 logicalStart=runs->logicalStart; |
|
1961 visualLimit=runs->visualLimit; |
|
1962 if(IS_EVEN_RUN(logicalStart)) { |
|
1963 do { /* LTR */ |
|
1964 *aIndexMap++ = logicalStart++; |
|
1965 } while(++visualStart<visualLimit); |
|
1966 } else { |
|
1967 REMOVE_ODD_BIT(logicalStart); |
|
1968 logicalStart+=visualLimit-visualStart; /* logicalLimit */ |
|
1969 do { /* RTL */ |
|
1970 *aIndexMap++ = --logicalStart; |
|
1971 } while(++visualStart<visualLimit); |
|
1972 } |
|
1973 /* visualStart==visualLimit; */ |
|
1974 } |
|
1975 return NS_OK; |
|
1976 } |
|
1977 } |
|
1978 |
|
1979 /* reorder a line based on a levels array (L2) ------------------------------ */ |
|
1980 |
|
1981 nsresult nsBidi::ReorderLogical(const nsBidiLevel *aLevels, int32_t aLength, int32_t *aIndexMap) |
|
1982 { |
|
1983 int32_t start, limit, sumOfSosEos; |
|
1984 nsBidiLevel minLevel, maxLevel; |
|
1985 |
|
1986 if(aIndexMap==nullptr || |
|
1987 !PrepareReorder(aLevels, aLength, aIndexMap, &minLevel, &maxLevel)) { |
|
1988 return NS_OK; |
|
1989 } |
|
1990 |
|
1991 /* nothing to do? */ |
|
1992 if(minLevel==maxLevel && (minLevel&1)==0) { |
|
1993 return NS_OK; |
|
1994 } |
|
1995 |
|
1996 /* reorder only down to the lowest odd level */ |
|
1997 minLevel|=1; |
|
1998 |
|
1999 /* loop maxLevel..minLevel */ |
|
2000 do { |
|
2001 start=0; |
|
2002 |
|
2003 /* loop for all sequences of levels to reorder at the current maxLevel */ |
|
2004 for(;;) { |
|
2005 /* look for a sequence of levels that are all at >=maxLevel */ |
|
2006 /* look for the first index of such a sequence */ |
|
2007 while(start<aLength && aLevels[start]<maxLevel) { |
|
2008 ++start; |
|
2009 } |
|
2010 if(start>=aLength) { |
|
2011 break; /* no more such sequences */ |
|
2012 } |
|
2013 |
|
2014 /* look for the limit of such a sequence (the index behind it) */ |
|
2015 for(limit=start; ++limit<aLength && aLevels[limit]>=maxLevel;) {} |
|
2016 |
|
2017 /* |
|
2018 * sos=start of sequence, eos=end of sequence |
|
2019 * |
|
2020 * The closed (inclusive) interval from sos to eos includes all the logical |
|
2021 * and visual indexes within this sequence. They are logically and |
|
2022 * visually contiguous and in the same range. |
|
2023 * |
|
2024 * For each run, the new visual index=sos+eos-old visual index; |
|
2025 * we pre-add sos+eos into sumOfSosEos -> |
|
2026 * new visual index=sumOfSosEos-old visual index; |
|
2027 */ |
|
2028 sumOfSosEos=start+limit-1; |
|
2029 |
|
2030 /* reorder each index in the sequence */ |
|
2031 do { |
|
2032 aIndexMap[start]=sumOfSosEos-aIndexMap[start]; |
|
2033 } while(++start<limit); |
|
2034 |
|
2035 /* start==limit */ |
|
2036 if(limit==aLength) { |
|
2037 break; /* no more such sequences */ |
|
2038 } else { |
|
2039 start=limit+1; |
|
2040 } |
|
2041 } |
|
2042 } while(--maxLevel>=minLevel); |
|
2043 |
|
2044 return NS_OK; |
|
2045 } |
|
2046 |
|
2047 nsresult nsBidi::InvertMap(const int32_t *aSrcMap, int32_t *aDestMap, int32_t aLength) |
|
2048 { |
|
2049 if(aSrcMap!=nullptr && aDestMap!=nullptr) { |
|
2050 aSrcMap+=aLength; |
|
2051 while(aLength>0) { |
|
2052 aDestMap[*--aSrcMap]=--aLength; |
|
2053 } |
|
2054 } |
|
2055 return NS_OK; |
|
2056 } |
|
2057 |
|
2058 int32_t nsBidi::doWriteReverse(const char16_t *src, int32_t srcLength, |
|
2059 char16_t *dest, uint16_t options) { |
|
2060 /* |
|
2061 * RTL run - |
|
2062 * |
|
2063 * RTL runs need to be copied to the destination in reverse order |
|
2064 * of code points, not code units, to keep Unicode characters intact. |
|
2065 * |
|
2066 * The general strategy for this is to read the source text |
|
2067 * in backward order, collect all code units for a code point |
|
2068 * (and optionally following combining characters, see below), |
|
2069 * and copy all these code units in ascending order |
|
2070 * to the destination for this run. |
|
2071 * |
|
2072 * Several options request whether combining characters |
|
2073 * should be kept after their base characters, |
|
2074 * whether Bidi control characters should be removed, and |
|
2075 * whether characters should be replaced by their mirror-image |
|
2076 * equivalent Unicode characters. |
|
2077 */ |
|
2078 int32_t i, j, destSize; |
|
2079 uint32_t c; |
|
2080 |
|
2081 /* optimize for several combinations of options */ |
|
2082 switch(options&(NSBIDI_REMOVE_BIDI_CONTROLS|NSBIDI_DO_MIRRORING|NSBIDI_KEEP_BASE_COMBINING)) { |
|
2083 case 0: |
|
2084 /* |
|
2085 * With none of the "complicated" options set, the destination |
|
2086 * run will have the same length as the source run, |
|
2087 * and there is no mirroring and no keeping combining characters |
|
2088 * with their base characters. |
|
2089 */ |
|
2090 destSize=srcLength; |
|
2091 |
|
2092 /* preserve character integrity */ |
|
2093 do { |
|
2094 /* i is always after the last code unit known to need to be kept in this segment */ |
|
2095 i=srcLength; |
|
2096 |
|
2097 /* collect code units for one base character */ |
|
2098 UTF_BACK_1(src, 0, srcLength); |
|
2099 |
|
2100 /* copy this base character */ |
|
2101 j=srcLength; |
|
2102 do { |
|
2103 *dest++=src[j++]; |
|
2104 } while(j<i); |
|
2105 } while(srcLength>0); |
|
2106 break; |
|
2107 case NSBIDI_KEEP_BASE_COMBINING: |
|
2108 /* |
|
2109 * Here, too, the destination |
|
2110 * run will have the same length as the source run, |
|
2111 * and there is no mirroring. |
|
2112 * We do need to keep combining characters with their base characters. |
|
2113 */ |
|
2114 destSize=srcLength; |
|
2115 |
|
2116 /* preserve character integrity */ |
|
2117 do { |
|
2118 /* i is always after the last code unit known to need to be kept in this segment */ |
|
2119 i=srcLength; |
|
2120 |
|
2121 /* collect code units and modifier letters for one base character */ |
|
2122 do { |
|
2123 UTF_PREV_CHAR(src, 0, srcLength, c); |
|
2124 } while(srcLength>0 && IsBidiCategory(c, eBidiCat_NSM)); |
|
2125 |
|
2126 /* copy this "user character" */ |
|
2127 j=srcLength; |
|
2128 do { |
|
2129 *dest++=src[j++]; |
|
2130 } while(j<i); |
|
2131 } while(srcLength>0); |
|
2132 break; |
|
2133 default: |
|
2134 /* |
|
2135 * With several "complicated" options set, this is the most |
|
2136 * general and the slowest copying of an RTL run. |
|
2137 * We will do mirroring, remove Bidi controls, and |
|
2138 * keep combining characters with their base characters |
|
2139 * as requested. |
|
2140 */ |
|
2141 if(!(options&NSBIDI_REMOVE_BIDI_CONTROLS)) { |
|
2142 i=srcLength; |
|
2143 } else { |
|
2144 /* we need to find out the destination length of the run, |
|
2145 which will not include the Bidi control characters */ |
|
2146 int32_t length=srcLength; |
|
2147 char16_t ch; |
|
2148 |
|
2149 i=0; |
|
2150 do { |
|
2151 ch=*src++; |
|
2152 if (!IsBidiControl((uint32_t)ch)) { |
|
2153 ++i; |
|
2154 } |
|
2155 } while(--length>0); |
|
2156 src-=srcLength; |
|
2157 } |
|
2158 destSize=i; |
|
2159 |
|
2160 /* preserve character integrity */ |
|
2161 do { |
|
2162 /* i is always after the last code unit known to need to be kept in this segment */ |
|
2163 i=srcLength; |
|
2164 |
|
2165 /* collect code units for one base character */ |
|
2166 UTF_PREV_CHAR(src, 0, srcLength, c); |
|
2167 if(options&NSBIDI_KEEP_BASE_COMBINING) { |
|
2168 /* collect modifier letters for this base character */ |
|
2169 while(srcLength>0 && IsBidiCategory(c, eBidiCat_NSM)) { |
|
2170 UTF_PREV_CHAR(src, 0, srcLength, c); |
|
2171 } |
|
2172 } |
|
2173 |
|
2174 if(options&NSBIDI_REMOVE_BIDI_CONTROLS && IsBidiControl(c)) { |
|
2175 /* do not copy this Bidi control character */ |
|
2176 continue; |
|
2177 } |
|
2178 |
|
2179 /* copy this "user character" */ |
|
2180 j=srcLength; |
|
2181 if(options&NSBIDI_DO_MIRRORING) { |
|
2182 /* mirror only the base character */ |
|
2183 c = SymmSwap(c); |
|
2184 |
|
2185 int32_t k=0; |
|
2186 UTF_APPEND_CHAR_UNSAFE(dest, k, c); |
|
2187 dest+=k; |
|
2188 j+=k; |
|
2189 } |
|
2190 while(j<i) { |
|
2191 *dest++=src[j++]; |
|
2192 } |
|
2193 } while(srcLength>0); |
|
2194 break; |
|
2195 } /* end of switch */ |
|
2196 return destSize; |
|
2197 } |
|
2198 |
|
2199 nsresult nsBidi::WriteReverse(const char16_t *aSrc, int32_t aSrcLength, char16_t *aDest, uint16_t aOptions, int32_t *aDestSize) |
|
2200 { |
|
2201 if( aSrc==nullptr || aSrcLength<0 || |
|
2202 aDest==nullptr |
|
2203 ) { |
|
2204 return NS_ERROR_INVALID_ARG; |
|
2205 } |
|
2206 |
|
2207 /* do input and output overlap? */ |
|
2208 if( aSrc>=aDest && aSrc<aDest+aSrcLength || |
|
2209 aDest>=aSrc && aDest<aSrc+aSrcLength |
|
2210 ) { |
|
2211 return NS_ERROR_INVALID_ARG; |
|
2212 } |
|
2213 |
|
2214 if(aSrcLength>0) { |
|
2215 *aDestSize = doWriteReverse(aSrc, aSrcLength, aDest, aOptions); |
|
2216 } |
|
2217 return NS_OK; |
|
2218 } |
|
2219 #endif // FULL_BIDI_ENGINE |