|
1 /* |
|
2 ****************************************************************************** |
|
3 * |
|
4 * Copyright (C) 1999-2013, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ****************************************************************************** |
|
8 * file name: ubidiln.c |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 1999aug06 |
|
14 * created by: Markus W. Scherer, updated by Matitiahu Allouche |
|
15 */ |
|
16 |
|
17 #include "cmemory.h" |
|
18 #include "unicode/utypes.h" |
|
19 #include "unicode/ustring.h" |
|
20 #include "unicode/uchar.h" |
|
21 #include "unicode/ubidi.h" |
|
22 #include "ubidiimp.h" |
|
23 #include "uassert.h" |
|
24 |
|
25 #ifndef U_COMMON_IMPLEMENTATION |
|
26 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu |
|
27 #endif |
|
28 |
|
29 /* |
|
30 * General remarks about the functions in this file: |
|
31 * |
|
32 * These functions deal with the aspects of potentially mixed-directional |
|
33 * text in a single paragraph or in a line of a single paragraph |
|
34 * which has already been processed according to |
|
35 * the Unicode 6.3 BiDi algorithm as defined in |
|
36 * http://www.unicode.org/unicode/reports/tr9/ , version 28, |
|
37 * also described in The Unicode Standard, Version 6.3.0 . |
|
38 * |
|
39 * This means that there is a UBiDi object with a levels |
|
40 * and a dirProps array. |
|
41 * paraLevel and direction are also set. |
|
42 * Only if the length of the text is zero, then levels==dirProps==NULL. |
|
43 * |
|
44 * The overall directionality of the paragraph |
|
45 * or line is used to bypass the reordering steps if possible. |
|
46 * Even purely RTL text does not need reordering there because |
|
47 * the ubidi_getLogical/VisualIndex() functions can compute the |
|
48 * index on the fly in such a case. |
|
49 * |
|
50 * The implementation of the access to same-level-runs and of the reordering |
|
51 * do attempt to provide better performance and less memory usage compared to |
|
52 * a direct implementation of especially rule (L2) with an array of |
|
53 * one (32-bit) integer per text character. |
|
54 * |
|
55 * Here, the levels array is scanned as soon as necessary, and a vector of |
|
56 * same-level-runs is created. Reordering then is done on this vector. |
|
57 * For each run of text positions that were resolved to the same level, |
|
58 * only 8 bytes are stored: the first text position of the run and the visual |
|
59 * position behind the run after reordering. |
|
60 * One sign bit is used to hold the directionality of the run. |
|
61 * This is inefficient if there are many very short runs. If the average run |
|
62 * length is <2, then this uses more memory. |
|
63 * |
|
64 * In a further attempt to save memory, the levels array is never changed |
|
65 * after all the resolution rules (Xn, Wn, Nn, In). |
|
66 * Many functions have to consider the field trailingWSStart: |
|
67 * if it is less than length, then there is an implicit trailing run |
|
68 * at the paraLevel, |
|
69 * which is not reflected in the levels array. |
|
70 * This allows a line UBiDi object to use the same levels array as |
|
71 * its paragraph parent object. |
|
72 * |
|
73 * When a UBiDi object is created for a line of a paragraph, then the |
|
74 * paragraph's levels and dirProps arrays are reused by way of setting |
|
75 * a pointer into them, not by copying. This again saves memory and forbids to |
|
76 * change the now shared levels for (L1). |
|
77 */ |
|
78 |
|
79 /* handle trailing WS (L1) -------------------------------------------------- */ |
|
80 |
|
81 /* |
|
82 * setTrailingWSStart() sets the start index for a trailing |
|
83 * run of WS in the line. This is necessary because we do not modify |
|
84 * the paragraph's levels array that we just point into. |
|
85 * Using trailingWSStart is another form of performing (L1). |
|
86 * |
|
87 * To make subsequent operations easier, we also include the run |
|
88 * before the WS if it is at the paraLevel - we merge the two here. |
|
89 * |
|
90 * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is |
|
91 * set correctly for the line even when contextual multiple paragraphs. |
|
92 */ |
|
93 static void |
|
94 setTrailingWSStart(UBiDi *pBiDi) { |
|
95 /* pBiDi->direction!=UBIDI_MIXED */ |
|
96 |
|
97 const DirProp *dirProps=pBiDi->dirProps; |
|
98 UBiDiLevel *levels=pBiDi->levels; |
|
99 int32_t start=pBiDi->length; |
|
100 UBiDiLevel paraLevel=pBiDi->paraLevel; |
|
101 |
|
102 /* If the line is terminated by a block separator, all preceding WS etc... |
|
103 are already set to paragraph level. |
|
104 Setting trailingWSStart to pBidi->length will avoid changing the |
|
105 level of B chars from 0 to paraLevel in ubidi_getLevels when |
|
106 orderParagraphsLTR==TRUE. |
|
107 */ |
|
108 if(dirProps[start-1]==B) { |
|
109 pBiDi->trailingWSStart=start; /* currently == pBiDi->length */ |
|
110 return; |
|
111 } |
|
112 /* go backwards across all WS, BN, explicit codes */ |
|
113 while(start>0 && DIRPROP_FLAG(PURE_DIRPROP(dirProps[start-1]))&MASK_WS) { |
|
114 --start; |
|
115 } |
|
116 |
|
117 /* if the WS run can be merged with the previous run then do so here */ |
|
118 while(start>0 && levels[start-1]==paraLevel) { |
|
119 --start; |
|
120 } |
|
121 |
|
122 pBiDi->trailingWSStart=start; |
|
123 } |
|
124 |
|
125 /* ubidi_setLine ------------------------------------------------------------ */ |
|
126 |
|
127 U_CAPI void U_EXPORT2 |
|
128 ubidi_setLine(const UBiDi *pParaBiDi, |
|
129 int32_t start, int32_t limit, |
|
130 UBiDi *pLineBiDi, |
|
131 UErrorCode *pErrorCode) { |
|
132 int32_t length; |
|
133 |
|
134 /* check the argument values */ |
|
135 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); |
|
136 RETURN_VOID_IF_NOT_VALID_PARA(pParaBiDi, *pErrorCode); |
|
137 RETURN_VOID_IF_BAD_RANGE(start, 0, limit, *pErrorCode); |
|
138 RETURN_VOID_IF_BAD_RANGE(limit, 0, pParaBiDi->length+1, *pErrorCode); |
|
139 if(pLineBiDi==NULL) { |
|
140 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
141 return; |
|
142 } |
|
143 if(ubidi_getParagraph(pParaBiDi, start, NULL, NULL, NULL, pErrorCode) != |
|
144 ubidi_getParagraph(pParaBiDi, limit-1, NULL, NULL, NULL, pErrorCode)) { |
|
145 /* the line crosses a paragraph boundary */ |
|
146 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
147 return; |
|
148 } |
|
149 |
|
150 /* set the values in pLineBiDi from its pParaBiDi parent */ |
|
151 pLineBiDi->pParaBiDi=NULL; /* mark unfinished setLine */ |
|
152 pLineBiDi->text=pParaBiDi->text+start; |
|
153 length=pLineBiDi->length=limit-start; |
|
154 pLineBiDi->resultLength=pLineBiDi->originalLength=length; |
|
155 pLineBiDi->paraLevel=GET_PARALEVEL(pParaBiDi, start); |
|
156 pLineBiDi->paraCount=pParaBiDi->paraCount; |
|
157 pLineBiDi->runs=NULL; |
|
158 pLineBiDi->flags=0; |
|
159 pLineBiDi->reorderingMode=pParaBiDi->reorderingMode; |
|
160 pLineBiDi->reorderingOptions=pParaBiDi->reorderingOptions; |
|
161 pLineBiDi->controlCount=0; |
|
162 if(pParaBiDi->controlCount>0) { |
|
163 int32_t j; |
|
164 for(j=start; j<limit; j++) { |
|
165 if(IS_BIDI_CONTROL_CHAR(pParaBiDi->text[j])) { |
|
166 pLineBiDi->controlCount++; |
|
167 } |
|
168 } |
|
169 pLineBiDi->resultLength-=pLineBiDi->controlCount; |
|
170 } |
|
171 |
|
172 pLineBiDi->dirProps=pParaBiDi->dirProps+start; |
|
173 pLineBiDi->levels=pParaBiDi->levels+start; |
|
174 pLineBiDi->runCount=-1; |
|
175 |
|
176 if(pParaBiDi->direction!=UBIDI_MIXED) { |
|
177 /* the parent is already trivial */ |
|
178 pLineBiDi->direction=pParaBiDi->direction; |
|
179 |
|
180 /* |
|
181 * The parent's levels are all either |
|
182 * implicitly or explicitly ==paraLevel; |
|
183 * do the same here. |
|
184 */ |
|
185 if(pParaBiDi->trailingWSStart<=start) { |
|
186 pLineBiDi->trailingWSStart=0; |
|
187 } else if(pParaBiDi->trailingWSStart<limit) { |
|
188 pLineBiDi->trailingWSStart=pParaBiDi->trailingWSStart-start; |
|
189 } else { |
|
190 pLineBiDi->trailingWSStart=length; |
|
191 } |
|
192 } else { |
|
193 const UBiDiLevel *levels=pLineBiDi->levels; |
|
194 int32_t i, trailingWSStart; |
|
195 UBiDiLevel level; |
|
196 |
|
197 setTrailingWSStart(pLineBiDi); |
|
198 trailingWSStart=pLineBiDi->trailingWSStart; |
|
199 |
|
200 /* recalculate pLineBiDi->direction */ |
|
201 if(trailingWSStart==0) { |
|
202 /* all levels are at paraLevel */ |
|
203 pLineBiDi->direction=(UBiDiDirection)(pLineBiDi->paraLevel&1); |
|
204 } else { |
|
205 /* get the level of the first character */ |
|
206 level=(UBiDiLevel)(levels[0]&1); |
|
207 |
|
208 /* if there is anything of a different level, then the line is mixed */ |
|
209 if(trailingWSStart<length && (pLineBiDi->paraLevel&1)!=level) { |
|
210 /* the trailing WS is at paraLevel, which differs from levels[0] */ |
|
211 pLineBiDi->direction=UBIDI_MIXED; |
|
212 } else { |
|
213 /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */ |
|
214 i=1; |
|
215 for(;;) { |
|
216 if(i==trailingWSStart) { |
|
217 /* the direction values match those in level */ |
|
218 pLineBiDi->direction=(UBiDiDirection)level; |
|
219 break; |
|
220 } else if((levels[i]&1)!=level) { |
|
221 pLineBiDi->direction=UBIDI_MIXED; |
|
222 break; |
|
223 } |
|
224 ++i; |
|
225 } |
|
226 } |
|
227 } |
|
228 |
|
229 switch(pLineBiDi->direction) { |
|
230 case UBIDI_LTR: |
|
231 /* make sure paraLevel is even */ |
|
232 pLineBiDi->paraLevel=(UBiDiLevel)((pLineBiDi->paraLevel+1)&~1); |
|
233 |
|
234 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ |
|
235 pLineBiDi->trailingWSStart=0; |
|
236 break; |
|
237 case UBIDI_RTL: |
|
238 /* make sure paraLevel is odd */ |
|
239 pLineBiDi->paraLevel|=1; |
|
240 |
|
241 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ |
|
242 pLineBiDi->trailingWSStart=0; |
|
243 break; |
|
244 default: |
|
245 break; |
|
246 } |
|
247 } |
|
248 pLineBiDi->pParaBiDi=pParaBiDi; /* mark successful setLine */ |
|
249 return; |
|
250 } |
|
251 |
|
252 U_CAPI UBiDiLevel U_EXPORT2 |
|
253 ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex) { |
|
254 /* return paraLevel if in the trailing WS run, otherwise the real level */ |
|
255 if(!IS_VALID_PARA_OR_LINE(pBiDi) || charIndex<0 || pBiDi->length<=charIndex) { |
|
256 return 0; |
|
257 } else if(pBiDi->direction!=UBIDI_MIXED || charIndex>=pBiDi->trailingWSStart) { |
|
258 return GET_PARALEVEL(pBiDi, charIndex); |
|
259 } else { |
|
260 return pBiDi->levels[charIndex]; |
|
261 } |
|
262 } |
|
263 |
|
264 U_CAPI const UBiDiLevel * U_EXPORT2 |
|
265 ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { |
|
266 int32_t start, length; |
|
267 |
|
268 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, NULL); |
|
269 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, NULL); |
|
270 if((length=pBiDi->length)<=0) { |
|
271 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
272 return NULL; |
|
273 } |
|
274 if((start=pBiDi->trailingWSStart)==length) { |
|
275 /* the current levels array reflects the WS run */ |
|
276 return pBiDi->levels; |
|
277 } |
|
278 |
|
279 /* |
|
280 * After the previous if(), we know that the levels array |
|
281 * has an implicit trailing WS run and therefore does not fully |
|
282 * reflect itself all the levels. |
|
283 * This must be a UBiDi object for a line, and |
|
284 * we need to create a new levels array. |
|
285 */ |
|
286 if(getLevelsMemory(pBiDi, length)) { |
|
287 UBiDiLevel *levels=pBiDi->levelsMemory; |
|
288 |
|
289 if(start>0 && levels!=pBiDi->levels) { |
|
290 uprv_memcpy(levels, pBiDi->levels, start); |
|
291 } |
|
292 /* pBiDi->paraLevel is ok even if contextual multiple paragraphs, |
|
293 since pBidi is a line object */ |
|
294 uprv_memset(levels+start, pBiDi->paraLevel, length-start); |
|
295 |
|
296 /* this new levels array is set for the line and reflects the WS run */ |
|
297 pBiDi->trailingWSStart=length; |
|
298 return pBiDi->levels=levels; |
|
299 } else { |
|
300 /* out of memory */ |
|
301 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
302 return NULL; |
|
303 } |
|
304 } |
|
305 |
|
306 U_CAPI void U_EXPORT2 |
|
307 ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition, |
|
308 int32_t *pLogicalLimit, UBiDiLevel *pLevel) { |
|
309 UErrorCode errorCode; |
|
310 int32_t runCount, visualStart, logicalLimit, logicalFirst, i; |
|
311 Run iRun; |
|
312 |
|
313 errorCode=U_ZERO_ERROR; |
|
314 RETURN_VOID_IF_BAD_RANGE(logicalPosition, 0, pBiDi->length, errorCode); |
|
315 /* ubidi_countRuns will check VALID_PARA_OR_LINE */ |
|
316 runCount=ubidi_countRuns((UBiDi *)pBiDi, &errorCode); |
|
317 if(U_FAILURE(errorCode)) { |
|
318 return; |
|
319 } |
|
320 /* this is done based on runs rather than on levels since levels have |
|
321 a special interpretation when UBIDI_REORDER_RUNS_ONLY |
|
322 */ |
|
323 visualStart=logicalLimit=0; |
|
324 iRun=pBiDi->runs[0]; |
|
325 |
|
326 for(i=0; i<runCount; i++) { |
|
327 iRun = pBiDi->runs[i]; |
|
328 logicalFirst=GET_INDEX(iRun.logicalStart); |
|
329 logicalLimit=logicalFirst+iRun.visualLimit-visualStart; |
|
330 if((logicalPosition>=logicalFirst) && |
|
331 (logicalPosition<logicalLimit)) { |
|
332 break; |
|
333 } |
|
334 visualStart = iRun.visualLimit; |
|
335 } |
|
336 if(pLogicalLimit) { |
|
337 *pLogicalLimit=logicalLimit; |
|
338 } |
|
339 if(pLevel) { |
|
340 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) { |
|
341 *pLevel=(UBiDiLevel)GET_ODD_BIT(iRun.logicalStart); |
|
342 } |
|
343 else if(pBiDi->direction!=UBIDI_MIXED || logicalPosition>=pBiDi->trailingWSStart) { |
|
344 *pLevel=GET_PARALEVEL(pBiDi, logicalPosition); |
|
345 } else { |
|
346 *pLevel=pBiDi->levels[logicalPosition]; |
|
347 } |
|
348 } |
|
349 } |
|
350 |
|
351 /* runs API functions ------------------------------------------------------- */ |
|
352 |
|
353 U_CAPI int32_t U_EXPORT2 |
|
354 ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { |
|
355 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); |
|
356 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); |
|
357 ubidi_getRuns(pBiDi, pErrorCode); |
|
358 if(U_FAILURE(*pErrorCode)) { |
|
359 return -1; |
|
360 } |
|
361 return pBiDi->runCount; |
|
362 } |
|
363 |
|
364 U_CAPI UBiDiDirection U_EXPORT2 |
|
365 ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, |
|
366 int32_t *pLogicalStart, int32_t *pLength) |
|
367 { |
|
368 int32_t start; |
|
369 UErrorCode errorCode = U_ZERO_ERROR; |
|
370 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, errorCode, UBIDI_LTR); |
|
371 ubidi_getRuns(pBiDi, &errorCode); |
|
372 if(U_FAILURE(errorCode)) { |
|
373 return UBIDI_LTR; |
|
374 } |
|
375 RETURN_IF_BAD_RANGE(runIndex, 0, pBiDi->runCount, errorCode, UBIDI_LTR); |
|
376 |
|
377 start=pBiDi->runs[runIndex].logicalStart; |
|
378 if(pLogicalStart!=NULL) { |
|
379 *pLogicalStart=GET_INDEX(start); |
|
380 } |
|
381 if(pLength!=NULL) { |
|
382 if(runIndex>0) { |
|
383 *pLength=pBiDi->runs[runIndex].visualLimit- |
|
384 pBiDi->runs[runIndex-1].visualLimit; |
|
385 } else { |
|
386 *pLength=pBiDi->runs[0].visualLimit; |
|
387 } |
|
388 } |
|
389 return (UBiDiDirection)GET_ODD_BIT(start); |
|
390 } |
|
391 |
|
392 /* in trivial cases there is only one trivial run; called by ubidi_getRuns() */ |
|
393 static void |
|
394 getSingleRun(UBiDi *pBiDi, UBiDiLevel level) { |
|
395 /* simple, single-run case */ |
|
396 pBiDi->runs=pBiDi->simpleRuns; |
|
397 pBiDi->runCount=1; |
|
398 |
|
399 /* fill and reorder the single run */ |
|
400 pBiDi->runs[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, level); |
|
401 pBiDi->runs[0].visualLimit=pBiDi->length; |
|
402 pBiDi->runs[0].insertRemove=0; |
|
403 } |
|
404 |
|
405 /* reorder the runs array (L2) ---------------------------------------------- */ |
|
406 |
|
407 /* |
|
408 * Reorder the same-level runs in the runs array. |
|
409 * Here, runCount>1 and maxLevel>=minLevel>=paraLevel. |
|
410 * All the visualStart fields=logical start before reordering. |
|
411 * The "odd" bits are not set yet. |
|
412 * |
|
413 * Reordering with this data structure lends itself to some handy shortcuts: |
|
414 * |
|
415 * Since each run is moved but not modified, and since at the initial maxLevel |
|
416 * each sequence of same-level runs consists of only one run each, we |
|
417 * don't need to do anything there and can predecrement maxLevel. |
|
418 * In many simple cases, the reordering is thus done entirely in the |
|
419 * index mapping. |
|
420 * Also, reordering occurs only down to the lowest odd level that occurs, |
|
421 * which is minLevel|1. However, if the lowest level itself is odd, then |
|
422 * in the last reordering the sequence of the runs at this level or higher |
|
423 * will be all runs, and we don't need the elaborate loop to search for them. |
|
424 * This is covered by ++minLevel instead of minLevel|=1 followed |
|
425 * by an extra reorder-all after the reorder-some loop. |
|
426 * About a trailing WS run: |
|
427 * Such a run would need special treatment because its level is not |
|
428 * reflected in levels[] if this is not a paragraph object. |
|
429 * Instead, all characters from trailingWSStart on are implicitly at |
|
430 * paraLevel. |
|
431 * However, for all maxLevel>paraLevel, this run will never be reordered |
|
432 * and does not need to be taken into account. maxLevel==paraLevel is only reordered |
|
433 * if minLevel==paraLevel is odd, which is done in the extra segment. |
|
434 * This means that for the main reordering loop we don't need to consider |
|
435 * this run and can --runCount. If it is later part of the all-runs |
|
436 * reordering, then runCount is adjusted accordingly. |
|
437 */ |
|
438 static void |
|
439 reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) { |
|
440 Run *runs, tempRun; |
|
441 UBiDiLevel *levels; |
|
442 int32_t firstRun, endRun, limitRun, runCount; |
|
443 |
|
444 /* nothing to do? */ |
|
445 if(maxLevel<=(minLevel|1)) { |
|
446 return; |
|
447 } |
|
448 |
|
449 /* |
|
450 * Reorder only down to the lowest odd level |
|
451 * and reorder at an odd minLevel in a separate, simpler loop. |
|
452 * See comments above for why minLevel is always incremented. |
|
453 */ |
|
454 ++minLevel; |
|
455 |
|
456 runs=pBiDi->runs; |
|
457 levels=pBiDi->levels; |
|
458 runCount=pBiDi->runCount; |
|
459 |
|
460 /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */ |
|
461 if(pBiDi->trailingWSStart<pBiDi->length) { |
|
462 --runCount; |
|
463 } |
|
464 |
|
465 while(--maxLevel>=minLevel) { |
|
466 firstRun=0; |
|
467 |
|
468 /* loop for all sequences of runs */ |
|
469 for(;;) { |
|
470 /* look for a sequence of runs that are all at >=maxLevel */ |
|
471 /* look for the first run of such a sequence */ |
|
472 while(firstRun<runCount && levels[runs[firstRun].logicalStart]<maxLevel) { |
|
473 ++firstRun; |
|
474 } |
|
475 if(firstRun>=runCount) { |
|
476 break; /* no more such runs */ |
|
477 } |
|
478 |
|
479 /* look for the limit run of such a sequence (the run behind it) */ |
|
480 for(limitRun=firstRun; ++limitRun<runCount && levels[runs[limitRun].logicalStart]>=maxLevel;) {} |
|
481 |
|
482 /* Swap the entire sequence of runs from firstRun to limitRun-1. */ |
|
483 endRun=limitRun-1; |
|
484 while(firstRun<endRun) { |
|
485 tempRun = runs[firstRun]; |
|
486 runs[firstRun]=runs[endRun]; |
|
487 runs[endRun]=tempRun; |
|
488 ++firstRun; |
|
489 --endRun; |
|
490 } |
|
491 |
|
492 if(limitRun==runCount) { |
|
493 break; /* no more such runs */ |
|
494 } else { |
|
495 firstRun=limitRun+1; |
|
496 } |
|
497 } |
|
498 } |
|
499 |
|
500 /* now do maxLevel==old minLevel (==odd!), see above */ |
|
501 if(!(minLevel&1)) { |
|
502 firstRun=0; |
|
503 |
|
504 /* include the trailing WS run in this complete reordering */ |
|
505 if(pBiDi->trailingWSStart==pBiDi->length) { |
|
506 --runCount; |
|
507 } |
|
508 |
|
509 /* Swap the entire sequence of all runs. (endRun==runCount) */ |
|
510 while(firstRun<runCount) { |
|
511 tempRun=runs[firstRun]; |
|
512 runs[firstRun]=runs[runCount]; |
|
513 runs[runCount]=tempRun; |
|
514 ++firstRun; |
|
515 --runCount; |
|
516 } |
|
517 } |
|
518 } |
|
519 |
|
520 /* compute the runs array --------------------------------------------------- */ |
|
521 |
|
522 static int32_t getRunFromLogicalIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) { |
|
523 Run *runs=pBiDi->runs; |
|
524 int32_t runCount=pBiDi->runCount, visualStart=0, i, length, logicalStart; |
|
525 |
|
526 for(i=0; i<runCount; i++) { |
|
527 length=runs[i].visualLimit-visualStart; |
|
528 logicalStart=GET_INDEX(runs[i].logicalStart); |
|
529 if((logicalIndex>=logicalStart) && (logicalIndex<(logicalStart+length))) { |
|
530 return i; |
|
531 } |
|
532 visualStart+=length; |
|
533 } |
|
534 /* we should never get here */ |
|
535 U_ASSERT(FALSE); |
|
536 *pErrorCode = U_INVALID_STATE_ERROR; |
|
537 return 0; |
|
538 } |
|
539 |
|
540 /* |
|
541 * Compute the runs array from the levels array. |
|
542 * After ubidi_getRuns() returns TRUE, runCount is guaranteed to be >0 |
|
543 * and the runs are reordered. |
|
544 * Odd-level runs have visualStart on their visual right edge and |
|
545 * they progress visually to the left. |
|
546 * If option UBIDI_OPTION_INSERT_MARKS is set, insertRemove will contain the |
|
547 * sum of appropriate LRM/RLM_BEFORE/AFTER flags. |
|
548 * If option UBIDI_OPTION_REMOVE_CONTROLS is set, insertRemove will contain the |
|
549 * negative number of BiDi control characters within this run. |
|
550 */ |
|
551 U_CFUNC UBool |
|
552 ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { |
|
553 /* |
|
554 * This method returns immediately if the runs are already set. This |
|
555 * includes the case of length==0 (handled in setPara).. |
|
556 */ |
|
557 if (pBiDi->runCount>=0) { |
|
558 return TRUE; |
|
559 } |
|
560 |
|
561 if(pBiDi->direction!=UBIDI_MIXED) { |
|
562 /* simple, single-run case - this covers length==0 */ |
|
563 /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */ |
|
564 getSingleRun(pBiDi, pBiDi->paraLevel); |
|
565 } else /* UBIDI_MIXED, length>0 */ { |
|
566 /* mixed directionality */ |
|
567 int32_t length=pBiDi->length, limit; |
|
568 UBiDiLevel *levels=pBiDi->levels; |
|
569 int32_t i, runCount; |
|
570 UBiDiLevel level=UBIDI_DEFAULT_LTR; /* initialize with no valid level */ |
|
571 /* |
|
572 * If there are WS characters at the end of the line |
|
573 * and the run preceding them has a level different from |
|
574 * paraLevel, then they will form their own run at paraLevel (L1). |
|
575 * Count them separately. |
|
576 * We need some special treatment for this in order to not |
|
577 * modify the levels array which a line UBiDi object shares |
|
578 * with its paragraph parent and its other line siblings. |
|
579 * In other words, for the trailing WS, it may be |
|
580 * levels[]!=paraLevel but we have to treat it like it were so. |
|
581 */ |
|
582 limit=pBiDi->trailingWSStart; |
|
583 /* count the runs, there is at least one non-WS run, and limit>0 */ |
|
584 runCount=0; |
|
585 for(i=0; i<limit; ++i) { |
|
586 /* increment runCount at the start of each run */ |
|
587 if(levels[i]!=level) { |
|
588 ++runCount; |
|
589 level=levels[i]; |
|
590 } |
|
591 } |
|
592 |
|
593 /* |
|
594 * We don't need to see if the last run can be merged with a trailing |
|
595 * WS run because setTrailingWSStart() would have done that. |
|
596 */ |
|
597 if(runCount==1 && limit==length) { |
|
598 /* There is only one non-WS run and no trailing WS-run. */ |
|
599 getSingleRun(pBiDi, levels[0]); |
|
600 } else /* runCount>1 || limit<length */ { |
|
601 /* allocate and set the runs */ |
|
602 Run *runs; |
|
603 int32_t runIndex, start; |
|
604 UBiDiLevel minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1, maxLevel=0; |
|
605 |
|
606 /* now, count a (non-mergeable) WS run */ |
|
607 if(limit<length) { |
|
608 ++runCount; |
|
609 } |
|
610 |
|
611 /* runCount>1 */ |
|
612 if(getRunsMemory(pBiDi, runCount)) { |
|
613 runs=pBiDi->runsMemory; |
|
614 } else { |
|
615 return FALSE; |
|
616 } |
|
617 |
|
618 /* set the runs */ |
|
619 /* FOOD FOR THOUGHT: this could be optimized, e.g.: |
|
620 * 464->444, 484->444, 575->555, 595->555 |
|
621 * However, that would take longer. Check also how it would |
|
622 * interact with BiDi control removal and inserting Marks. |
|
623 */ |
|
624 runIndex=0; |
|
625 |
|
626 /* search for the run limits and initialize visualLimit values with the run lengths */ |
|
627 i=0; |
|
628 do { |
|
629 /* prepare this run */ |
|
630 start=i; |
|
631 level=levels[i]; |
|
632 if(level<minLevel) { |
|
633 minLevel=level; |
|
634 } |
|
635 if(level>maxLevel) { |
|
636 maxLevel=level; |
|
637 } |
|
638 |
|
639 /* look for the run limit */ |
|
640 while(++i<limit && levels[i]==level) {} |
|
641 |
|
642 /* i is another run limit */ |
|
643 runs[runIndex].logicalStart=start; |
|
644 runs[runIndex].visualLimit=i-start; |
|
645 runs[runIndex].insertRemove=0; |
|
646 ++runIndex; |
|
647 } while(i<limit); |
|
648 |
|
649 if(limit<length) { |
|
650 /* there is a separate WS run */ |
|
651 runs[runIndex].logicalStart=limit; |
|
652 runs[runIndex].visualLimit=length-limit; |
|
653 /* For the trailing WS run, pBiDi->paraLevel is ok even |
|
654 if contextual multiple paragraphs. */ |
|
655 if(pBiDi->paraLevel<minLevel) { |
|
656 minLevel=pBiDi->paraLevel; |
|
657 } |
|
658 } |
|
659 |
|
660 /* set the object fields */ |
|
661 pBiDi->runs=runs; |
|
662 pBiDi->runCount=runCount; |
|
663 |
|
664 reorderLine(pBiDi, minLevel, maxLevel); |
|
665 |
|
666 /* now add the direction flags and adjust the visualLimit's to be just that */ |
|
667 /* this loop will also handle the trailing WS run */ |
|
668 limit=0; |
|
669 for(i=0; i<runCount; ++i) { |
|
670 ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, levels[runs[i].logicalStart]); |
|
671 limit+=runs[i].visualLimit; |
|
672 runs[i].visualLimit=limit; |
|
673 } |
|
674 |
|
675 /* Set the "odd" bit for the trailing WS run. */ |
|
676 /* For a RTL paragraph, it will be the *first* run in visual order. */ |
|
677 /* For the trailing WS run, pBiDi->paraLevel is ok even if |
|
678 contextual multiple paragraphs. */ |
|
679 if(runIndex<runCount) { |
|
680 int32_t trailingRun = ((pBiDi->paraLevel & 1) != 0)? 0 : runIndex; |
|
681 |
|
682 ADD_ODD_BIT_FROM_LEVEL(runs[trailingRun].logicalStart, pBiDi->paraLevel); |
|
683 } |
|
684 } |
|
685 } |
|
686 |
|
687 /* handle insert LRM/RLM BEFORE/AFTER run */ |
|
688 if(pBiDi->insertPoints.size>0) { |
|
689 Point *point, *start=pBiDi->insertPoints.points, |
|
690 *limit=start+pBiDi->insertPoints.size; |
|
691 int32_t runIndex; |
|
692 for(point=start; point<limit; point++) { |
|
693 runIndex=getRunFromLogicalIndex(pBiDi, point->pos, pErrorCode); |
|
694 pBiDi->runs[runIndex].insertRemove|=point->flag; |
|
695 } |
|
696 } |
|
697 |
|
698 /* handle remove BiDi control characters */ |
|
699 if(pBiDi->controlCount>0) { |
|
700 int32_t runIndex; |
|
701 const UChar *start=pBiDi->text, *limit=start+pBiDi->length, *pu; |
|
702 for(pu=start; pu<limit; pu++) { |
|
703 if(IS_BIDI_CONTROL_CHAR(*pu)) { |
|
704 runIndex=getRunFromLogicalIndex(pBiDi, (int32_t)(pu-start), pErrorCode); |
|
705 pBiDi->runs[runIndex].insertRemove--; |
|
706 } |
|
707 } |
|
708 } |
|
709 |
|
710 return TRUE; |
|
711 } |
|
712 |
|
713 static UBool |
|
714 prepareReorder(const UBiDiLevel *levels, int32_t length, |
|
715 int32_t *indexMap, |
|
716 UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel) { |
|
717 int32_t start; |
|
718 UBiDiLevel level, minLevel, maxLevel; |
|
719 |
|
720 if(levels==NULL || length<=0) { |
|
721 return FALSE; |
|
722 } |
|
723 |
|
724 /* determine minLevel and maxLevel */ |
|
725 minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1; |
|
726 maxLevel=0; |
|
727 for(start=length; start>0;) { |
|
728 level=levels[--start]; |
|
729 if(level>UBIDI_MAX_EXPLICIT_LEVEL+1) { |
|
730 return FALSE; |
|
731 } |
|
732 if(level<minLevel) { |
|
733 minLevel=level; |
|
734 } |
|
735 if(level>maxLevel) { |
|
736 maxLevel=level; |
|
737 } |
|
738 } |
|
739 *pMinLevel=minLevel; |
|
740 *pMaxLevel=maxLevel; |
|
741 |
|
742 /* initialize the index map */ |
|
743 for(start=length; start>0;) { |
|
744 --start; |
|
745 indexMap[start]=start; |
|
746 } |
|
747 |
|
748 return TRUE; |
|
749 } |
|
750 |
|
751 /* reorder a line based on a levels array (L2) ------------------------------ */ |
|
752 |
|
753 U_CAPI void U_EXPORT2 |
|
754 ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { |
|
755 int32_t start, limit, sumOfSosEos; |
|
756 UBiDiLevel minLevel = 0, maxLevel = 0; |
|
757 |
|
758 if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) { |
|
759 return; |
|
760 } |
|
761 |
|
762 /* nothing to do? */ |
|
763 if(minLevel==maxLevel && (minLevel&1)==0) { |
|
764 return; |
|
765 } |
|
766 |
|
767 /* reorder only down to the lowest odd level */ |
|
768 minLevel|=1; |
|
769 |
|
770 /* loop maxLevel..minLevel */ |
|
771 do { |
|
772 start=0; |
|
773 |
|
774 /* loop for all sequences of levels to reorder at the current maxLevel */ |
|
775 for(;;) { |
|
776 /* look for a sequence of levels that are all at >=maxLevel */ |
|
777 /* look for the first index of such a sequence */ |
|
778 while(start<length && levels[start]<maxLevel) { |
|
779 ++start; |
|
780 } |
|
781 if(start>=length) { |
|
782 break; /* no more such sequences */ |
|
783 } |
|
784 |
|
785 /* look for the limit of such a sequence (the index behind it) */ |
|
786 for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {} |
|
787 |
|
788 /* |
|
789 * sos=start of sequence, eos=end of sequence |
|
790 * |
|
791 * The closed (inclusive) interval from sos to eos includes all the logical |
|
792 * and visual indexes within this sequence. They are logically and |
|
793 * visually contiguous and in the same range. |
|
794 * |
|
795 * For each run, the new visual index=sos+eos-old visual index; |
|
796 * we pre-add sos+eos into sumOfSosEos -> |
|
797 * new visual index=sumOfSosEos-old visual index; |
|
798 */ |
|
799 sumOfSosEos=start+limit-1; |
|
800 |
|
801 /* reorder each index in the sequence */ |
|
802 do { |
|
803 indexMap[start]=sumOfSosEos-indexMap[start]; |
|
804 } while(++start<limit); |
|
805 |
|
806 /* start==limit */ |
|
807 if(limit==length) { |
|
808 break; /* no more such sequences */ |
|
809 } else { |
|
810 start=limit+1; |
|
811 } |
|
812 } |
|
813 } while(--maxLevel>=minLevel); |
|
814 } |
|
815 |
|
816 U_CAPI void U_EXPORT2 |
|
817 ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { |
|
818 int32_t start, end, limit, temp; |
|
819 UBiDiLevel minLevel = 0, maxLevel = 0; |
|
820 |
|
821 if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) { |
|
822 return; |
|
823 } |
|
824 |
|
825 /* nothing to do? */ |
|
826 if(minLevel==maxLevel && (minLevel&1)==0) { |
|
827 return; |
|
828 } |
|
829 |
|
830 /* reorder only down to the lowest odd level */ |
|
831 minLevel|=1; |
|
832 |
|
833 /* loop maxLevel..minLevel */ |
|
834 do { |
|
835 start=0; |
|
836 |
|
837 /* loop for all sequences of levels to reorder at the current maxLevel */ |
|
838 for(;;) { |
|
839 /* look for a sequence of levels that are all at >=maxLevel */ |
|
840 /* look for the first index of such a sequence */ |
|
841 while(start<length && levels[start]<maxLevel) { |
|
842 ++start; |
|
843 } |
|
844 if(start>=length) { |
|
845 break; /* no more such runs */ |
|
846 } |
|
847 |
|
848 /* look for the limit of such a sequence (the index behind it) */ |
|
849 for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {} |
|
850 |
|
851 /* |
|
852 * Swap the entire interval of indexes from start to limit-1. |
|
853 * We don't need to swap the levels for the purpose of this |
|
854 * algorithm: the sequence of levels that we look at does not |
|
855 * move anyway. |
|
856 */ |
|
857 end=limit-1; |
|
858 while(start<end) { |
|
859 temp=indexMap[start]; |
|
860 indexMap[start]=indexMap[end]; |
|
861 indexMap[end]=temp; |
|
862 |
|
863 ++start; |
|
864 --end; |
|
865 } |
|
866 |
|
867 if(limit==length) { |
|
868 break; /* no more such sequences */ |
|
869 } else { |
|
870 start=limit+1; |
|
871 } |
|
872 } |
|
873 } while(--maxLevel>=minLevel); |
|
874 } |
|
875 |
|
876 /* API functions for logical<->visual mapping ------------------------------- */ |
|
877 |
|
878 U_CAPI int32_t U_EXPORT2 |
|
879 ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) { |
|
880 int32_t visualIndex=UBIDI_MAP_NOWHERE; |
|
881 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); |
|
882 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); |
|
883 RETURN_IF_BAD_RANGE(logicalIndex, 0, pBiDi->length, *pErrorCode, -1); |
|
884 |
|
885 /* we can do the trivial cases without the runs array */ |
|
886 switch(pBiDi->direction) { |
|
887 case UBIDI_LTR: |
|
888 visualIndex=logicalIndex; |
|
889 break; |
|
890 case UBIDI_RTL: |
|
891 visualIndex=pBiDi->length-logicalIndex-1; |
|
892 break; |
|
893 default: |
|
894 if(!ubidi_getRuns(pBiDi, pErrorCode)) { |
|
895 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
896 return -1; |
|
897 } else { |
|
898 Run *runs=pBiDi->runs; |
|
899 int32_t i, visualStart=0, offset, length; |
|
900 |
|
901 /* linear search for the run, search on the visual runs */ |
|
902 for(i=0; i<pBiDi->runCount; ++i) { |
|
903 length=runs[i].visualLimit-visualStart; |
|
904 offset=logicalIndex-GET_INDEX(runs[i].logicalStart); |
|
905 if(offset>=0 && offset<length) { |
|
906 if(IS_EVEN_RUN(runs[i].logicalStart)) { |
|
907 /* LTR */ |
|
908 visualIndex=visualStart+offset; |
|
909 } else { |
|
910 /* RTL */ |
|
911 visualIndex=visualStart+length-offset-1; |
|
912 } |
|
913 break; /* exit for loop */ |
|
914 } |
|
915 visualStart+=length; |
|
916 } |
|
917 if(i>=pBiDi->runCount) { |
|
918 return UBIDI_MAP_NOWHERE; |
|
919 } |
|
920 } |
|
921 } |
|
922 |
|
923 if(pBiDi->insertPoints.size>0) { |
|
924 /* add the number of added marks until the calculated visual index */ |
|
925 Run *runs=pBiDi->runs; |
|
926 int32_t i, length, insertRemove; |
|
927 int32_t visualStart=0, markFound=0; |
|
928 for(i=0; ; i++, visualStart+=length) { |
|
929 length=runs[i].visualLimit-visualStart; |
|
930 insertRemove=runs[i].insertRemove; |
|
931 if(insertRemove & (LRM_BEFORE|RLM_BEFORE)) { |
|
932 markFound++; |
|
933 } |
|
934 /* is it the run containing the visual index? */ |
|
935 if(visualIndex<runs[i].visualLimit) { |
|
936 return visualIndex+markFound; |
|
937 } |
|
938 if(insertRemove & (LRM_AFTER|RLM_AFTER)) { |
|
939 markFound++; |
|
940 } |
|
941 } |
|
942 } |
|
943 else if(pBiDi->controlCount>0) { |
|
944 /* subtract the number of controls until the calculated visual index */ |
|
945 Run *runs=pBiDi->runs; |
|
946 int32_t i, j, start, limit, length, insertRemove; |
|
947 int32_t visualStart=0, controlFound=0; |
|
948 UChar uchar=pBiDi->text[logicalIndex]; |
|
949 /* is the logical index pointing to a control ? */ |
|
950 if(IS_BIDI_CONTROL_CHAR(uchar)) { |
|
951 return UBIDI_MAP_NOWHERE; |
|
952 } |
|
953 /* loop on runs */ |
|
954 for(i=0; ; i++, visualStart+=length) { |
|
955 length=runs[i].visualLimit-visualStart; |
|
956 insertRemove=runs[i].insertRemove; |
|
957 /* calculated visual index is beyond this run? */ |
|
958 if(visualIndex>=runs[i].visualLimit) { |
|
959 controlFound-=insertRemove; |
|
960 continue; |
|
961 } |
|
962 /* calculated visual index must be within current run */ |
|
963 if(insertRemove==0) { |
|
964 return visualIndex-controlFound; |
|
965 } |
|
966 if(IS_EVEN_RUN(runs[i].logicalStart)) { |
|
967 /* LTR: check from run start to logical index */ |
|
968 start=runs[i].logicalStart; |
|
969 limit=logicalIndex; |
|
970 } else { |
|
971 /* RTL: check from logical index to run end */ |
|
972 start=logicalIndex+1; |
|
973 limit=GET_INDEX(runs[i].logicalStart)+length; |
|
974 } |
|
975 for(j=start; j<limit; j++) { |
|
976 uchar=pBiDi->text[j]; |
|
977 if(IS_BIDI_CONTROL_CHAR(uchar)) { |
|
978 controlFound++; |
|
979 } |
|
980 } |
|
981 return visualIndex-controlFound; |
|
982 } |
|
983 } |
|
984 |
|
985 return visualIndex; |
|
986 } |
|
987 |
|
988 U_CAPI int32_t U_EXPORT2 |
|
989 ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) { |
|
990 Run *runs; |
|
991 int32_t i, runCount, start; |
|
992 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); |
|
993 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); |
|
994 RETURN_IF_BAD_RANGE(visualIndex, 0, pBiDi->resultLength, *pErrorCode, -1); |
|
995 /* we can do the trivial cases without the runs array */ |
|
996 if(pBiDi->insertPoints.size==0 && pBiDi->controlCount==0) { |
|
997 if(pBiDi->direction==UBIDI_LTR) { |
|
998 return visualIndex; |
|
999 } |
|
1000 else if(pBiDi->direction==UBIDI_RTL) { |
|
1001 return pBiDi->length-visualIndex-1; |
|
1002 } |
|
1003 } |
|
1004 if(!ubidi_getRuns(pBiDi, pErrorCode)) { |
|
1005 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
1006 return -1; |
|
1007 } |
|
1008 |
|
1009 runs=pBiDi->runs; |
|
1010 runCount=pBiDi->runCount; |
|
1011 if(pBiDi->insertPoints.size>0) { |
|
1012 /* handle inserted LRM/RLM */ |
|
1013 int32_t markFound=0, insertRemove; |
|
1014 int32_t visualStart=0, length; |
|
1015 runs=pBiDi->runs; |
|
1016 /* subtract number of marks until visual index */ |
|
1017 for(i=0; ; i++, visualStart+=length) { |
|
1018 length=runs[i].visualLimit-visualStart; |
|
1019 insertRemove=runs[i].insertRemove; |
|
1020 if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) { |
|
1021 if(visualIndex<=(visualStart+markFound)) { |
|
1022 return UBIDI_MAP_NOWHERE; |
|
1023 } |
|
1024 markFound++; |
|
1025 } |
|
1026 /* is adjusted visual index within this run? */ |
|
1027 if(visualIndex<(runs[i].visualLimit+markFound)) { |
|
1028 visualIndex-=markFound; |
|
1029 break; |
|
1030 } |
|
1031 if(insertRemove&(LRM_AFTER|RLM_AFTER)) { |
|
1032 if(visualIndex==(visualStart+length+markFound)) { |
|
1033 return UBIDI_MAP_NOWHERE; |
|
1034 } |
|
1035 markFound++; |
|
1036 } |
|
1037 } |
|
1038 } |
|
1039 else if(pBiDi->controlCount>0) { |
|
1040 /* handle removed BiDi control characters */ |
|
1041 int32_t controlFound=0, insertRemove, length; |
|
1042 int32_t logicalStart, logicalEnd, visualStart=0, j, k; |
|
1043 UChar uchar; |
|
1044 UBool evenRun; |
|
1045 /* add number of controls until visual index */ |
|
1046 for(i=0; ; i++, visualStart+=length) { |
|
1047 length=runs[i].visualLimit-visualStart; |
|
1048 insertRemove=runs[i].insertRemove; |
|
1049 /* is adjusted visual index beyond current run? */ |
|
1050 if(visualIndex>=(runs[i].visualLimit-controlFound+insertRemove)) { |
|
1051 controlFound-=insertRemove; |
|
1052 continue; |
|
1053 } |
|
1054 /* adjusted visual index is within current run */ |
|
1055 if(insertRemove==0) { |
|
1056 visualIndex+=controlFound; |
|
1057 break; |
|
1058 } |
|
1059 /* count non-control chars until visualIndex */ |
|
1060 logicalStart=runs[i].logicalStart; |
|
1061 evenRun=IS_EVEN_RUN(logicalStart); |
|
1062 REMOVE_ODD_BIT(logicalStart); |
|
1063 logicalEnd=logicalStart+length-1; |
|
1064 for(j=0; j<length; j++) { |
|
1065 k= evenRun ? logicalStart+j : logicalEnd-j; |
|
1066 uchar=pBiDi->text[k]; |
|
1067 if(IS_BIDI_CONTROL_CHAR(uchar)) { |
|
1068 controlFound++; |
|
1069 } |
|
1070 if((visualIndex+controlFound)==(visualStart+j)) { |
|
1071 break; |
|
1072 } |
|
1073 } |
|
1074 visualIndex+=controlFound; |
|
1075 break; |
|
1076 } |
|
1077 } |
|
1078 /* handle all cases */ |
|
1079 if(runCount<=10) { |
|
1080 /* linear search for the run */ |
|
1081 for(i=0; visualIndex>=runs[i].visualLimit; ++i) {} |
|
1082 } else { |
|
1083 /* binary search for the run */ |
|
1084 int32_t begin=0, limit=runCount; |
|
1085 |
|
1086 /* the middle if() is guaranteed to find the run, we don't need a loop limit */ |
|
1087 for(;;) { |
|
1088 i=(begin+limit)/2; |
|
1089 if(visualIndex>=runs[i].visualLimit) { |
|
1090 begin=i+1; |
|
1091 } else if(i==0 || visualIndex>=runs[i-1].visualLimit) { |
|
1092 break; |
|
1093 } else { |
|
1094 limit=i; |
|
1095 } |
|
1096 } |
|
1097 } |
|
1098 |
|
1099 start=runs[i].logicalStart; |
|
1100 if(IS_EVEN_RUN(start)) { |
|
1101 /* LTR */ |
|
1102 /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */ |
|
1103 if(i>0) { |
|
1104 visualIndex-=runs[i-1].visualLimit; |
|
1105 } |
|
1106 return start+visualIndex; |
|
1107 } else { |
|
1108 /* RTL */ |
|
1109 return GET_INDEX(start)+runs[i].visualLimit-visualIndex-1; |
|
1110 } |
|
1111 } |
|
1112 |
|
1113 U_CAPI void U_EXPORT2 |
|
1114 ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { |
|
1115 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); |
|
1116 /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */ |
|
1117 ubidi_countRuns(pBiDi, pErrorCode); |
|
1118 if(U_FAILURE(*pErrorCode)) { |
|
1119 /* no op */ |
|
1120 } else if(indexMap==NULL) { |
|
1121 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
1122 } else { |
|
1123 /* fill a logical-to-visual index map using the runs[] */ |
|
1124 int32_t visualStart, visualLimit, i, j, k; |
|
1125 int32_t logicalStart, logicalLimit; |
|
1126 Run *runs=pBiDi->runs; |
|
1127 if (pBiDi->length<=0) { |
|
1128 return; |
|
1129 } |
|
1130 if (pBiDi->length>pBiDi->resultLength) { |
|
1131 uprv_memset(indexMap, 0xFF, pBiDi->length*sizeof(int32_t)); |
|
1132 } |
|
1133 |
|
1134 visualStart=0; |
|
1135 for(j=0; j<pBiDi->runCount; ++j) { |
|
1136 logicalStart=GET_INDEX(runs[j].logicalStart); |
|
1137 visualLimit=runs[j].visualLimit; |
|
1138 if(IS_EVEN_RUN(runs[j].logicalStart)) { |
|
1139 do { /* LTR */ |
|
1140 indexMap[logicalStart++]=visualStart++; |
|
1141 } while(visualStart<visualLimit); |
|
1142 } else { |
|
1143 logicalStart+=visualLimit-visualStart; /* logicalLimit */ |
|
1144 do { /* RTL */ |
|
1145 indexMap[--logicalStart]=visualStart++; |
|
1146 } while(visualStart<visualLimit); |
|
1147 } |
|
1148 /* visualStart==visualLimit; */ |
|
1149 } |
|
1150 |
|
1151 if(pBiDi->insertPoints.size>0) { |
|
1152 int32_t markFound=0, runCount=pBiDi->runCount; |
|
1153 int32_t length, insertRemove; |
|
1154 visualStart=0; |
|
1155 /* add number of marks found until each index */ |
|
1156 for(i=0; i<runCount; i++, visualStart+=length) { |
|
1157 length=runs[i].visualLimit-visualStart; |
|
1158 insertRemove=runs[i].insertRemove; |
|
1159 if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) { |
|
1160 markFound++; |
|
1161 } |
|
1162 if(markFound>0) { |
|
1163 logicalStart=GET_INDEX(runs[i].logicalStart); |
|
1164 logicalLimit=logicalStart+length; |
|
1165 for(j=logicalStart; j<logicalLimit; j++) { |
|
1166 indexMap[j]+=markFound; |
|
1167 } |
|
1168 } |
|
1169 if(insertRemove&(LRM_AFTER|RLM_AFTER)) { |
|
1170 markFound++; |
|
1171 } |
|
1172 } |
|
1173 } |
|
1174 else if(pBiDi->controlCount>0) { |
|
1175 int32_t controlFound=0, runCount=pBiDi->runCount; |
|
1176 int32_t length, insertRemove; |
|
1177 UBool evenRun; |
|
1178 UChar uchar; |
|
1179 visualStart=0; |
|
1180 /* subtract number of controls found until each index */ |
|
1181 for(i=0; i<runCount; i++, visualStart+=length) { |
|
1182 length=runs[i].visualLimit-visualStart; |
|
1183 insertRemove=runs[i].insertRemove; |
|
1184 /* no control found within previous runs nor within this run */ |
|
1185 if((controlFound-insertRemove)==0) { |
|
1186 continue; |
|
1187 } |
|
1188 logicalStart=runs[i].logicalStart; |
|
1189 evenRun=IS_EVEN_RUN(logicalStart); |
|
1190 REMOVE_ODD_BIT(logicalStart); |
|
1191 logicalLimit=logicalStart+length; |
|
1192 /* if no control within this run */ |
|
1193 if(insertRemove==0) { |
|
1194 for(j=logicalStart; j<logicalLimit; j++) { |
|
1195 indexMap[j]-=controlFound; |
|
1196 } |
|
1197 continue; |
|
1198 } |
|
1199 for(j=0; j<length; j++) { |
|
1200 k= evenRun ? logicalStart+j : logicalLimit-j-1; |
|
1201 uchar=pBiDi->text[k]; |
|
1202 if(IS_BIDI_CONTROL_CHAR(uchar)) { |
|
1203 controlFound++; |
|
1204 indexMap[k]=UBIDI_MAP_NOWHERE; |
|
1205 continue; |
|
1206 } |
|
1207 indexMap[k]-=controlFound; |
|
1208 } |
|
1209 } |
|
1210 } |
|
1211 } |
|
1212 } |
|
1213 |
|
1214 U_CAPI void U_EXPORT2 |
|
1215 ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { |
|
1216 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); |
|
1217 if(indexMap==NULL) { |
|
1218 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
1219 return; |
|
1220 } |
|
1221 /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */ |
|
1222 ubidi_countRuns(pBiDi, pErrorCode); |
|
1223 if(U_SUCCESS(*pErrorCode)) { |
|
1224 /* fill a visual-to-logical index map using the runs[] */ |
|
1225 Run *runs=pBiDi->runs, *runsLimit=runs+pBiDi->runCount; |
|
1226 int32_t logicalStart, visualStart, visualLimit, *pi=indexMap; |
|
1227 |
|
1228 if (pBiDi->resultLength<=0) { |
|
1229 return; |
|
1230 } |
|
1231 visualStart=0; |
|
1232 for(; runs<runsLimit; ++runs) { |
|
1233 logicalStart=runs->logicalStart; |
|
1234 visualLimit=runs->visualLimit; |
|
1235 if(IS_EVEN_RUN(logicalStart)) { |
|
1236 do { /* LTR */ |
|
1237 *pi++ = logicalStart++; |
|
1238 } while(++visualStart<visualLimit); |
|
1239 } else { |
|
1240 REMOVE_ODD_BIT(logicalStart); |
|
1241 logicalStart+=visualLimit-visualStart; /* logicalLimit */ |
|
1242 do { /* RTL */ |
|
1243 *pi++ = --logicalStart; |
|
1244 } while(++visualStart<visualLimit); |
|
1245 } |
|
1246 /* visualStart==visualLimit; */ |
|
1247 } |
|
1248 |
|
1249 if(pBiDi->insertPoints.size>0) { |
|
1250 int32_t markFound=0, runCount=pBiDi->runCount; |
|
1251 int32_t insertRemove, i, j, k; |
|
1252 runs=pBiDi->runs; |
|
1253 /* count all inserted marks */ |
|
1254 for(i=0; i<runCount; i++) { |
|
1255 insertRemove=runs[i].insertRemove; |
|
1256 if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) { |
|
1257 markFound++; |
|
1258 } |
|
1259 if(insertRemove&(LRM_AFTER|RLM_AFTER)) { |
|
1260 markFound++; |
|
1261 } |
|
1262 } |
|
1263 /* move back indexes by number of preceding marks */ |
|
1264 k=pBiDi->resultLength; |
|
1265 for(i=runCount-1; i>=0 && markFound>0; i--) { |
|
1266 insertRemove=runs[i].insertRemove; |
|
1267 if(insertRemove&(LRM_AFTER|RLM_AFTER)) { |
|
1268 indexMap[--k]= UBIDI_MAP_NOWHERE; |
|
1269 markFound--; |
|
1270 } |
|
1271 visualStart= i>0 ? runs[i-1].visualLimit : 0; |
|
1272 for(j=runs[i].visualLimit-1; j>=visualStart && markFound>0; j--) { |
|
1273 indexMap[--k]=indexMap[j]; |
|
1274 } |
|
1275 if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) { |
|
1276 indexMap[--k]= UBIDI_MAP_NOWHERE; |
|
1277 markFound--; |
|
1278 } |
|
1279 } |
|
1280 } |
|
1281 else if(pBiDi->controlCount>0) { |
|
1282 int32_t runCount=pBiDi->runCount, logicalEnd; |
|
1283 int32_t insertRemove, length, i, j, k, m; |
|
1284 UChar uchar; |
|
1285 UBool evenRun; |
|
1286 runs=pBiDi->runs; |
|
1287 visualStart=0; |
|
1288 /* move forward indexes by number of preceding controls */ |
|
1289 k=0; |
|
1290 for(i=0; i<runCount; i++, visualStart+=length) { |
|
1291 length=runs[i].visualLimit-visualStart; |
|
1292 insertRemove=runs[i].insertRemove; |
|
1293 /* if no control found yet, nothing to do in this run */ |
|
1294 if((insertRemove==0)&&(k==visualStart)) { |
|
1295 k+=length; |
|
1296 continue; |
|
1297 } |
|
1298 /* if no control in this run */ |
|
1299 if(insertRemove==0) { |
|
1300 visualLimit=runs[i].visualLimit; |
|
1301 for(j=visualStart; j<visualLimit; j++) { |
|
1302 indexMap[k++]=indexMap[j]; |
|
1303 } |
|
1304 continue; |
|
1305 } |
|
1306 logicalStart=runs[i].logicalStart; |
|
1307 evenRun=IS_EVEN_RUN(logicalStart); |
|
1308 REMOVE_ODD_BIT(logicalStart); |
|
1309 logicalEnd=logicalStart+length-1; |
|
1310 for(j=0; j<length; j++) { |
|
1311 m= evenRun ? logicalStart+j : logicalEnd-j; |
|
1312 uchar=pBiDi->text[m]; |
|
1313 if(!IS_BIDI_CONTROL_CHAR(uchar)) { |
|
1314 indexMap[k++]=m; |
|
1315 } |
|
1316 } |
|
1317 } |
|
1318 } |
|
1319 } |
|
1320 } |
|
1321 |
|
1322 U_CAPI void U_EXPORT2 |
|
1323 ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length) { |
|
1324 if(srcMap!=NULL && destMap!=NULL && length>0) { |
|
1325 const int32_t *pi; |
|
1326 int32_t destLength=-1, count=0; |
|
1327 /* find highest value and count positive indexes in srcMap */ |
|
1328 pi=srcMap+length; |
|
1329 while(pi>srcMap) { |
|
1330 if(*--pi>destLength) { |
|
1331 destLength=*pi; |
|
1332 } |
|
1333 if(*pi>=0) { |
|
1334 count++; |
|
1335 } |
|
1336 } |
|
1337 destLength++; /* add 1 for origin 0 */ |
|
1338 if(count<destLength) { |
|
1339 /* we must fill unmatched destMap entries with -1 */ |
|
1340 uprv_memset(destMap, 0xFF, destLength*sizeof(int32_t)); |
|
1341 } |
|
1342 pi=srcMap+length; |
|
1343 while(length>0) { |
|
1344 if(*--pi>=0) { |
|
1345 destMap[*pi]=--length; |
|
1346 } else { |
|
1347 --length; |
|
1348 } |
|
1349 } |
|
1350 } |
|
1351 } |