|
1 /* |
|
2 ****************************************************************************** |
|
3 * |
|
4 * Copyright (C) 1999-2013, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ****************************************************************************** |
|
8 * file name: ubidi.c |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 1999jul27 |
|
14 * created by: Markus W. Scherer, updated by Matitiahu Allouche |
|
15 * |
|
16 */ |
|
17 |
|
18 #include "cmemory.h" |
|
19 #include "unicode/utypes.h" |
|
20 #include "unicode/ustring.h" |
|
21 #include "unicode/uchar.h" |
|
22 #include "unicode/ubidi.h" |
|
23 #include "unicode/utf16.h" |
|
24 #include "ubidi_props.h" |
|
25 #include "ubidiimp.h" |
|
26 #include "uassert.h" |
|
27 |
|
28 /* |
|
29 * General implementation notes: |
|
30 * |
|
31 * Throughout the implementation, there are comments like (W2) that refer to |
|
32 * rules of the BiDi algorithm in its version 5, in this example to the second |
|
33 * rule of the resolution of weak types. |
|
34 * |
|
35 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32) |
|
36 * character according to UTF-16, the second UChar gets the directional property of |
|
37 * the entire character assigned, while the first one gets a BN, a boundary |
|
38 * neutral, type, which is ignored by most of the algorithm according to |
|
39 * rule (X9) and the implementation suggestions of the BiDi algorithm. |
|
40 * |
|
41 * Later, adjustWSLevels() will set the level for each BN to that of the |
|
42 * following character (UChar), which results in surrogate pairs getting the |
|
43 * same level on each of their surrogates. |
|
44 * |
|
45 * In a UTF-8 implementation, the same thing could be done: the last byte of |
|
46 * a multi-byte sequence would get the "real" property, while all previous |
|
47 * bytes of that sequence would get BN. |
|
48 * |
|
49 * It is not possible to assign all those parts of a character the same real |
|
50 * property because this would fail in the resolution of weak types with rules |
|
51 * that look at immediately surrounding types. |
|
52 * |
|
53 * As a related topic, this implementation does not remove Boundary Neutral |
|
54 * types from the input, but ignores them wherever this is relevant. |
|
55 * For example, the loop for the resolution of the weak types reads |
|
56 * types until it finds a non-BN. |
|
57 * Also, explicit embedding codes are neither changed into BN nor removed. |
|
58 * They are only treated the same way real BNs are. |
|
59 * As stated before, adjustWSLevels() takes care of them at the end. |
|
60 * For the purpose of conformance, the levels of all these codes |
|
61 * do not matter. |
|
62 * |
|
63 * Note that this implementation never modifies the dirProps |
|
64 * after the initial setup, except for FSI which is changed to either |
|
65 * LRI or RLI in getDirProps(), and paired brackets which may be changed |
|
66 * to L or R according to N0. |
|
67 * |
|
68 * |
|
69 * In this implementation, the resolution of weak types (Wn), |
|
70 * neutrals (Nn), and the assignment of the resolved level (In) |
|
71 * are all done in one single loop, in resolveImplicitLevels(). |
|
72 * Changes of dirProp values are done on the fly, without writing |
|
73 * them back to the dirProps array. |
|
74 * |
|
75 * |
|
76 * This implementation contains code that allows to bypass steps of the |
|
77 * algorithm that are not needed on the specific paragraph |
|
78 * in order to speed up the most common cases considerably, |
|
79 * like text that is entirely LTR, or RTL text without numbers. |
|
80 * |
|
81 * Most of this is done by setting a bit for each directional property |
|
82 * in a flags variable and later checking for whether there are |
|
83 * any LTR characters or any RTL characters, or both, whether |
|
84 * there are any explicit embedding codes, etc. |
|
85 * |
|
86 * If the (Xn) steps are performed, then the flags are re-evaluated, |
|
87 * because they will then not contain the embedding codes any more |
|
88 * and will be adjusted for override codes, so that subsequently |
|
89 * more bypassing may be possible than what the initial flags suggested. |
|
90 * |
|
91 * If the text is not mixed-directional, then the |
|
92 * algorithm steps for the weak type resolution are not performed, |
|
93 * and all levels are set to the paragraph level. |
|
94 * |
|
95 * If there are no explicit embedding codes, then the (Xn) steps |
|
96 * are not performed. |
|
97 * |
|
98 * If embedding levels are supplied as a parameter, then all |
|
99 * explicit embedding codes are ignored, and the (Xn) steps |
|
100 * are not performed. |
|
101 * |
|
102 * White Space types could get the level of the run they belong to, |
|
103 * and are checked with a test of (flags&MASK_EMBEDDING) to |
|
104 * consider if the paragraph direction should be considered in |
|
105 * the flags variable. |
|
106 * |
|
107 * If there are no White Space types in the paragraph, then |
|
108 * (L1) is not necessary in adjustWSLevels(). |
|
109 */ |
|
110 |
|
111 /* to avoid some conditional statements, use tiny constant arrays */ |
|
112 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) }; |
|
113 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) }; |
|
114 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; |
|
115 |
|
116 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1] |
|
117 #define DIRPROP_FLAG_E(level) flagE[(level)&1] |
|
118 #define DIRPROP_FLAG_O(level) flagO[(level)&1] |
|
119 |
|
120 #define DIR_FROM_STRONG(strong) ((strong)==L ? L : R) |
|
121 |
|
122 /* UBiDi object management -------------------------------------------------- */ |
|
123 |
|
124 U_CAPI UBiDi * U_EXPORT2 |
|
125 ubidi_open(void) |
|
126 { |
|
127 UErrorCode errorCode=U_ZERO_ERROR; |
|
128 return ubidi_openSized(0, 0, &errorCode); |
|
129 } |
|
130 |
|
131 U_CAPI UBiDi * U_EXPORT2 |
|
132 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) { |
|
133 UBiDi *pBiDi; |
|
134 |
|
135 /* check the argument values */ |
|
136 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
|
137 return NULL; |
|
138 } else if(maxLength<0 || maxRunCount<0) { |
|
139 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
140 return NULL; /* invalid arguments */ |
|
141 } |
|
142 |
|
143 /* allocate memory for the object */ |
|
144 pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi)); |
|
145 if(pBiDi==NULL) { |
|
146 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
147 return NULL; |
|
148 } |
|
149 |
|
150 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */ |
|
151 uprv_memset(pBiDi, 0, sizeof(UBiDi)); |
|
152 |
|
153 /* get BiDi properties */ |
|
154 pBiDi->bdp=ubidi_getSingleton(); |
|
155 |
|
156 /* allocate memory for arrays as requested */ |
|
157 if(maxLength>0) { |
|
158 if( !getInitialDirPropsMemory(pBiDi, maxLength) || |
|
159 !getInitialLevelsMemory(pBiDi, maxLength) |
|
160 ) { |
|
161 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
162 } |
|
163 } else { |
|
164 pBiDi->mayAllocateText=TRUE; |
|
165 } |
|
166 |
|
167 if(maxRunCount>0) { |
|
168 if(maxRunCount==1) { |
|
169 /* use simpleRuns[] */ |
|
170 pBiDi->runsSize=sizeof(Run); |
|
171 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) { |
|
172 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
173 } |
|
174 } else { |
|
175 pBiDi->mayAllocateRuns=TRUE; |
|
176 } |
|
177 |
|
178 if(U_SUCCESS(*pErrorCode)) { |
|
179 return pBiDi; |
|
180 } else { |
|
181 ubidi_close(pBiDi); |
|
182 return NULL; |
|
183 } |
|
184 } |
|
185 |
|
186 /* |
|
187 * We are allowed to allocate memory if memory==NULL or |
|
188 * mayAllocate==TRUE for each array that we need. |
|
189 * We also try to grow memory as needed if we |
|
190 * allocate it. |
|
191 * |
|
192 * Assume sizeNeeded>0. |
|
193 * If *pMemory!=NULL, then assume *pSize>0. |
|
194 * |
|
195 * ### this realloc() may unnecessarily copy the old data, |
|
196 * which we know we don't need any more; |
|
197 * is this the best way to do this?? |
|
198 */ |
|
199 U_CFUNC UBool |
|
200 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) { |
|
201 void **pMemory = (void **)bidiMem; |
|
202 /* check for existing memory */ |
|
203 if(*pMemory==NULL) { |
|
204 /* we need to allocate memory */ |
|
205 if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) { |
|
206 *pSize=sizeNeeded; |
|
207 return TRUE; |
|
208 } else { |
|
209 return FALSE; |
|
210 } |
|
211 } else { |
|
212 if(sizeNeeded<=*pSize) { |
|
213 /* there is already enough memory */ |
|
214 return TRUE; |
|
215 } |
|
216 else if(!mayAllocate) { |
|
217 /* not enough memory, and we must not allocate */ |
|
218 return FALSE; |
|
219 } else { |
|
220 /* we try to grow */ |
|
221 void *memory; |
|
222 /* in most cases, we do not need the copy-old-data part of |
|
223 * realloc, but it is needed when adding runs using getRunsMemory() |
|
224 * in setParaRunsOnly() |
|
225 */ |
|
226 if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) { |
|
227 *pMemory=memory; |
|
228 *pSize=sizeNeeded; |
|
229 return TRUE; |
|
230 } else { |
|
231 /* we failed to grow */ |
|
232 return FALSE; |
|
233 } |
|
234 } |
|
235 } |
|
236 } |
|
237 |
|
238 U_CAPI void U_EXPORT2 |
|
239 ubidi_close(UBiDi *pBiDi) { |
|
240 if(pBiDi!=NULL) { |
|
241 pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */ |
|
242 if(pBiDi->dirPropsMemory!=NULL) { |
|
243 uprv_free(pBiDi->dirPropsMemory); |
|
244 } |
|
245 if(pBiDi->levelsMemory!=NULL) { |
|
246 uprv_free(pBiDi->levelsMemory); |
|
247 } |
|
248 if(pBiDi->openingsMemory!=NULL) { |
|
249 uprv_free(pBiDi->openingsMemory); |
|
250 } |
|
251 if(pBiDi->parasMemory!=NULL) { |
|
252 uprv_free(pBiDi->parasMemory); |
|
253 } |
|
254 if(pBiDi->runsMemory!=NULL) { |
|
255 uprv_free(pBiDi->runsMemory); |
|
256 } |
|
257 if(pBiDi->isolatesMemory!=NULL) { |
|
258 uprv_free(pBiDi->isolatesMemory); |
|
259 } |
|
260 if(pBiDi->insertPoints.points!=NULL) { |
|
261 uprv_free(pBiDi->insertPoints.points); |
|
262 } |
|
263 |
|
264 uprv_free(pBiDi); |
|
265 } |
|
266 } |
|
267 |
|
268 /* set to approximate "inverse BiDi" ---------------------------------------- */ |
|
269 |
|
270 U_CAPI void U_EXPORT2 |
|
271 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) { |
|
272 if(pBiDi!=NULL) { |
|
273 pBiDi->isInverse=isInverse; |
|
274 pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L |
|
275 : UBIDI_REORDER_DEFAULT; |
|
276 } |
|
277 } |
|
278 |
|
279 U_CAPI UBool U_EXPORT2 |
|
280 ubidi_isInverse(UBiDi *pBiDi) { |
|
281 if(pBiDi!=NULL) { |
|
282 return pBiDi->isInverse; |
|
283 } else { |
|
284 return FALSE; |
|
285 } |
|
286 } |
|
287 |
|
288 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of |
|
289 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre |
|
290 * concept of RUNS_ONLY which is a double operation. |
|
291 * It could be advantageous to divide this into 3 concepts: |
|
292 * a) Operation: direct / inverse / RUNS_ONLY |
|
293 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R |
|
294 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL |
|
295 * This would allow combinations not possible today like RUNS_ONLY with |
|
296 * NUMBERS_SPECIAL. |
|
297 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and |
|
298 * REMOVE_CONTROLS for the inverse step. |
|
299 * Not all combinations would be supported, and probably not all do make sense. |
|
300 * This would need to document which ones are supported and what are the |
|
301 * fallbacks for unsupported combinations. |
|
302 */ |
|
303 U_CAPI void U_EXPORT2 |
|
304 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) { |
|
305 if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT) |
|
306 && (reorderingMode < UBIDI_REORDER_COUNT)) { |
|
307 pBiDi->reorderingMode = reorderingMode; |
|
308 pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L); |
|
309 } |
|
310 } |
|
311 |
|
312 U_CAPI UBiDiReorderingMode U_EXPORT2 |
|
313 ubidi_getReorderingMode(UBiDi *pBiDi) { |
|
314 if (pBiDi!=NULL) { |
|
315 return pBiDi->reorderingMode; |
|
316 } else { |
|
317 return UBIDI_REORDER_DEFAULT; |
|
318 } |
|
319 } |
|
320 |
|
321 U_CAPI void U_EXPORT2 |
|
322 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) { |
|
323 if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { |
|
324 reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; |
|
325 } |
|
326 if (pBiDi!=NULL) { |
|
327 pBiDi->reorderingOptions=reorderingOptions; |
|
328 } |
|
329 } |
|
330 |
|
331 U_CAPI uint32_t U_EXPORT2 |
|
332 ubidi_getReorderingOptions(UBiDi *pBiDi) { |
|
333 if (pBiDi!=NULL) { |
|
334 return pBiDi->reorderingOptions; |
|
335 } else { |
|
336 return 0; |
|
337 } |
|
338 } |
|
339 |
|
340 U_CAPI UBiDiDirection U_EXPORT2 |
|
341 ubidi_getBaseDirection(const UChar *text, |
|
342 int32_t length){ |
|
343 |
|
344 int32_t i; |
|
345 UChar32 uchar; |
|
346 UCharDirection dir; |
|
347 |
|
348 if( text==NULL || length<-1 ){ |
|
349 return UBIDI_NEUTRAL; |
|
350 } |
|
351 |
|
352 if(length==-1) { |
|
353 length=u_strlen(text); |
|
354 } |
|
355 |
|
356 for( i = 0 ; i < length; ) { |
|
357 /* i is incremented by U16_NEXT */ |
|
358 U16_NEXT(text, i, length, uchar); |
|
359 dir = u_charDirection(uchar); |
|
360 if( dir == U_LEFT_TO_RIGHT ) |
|
361 return UBIDI_LTR; |
|
362 if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC ) |
|
363 return UBIDI_RTL; |
|
364 } |
|
365 return UBIDI_NEUTRAL; |
|
366 } |
|
367 |
|
368 /* perform (P2)..(P3) ------------------------------------------------------- */ |
|
369 |
|
370 /** |
|
371 * Returns the directionality of the first strong character |
|
372 * after the last B in prologue, if any. |
|
373 * Requires prologue!=null. |
|
374 */ |
|
375 static DirProp |
|
376 firstL_R_AL(UBiDi *pBiDi) { |
|
377 const UChar *text=pBiDi->prologue; |
|
378 int32_t length=pBiDi->proLength; |
|
379 int32_t i; |
|
380 UChar32 uchar; |
|
381 DirProp dirProp, result=ON; |
|
382 for(i=0; i<length; ) { |
|
383 /* i is incremented by U16_NEXT */ |
|
384 U16_NEXT(text, i, length, uchar); |
|
385 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); |
|
386 if(result==ON) { |
|
387 if(dirProp==L || dirProp==R || dirProp==AL) { |
|
388 result=dirProp; |
|
389 } |
|
390 } else { |
|
391 if(dirProp==B) { |
|
392 result=ON; |
|
393 } |
|
394 } |
|
395 } |
|
396 return result; |
|
397 } |
|
398 |
|
399 /* |
|
400 * Check that there are enough entries in the array pointed to by pBiDi->paras |
|
401 */ |
|
402 static UBool |
|
403 checkParaCount(UBiDi *pBiDi) { |
|
404 int32_t count=pBiDi->paraCount; |
|
405 if(pBiDi->paras==pBiDi->simpleParas) { |
|
406 if(count<=SIMPLE_PARAS_SIZE) |
|
407 return TRUE; |
|
408 if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_SIZE * 2)) |
|
409 return FALSE; |
|
410 pBiDi->paras=pBiDi->parasMemory; |
|
411 uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_SIZE * sizeof(Para)); |
|
412 return TRUE; |
|
413 } |
|
414 if(!getInitialParasMemory(pBiDi, count * 2)) |
|
415 return FALSE; |
|
416 pBiDi->paras=pBiDi->parasMemory; |
|
417 return TRUE; |
|
418 } |
|
419 |
|
420 /* |
|
421 * Get the directional properties for the text, calculate the flags bit-set, and |
|
422 * determine the paragraph level if necessary (in pBiDi->paras[i].level). |
|
423 * FSI initiators are also resolved and their dirProp replaced with LRI or RLI. |
|
424 */ |
|
425 static UBool |
|
426 getDirProps(UBiDi *pBiDi) { |
|
427 const UChar *text=pBiDi->text; |
|
428 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */ |
|
429 |
|
430 int32_t i=0, originalLength=pBiDi->originalLength; |
|
431 Flags flags=0; /* collect all directionalities in the text */ |
|
432 UChar32 uchar; |
|
433 DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */ |
|
434 UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel); |
|
435 /* for inverse BiDi, the default para level is set to RTL if there is a |
|
436 strong R or AL character at either end of the text */ |
|
437 UBool isDefaultLevelInverse=isDefaultLevel && (UBool) |
|
438 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || |
|
439 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL); |
|
440 int32_t lastArabicPos=-1; |
|
441 int32_t controlCount=0; |
|
442 UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions & |
|
443 UBIDI_OPTION_REMOVE_CONTROLS); |
|
444 |
|
445 typedef enum { |
|
446 NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */ |
|
447 SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */ |
|
448 SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */ |
|
449 LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */ |
|
450 } State; |
|
451 State state; |
|
452 DirProp lastStrong=ON; /* for default level & inverse BiDi */ |
|
453 /* The following stacks are used to manage isolate sequences. Those |
|
454 sequences may be nested, but obviously never more deeply than the |
|
455 maximum explicit embedding level. |
|
456 lastStack is the index of the last used entry in the stack. A value of -1 |
|
457 means that there is no open isolate sequence. |
|
458 lastStack is reset to -1 on paragraph boundaries. */ |
|
459 /* The following stack contains the position of the initiator of |
|
460 each open isolate sequence */ |
|
461 int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1]; |
|
462 /* The following stack contains the last known state before |
|
463 encountering the initiator of an isolate sequence */ |
|
464 int8_t previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1]; |
|
465 int32_t stackLast=-1; |
|
466 |
|
467 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) |
|
468 pBiDi->length=0; |
|
469 defaultParaLevel=pBiDi->paraLevel&1; |
|
470 if(isDefaultLevel) { |
|
471 pBiDi->paras[0].level=defaultParaLevel; |
|
472 lastStrong=defaultParaLevel; |
|
473 if(pBiDi->proLength>0 && /* there is a prologue */ |
|
474 (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */ |
|
475 if(dirProp==L) |
|
476 pBiDi->paras[0].level=0; /* set the default para level */ |
|
477 else |
|
478 pBiDi->paras[0].level=1; /* set the default para level */ |
|
479 state=NOT_SEEKING_STRONG; |
|
480 } else { |
|
481 state=SEEKING_STRONG_FOR_PARA; |
|
482 } |
|
483 } else { |
|
484 pBiDi->paras[0].level=pBiDi->paraLevel; |
|
485 state=NOT_SEEKING_STRONG; |
|
486 } |
|
487 /* count paragraphs and determine the paragraph level (P2..P3) */ |
|
488 /* |
|
489 * see comment in ubidi.h: |
|
490 * the UBIDI_DEFAULT_XXX values are designed so that |
|
491 * their bit 0 alone yields the intended default |
|
492 */ |
|
493 for( /* i=0 above */ ; i<originalLength; ) { |
|
494 /* i is incremented by U16_NEXT */ |
|
495 U16_NEXT(text, i, originalLength, uchar); |
|
496 flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar)); |
|
497 dirProps[i-1]=dirProp; |
|
498 if(uchar>0xffff) { /* set the lead surrogate's property to BN */ |
|
499 flags|=DIRPROP_FLAG(BN); |
|
500 dirProps[i-2]=BN; |
|
501 } |
|
502 if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) |
|
503 controlCount++; |
|
504 if(dirProp==L) { |
|
505 if(state==SEEKING_STRONG_FOR_PARA) { |
|
506 pBiDi->paras[pBiDi->paraCount-1].level=0; |
|
507 state=NOT_SEEKING_STRONG; |
|
508 } |
|
509 else if(state==SEEKING_STRONG_FOR_FSI) { |
|
510 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { |
|
511 dirProps[isolateStartStack[stackLast]]=LRI; |
|
512 flags|=DIRPROP_FLAG(LRI); |
|
513 } |
|
514 state=LOOKING_FOR_PDI; |
|
515 } |
|
516 lastStrong=L; |
|
517 continue; |
|
518 } |
|
519 if(dirProp==R || dirProp==AL) { |
|
520 if(state==SEEKING_STRONG_FOR_PARA) { |
|
521 pBiDi->paras[pBiDi->paraCount-1].level=1; |
|
522 state=NOT_SEEKING_STRONG; |
|
523 } |
|
524 else if(state==SEEKING_STRONG_FOR_FSI) { |
|
525 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { |
|
526 dirProps[isolateStartStack[stackLast]]=RLI; |
|
527 flags|=DIRPROP_FLAG(RLI); |
|
528 } |
|
529 state=LOOKING_FOR_PDI; |
|
530 } |
|
531 lastStrong=R; |
|
532 if(dirProp==AL) |
|
533 lastArabicPos=i-1; |
|
534 continue; |
|
535 } |
|
536 if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */ |
|
537 stackLast++; |
|
538 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { |
|
539 isolateStartStack[stackLast]=i-1; |
|
540 previousStateStack[stackLast]=state; |
|
541 } |
|
542 if(dirProp==FSI) |
|
543 state=SEEKING_STRONG_FOR_FSI; |
|
544 else |
|
545 state=LOOKING_FOR_PDI; |
|
546 continue; |
|
547 } |
|
548 if(dirProp==PDI) { |
|
549 if(state==SEEKING_STRONG_FOR_FSI) { |
|
550 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { |
|
551 dirProps[isolateStartStack[stackLast]]=LRI; |
|
552 flags|=DIRPROP_FLAG(LRI); |
|
553 } |
|
554 } |
|
555 if(stackLast>=0) { |
|
556 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) |
|
557 state=previousStateStack[stackLast]; |
|
558 stackLast--; |
|
559 } |
|
560 continue; |
|
561 } |
|
562 if(dirProp==B) { |
|
563 if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */ |
|
564 continue; |
|
565 pBiDi->paras[pBiDi->paraCount-1].limit=i; |
|
566 if(isDefaultLevelInverse && lastStrong==R) |
|
567 pBiDi->paras[pBiDi->paraCount-1].level=1; |
|
568 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { |
|
569 /* When streaming, we only process whole paragraphs |
|
570 thus some updates are only done on paragraph boundaries */ |
|
571 pBiDi->length=i; /* i is index to next character */ |
|
572 pBiDi->controlCount=controlCount; |
|
573 } |
|
574 if(i<originalLength) { /* B not last char in text */ |
|
575 pBiDi->paraCount++; |
|
576 if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */ |
|
577 return FALSE; |
|
578 if(isDefaultLevel) { |
|
579 pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel; |
|
580 state=SEEKING_STRONG_FOR_PARA; |
|
581 lastStrong=defaultParaLevel; |
|
582 } else { |
|
583 pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel; |
|
584 state=NOT_SEEKING_STRONG; |
|
585 } |
|
586 stackLast=-1; |
|
587 } |
|
588 continue; |
|
589 } |
|
590 } |
|
591 /* Ignore still open isolate sequences with overflow */ |
|
592 if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) { |
|
593 stackLast=UBIDI_MAX_EXPLICIT_LEVEL; |
|
594 if(dirProps[previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL]]!=FSI) |
|
595 state=LOOKING_FOR_PDI; |
|
596 } |
|
597 /* Resolve direction of still unresolved open FSI sequences */ |
|
598 while(stackLast>=0) { |
|
599 if(state==SEEKING_STRONG_FOR_FSI) { |
|
600 dirProps[isolateStartStack[stackLast]]=LRI; |
|
601 flags|=DIRPROP_FLAG(LRI); |
|
602 } |
|
603 state=previousStateStack[stackLast]; |
|
604 stackLast--; |
|
605 } |
|
606 /* When streaming, ignore text after the last paragraph separator */ |
|
607 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { |
|
608 if(pBiDi->length<originalLength) |
|
609 pBiDi->paraCount--; |
|
610 } else { |
|
611 pBiDi->paras[pBiDi->paraCount-1].limit=originalLength; |
|
612 pBiDi->controlCount=controlCount; |
|
613 } |
|
614 /* For inverse bidi, default para direction is RTL if there is |
|
615 a strong R or AL at either end of the paragraph */ |
|
616 if(isDefaultLevelInverse && lastStrong==R) { |
|
617 pBiDi->paras[pBiDi->paraCount-1].level=1; |
|
618 } |
|
619 if(isDefaultLevel) { |
|
620 pBiDi->paraLevel=pBiDi->paras[0].level; |
|
621 } |
|
622 /* The following is needed to resolve the text direction for default level |
|
623 paragraphs containing no strong character */ |
|
624 for(i=0; i<pBiDi->paraCount; i++) |
|
625 flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level); |
|
626 |
|
627 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { |
|
628 flags|=DIRPROP_FLAG(L); |
|
629 } |
|
630 pBiDi->flags=flags; |
|
631 pBiDi->lastArabicPos=lastArabicPos; |
|
632 return TRUE; |
|
633 } |
|
634 |
|
635 /* determine the paragraph level at position index */ |
|
636 U_CFUNC UBiDiLevel |
|
637 ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) { |
|
638 int32_t i; |
|
639 for(i=0; i<pBiDi->paraCount; i++) |
|
640 if(pindex<pBiDi->paras[i].limit) |
|
641 break; |
|
642 if(i>=pBiDi->paraCount) |
|
643 i=pBiDi->paraCount-1; |
|
644 return (UBiDiLevel)(pBiDi->paras[i].level); |
|
645 } |
|
646 |
|
647 /* Functions for handling paired brackets ----------------------------------- */ |
|
648 |
|
649 /* In the isoRuns array, the first entry is used for text outside of any |
|
650 isolate sequence. Higher entries are used for each more deeply nested |
|
651 isolate sequence. isoRunLast is the index of the last used entry. The |
|
652 openings array is used to note the data of opening brackets not yet |
|
653 matched by a closing bracket, or matched but still susceptible to change |
|
654 level. |
|
655 Each isoRun entry contains the index of the first and |
|
656 one-after-last openings entries for pending opening brackets it |
|
657 contains. The next openings entry to use is the one-after-last of the |
|
658 most deeply nested isoRun entry. |
|
659 isoRun entries also contain their current embedding level and the last |
|
660 encountered strong character, since these will be needed to resolve |
|
661 the level of paired brackets. */ |
|
662 |
|
663 static void |
|
664 bracketInit(UBiDi *pBiDi, BracketData *bd) { |
|
665 bd->pBiDi=pBiDi; |
|
666 bd->isoRunLast=0; |
|
667 bd->isoRuns[0].start=0; |
|
668 bd->isoRuns[0].limit=0; |
|
669 bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0); |
|
670 bd->isoRuns[0].lastStrong=bd->isoRuns[0].contextDir=GET_PARALEVEL(pBiDi, 0)&1; |
|
671 bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0; |
|
672 if(pBiDi->openingsMemory) { |
|
673 bd->openings=pBiDi->openingsMemory; |
|
674 bd->openingsSize=pBiDi->openingsSize; |
|
675 } else { |
|
676 bd->openings=bd->simpleOpenings; |
|
677 bd->openingsSize=SIMPLE_OPENINGS_SIZE; |
|
678 } |
|
679 bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL || |
|
680 bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL; |
|
681 } |
|
682 |
|
683 /* paragraph boundary */ |
|
684 static void |
|
685 bracketProcessB(BracketData *bd, UBiDiLevel level) { |
|
686 bd->isoRunLast=0; |
|
687 bd->isoRuns[0].limit=0; |
|
688 bd->isoRuns[0].level=level; |
|
689 bd->isoRuns[0].lastStrong=bd->isoRuns[0].contextDir=level&1; |
|
690 bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0; |
|
691 } |
|
692 |
|
693 /* LRE, LRO, RLE, RLO, PDF */ |
|
694 static void |
|
695 bracketProcessBoundary(BracketData *bd, int32_t lastCcPos, |
|
696 UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) { |
|
697 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; |
|
698 DirProp *dirProps=bd->pBiDi->dirProps; |
|
699 if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO) /* after an isolate */ |
|
700 return; |
|
701 if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)> |
|
702 (contextLevel&~UBIDI_LEVEL_OVERRIDE)) /* not a PDF */ |
|
703 contextLevel=embeddingLevel; |
|
704 pLastIsoRun->limit=pLastIsoRun->start; |
|
705 pLastIsoRun->level=embeddingLevel; |
|
706 pLastIsoRun->lastStrong=pLastIsoRun->contextDir=contextLevel&1; |
|
707 pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=lastCcPos; |
|
708 } |
|
709 |
|
710 /* LRI or RLI */ |
|
711 static void |
|
712 bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) { |
|
713 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; |
|
714 int16_t lastLimit; |
|
715 lastLimit=pLastIsoRun->limit; |
|
716 bd->isoRunLast++; |
|
717 pLastIsoRun++; |
|
718 pLastIsoRun->start=pLastIsoRun->limit=lastLimit; |
|
719 pLastIsoRun->level=level; |
|
720 pLastIsoRun->lastStrong=pLastIsoRun->contextDir=level&1; |
|
721 pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=0; |
|
722 } |
|
723 |
|
724 /* PDI */ |
|
725 static void |
|
726 bracketProcessPDI(BracketData *bd) { |
|
727 bd->isoRunLast--; |
|
728 } |
|
729 |
|
730 /* newly found opening bracket: create an openings entry */ |
|
731 static UBool /* return TRUE if success */ |
|
732 bracketAddOpening(BracketData *bd, UChar match, int32_t position) { |
|
733 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; |
|
734 Opening *pOpening; |
|
735 if(pLastIsoRun->limit>=bd->openingsSize) { /* no available new entry */ |
|
736 UBiDi *pBiDi=bd->pBiDi; |
|
737 if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2)) |
|
738 return FALSE; |
|
739 if(bd->openings==bd->simpleOpenings) |
|
740 uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings, |
|
741 SIMPLE_OPENINGS_SIZE * sizeof(Opening)); |
|
742 bd->openings=pBiDi->openingsMemory; /* may have changed */ |
|
743 bd->openingsSize=pBiDi->openingsSize; |
|
744 } |
|
745 pOpening=&bd->openings[pLastIsoRun->limit]; |
|
746 pOpening->position=position; |
|
747 pOpening->match=match; |
|
748 pOpening->contextDir=pLastIsoRun->contextDir; |
|
749 pOpening->contextPos=pLastIsoRun->contextPos; |
|
750 pOpening->flags=0; |
|
751 pLastIsoRun->limit++; |
|
752 return TRUE; |
|
753 } |
|
754 |
|
755 /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */ |
|
756 static void |
|
757 fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) { |
|
758 /* This function calls itself recursively */ |
|
759 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; |
|
760 Opening *qOpening; |
|
761 DirProp *dirProps=bd->pBiDi->dirProps; |
|
762 int32_t k, openingPosition, closingPosition; |
|
763 for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) { |
|
764 if(qOpening->match>=0) /* not an N0c match */ |
|
765 continue; |
|
766 if(newPropPosition<qOpening->contextPos) |
|
767 break; |
|
768 if(newPropPosition>=qOpening->position) |
|
769 continue; |
|
770 if(newProp==qOpening->contextDir) |
|
771 break; |
|
772 openingPosition=qOpening->position; |
|
773 dirProps[openingPosition]=dirProps[newPropPosition]; |
|
774 closingPosition=-(qOpening->match); |
|
775 dirProps[closingPosition]= newProp; /* can never be AL */ |
|
776 qOpening->match=0; /* prevent further changes */ |
|
777 fixN0c(bd, k, openingPosition, newProp); |
|
778 fixN0c(bd, k, closingPosition, newProp); |
|
779 } |
|
780 } |
|
781 |
|
782 /* handle strong characters, digits and candidates for closing brackets */ |
|
783 static UBool /* return TRUE if success */ |
|
784 bracketProcessChar(BracketData *bd, int32_t position, DirProp dirProp) { |
|
785 IsoRun *pLastIsoRun; |
|
786 Opening *pOpening, *qOpening; |
|
787 DirProp *dirProps, newProp; |
|
788 UBiDiDirection direction; |
|
789 uint16_t flag; |
|
790 int32_t i, k; |
|
791 UBool stable; |
|
792 UChar c, match; |
|
793 dirProps=bd->pBiDi->dirProps; |
|
794 if(DIRPROP_FLAG(dirProp)&MASK_STRONG_EN_AN) { /* L, R, AL, EN or AN */ |
|
795 pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; |
|
796 /* AN after R or AL becomes R or AL; after L or L+AN, it is kept as-is */ |
|
797 if(dirProp==AN && (pLastIsoRun->lastStrong==R || pLastIsoRun->lastStrong==AL)) |
|
798 dirProp=pLastIsoRun->lastStrong; |
|
799 /* EN after L or L+AN becomes L; after R or AL, it becomes R or AL */ |
|
800 if(dirProp==EN) { |
|
801 if(pLastIsoRun->lastStrong==L || pLastIsoRun->lastStrong==AN) { |
|
802 dirProp=L; |
|
803 if(!bd->isNumbersSpecial) |
|
804 dirProps[position]=ENL; |
|
805 } |
|
806 else { |
|
807 dirProp=pLastIsoRun->lastStrong; /* may be R or AL */ |
|
808 if(!bd->isNumbersSpecial) |
|
809 dirProps[position]= dirProp==AL ? AN : ENR; |
|
810 } |
|
811 } |
|
812 pLastIsoRun->lastStrong=dirProp; |
|
813 pLastIsoRun->contextDir=DIR_FROM_STRONG(dirProp); |
|
814 pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=position; |
|
815 if(dirProp==AL || dirProp==AN) |
|
816 dirProp=R; |
|
817 flag=DIRPROP_FLAG(dirProp); |
|
818 /* strong characters found after an unmatched opening bracket |
|
819 must be noted for possibly applying N0b */ |
|
820 for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++) |
|
821 bd->openings[i].flags|=flag; |
|
822 return TRUE; |
|
823 } |
|
824 if(dirProp!=ON) |
|
825 return TRUE; |
|
826 /* First see if it is a matching closing bracket. Hopefully, this is more |
|
827 efficient than checking if it is a closing bracket at all */ |
|
828 c=bd->pBiDi->text[position]; |
|
829 pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; |
|
830 for(i=pLastIsoRun->limit-1; i>=pLastIsoRun->start; i--) { |
|
831 if(bd->openings[i].match!=c) |
|
832 continue; |
|
833 /* We have a match */ |
|
834 pOpening=&bd->openings[i]; |
|
835 direction=pLastIsoRun->level&1; |
|
836 stable=TRUE; /* assume stable until proved otherwise */ |
|
837 |
|
838 /* The stable flag is set when brackets are paired and their |
|
839 level is resolved and cannot be changed by what will be |
|
840 found later in the source string. |
|
841 An unstable match can occur only when applying N0c, where |
|
842 the resolved level depends on the preceding context, and |
|
843 this context may be affected by text occurring later. |
|
844 Example: RTL paragraph containing: abc[(latin) HEBREW] |
|
845 When the closing parenthesis is encountered, it appears |
|
846 that N0c1 must be applied since 'abc' sets an opposite |
|
847 direction context and both parentheses receive level 2. |
|
848 However, when the closing square bracket is processed, |
|
849 N0b applies because of 'HEBREW' being included within the |
|
850 brackets, thus the square brackets are treated like R and |
|
851 receive level 1. However, this changes the preceding |
|
852 context of the opening parenthesis, and it now appears |
|
853 that N0c2 must be applied to the parentheses rather than |
|
854 N0c1. */ |
|
855 |
|
856 if((direction==0 && pOpening->flags&FOUND_L) || |
|
857 (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */ |
|
858 newProp=direction; |
|
859 } |
|
860 else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */ |
|
861 if(direction!=pOpening->contextDir) { |
|
862 newProp=pOpening->contextDir; /* N0c1 */ |
|
863 /* it is stable if there is no preceding text or in |
|
864 conditions too complicated and not worth checking */ |
|
865 stable=(i==pLastIsoRun->start); |
|
866 } |
|
867 else |
|
868 newProp=direction; /* N0c2 */ |
|
869 } |
|
870 else { |
|
871 newProp=BN; /* N0d */ |
|
872 } |
|
873 if(newProp!=BN) { |
|
874 dirProps[pOpening->position]=newProp; |
|
875 dirProps[position]=newProp; |
|
876 pLastIsoRun->contextDir=newProp; |
|
877 pLastIsoRun->contextPos=position; |
|
878 } |
|
879 /* Update nested N0c pairs that may be affected */ |
|
880 if(newProp==direction) |
|
881 fixN0c(bd, i, pOpening->position, newProp); |
|
882 if(stable) { |
|
883 pLastIsoRun->limit=i; /* forget any brackets nested within this pair */ |
|
884 /* remove lower located synonyms if any */ |
|
885 while(pLastIsoRun->limit>pLastIsoRun->start && |
|
886 bd->openings[pLastIsoRun->limit-1].position==pOpening->position) |
|
887 pLastIsoRun->limit--; |
|
888 } |
|
889 else { |
|
890 pOpening->match=-position; |
|
891 /* neutralize lower located synonyms if any */ |
|
892 k=i-1; |
|
893 while(k>=pLastIsoRun->start && |
|
894 bd->openings[k].position==pOpening->position) |
|
895 bd->openings[k--].match=0; |
|
896 /* neutralize any unmatched opening between the current pair; |
|
897 this will also neutralize higher located synonyms if any */ |
|
898 for(k=i+1; k<pLastIsoRun->limit; k++) { |
|
899 qOpening=&bd->openings[k]; |
|
900 if(qOpening->position>=position) |
|
901 break; |
|
902 if(qOpening->match>0) |
|
903 qOpening->match=0; |
|
904 } |
|
905 } |
|
906 return TRUE; |
|
907 } |
|
908 /* We get here only if the ON character was not a matching closing bracket */ |
|
909 /* Now see if it is an opening bracket */ |
|
910 match=u_getBidiPairedBracket(c); /* get the matching char */ |
|
911 if(match==c) /* if no matching char */ |
|
912 return TRUE; |
|
913 if(ubidi_getPairedBracketType(bd->pBiDi->bdp, c)!=U_BPT_OPEN) |
|
914 return TRUE; /* not an opening bracket */ |
|
915 /* special case: process synonyms |
|
916 create an opening entry for each synonym */ |
|
917 if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */ |
|
918 if(!bracketAddOpening(bd, 0x3009, position)) |
|
919 return FALSE; |
|
920 } |
|
921 else if(match==0x3009) { /* RIGHT ANGLE BRACKET */ |
|
922 if(!bracketAddOpening(bd, 0x232A, position)) |
|
923 return FALSE; |
|
924 } |
|
925 return bracketAddOpening(bd, match, position); |
|
926 } |
|
927 |
|
928 /* perform (X1)..(X9) ------------------------------------------------------- */ |
|
929 |
|
930 /* determine if the text is mixed-directional or single-directional */ |
|
931 static UBiDiDirection |
|
932 directionFromFlags(UBiDi *pBiDi) { |
|
933 Flags flags=pBiDi->flags; |
|
934 /* if the text contains AN and neutrals, then some neutrals may become RTL */ |
|
935 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) { |
|
936 return UBIDI_LTR; |
|
937 } else if(!(flags&MASK_LTR)) { |
|
938 return UBIDI_RTL; |
|
939 } else { |
|
940 return UBIDI_MIXED; |
|
941 } |
|
942 } |
|
943 |
|
944 /* |
|
945 * Resolve the explicit levels as specified by explicit embedding codes. |
|
946 * Recalculate the flags to have them reflect the real properties |
|
947 * after taking the explicit embeddings into account. |
|
948 * |
|
949 * The BiDi algorithm is designed to result in the same behavior whether embedding |
|
950 * levels are externally specified (from "styled text", supposedly the preferred |
|
951 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text. |
|
952 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN). |
|
953 * However, in a real implementation, the removal of these codes and their index |
|
954 * positions in the plain text is undesirable since it would result in |
|
955 * reallocated, reindexed text. |
|
956 * Instead, this implementation leaves the codes in there and just ignores them |
|
957 * in the subsequent processing. |
|
958 * In order to get the same reordering behavior, positions with a BN or a not-isolate |
|
959 * explicit embedding code just get the same level assigned as the last "real" |
|
960 * character. |
|
961 * |
|
962 * Some implementations, not this one, then overwrite some of these |
|
963 * directionality properties at "real" same-level-run boundaries by |
|
964 * L or R codes so that the resolution of weak types can be performed on the |
|
965 * entire paragraph at once instead of having to parse it once more and |
|
966 * perform that resolution on same-level-runs. |
|
967 * This limits the scope of the implicit rules in effectively |
|
968 * the same way as the run limits. |
|
969 * |
|
970 * Instead, this implementation does not modify these codes, except for |
|
971 * paired brackets whose properties (ON) may be replaced by L or R. |
|
972 * On one hand, the paragraph has to be scanned for same-level-runs, but |
|
973 * on the other hand, this saves another loop to reset these codes, |
|
974 * or saves making and modifying a copy of dirProps[]. |
|
975 * |
|
976 * |
|
977 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm. |
|
978 * |
|
979 * |
|
980 * Handling the stack of explicit levels (Xn): |
|
981 * |
|
982 * With the BiDi stack of explicit levels, as pushed with each |
|
983 * LRE, RLE, LRO, RLO, LRI, RLI and FSO and popped with each PDF and PDI, |
|
984 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL. |
|
985 * |
|
986 * In order to have a correct push-pop semantics even in the case of overflows, |
|
987 * overflow counters and a valid isolate counter are used as described in UAX#9 |
|
988 * section 3.3.2 "Explicit Levels and Directions". |
|
989 * |
|
990 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. |
|
991 */ |
|
992 static UBiDiDirection |
|
993 resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { |
|
994 DirProp *dirProps=pBiDi->dirProps; |
|
995 UBiDiLevel *levels=pBiDi->levels; |
|
996 const UChar *text=pBiDi->text; |
|
997 |
|
998 int32_t i=0, length=pBiDi->length; |
|
999 Flags flags=pBiDi->flags; /* collect all directionalities in the text */ |
|
1000 DirProp dirProp; |
|
1001 UBiDiLevel level=GET_PARALEVEL(pBiDi, 0); |
|
1002 UBiDiDirection direction; |
|
1003 pBiDi->isolateCount=0; |
|
1004 |
|
1005 if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; } |
|
1006 |
|
1007 /* determine if the text is mixed-directional or single-directional */ |
|
1008 direction=directionFromFlags(pBiDi); |
|
1009 |
|
1010 /* we may not need to resolve any explicit levels */ |
|
1011 if((direction!=UBIDI_MIXED)) { |
|
1012 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ |
|
1013 return direction; |
|
1014 } |
|
1015 if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) { |
|
1016 /* inverse BiDi: mixed, but all characters are at the same embedding level */ |
|
1017 /* set all levels to the paragraph level */ |
|
1018 int32_t paraIndex, start, limit; |
|
1019 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) { |
|
1020 if(paraIndex==0) |
|
1021 start=0; |
|
1022 else |
|
1023 start=pBiDi->paras[paraIndex-1].limit; |
|
1024 limit=pBiDi->paras[paraIndex].limit; |
|
1025 level=pBiDi->paras[paraIndex].level; |
|
1026 for(i=start; i<limit; i++) |
|
1027 levels[i]=level; |
|
1028 } |
|
1029 return direction; /* no bracket matching for inverse BiDi */ |
|
1030 } |
|
1031 if(!(flags&(MASK_EXPLICIT|MASK_ISO))) { |
|
1032 /* no embeddings, set all levels to the paragraph level */ |
|
1033 /* we still have to perform bracket matching */ |
|
1034 int32_t paraIndex, start, limit; |
|
1035 BracketData bracketData; |
|
1036 bracketInit(pBiDi, &bracketData); |
|
1037 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) { |
|
1038 if(paraIndex==0) |
|
1039 start=0; |
|
1040 else |
|
1041 start=pBiDi->paras[paraIndex-1].limit; |
|
1042 limit=pBiDi->paras[paraIndex].limit; |
|
1043 level=pBiDi->paras[paraIndex].level; |
|
1044 for(i=start; i<limit; i++) { |
|
1045 levels[i]=level; |
|
1046 dirProp=dirProps[i]; |
|
1047 if(dirProp==B) { |
|
1048 if((i+1)<length) { |
|
1049 if(text[i]==CR && text[i+1]==LF) |
|
1050 continue; /* skip CR when followed by LF */ |
|
1051 bracketProcessB(&bracketData, level); |
|
1052 } |
|
1053 continue; |
|
1054 } |
|
1055 if(!bracketProcessChar(&bracketData, i, dirProp)) { |
|
1056 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
1057 return UBIDI_LTR; |
|
1058 } |
|
1059 } |
|
1060 } |
|
1061 return direction; |
|
1062 } |
|
1063 { |
|
1064 /* continue to perform (Xn) */ |
|
1065 |
|
1066 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */ |
|
1067 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */ |
|
1068 UBiDiLevel embeddingLevel=level, newLevel; |
|
1069 UBiDiLevel previousLevel=level; /* previous level for regular (not CC) characters */ |
|
1070 int32_t lastCcPos=0; /* index of last effective LRx,RLx, PDx */ |
|
1071 |
|
1072 uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL |
|
1073 but we need one more entry as base */ |
|
1074 uint32_t stackLast=0; |
|
1075 int32_t overflowIsolateCount=0; |
|
1076 int32_t overflowEmbeddingCount=0; |
|
1077 int32_t validIsolateCount=0; |
|
1078 BracketData bracketData; |
|
1079 bracketInit(pBiDi, &bracketData); |
|
1080 stack[0]=level; /* initialize base entry to para level, no override, no isolate */ |
|
1081 |
|
1082 /* recalculate the flags */ |
|
1083 flags=0; |
|
1084 |
|
1085 for(i=0; i<length; ++i) { |
|
1086 dirProp=dirProps[i]; |
|
1087 switch(dirProp) { |
|
1088 case LRE: |
|
1089 case RLE: |
|
1090 case LRO: |
|
1091 case RLO: |
|
1092 /* (X2, X3, X4, X5) */ |
|
1093 flags|=DIRPROP_FLAG(BN); |
|
1094 if (dirProp==LRE || dirProp==LRO) |
|
1095 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */ |
|
1096 else |
|
1097 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */ |
|
1098 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 && |
|
1099 overflowEmbeddingCount==0) { |
|
1100 lastCcPos=i; |
|
1101 embeddingLevel=newLevel; |
|
1102 if(dirProp==LRO || dirProp==RLO) |
|
1103 embeddingLevel|=UBIDI_LEVEL_OVERRIDE; |
|
1104 stackLast++; |
|
1105 stack[stackLast]=embeddingLevel; |
|
1106 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE |
|
1107 since this has already been done for newLevel which is |
|
1108 the source for embeddingLevel. |
|
1109 */ |
|
1110 } else { |
|
1111 dirProps[i]|=IGNORE_CC; |
|
1112 if(overflowIsolateCount==0) |
|
1113 overflowEmbeddingCount++; |
|
1114 } |
|
1115 break; |
|
1116 case PDF: |
|
1117 /* (X7) */ |
|
1118 flags|=DIRPROP_FLAG(BN); |
|
1119 /* handle all the overflow cases first */ |
|
1120 if(overflowIsolateCount) { |
|
1121 dirProps[i]|=IGNORE_CC; |
|
1122 break; |
|
1123 } |
|
1124 if(overflowEmbeddingCount) { |
|
1125 dirProps[i]|=IGNORE_CC; |
|
1126 overflowEmbeddingCount--; |
|
1127 break; |
|
1128 } |
|
1129 if(stackLast>0 && stack[stackLast]<ISOLATE) { /* not an isolate entry */ |
|
1130 lastCcPos=i; |
|
1131 stackLast--; |
|
1132 embeddingLevel=(UBiDiLevel)stack[stackLast]; |
|
1133 } else |
|
1134 dirProps[i]|=IGNORE_CC; |
|
1135 break; |
|
1136 case LRI: |
|
1137 case RLI: |
|
1138 if(embeddingLevel!=previousLevel) { |
|
1139 bracketProcessBoundary(&bracketData, lastCcPos, |
|
1140 previousLevel, embeddingLevel); |
|
1141 previousLevel=embeddingLevel; |
|
1142 } |
|
1143 /* (X5a, X5b) */ |
|
1144 flags|= DIRPROP_FLAG(ON) | DIRPROP_FLAG(BN) | DIRPROP_FLAG_LR(embeddingLevel); |
|
1145 level=embeddingLevel; |
|
1146 if(dirProp==LRI) |
|
1147 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */ |
|
1148 else |
|
1149 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */ |
|
1150 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 && |
|
1151 overflowEmbeddingCount==0) { |
|
1152 lastCcPos=i; |
|
1153 previousLevel=embeddingLevel; |
|
1154 validIsolateCount++; |
|
1155 if(validIsolateCount>pBiDi->isolateCount) |
|
1156 pBiDi->isolateCount=validIsolateCount; |
|
1157 embeddingLevel=newLevel; |
|
1158 stackLast++; |
|
1159 stack[stackLast]=embeddingLevel+ISOLATE; |
|
1160 bracketProcessLRI_RLI(&bracketData, embeddingLevel); |
|
1161 } else { |
|
1162 dirProps[i]|=IGNORE_CC; |
|
1163 overflowIsolateCount++; |
|
1164 } |
|
1165 break; |
|
1166 case PDI: |
|
1167 if(embeddingLevel!=previousLevel) { |
|
1168 bracketProcessBoundary(&bracketData, lastCcPos, |
|
1169 previousLevel, embeddingLevel); |
|
1170 } |
|
1171 /* (X6a) */ |
|
1172 if(overflowIsolateCount) { |
|
1173 dirProps[i]|=IGNORE_CC; |
|
1174 overflowIsolateCount--; |
|
1175 } |
|
1176 else if(validIsolateCount) { |
|
1177 lastCcPos=i; |
|
1178 overflowEmbeddingCount=0; |
|
1179 while(stack[stackLast]<ISOLATE) /* pop embedding entries */ |
|
1180 stackLast--; /* until the last isolate entry */ |
|
1181 stackLast--; /* pop also the last isolate entry */ |
|
1182 validIsolateCount--; |
|
1183 bracketProcessPDI(&bracketData); |
|
1184 } else |
|
1185 dirProps[i]|=IGNORE_CC; |
|
1186 embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE; |
|
1187 previousLevel=level=embeddingLevel; |
|
1188 flags|= DIRPROP_FLAG(ON) | DIRPROP_FLAG(BN) | DIRPROP_FLAG_LR(embeddingLevel); |
|
1189 break; |
|
1190 case B: |
|
1191 level=GET_PARALEVEL(pBiDi, i); |
|
1192 if((i+1)<length) { |
|
1193 if(text[i]==CR && text[i+1]==LF) |
|
1194 break; /* skip CR when followed by LF */ |
|
1195 overflowEmbeddingCount=overflowIsolateCount=0; |
|
1196 validIsolateCount=0; |
|
1197 stackLast=0; |
|
1198 stack[0]=level; /* initialize base entry to para level, no override, no isolate */ |
|
1199 previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1); |
|
1200 bracketProcessB(&bracketData, embeddingLevel); |
|
1201 } |
|
1202 flags|=DIRPROP_FLAG(B); |
|
1203 break; |
|
1204 case BN: |
|
1205 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ |
|
1206 /* they will get their levels set correctly in adjustWSLevels() */ |
|
1207 flags|=DIRPROP_FLAG(BN); |
|
1208 break; |
|
1209 default: |
|
1210 /* all other types get the "real" level */ |
|
1211 level=embeddingLevel; |
|
1212 if(embeddingLevel!=previousLevel) { |
|
1213 bracketProcessBoundary(&bracketData, lastCcPos, |
|
1214 previousLevel, embeddingLevel); |
|
1215 previousLevel=embeddingLevel; |
|
1216 } |
|
1217 if(level&UBIDI_LEVEL_OVERRIDE) |
|
1218 flags|=DIRPROP_FLAG_LR(level); |
|
1219 else |
|
1220 flags|=DIRPROP_FLAG(dirProp); |
|
1221 if(!bracketProcessChar(&bracketData, i, dirProp)) |
|
1222 return -1; |
|
1223 break; |
|
1224 } |
|
1225 |
|
1226 /* |
|
1227 * We need to set reasonable levels even on BN codes and |
|
1228 * explicit codes because we will later look at same-level runs (X10). |
|
1229 */ |
|
1230 levels[i]=level; |
|
1231 if(i>0 && levels[i-1]!=level) { |
|
1232 flags|=DIRPROP_FLAG_MULTI_RUNS; |
|
1233 if(level&UBIDI_LEVEL_OVERRIDE) |
|
1234 flags|=DIRPROP_FLAG_O(level); |
|
1235 else |
|
1236 flags|=DIRPROP_FLAG_E(level); |
|
1237 } |
|
1238 if(DIRPROP_FLAG(dirProp)&MASK_ISO) |
|
1239 level=embeddingLevel; |
|
1240 } |
|
1241 if(flags&MASK_EMBEDDING) { |
|
1242 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); |
|
1243 } |
|
1244 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { |
|
1245 flags|=DIRPROP_FLAG(L); |
|
1246 } |
|
1247 |
|
1248 /* subsequently, ignore the explicit codes and BN (X9) */ |
|
1249 |
|
1250 /* again, determine if the text is mixed-directional or single-directional */ |
|
1251 pBiDi->flags=flags; |
|
1252 direction=directionFromFlags(pBiDi); |
|
1253 } |
|
1254 return direction; |
|
1255 } |
|
1256 |
|
1257 /* |
|
1258 * Use a pre-specified embedding levels array: |
|
1259 * |
|
1260 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), |
|
1261 * ignore all explicit codes (X9), |
|
1262 * and check all the preset levels. |
|
1263 * |
|
1264 * Recalculate the flags to have them reflect the real properties |
|
1265 * after taking the explicit embeddings into account. |
|
1266 */ |
|
1267 static UBiDiDirection |
|
1268 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { |
|
1269 DirProp *dirProps=pBiDi->dirProps; |
|
1270 DirProp dirProp; |
|
1271 UBiDiLevel *levels=pBiDi->levels; |
|
1272 int32_t isolateCount=0; |
|
1273 |
|
1274 int32_t i, length=pBiDi->length; |
|
1275 Flags flags=0; /* collect all directionalities in the text */ |
|
1276 UBiDiLevel level; |
|
1277 pBiDi->isolateCount=0; |
|
1278 |
|
1279 for(i=0; i<length; ++i) { |
|
1280 level=levels[i]; |
|
1281 dirProp=dirProps[i]; |
|
1282 if(dirProp==LRI || dirProp==RLI) { |
|
1283 isolateCount++; |
|
1284 if(isolateCount>pBiDi->isolateCount) |
|
1285 pBiDi->isolateCount=isolateCount; |
|
1286 } |
|
1287 else if(dirProp==PDI) |
|
1288 isolateCount--; |
|
1289 else if(dirProp==B) |
|
1290 isolateCount=0; |
|
1291 if(level&UBIDI_LEVEL_OVERRIDE) { |
|
1292 /* keep the override flag in levels[i] but adjust the flags */ |
|
1293 level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */ |
|
1294 flags|=DIRPROP_FLAG_O(level); |
|
1295 } else { |
|
1296 /* set the flags */ |
|
1297 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp); |
|
1298 } |
|
1299 if((level<GET_PARALEVEL(pBiDi, i) && |
|
1300 !((0==level)&&(dirProp==B))) || |
|
1301 (UBIDI_MAX_EXPLICIT_LEVEL<level)) { |
|
1302 /* level out of bounds */ |
|
1303 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
1304 return UBIDI_LTR; |
|
1305 } |
|
1306 } |
|
1307 if(flags&MASK_EMBEDDING) { |
|
1308 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); |
|
1309 } |
|
1310 |
|
1311 /* determine if the text is mixed-directional or single-directional */ |
|
1312 pBiDi->flags=flags; |
|
1313 return directionFromFlags(pBiDi); |
|
1314 } |
|
1315 |
|
1316 /****************************************************************** |
|
1317 The Properties state machine table |
|
1318 ******************************************************************* |
|
1319 |
|
1320 All table cells are 8 bits: |
|
1321 bits 0..4: next state |
|
1322 bits 5..7: action to perform (if > 0) |
|
1323 |
|
1324 Cells may be of format "n" where n represents the next state |
|
1325 (except for the rightmost column). |
|
1326 Cells may also be of format "s(x,y)" where x represents an action |
|
1327 to perform and y represents the next state. |
|
1328 |
|
1329 ******************************************************************* |
|
1330 Definitions and type for properties state table |
|
1331 ******************************************************************* |
|
1332 */ |
|
1333 #define IMPTABPROPS_COLUMNS 16 |
|
1334 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1) |
|
1335 #define GET_STATEPROPS(cell) ((cell)&0x1f) |
|
1336 #define GET_ACTIONPROPS(cell) ((cell)>>5) |
|
1337 #define s(action, newState) ((uint8_t)(newState+(action<<5))) |
|
1338 |
|
1339 static const uint8_t groupProp[] = /* dirProp regrouped */ |
|
1340 { |
|
1341 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */ |
|
1342 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14 |
|
1343 }; |
|
1344 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */ |
|
1345 |
|
1346 /****************************************************************** |
|
1347 |
|
1348 PROPERTIES STATE TABLE |
|
1349 |
|
1350 In table impTabProps, |
|
1351 - the ON column regroups ON and WS, FSI, RLI, LRI and PDI |
|
1352 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF |
|
1353 - the Res column is the reduced property assigned to a run |
|
1354 |
|
1355 Action 1: process current run1, init new run1 |
|
1356 2: init new run2 |
|
1357 3: process run1, process run2, init new run1 |
|
1358 4: process run1, set run1=run2, init new run2 |
|
1359 |
|
1360 Notes: |
|
1361 1) This table is used in resolveImplicitLevels(). |
|
1362 2) This table triggers actions when there is a change in the Bidi |
|
1363 property of incoming characters (action 1). |
|
1364 3) Most such property sequences are processed immediately (in |
|
1365 fact, passed to processPropertySeq(). |
|
1366 4) However, numbers are assembled as one sequence. This means |
|
1367 that undefined situations (like CS following digits, until |
|
1368 it is known if the next char will be a digit) are held until |
|
1369 following chars define them. |
|
1370 Example: digits followed by CS, then comes another CS or ON; |
|
1371 the digits will be processed, then the CS assigned |
|
1372 as the start of an ON sequence (action 3). |
|
1373 5) There are cases where more than one sequence must be |
|
1374 processed, for instance digits followed by CS followed by L: |
|
1375 the digits must be processed as one sequence, and the CS |
|
1376 must be processed as an ON sequence, all this before starting |
|
1377 assembling chars for the opening L sequence. |
|
1378 |
|
1379 |
|
1380 */ |
|
1381 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] = |
|
1382 { |
|
1383 /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */ |
|
1384 /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON }, |
|
1385 /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L }, |
|
1386 /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R }, |
|
1387 /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R }, |
|
1388 /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN }, |
|
1389 /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN }, |
|
1390 /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN }, |
|
1391 /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON }, |
|
1392 /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON }, |
|
1393 /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON }, |
|
1394 /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN }, |
|
1395 /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN }, |
|
1396 /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN }, |
|
1397 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN }, |
|
1398 /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON }, |
|
1399 /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S }, |
|
1400 /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S }, |
|
1401 /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B }, |
|
1402 /*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L }, |
|
1403 /*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L }, |
|
1404 /*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L }, |
|
1405 /*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN }, |
|
1406 /*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN }, |
|
1407 /*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN } |
|
1408 }; |
|
1409 |
|
1410 /* we must undef macro s because the levels table have a different |
|
1411 * structure (4 bits for action and 4 bits for next state. |
|
1412 */ |
|
1413 #undef s |
|
1414 |
|
1415 /****************************************************************** |
|
1416 The levels state machine tables |
|
1417 ******************************************************************* |
|
1418 |
|
1419 All table cells are 8 bits: |
|
1420 bits 0..3: next state |
|
1421 bits 4..7: action to perform (if > 0) |
|
1422 |
|
1423 Cells may be of format "n" where n represents the next state |
|
1424 (except for the rightmost column). |
|
1425 Cells may also be of format "s(x,y)" where x represents an action |
|
1426 to perform and y represents the next state. |
|
1427 |
|
1428 This format limits each table to 16 states each and to 15 actions. |
|
1429 |
|
1430 ******************************************************************* |
|
1431 Definitions and type for levels state tables |
|
1432 ******************************************************************* |
|
1433 */ |
|
1434 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2) |
|
1435 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1) |
|
1436 #define GET_STATE(cell) ((cell)&0x0f) |
|
1437 #define GET_ACTION(cell) ((cell)>>4) |
|
1438 #define s(action, newState) ((uint8_t)(newState+(action<<4))) |
|
1439 |
|
1440 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS]; |
|
1441 typedef uint8_t ImpAct[]; |
|
1442 |
|
1443 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct, |
|
1444 * instead of having a pair of ImpTab and a pair of ImpAct. |
|
1445 */ |
|
1446 typedef struct ImpTabPair { |
|
1447 const void * pImpTab[2]; |
|
1448 const void * pImpAct[2]; |
|
1449 } ImpTabPair; |
|
1450 |
|
1451 /****************************************************************** |
|
1452 |
|
1453 LEVELS STATE TABLES |
|
1454 |
|
1455 In all levels state tables, |
|
1456 - state 0 is the initial state |
|
1457 - the Res column is the increment to add to the text level |
|
1458 for this property sequence. |
|
1459 |
|
1460 The impAct arrays for each table of a pair map the local action |
|
1461 numbers of the table to the total list of actions. For instance, |
|
1462 action 2 in a given table corresponds to the action number which |
|
1463 appears in entry [2] of the impAct array for that table. |
|
1464 The first entry of all impAct arrays must be 0. |
|
1465 |
|
1466 Action 1: init conditional sequence |
|
1467 2: prepend conditional sequence to current sequence |
|
1468 3: set ON sequence to new level - 1 |
|
1469 4: init EN/AN/ON sequence |
|
1470 5: fix EN/AN/ON sequence followed by R |
|
1471 6: set previous level sequence to level 2 |
|
1472 |
|
1473 Notes: |
|
1474 1) These tables are used in processPropertySeq(). The input |
|
1475 is property sequences as determined by resolveImplicitLevels. |
|
1476 2) Most such property sequences are processed immediately |
|
1477 (levels are assigned). |
|
1478 3) However, some sequences cannot be assigned a final level till |
|
1479 one or more following sequences are received. For instance, |
|
1480 ON following an R sequence within an even-level paragraph. |
|
1481 If the following sequence is R, the ON sequence will be |
|
1482 assigned basic run level+1, and so will the R sequence. |
|
1483 4) S is generally handled like ON, since its level will be fixed |
|
1484 to paragraph level in adjustWSLevels(). |
|
1485 |
|
1486 */ |
|
1487 |
|
1488 static const ImpTab impTabL_DEFAULT = /* Even paragraph level */ |
|
1489 /* In this table, conditional sequences receive the higher possible level |
|
1490 until proven otherwise. |
|
1491 */ |
|
1492 { |
|
1493 /* L , R , EN , AN , ON , S , B , Res */ |
|
1494 /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 }, |
|
1495 /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 }, |
|
1496 /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 }, |
|
1497 /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 }, |
|
1498 /* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0), 1 }, |
|
1499 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0), 1 } |
|
1500 }; |
|
1501 static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */ |
|
1502 /* In this table, conditional sequences receive the lower possible level |
|
1503 until proven otherwise. |
|
1504 */ |
|
1505 { |
|
1506 /* L , R , EN , AN , ON , S , B , Res */ |
|
1507 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, |
|
1508 /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 }, |
|
1509 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, |
|
1510 /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 }, |
|
1511 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 }, |
|
1512 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 } |
|
1513 }; |
|
1514 static const ImpAct impAct0 = {0,1,2,3,4,5,6}; |
|
1515 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT, |
|
1516 &impTabR_DEFAULT}, |
|
1517 {&impAct0, &impAct0}}; |
|
1518 |
|
1519 static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */ |
|
1520 /* In this table, conditional sequences receive the higher possible level |
|
1521 until proven otherwise. |
|
1522 */ |
|
1523 { |
|
1524 /* L , R , EN , AN , ON , S , B , Res */ |
|
1525 /* 0 : init */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 0 }, |
|
1526 /* 1 : L+EN/AN */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 2 }, |
|
1527 /* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), 0 , 0 , 1 }, |
|
1528 /* 3 : R+ON */ { s(2,0), 2 , 4 , 4 , 3 , 3 , s(2,0), 1 }, |
|
1529 /* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 } |
|
1530 }; |
|
1531 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL, |
|
1532 &impTabR_DEFAULT}, |
|
1533 {&impAct0, &impAct0}}; |
|
1534 |
|
1535 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R = |
|
1536 /* In this table, EN/AN+ON sequences receive levels as if associated with R |
|
1537 until proven that there is L or sor/eor on both sides. AN is handled like EN. |
|
1538 */ |
|
1539 { |
|
1540 /* L , R , EN , AN , ON , S , B , Res */ |
|
1541 /* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 }, |
|
1542 /* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 }, |
|
1543 /* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 }, |
|
1544 /* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 }, |
|
1545 /* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 }, |
|
1546 /* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 } |
|
1547 }; |
|
1548 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R = |
|
1549 /* In this table, EN/AN+ON sequences receive levels as if associated with R |
|
1550 until proven that there is L on both sides. AN is handled like EN. |
|
1551 */ |
|
1552 { |
|
1553 /* L , R , EN , AN , ON , S , B , Res */ |
|
1554 /* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, |
|
1555 /* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, |
|
1556 /* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 }, |
|
1557 /* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 }, |
|
1558 /* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 } |
|
1559 }; |
|
1560 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = { |
|
1561 {&impTabL_GROUP_NUMBERS_WITH_R, |
|
1562 &impTabR_GROUP_NUMBERS_WITH_R}, |
|
1563 {&impAct0, &impAct0}}; |
|
1564 |
|
1565 |
|
1566 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L = |
|
1567 /* This table is identical to the Default LTR table except that EN and AN are |
|
1568 handled like L. |
|
1569 */ |
|
1570 { |
|
1571 /* L , R , EN , AN , ON , S , B , Res */ |
|
1572 /* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 }, |
|
1573 /* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 }, |
|
1574 /* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 }, |
|
1575 /* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 }, |
|
1576 /* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 }, |
|
1577 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 } |
|
1578 }; |
|
1579 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L = |
|
1580 /* This table is identical to the Default RTL table except that EN and AN are |
|
1581 handled like L. |
|
1582 */ |
|
1583 { |
|
1584 /* L , R , EN , AN , ON , S , B , Res */ |
|
1585 /* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, |
|
1586 /* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 }, |
|
1587 /* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, |
|
1588 /* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 }, |
|
1589 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 }, |
|
1590 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 } |
|
1591 }; |
|
1592 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = { |
|
1593 {&impTabL_INVERSE_NUMBERS_AS_L, |
|
1594 &impTabR_INVERSE_NUMBERS_AS_L}, |
|
1595 {&impAct0, &impAct0}}; |
|
1596 |
|
1597 static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */ |
|
1598 /* In this table, conditional sequences receive the lower possible level |
|
1599 until proven otherwise. |
|
1600 */ |
|
1601 { |
|
1602 /* L , R , EN , AN , ON , S , B , Res */ |
|
1603 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, |
|
1604 /* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 }, |
|
1605 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, |
|
1606 /* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 }, |
|
1607 /* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 }, |
|
1608 /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 }, |
|
1609 /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 } |
|
1610 }; |
|
1611 static const ImpAct impAct1 = {0,1,11,12}; |
|
1612 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc" |
|
1613 */ |
|
1614 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = { |
|
1615 {&impTabL_DEFAULT, |
|
1616 &impTabR_INVERSE_LIKE_DIRECT}, |
|
1617 {&impAct0, &impAct1}}; |
|
1618 |
|
1619 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS = |
|
1620 /* The case handled in this table is (visually): R EN L |
|
1621 */ |
|
1622 { |
|
1623 /* L , R , EN , AN , ON , S , B , Res */ |
|
1624 /* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 }, |
|
1625 /* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 }, |
|
1626 /* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 }, |
|
1627 /* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 }, |
|
1628 /* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 }, |
|
1629 /* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 }, |
|
1630 /* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 } |
|
1631 }; |
|
1632 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS = |
|
1633 /* The cases handled in this table are (visually): R EN L |
|
1634 R L AN L |
|
1635 */ |
|
1636 { |
|
1637 /* L , R , EN , AN , ON , S , B , Res */ |
|
1638 /* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 }, |
|
1639 /* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 }, |
|
1640 /* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 }, |
|
1641 /* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 }, |
|
1642 /* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 }, |
|
1643 /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 }, |
|
1644 /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 } |
|
1645 }; |
|
1646 static const ImpAct impAct2 = {0,1,7,8,9,10}; |
|
1647 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = { |
|
1648 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, |
|
1649 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, |
|
1650 {&impAct0, &impAct2}}; |
|
1651 |
|
1652 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = { |
|
1653 {&impTabL_NUMBERS_SPECIAL, |
|
1654 &impTabR_INVERSE_LIKE_DIRECT}, |
|
1655 {&impAct0, &impAct1}}; |
|
1656 |
|
1657 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = |
|
1658 /* The case handled in this table is (visually): R EN L |
|
1659 */ |
|
1660 { |
|
1661 /* L , R , EN , AN , ON , S , B , Res */ |
|
1662 /* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 }, |
|
1663 /* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 }, |
|
1664 /* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 }, |
|
1665 /* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 }, |
|
1666 /* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 } |
|
1667 }; |
|
1668 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = { |
|
1669 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, |
|
1670 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, |
|
1671 {&impAct0, &impAct2}}; |
|
1672 |
|
1673 #undef s |
|
1674 |
|
1675 typedef struct { |
|
1676 const ImpTab * pImpTab; /* level table pointer */ |
|
1677 const ImpAct * pImpAct; /* action map array */ |
|
1678 int32_t startON; /* start of ON sequence */ |
|
1679 int32_t startL2EN; /* start of level 2 sequence */ |
|
1680 int32_t lastStrongRTL; /* index of last found R or AL */ |
|
1681 int32_t state; /* current state */ |
|
1682 int32_t runStart; /* start position of the run */ |
|
1683 UBiDiLevel runLevel; /* run level before implicit solving */ |
|
1684 } LevState; |
|
1685 |
|
1686 /*------------------------------------------------------------------------*/ |
|
1687 |
|
1688 static void |
|
1689 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag) |
|
1690 /* param pos: position where to insert |
|
1691 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER |
|
1692 */ |
|
1693 { |
|
1694 #define FIRSTALLOC 10 |
|
1695 Point point; |
|
1696 InsertPoints * pInsertPoints=&(pBiDi->insertPoints); |
|
1697 |
|
1698 if (pInsertPoints->capacity == 0) |
|
1699 { |
|
1700 pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC); |
|
1701 if (pInsertPoints->points == NULL) |
|
1702 { |
|
1703 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; |
|
1704 return; |
|
1705 } |
|
1706 pInsertPoints->capacity=FIRSTALLOC; |
|
1707 } |
|
1708 if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */ |
|
1709 { |
|
1710 void * savePoints=pInsertPoints->points; |
|
1711 pInsertPoints->points=uprv_realloc(pInsertPoints->points, |
|
1712 pInsertPoints->capacity*2*sizeof(Point)); |
|
1713 if (pInsertPoints->points == NULL) |
|
1714 { |
|
1715 pInsertPoints->points=savePoints; |
|
1716 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; |
|
1717 return; |
|
1718 } |
|
1719 else pInsertPoints->capacity*=2; |
|
1720 } |
|
1721 point.pos=pos; |
|
1722 point.flag=flag; |
|
1723 pInsertPoints->points[pInsertPoints->size]=point; |
|
1724 pInsertPoints->size++; |
|
1725 #undef FIRSTALLOC |
|
1726 } |
|
1727 |
|
1728 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */ |
|
1729 |
|
1730 /* |
|
1731 * This implementation of the (Wn) rules applies all rules in one pass. |
|
1732 * In order to do so, it needs a look-ahead of typically 1 character |
|
1733 * (except for W5: sequences of ET) and keeps track of changes |
|
1734 * in a rule Wp that affect a later Wq (p<q). |
|
1735 * |
|
1736 * The (Nn) and (In) rules are also performed in that same single loop, |
|
1737 * but effectively one iteration behind for white space. |
|
1738 * |
|
1739 * Since all implicit rules are performed in one step, it is not necessary |
|
1740 * to actually store the intermediate directional properties in dirProps[]. |
|
1741 */ |
|
1742 |
|
1743 static void |
|
1744 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop, |
|
1745 int32_t start, int32_t limit) { |
|
1746 uint8_t cell, oldStateSeq, actionSeq; |
|
1747 const ImpTab * pImpTab=pLevState->pImpTab; |
|
1748 const ImpAct * pImpAct=pLevState->pImpAct; |
|
1749 UBiDiLevel * levels=pBiDi->levels; |
|
1750 UBiDiLevel level, addLevel; |
|
1751 InsertPoints * pInsertPoints; |
|
1752 int32_t start0, k; |
|
1753 |
|
1754 start0=start; /* save original start position */ |
|
1755 oldStateSeq=(uint8_t)pLevState->state; |
|
1756 cell=(*pImpTab)[oldStateSeq][_prop]; |
|
1757 pLevState->state=GET_STATE(cell); /* isolate the new state */ |
|
1758 actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */ |
|
1759 addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES]; |
|
1760 |
|
1761 if(actionSeq) { |
|
1762 switch(actionSeq) { |
|
1763 case 1: /* init ON seq */ |
|
1764 pLevState->startON=start0; |
|
1765 break; |
|
1766 |
|
1767 case 2: /* prepend ON seq to current seq */ |
|
1768 start=pLevState->startON; |
|
1769 break; |
|
1770 |
|
1771 case 3: /* L or S after possible relevant EN/AN */ |
|
1772 /* check if we had EN after R/AL */ |
|
1773 if (pLevState->startL2EN >= 0) { |
|
1774 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); |
|
1775 } |
|
1776 pLevState->startL2EN=-1; /* not within previous if since could also be -2 */ |
|
1777 /* check if we had any relevant EN/AN after R/AL */ |
|
1778 pInsertPoints=&(pBiDi->insertPoints); |
|
1779 if ((pInsertPoints->capacity == 0) || |
|
1780 (pInsertPoints->size <= pInsertPoints->confirmed)) |
|
1781 { |
|
1782 /* nothing, just clean up */ |
|
1783 pLevState->lastStrongRTL=-1; |
|
1784 /* check if we have a pending conditional segment */ |
|
1785 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES]; |
|
1786 if ((level & 1) && (pLevState->startON > 0)) { /* after ON */ |
|
1787 start=pLevState->startON; /* reset to basic run level */ |
|
1788 } |
|
1789 if (_prop == DirProp_S) /* add LRM before S */ |
|
1790 { |
|
1791 addPoint(pBiDi, start0, LRM_BEFORE); |
|
1792 pInsertPoints->confirmed=pInsertPoints->size; |
|
1793 } |
|
1794 break; |
|
1795 } |
|
1796 /* reset previous RTL cont to level for LTR text */ |
|
1797 for (k=pLevState->lastStrongRTL+1; k<start0; k++) |
|
1798 { |
|
1799 /* reset odd level, leave runLevel+2 as is */ |
|
1800 levels[k]=(levels[k] - 2) & ~1; |
|
1801 } |
|
1802 /* mark insert points as confirmed */ |
|
1803 pInsertPoints->confirmed=pInsertPoints->size; |
|
1804 pLevState->lastStrongRTL=-1; |
|
1805 if (_prop == DirProp_S) /* add LRM before S */ |
|
1806 { |
|
1807 addPoint(pBiDi, start0, LRM_BEFORE); |
|
1808 pInsertPoints->confirmed=pInsertPoints->size; |
|
1809 } |
|
1810 break; |
|
1811 |
|
1812 case 4: /* R/AL after possible relevant EN/AN */ |
|
1813 /* just clean up */ |
|
1814 pInsertPoints=&(pBiDi->insertPoints); |
|
1815 if (pInsertPoints->capacity > 0) |
|
1816 /* remove all non confirmed insert points */ |
|
1817 pInsertPoints->size=pInsertPoints->confirmed; |
|
1818 pLevState->startON=-1; |
|
1819 pLevState->startL2EN=-1; |
|
1820 pLevState->lastStrongRTL=limit - 1; |
|
1821 break; |
|
1822 |
|
1823 case 5: /* EN/AN after R/AL + possible cont */ |
|
1824 /* check for real AN */ |
|
1825 if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) && |
|
1826 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)) |
|
1827 { |
|
1828 /* real AN */ |
|
1829 if (pLevState->startL2EN == -1) /* if no relevant EN already found */ |
|
1830 { |
|
1831 /* just note the righmost digit as a strong RTL */ |
|
1832 pLevState->lastStrongRTL=limit - 1; |
|
1833 break; |
|
1834 } |
|
1835 if (pLevState->startL2EN >= 0) /* after EN, no AN */ |
|
1836 { |
|
1837 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); |
|
1838 pLevState->startL2EN=-2; |
|
1839 } |
|
1840 /* note AN */ |
|
1841 addPoint(pBiDi, start0, LRM_BEFORE); |
|
1842 break; |
|
1843 } |
|
1844 /* if first EN/AN after R/AL */ |
|
1845 if (pLevState->startL2EN == -1) { |
|
1846 pLevState->startL2EN=start0; |
|
1847 } |
|
1848 break; |
|
1849 |
|
1850 case 6: /* note location of latest R/AL */ |
|
1851 pLevState->lastStrongRTL=limit - 1; |
|
1852 pLevState->startON=-1; |
|
1853 break; |
|
1854 |
|
1855 case 7: /* L after R+ON/EN/AN */ |
|
1856 /* include possible adjacent number on the left */ |
|
1857 for (k=start0-1; k>=0 && !(levels[k]&1); k--); |
|
1858 if(k>=0) { |
|
1859 addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */ |
|
1860 pInsertPoints=&(pBiDi->insertPoints); |
|
1861 pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */ |
|
1862 } |
|
1863 pLevState->startON=start0; |
|
1864 break; |
|
1865 |
|
1866 case 8: /* AN after L */ |
|
1867 /* AN numbers between L text on both sides may be trouble. */ |
|
1868 /* tentatively bracket with LRMs; will be confirmed if followed by L */ |
|
1869 addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */ |
|
1870 addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */ |
|
1871 break; |
|
1872 |
|
1873 case 9: /* R after L+ON/EN/AN */ |
|
1874 /* false alert, infirm LRMs around previous AN */ |
|
1875 pInsertPoints=&(pBiDi->insertPoints); |
|
1876 pInsertPoints->size=pInsertPoints->confirmed; |
|
1877 if (_prop == DirProp_S) /* add RLM before S */ |
|
1878 { |
|
1879 addPoint(pBiDi, start0, RLM_BEFORE); |
|
1880 pInsertPoints->confirmed=pInsertPoints->size; |
|
1881 } |
|
1882 break; |
|
1883 |
|
1884 case 10: /* L after L+ON/AN */ |
|
1885 level=pLevState->runLevel + addLevel; |
|
1886 for(k=pLevState->startON; k<start0; k++) { |
|
1887 if (levels[k]<level) |
|
1888 levels[k]=level; |
|
1889 } |
|
1890 pInsertPoints=&(pBiDi->insertPoints); |
|
1891 pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */ |
|
1892 pLevState->startON=start0; |
|
1893 break; |
|
1894 |
|
1895 case 11: /* L after L+ON+EN/AN/ON */ |
|
1896 level=pLevState->runLevel; |
|
1897 for(k=start0-1; k>=pLevState->startON; k--) { |
|
1898 if(levels[k]==level+3) { |
|
1899 while(levels[k]==level+3) { |
|
1900 levels[k--]-=2; |
|
1901 } |
|
1902 while(levels[k]==level) { |
|
1903 k--; |
|
1904 } |
|
1905 } |
|
1906 if(levels[k]==level+2) { |
|
1907 levels[k]=level; |
|
1908 continue; |
|
1909 } |
|
1910 levels[k]=level+1; |
|
1911 } |
|
1912 break; |
|
1913 |
|
1914 case 12: /* R after L+ON+EN/AN/ON */ |
|
1915 level=pLevState->runLevel+1; |
|
1916 for(k=start0-1; k>=pLevState->startON; k--) { |
|
1917 if(levels[k]>level) { |
|
1918 levels[k]-=2; |
|
1919 } |
|
1920 } |
|
1921 break; |
|
1922 |
|
1923 default: /* we should never get here */ |
|
1924 U_ASSERT(FALSE); |
|
1925 break; |
|
1926 } |
|
1927 } |
|
1928 if((addLevel) || (start < start0)) { |
|
1929 level=pLevState->runLevel + addLevel; |
|
1930 if(start>=pLevState->runStart) { |
|
1931 for(k=start; k<limit; k++) { |
|
1932 levels[k]=level; |
|
1933 } |
|
1934 } else { |
|
1935 DirProp *dirProps=pBiDi->dirProps, dirProp; |
|
1936 int32_t isolateCount=0; |
|
1937 for(k=start; k<limit; k++) { |
|
1938 dirProp=dirProps[k]; |
|
1939 if(dirProp==PDI) |
|
1940 isolateCount--; |
|
1941 if(isolateCount==0) |
|
1942 levels[k]=level; |
|
1943 if(dirProp==LRI || dirProp==RLI) |
|
1944 isolateCount++; |
|
1945 } |
|
1946 } |
|
1947 } |
|
1948 } |
|
1949 |
|
1950 /** |
|
1951 * Returns the directionality of the last strong character at the end of the prologue, if any. |
|
1952 * Requires prologue!=null. |
|
1953 */ |
|
1954 static DirProp |
|
1955 lastL_R_AL(UBiDi *pBiDi) { |
|
1956 const UChar *text=pBiDi->prologue; |
|
1957 int32_t length=pBiDi->proLength; |
|
1958 int32_t i; |
|
1959 UChar32 uchar; |
|
1960 DirProp dirProp; |
|
1961 for(i=length; i>0; ) { |
|
1962 /* i is decremented by U16_PREV */ |
|
1963 U16_PREV(text, 0, i, uchar); |
|
1964 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); |
|
1965 if(dirProp==L) { |
|
1966 return DirProp_L; |
|
1967 } |
|
1968 if(dirProp==R || dirProp==AL) { |
|
1969 return DirProp_R; |
|
1970 } |
|
1971 if(dirProp==B) { |
|
1972 return DirProp_ON; |
|
1973 } |
|
1974 } |
|
1975 return DirProp_ON; |
|
1976 } |
|
1977 |
|
1978 /** |
|
1979 * Returns the directionality of the first strong character, or digit, in the epilogue, if any. |
|
1980 * Requires epilogue!=null. |
|
1981 */ |
|
1982 static DirProp |
|
1983 firstL_R_AL_EN_AN(UBiDi *pBiDi) { |
|
1984 const UChar *text=pBiDi->epilogue; |
|
1985 int32_t length=pBiDi->epiLength; |
|
1986 int32_t i; |
|
1987 UChar32 uchar; |
|
1988 DirProp dirProp; |
|
1989 for(i=0; i<length; ) { |
|
1990 /* i is incremented by U16_NEXT */ |
|
1991 U16_NEXT(text, i, length, uchar); |
|
1992 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); |
|
1993 if(dirProp==L) { |
|
1994 return DirProp_L; |
|
1995 } |
|
1996 if(dirProp==R || dirProp==AL) { |
|
1997 return DirProp_R; |
|
1998 } |
|
1999 if(dirProp==EN) { |
|
2000 return DirProp_EN; |
|
2001 } |
|
2002 if(dirProp==AN) { |
|
2003 return DirProp_AN; |
|
2004 } |
|
2005 } |
|
2006 return DirProp_ON; |
|
2007 } |
|
2008 |
|
2009 static void |
|
2010 resolveImplicitLevels(UBiDi *pBiDi, |
|
2011 int32_t start, int32_t limit, |
|
2012 DirProp sor, DirProp eor) { |
|
2013 const DirProp *dirProps=pBiDi->dirProps; |
|
2014 DirProp dirProp; |
|
2015 LevState levState; |
|
2016 int32_t i, start1, start2; |
|
2017 uint16_t oldStateImp, stateImp, actionImp; |
|
2018 uint8_t gprop, resProp, cell; |
|
2019 UBool inverseRTL; |
|
2020 DirProp nextStrongProp=R; |
|
2021 int32_t nextStrongPos=-1; |
|
2022 |
|
2023 /* check for RTL inverse BiDi mode */ |
|
2024 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to |
|
2025 * loop on the text characters from end to start. |
|
2026 * This would need a different properties state table (at least different |
|
2027 * actions) and different levels state tables (maybe very similar to the |
|
2028 * LTR corresponding ones. |
|
2029 */ |
|
2030 inverseRTL=(UBool) |
|
2031 ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) && |
|
2032 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || |
|
2033 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)); |
|
2034 |
|
2035 /* initialize for property and levels state tables */ |
|
2036 levState.startON=-1; |
|
2037 levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ |
|
2038 levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ |
|
2039 levState.runStart=start; |
|
2040 levState.runLevel=pBiDi->levels[start]; |
|
2041 levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1]; |
|
2042 levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1]; |
|
2043 if(start==0 && pBiDi->proLength>0) { |
|
2044 DirProp lastStrong=lastL_R_AL(pBiDi); |
|
2045 if(lastStrong!=DirProp_ON) { |
|
2046 sor=lastStrong; |
|
2047 } |
|
2048 } |
|
2049 /* The isolates[] entries contain enough information to |
|
2050 resume the bidi algorithm in the same state as it was |
|
2051 when it was interrupted by an isolate sequence. */ |
|
2052 if(dirProps[start]==PDI) { |
|
2053 start1=pBiDi->isolates[pBiDi->isolateCount].start1; |
|
2054 stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp; |
|
2055 levState.state=pBiDi->isolates[pBiDi->isolateCount].state; |
|
2056 pBiDi->isolateCount--; |
|
2057 } else { |
|
2058 start1=start; |
|
2059 if(dirProps[start]==NSM) |
|
2060 stateImp = 1 + sor; |
|
2061 else |
|
2062 stateImp=0; |
|
2063 levState.state=0; |
|
2064 processPropertySeq(pBiDi, &levState, sor, start, start); |
|
2065 } |
|
2066 start2=start; |
|
2067 |
|
2068 for(i=start; i<=limit; i++) { |
|
2069 if(i>=limit) { |
|
2070 if(limit>start) { |
|
2071 dirProp=pBiDi->dirProps[limit-1]; |
|
2072 if(dirProp==LRI || dirProp==RLI) |
|
2073 break; /* no forced closing for sequence ending with LRI/RLI */ |
|
2074 } |
|
2075 gprop=eor; |
|
2076 } else { |
|
2077 DirProp prop, prop1; |
|
2078 prop=PURE_DIRPROP(dirProps[i]); |
|
2079 if(inverseRTL) { |
|
2080 if(prop==AL) { |
|
2081 /* AL before EN does not make it AN */ |
|
2082 prop=R; |
|
2083 } else if(prop==EN) { |
|
2084 if(nextStrongPos<=i) { |
|
2085 /* look for next strong char (L/R/AL) */ |
|
2086 int32_t j; |
|
2087 nextStrongProp=R; /* set default */ |
|
2088 nextStrongPos=limit; |
|
2089 for(j=i+1; j<limit; j++) { |
|
2090 prop1=dirProps[j]; |
|
2091 if(prop1==L || prop1==R || prop1==AL) { |
|
2092 nextStrongProp=prop1; |
|
2093 nextStrongPos=j; |
|
2094 break; |
|
2095 } |
|
2096 } |
|
2097 } |
|
2098 if(nextStrongProp==AL) { |
|
2099 prop=AN; |
|
2100 } |
|
2101 } |
|
2102 } |
|
2103 gprop=groupProp[prop]; |
|
2104 } |
|
2105 oldStateImp=stateImp; |
|
2106 cell=impTabProps[oldStateImp][gprop]; |
|
2107 stateImp=GET_STATEPROPS(cell); /* isolate the new state */ |
|
2108 actionImp=GET_ACTIONPROPS(cell); /* isolate the action */ |
|
2109 if((i==limit) && (actionImp==0)) { |
|
2110 /* there is an unprocessed sequence if its property == eor */ |
|
2111 actionImp=1; /* process the last sequence */ |
|
2112 } |
|
2113 if(actionImp) { |
|
2114 resProp=impTabProps[oldStateImp][IMPTABPROPS_RES]; |
|
2115 switch(actionImp) { |
|
2116 case 1: /* process current seq1, init new seq1 */ |
|
2117 processPropertySeq(pBiDi, &levState, resProp, start1, i); |
|
2118 start1=i; |
|
2119 break; |
|
2120 case 2: /* init new seq2 */ |
|
2121 start2=i; |
|
2122 break; |
|
2123 case 3: /* process seq1, process seq2, init new seq1 */ |
|
2124 processPropertySeq(pBiDi, &levState, resProp, start1, start2); |
|
2125 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i); |
|
2126 start1=i; |
|
2127 break; |
|
2128 case 4: /* process seq1, set seq1=seq2, init new seq2 */ |
|
2129 processPropertySeq(pBiDi, &levState, resProp, start1, start2); |
|
2130 start1=start2; |
|
2131 start2=i; |
|
2132 break; |
|
2133 default: /* we should never get here */ |
|
2134 U_ASSERT(FALSE); |
|
2135 break; |
|
2136 } |
|
2137 } |
|
2138 } |
|
2139 |
|
2140 /* flush possible pending sequence, e.g. ON */ |
|
2141 if(limit==pBiDi->length && pBiDi->epiLength>0) { |
|
2142 DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi); |
|
2143 if(firstStrong!=DirProp_ON) { |
|
2144 eor=firstStrong; |
|
2145 } |
|
2146 } |
|
2147 |
|
2148 dirProp=dirProps[limit-1]; |
|
2149 if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) { |
|
2150 pBiDi->isolateCount++; |
|
2151 pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp; |
|
2152 pBiDi->isolates[pBiDi->isolateCount].state=levState.state; |
|
2153 pBiDi->isolates[pBiDi->isolateCount].start1=start1; |
|
2154 } |
|
2155 else |
|
2156 processPropertySeq(pBiDi, &levState, eor, limit, limit); |
|
2157 } |
|
2158 |
|
2159 /* perform (L1) and (X9) ---------------------------------------------------- */ |
|
2160 |
|
2161 /* |
|
2162 * Reset the embedding levels for some non-graphic characters (L1). |
|
2163 * This function also sets appropriate levels for BN, and |
|
2164 * explicit embedding types that are supposed to have been removed |
|
2165 * from the paragraph in (X9). |
|
2166 */ |
|
2167 static void |
|
2168 adjustWSLevels(UBiDi *pBiDi) { |
|
2169 const DirProp *dirProps=pBiDi->dirProps; |
|
2170 UBiDiLevel *levels=pBiDi->levels; |
|
2171 int32_t i; |
|
2172 |
|
2173 if(pBiDi->flags&MASK_WS) { |
|
2174 UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR; |
|
2175 Flags flag; |
|
2176 |
|
2177 i=pBiDi->trailingWSStart; |
|
2178 while(i>0) { |
|
2179 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ |
|
2180 while(i>0 && (flag=DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i])))&MASK_WS) { |
|
2181 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { |
|
2182 levels[i]=0; |
|
2183 } else { |
|
2184 levels[i]=GET_PARALEVEL(pBiDi, i); |
|
2185 } |
|
2186 } |
|
2187 |
|
2188 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ |
|
2189 /* here, i+1 is guaranteed to be <length */ |
|
2190 while(i>0) { |
|
2191 flag=DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i])); |
|
2192 if(flag&MASK_BN_EXPLICIT) { |
|
2193 levels[i]=levels[i+1]; |
|
2194 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { |
|
2195 levels[i]=0; |
|
2196 break; |
|
2197 } else if(flag&MASK_B_S) { |
|
2198 levels[i]=GET_PARALEVEL(pBiDi, i); |
|
2199 break; |
|
2200 } |
|
2201 } |
|
2202 } |
|
2203 } |
|
2204 } |
|
2205 |
|
2206 U_CAPI void U_EXPORT2 |
|
2207 ubidi_setContext(UBiDi *pBiDi, |
|
2208 const UChar *prologue, int32_t proLength, |
|
2209 const UChar *epilogue, int32_t epiLength, |
|
2210 UErrorCode *pErrorCode) { |
|
2211 /* check the argument values */ |
|
2212 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); |
|
2213 if(pBiDi==NULL || proLength<-1 || epiLength<-1 || |
|
2214 (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) { |
|
2215 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
2216 return; |
|
2217 } |
|
2218 |
|
2219 if(proLength==-1) { |
|
2220 pBiDi->proLength=u_strlen(prologue); |
|
2221 } else { |
|
2222 pBiDi->proLength=proLength; |
|
2223 } |
|
2224 if(epiLength==-1) { |
|
2225 pBiDi->epiLength=u_strlen(epilogue); |
|
2226 } else { |
|
2227 pBiDi->epiLength=epiLength; |
|
2228 } |
|
2229 pBiDi->prologue=prologue; |
|
2230 pBiDi->epilogue=epilogue; |
|
2231 } |
|
2232 |
|
2233 static void |
|
2234 setParaSuccess(UBiDi *pBiDi) { |
|
2235 pBiDi->proLength=0; /* forget the last context */ |
|
2236 pBiDi->epiLength=0; |
|
2237 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ |
|
2238 } |
|
2239 |
|
2240 #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y)) |
|
2241 #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x))) |
|
2242 |
|
2243 static void |
|
2244 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, |
|
2245 UBiDiLevel paraLevel, UErrorCode *pErrorCode) { |
|
2246 void *runsOnlyMemory; |
|
2247 int32_t *visualMap; |
|
2248 UChar *visualText; |
|
2249 int32_t saveLength, saveTrailingWSStart; |
|
2250 const UBiDiLevel *levels; |
|
2251 UBiDiLevel *saveLevels; |
|
2252 UBiDiDirection saveDirection; |
|
2253 UBool saveMayAllocateText; |
|
2254 Run *runs; |
|
2255 int32_t visualLength, i, j, visualStart, logicalStart, |
|
2256 runCount, runLength, addedRuns, insertRemove, |
|
2257 start, limit, step, indexOddBit, logicalPos, |
|
2258 index0, index1; |
|
2259 uint32_t saveOptions; |
|
2260 |
|
2261 pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT; |
|
2262 if(length==0) { |
|
2263 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); |
|
2264 goto cleanup3; |
|
2265 } |
|
2266 /* obtain memory for mapping table and visual text */ |
|
2267 runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))); |
|
2268 if(runsOnlyMemory==NULL) { |
|
2269 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
2270 goto cleanup3; |
|
2271 } |
|
2272 visualMap=runsOnlyMemory; |
|
2273 visualText=(UChar *)&visualMap[length]; |
|
2274 saveLevels=(UBiDiLevel *)&visualText[length]; |
|
2275 saveOptions=pBiDi->reorderingOptions; |
|
2276 if(saveOptions & UBIDI_OPTION_INSERT_MARKS) { |
|
2277 pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; |
|
2278 pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS; |
|
2279 } |
|
2280 paraLevel&=1; /* accept only 0 or 1 */ |
|
2281 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); |
|
2282 if(U_FAILURE(*pErrorCode)) { |
|
2283 goto cleanup3; |
|
2284 } |
|
2285 /* we cannot access directly pBiDi->levels since it is not yet set if |
|
2286 * direction is not MIXED |
|
2287 */ |
|
2288 levels=ubidi_getLevels(pBiDi, pErrorCode); |
|
2289 uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel)); |
|
2290 saveTrailingWSStart=pBiDi->trailingWSStart; |
|
2291 saveLength=pBiDi->length; |
|
2292 saveDirection=pBiDi->direction; |
|
2293 |
|
2294 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use |
|
2295 * the visual map and the dirProps array to drive the second call |
|
2296 * to ubidi_setPara (but must make provision for possible removal of |
|
2297 * BiDi controls. Alternatively, only use the dirProps array via |
|
2298 * customized classifier callback. |
|
2299 */ |
|
2300 visualLength=ubidi_writeReordered(pBiDi, visualText, length, |
|
2301 UBIDI_DO_MIRRORING, pErrorCode); |
|
2302 ubidi_getVisualMap(pBiDi, visualMap, pErrorCode); |
|
2303 if(U_FAILURE(*pErrorCode)) { |
|
2304 goto cleanup2; |
|
2305 } |
|
2306 pBiDi->reorderingOptions=saveOptions; |
|
2307 |
|
2308 pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT; |
|
2309 paraLevel^=1; |
|
2310 /* Because what we did with reorderingOptions, visualText may be shorter |
|
2311 * than the original text. But we don't want the levels memory to be |
|
2312 * reallocated shorter than the original length, since we need to restore |
|
2313 * the levels as after the first call to ubidi_setpara() before returning. |
|
2314 * We will force mayAllocateText to FALSE before the second call to |
|
2315 * ubidi_setpara(), and will restore it afterwards. |
|
2316 */ |
|
2317 saveMayAllocateText=pBiDi->mayAllocateText; |
|
2318 pBiDi->mayAllocateText=FALSE; |
|
2319 ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode); |
|
2320 pBiDi->mayAllocateText=saveMayAllocateText; |
|
2321 ubidi_getRuns(pBiDi, pErrorCode); |
|
2322 if(U_FAILURE(*pErrorCode)) { |
|
2323 goto cleanup1; |
|
2324 } |
|
2325 /* check if some runs must be split, count how many splits */ |
|
2326 addedRuns=0; |
|
2327 runCount=pBiDi->runCount; |
|
2328 runs=pBiDi->runs; |
|
2329 visualStart=0; |
|
2330 for(i=0; i<runCount; i++, visualStart+=runLength) { |
|
2331 runLength=runs[i].visualLimit-visualStart; |
|
2332 if(runLength<2) { |
|
2333 continue; |
|
2334 } |
|
2335 logicalStart=GET_INDEX(runs[i].logicalStart); |
|
2336 for(j=logicalStart+1; j<logicalStart+runLength; j++) { |
|
2337 index0=visualMap[j]; |
|
2338 index1=visualMap[j-1]; |
|
2339 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) { |
|
2340 addedRuns++; |
|
2341 } |
|
2342 } |
|
2343 } |
|
2344 if(addedRuns) { |
|
2345 if(getRunsMemory(pBiDi, runCount+addedRuns)) { |
|
2346 if(runCount==1) { |
|
2347 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */ |
|
2348 pBiDi->runsMemory[0]=runs[0]; |
|
2349 } |
|
2350 runs=pBiDi->runs=pBiDi->runsMemory; |
|
2351 pBiDi->runCount+=addedRuns; |
|
2352 } else { |
|
2353 goto cleanup1; |
|
2354 } |
|
2355 } |
|
2356 /* split runs which are not consecutive in source text */ |
|
2357 for(i=runCount-1; i>=0; i--) { |
|
2358 runLength= i==0 ? runs[0].visualLimit : |
|
2359 runs[i].visualLimit-runs[i-1].visualLimit; |
|
2360 logicalStart=runs[i].logicalStart; |
|
2361 indexOddBit=GET_ODD_BIT(logicalStart); |
|
2362 logicalStart=GET_INDEX(logicalStart); |
|
2363 if(runLength<2) { |
|
2364 if(addedRuns) { |
|
2365 runs[i+addedRuns]=runs[i]; |
|
2366 } |
|
2367 logicalPos=visualMap[logicalStart]; |
|
2368 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, |
|
2369 saveLevels[logicalPos]^indexOddBit); |
|
2370 continue; |
|
2371 } |
|
2372 if(indexOddBit) { |
|
2373 start=logicalStart; |
|
2374 limit=logicalStart+runLength-1; |
|
2375 step=1; |
|
2376 } else { |
|
2377 start=logicalStart+runLength-1; |
|
2378 limit=logicalStart; |
|
2379 step=-1; |
|
2380 } |
|
2381 for(j=start; j!=limit; j+=step) { |
|
2382 index0=visualMap[j]; |
|
2383 index1=visualMap[j+step]; |
|
2384 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) { |
|
2385 logicalPos=BIDI_MIN(visualMap[start], index0); |
|
2386 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, |
|
2387 saveLevels[logicalPos]^indexOddBit); |
|
2388 runs[i+addedRuns].visualLimit=runs[i].visualLimit; |
|
2389 runs[i].visualLimit-=BIDI_ABS(j-start)+1; |
|
2390 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER); |
|
2391 runs[i+addedRuns].insertRemove=insertRemove; |
|
2392 runs[i].insertRemove&=~insertRemove; |
|
2393 start=j+step; |
|
2394 addedRuns--; |
|
2395 } |
|
2396 } |
|
2397 if(addedRuns) { |
|
2398 runs[i+addedRuns]=runs[i]; |
|
2399 } |
|
2400 logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]); |
|
2401 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, |
|
2402 saveLevels[logicalPos]^indexOddBit); |
|
2403 } |
|
2404 |
|
2405 cleanup1: |
|
2406 /* restore initial paraLevel */ |
|
2407 pBiDi->paraLevel^=1; |
|
2408 cleanup2: |
|
2409 /* restore real text */ |
|
2410 pBiDi->text=text; |
|
2411 pBiDi->length=saveLength; |
|
2412 pBiDi->originalLength=length; |
|
2413 pBiDi->direction=saveDirection; |
|
2414 /* the saved levels should never excess levelsSize, but we check anyway */ |
|
2415 if(saveLength>pBiDi->levelsSize) { |
|
2416 saveLength=pBiDi->levelsSize; |
|
2417 } |
|
2418 uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel)); |
|
2419 pBiDi->trailingWSStart=saveTrailingWSStart; |
|
2420 /* free memory for mapping table and visual text */ |
|
2421 uprv_free(runsOnlyMemory); |
|
2422 if(pBiDi->runCount>1) { |
|
2423 pBiDi->direction=UBIDI_MIXED; |
|
2424 } |
|
2425 cleanup3: |
|
2426 pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY; |
|
2427 } |
|
2428 |
|
2429 /* ubidi_setPara ------------------------------------------------------------ */ |
|
2430 |
|
2431 U_CAPI void U_EXPORT2 |
|
2432 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, |
|
2433 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, |
|
2434 UErrorCode *pErrorCode) { |
|
2435 UBiDiDirection direction; |
|
2436 |
|
2437 /* check the argument values */ |
|
2438 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); |
|
2439 if(pBiDi==NULL || text==NULL || length<-1 || |
|
2440 (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) { |
|
2441 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
2442 return; |
|
2443 } |
|
2444 |
|
2445 if(length==-1) { |
|
2446 length=u_strlen(text); |
|
2447 } |
|
2448 |
|
2449 /* special treatment for RUNS_ONLY mode */ |
|
2450 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) { |
|
2451 setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode); |
|
2452 return; |
|
2453 } |
|
2454 |
|
2455 /* initialize the UBiDi structure */ |
|
2456 pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */ |
|
2457 pBiDi->text=text; |
|
2458 pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length; |
|
2459 pBiDi->paraLevel=paraLevel; |
|
2460 pBiDi->direction=paraLevel&1; |
|
2461 pBiDi->paraCount=1; |
|
2462 |
|
2463 pBiDi->dirProps=NULL; |
|
2464 pBiDi->levels=NULL; |
|
2465 pBiDi->runs=NULL; |
|
2466 pBiDi->insertPoints.size=0; /* clean up from last call */ |
|
2467 pBiDi->insertPoints.confirmed=0; /* clean up from last call */ |
|
2468 |
|
2469 /* |
|
2470 * Save the original paraLevel if contextual; otherwise, set to 0. |
|
2471 */ |
|
2472 pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel); |
|
2473 |
|
2474 if(length==0) { |
|
2475 /* |
|
2476 * For an empty paragraph, create a UBiDi object with the paraLevel and |
|
2477 * the flags and the direction set but without allocating zero-length arrays. |
|
2478 * There is nothing more to do. |
|
2479 */ |
|
2480 if(IS_DEFAULT_LEVEL(paraLevel)) { |
|
2481 pBiDi->paraLevel&=1; |
|
2482 pBiDi->defaultParaLevel=0; |
|
2483 } |
|
2484 pBiDi->flags=DIRPROP_FLAG_LR(paraLevel); |
|
2485 pBiDi->runCount=0; |
|
2486 pBiDi->paraCount=0; |
|
2487 setParaSuccess(pBiDi); /* mark successful setPara */ |
|
2488 return; |
|
2489 } |
|
2490 |
|
2491 pBiDi->runCount=-1; |
|
2492 |
|
2493 /* allocate paras memory */ |
|
2494 if(pBiDi->parasMemory) |
|
2495 pBiDi->paras=pBiDi->parasMemory; |
|
2496 else |
|
2497 pBiDi->paras=pBiDi->simpleParas; |
|
2498 |
|
2499 /* |
|
2500 * Get the directional properties, |
|
2501 * the flags bit-set, and |
|
2502 * determine the paragraph level if necessary. |
|
2503 */ |
|
2504 if(getDirPropsMemory(pBiDi, length)) { |
|
2505 pBiDi->dirProps=pBiDi->dirPropsMemory; |
|
2506 if(!getDirProps(pBiDi)) { |
|
2507 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
2508 return; |
|
2509 } |
|
2510 } else { |
|
2511 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
2512 return; |
|
2513 } |
|
2514 /* the processed length may have changed if UBIDI_OPTION_STREAMING */ |
|
2515 length= pBiDi->length; |
|
2516 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */ |
|
2517 |
|
2518 /* are explicit levels specified? */ |
|
2519 if(embeddingLevels==NULL) { |
|
2520 /* no: determine explicit levels according to the (Xn) rules */\ |
|
2521 if(getLevelsMemory(pBiDi, length)) { |
|
2522 pBiDi->levels=pBiDi->levelsMemory; |
|
2523 direction=resolveExplicitLevels(pBiDi, pErrorCode); |
|
2524 if(U_FAILURE(*pErrorCode)) { |
|
2525 return; |
|
2526 } |
|
2527 } else { |
|
2528 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
2529 return; |
|
2530 } |
|
2531 } else { |
|
2532 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */ |
|
2533 pBiDi->levels=embeddingLevels; |
|
2534 direction=checkExplicitLevels(pBiDi, pErrorCode); |
|
2535 if(U_FAILURE(*pErrorCode)) { |
|
2536 return; |
|
2537 } |
|
2538 } |
|
2539 |
|
2540 /* allocate isolate memory */ |
|
2541 if(pBiDi->isolateCount<=SIMPLE_ISOLATES_SIZE) |
|
2542 pBiDi->isolates=pBiDi->simpleIsolates; |
|
2543 else |
|
2544 if(pBiDi->isolateCount<=pBiDi->isolatesSize) |
|
2545 pBiDi->isolates=pBiDi->isolatesMemory; |
|
2546 else { |
|
2547 if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) { |
|
2548 pBiDi->isolates=pBiDi->isolatesMemory; |
|
2549 } else { |
|
2550 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
|
2551 return; |
|
2552 } |
|
2553 } |
|
2554 pBiDi->isolateCount=-1; /* current isolates stack entry == none */ |
|
2555 |
|
2556 /* |
|
2557 * The steps after (X9) in the UBiDi algorithm are performed only if |
|
2558 * the paragraph text has mixed directionality! |
|
2559 */ |
|
2560 pBiDi->direction=direction; |
|
2561 switch(direction) { |
|
2562 case UBIDI_LTR: |
|
2563 /* make sure paraLevel is even */ |
|
2564 pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1); |
|
2565 |
|
2566 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ |
|
2567 pBiDi->trailingWSStart=0; |
|
2568 break; |
|
2569 case UBIDI_RTL: |
|
2570 /* make sure paraLevel is odd */ |
|
2571 pBiDi->paraLevel|=1; |
|
2572 |
|
2573 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ |
|
2574 pBiDi->trailingWSStart=0; |
|
2575 break; |
|
2576 default: |
|
2577 /* |
|
2578 * Choose the right implicit state table |
|
2579 */ |
|
2580 switch(pBiDi->reorderingMode) { |
|
2581 case UBIDI_REORDER_DEFAULT: |
|
2582 pBiDi->pImpTabPair=&impTab_DEFAULT; |
|
2583 break; |
|
2584 case UBIDI_REORDER_NUMBERS_SPECIAL: |
|
2585 pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL; |
|
2586 break; |
|
2587 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R: |
|
2588 pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R; |
|
2589 break; |
|
2590 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L: |
|
2591 pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L; |
|
2592 break; |
|
2593 case UBIDI_REORDER_INVERSE_LIKE_DIRECT: |
|
2594 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { |
|
2595 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS; |
|
2596 } else { |
|
2597 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT; |
|
2598 } |
|
2599 break; |
|
2600 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL: |
|
2601 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { |
|
2602 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS; |
|
2603 } else { |
|
2604 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL; |
|
2605 } |
|
2606 break; |
|
2607 default: |
|
2608 /* we should never get here */ |
|
2609 U_ASSERT(FALSE); |
|
2610 break; |
|
2611 } |
|
2612 /* |
|
2613 * If there are no external levels specified and there |
|
2614 * are no significant explicit level codes in the text, |
|
2615 * then we can treat the entire paragraph as one run. |
|
2616 * Otherwise, we need to perform the following rules on runs of |
|
2617 * the text with the same embedding levels. (X10) |
|
2618 * "Significant" explicit level codes are ones that actually |
|
2619 * affect non-BN characters. |
|
2620 * Examples for "insignificant" ones are empty embeddings |
|
2621 * LRE-PDF, LRE-RLE-PDF-PDF, etc. |
|
2622 */ |
|
2623 if(embeddingLevels==NULL && pBiDi->paraCount<=1 && |
|
2624 !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) { |
|
2625 resolveImplicitLevels(pBiDi, 0, length, |
|
2626 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)), |
|
2627 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1))); |
|
2628 } else { |
|
2629 /* sor, eor: start and end types of same-level-run */ |
|
2630 UBiDiLevel *levels=pBiDi->levels; |
|
2631 int32_t start, limit=0; |
|
2632 UBiDiLevel level, nextLevel; |
|
2633 DirProp sor, eor; |
|
2634 |
|
2635 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ |
|
2636 level=GET_PARALEVEL(pBiDi, 0); |
|
2637 nextLevel=levels[0]; |
|
2638 if(level<nextLevel) { |
|
2639 eor=GET_LR_FROM_LEVEL(nextLevel); |
|
2640 } else { |
|
2641 eor=GET_LR_FROM_LEVEL(level); |
|
2642 } |
|
2643 |
|
2644 do { |
|
2645 /* determine start and limit of the run (end points just behind the run) */ |
|
2646 |
|
2647 /* the values for this run's start are the same as for the previous run's end */ |
|
2648 start=limit; |
|
2649 level=nextLevel; |
|
2650 if((start>0) && (pBiDi->dirProps[start-1]==B)) { |
|
2651 /* except if this is a new paragraph, then set sor = para level */ |
|
2652 sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start)); |
|
2653 } else { |
|
2654 sor=eor; |
|
2655 } |
|
2656 |
|
2657 /* search for the limit of this run */ |
|
2658 while(++limit<length && levels[limit]==level) {} |
|
2659 |
|
2660 /* get the correct level of the next run */ |
|
2661 if(limit<length) { |
|
2662 nextLevel=levels[limit]; |
|
2663 } else { |
|
2664 nextLevel=GET_PARALEVEL(pBiDi, length-1); |
|
2665 } |
|
2666 |
|
2667 /* determine eor from max(level, nextLevel); sor is last run's eor */ |
|
2668 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) { |
|
2669 eor=GET_LR_FROM_LEVEL(nextLevel); |
|
2670 } else { |
|
2671 eor=GET_LR_FROM_LEVEL(level); |
|
2672 } |
|
2673 |
|
2674 /* if the run consists of overridden directional types, then there |
|
2675 are no implicit types to be resolved */ |
|
2676 if(!(level&UBIDI_LEVEL_OVERRIDE)) { |
|
2677 resolveImplicitLevels(pBiDi, start, limit, sor, eor); |
|
2678 } else { |
|
2679 /* remove the UBIDI_LEVEL_OVERRIDE flags */ |
|
2680 do { |
|
2681 levels[start++]&=~UBIDI_LEVEL_OVERRIDE; |
|
2682 } while(start<limit); |
|
2683 } |
|
2684 } while(limit<length); |
|
2685 } |
|
2686 /* check if we got any memory shortage while adding insert points */ |
|
2687 if (U_FAILURE(pBiDi->insertPoints.errorCode)) |
|
2688 { |
|
2689 *pErrorCode=pBiDi->insertPoints.errorCode; |
|
2690 return; |
|
2691 } |
|
2692 /* reset the embedding levels for some non-graphic characters (L1), (X9) */ |
|
2693 adjustWSLevels(pBiDi); |
|
2694 break; |
|
2695 } |
|
2696 /* add RLM for inverse Bidi with contextual orientation resolving |
|
2697 * to RTL which would not round-trip otherwise |
|
2698 */ |
|
2699 if((pBiDi->defaultParaLevel>0) && |
|
2700 (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) && |
|
2701 ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) || |
|
2702 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) { |
|
2703 int32_t i, j, start, last; |
|
2704 UBiDiLevel level; |
|
2705 DirProp dirProp; |
|
2706 for(i=0; i<pBiDi->paraCount; i++) { |
|
2707 last=(pBiDi->paras[i].limit)-1; |
|
2708 level=pBiDi->paras[i].level; |
|
2709 if(level==0) |
|
2710 continue; /* LTR paragraph */ |
|
2711 start= i==0 ? 0 : pBiDi->paras[i-1].limit; |
|
2712 for(j=last; j>=start; j--) { |
|
2713 dirProp=pBiDi->dirProps[j]; |
|
2714 if(dirProp==L) { |
|
2715 if(j<last) { |
|
2716 while(pBiDi->dirProps[last]==B) { |
|
2717 last--; |
|
2718 } |
|
2719 } |
|
2720 addPoint(pBiDi, last, RLM_BEFORE); |
|
2721 break; |
|
2722 } |
|
2723 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) { |
|
2724 break; |
|
2725 } |
|
2726 } |
|
2727 } |
|
2728 } |
|
2729 |
|
2730 if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { |
|
2731 pBiDi->resultLength -= pBiDi->controlCount; |
|
2732 } else { |
|
2733 pBiDi->resultLength += pBiDi->insertPoints.size; |
|
2734 } |
|
2735 setParaSuccess(pBiDi); /* mark successful setPara */ |
|
2736 } |
|
2737 |
|
2738 U_CAPI void U_EXPORT2 |
|
2739 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) { |
|
2740 if(pBiDi!=NULL) { |
|
2741 pBiDi->orderParagraphsLTR=orderParagraphsLTR; |
|
2742 } |
|
2743 } |
|
2744 |
|
2745 U_CAPI UBool U_EXPORT2 |
|
2746 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) { |
|
2747 if(pBiDi!=NULL) { |
|
2748 return pBiDi->orderParagraphsLTR; |
|
2749 } else { |
|
2750 return FALSE; |
|
2751 } |
|
2752 } |
|
2753 |
|
2754 U_CAPI UBiDiDirection U_EXPORT2 |
|
2755 ubidi_getDirection(const UBiDi *pBiDi) { |
|
2756 if(IS_VALID_PARA_OR_LINE(pBiDi)) { |
|
2757 return pBiDi->direction; |
|
2758 } else { |
|
2759 return UBIDI_LTR; |
|
2760 } |
|
2761 } |
|
2762 |
|
2763 U_CAPI const UChar * U_EXPORT2 |
|
2764 ubidi_getText(const UBiDi *pBiDi) { |
|
2765 if(IS_VALID_PARA_OR_LINE(pBiDi)) { |
|
2766 return pBiDi->text; |
|
2767 } else { |
|
2768 return NULL; |
|
2769 } |
|
2770 } |
|
2771 |
|
2772 U_CAPI int32_t U_EXPORT2 |
|
2773 ubidi_getLength(const UBiDi *pBiDi) { |
|
2774 if(IS_VALID_PARA_OR_LINE(pBiDi)) { |
|
2775 return pBiDi->originalLength; |
|
2776 } else { |
|
2777 return 0; |
|
2778 } |
|
2779 } |
|
2780 |
|
2781 U_CAPI int32_t U_EXPORT2 |
|
2782 ubidi_getProcessedLength(const UBiDi *pBiDi) { |
|
2783 if(IS_VALID_PARA_OR_LINE(pBiDi)) { |
|
2784 return pBiDi->length; |
|
2785 } else { |
|
2786 return 0; |
|
2787 } |
|
2788 } |
|
2789 |
|
2790 U_CAPI int32_t U_EXPORT2 |
|
2791 ubidi_getResultLength(const UBiDi *pBiDi) { |
|
2792 if(IS_VALID_PARA_OR_LINE(pBiDi)) { |
|
2793 return pBiDi->resultLength; |
|
2794 } else { |
|
2795 return 0; |
|
2796 } |
|
2797 } |
|
2798 |
|
2799 /* paragraphs API functions ------------------------------------------------- */ |
|
2800 |
|
2801 U_CAPI UBiDiLevel U_EXPORT2 |
|
2802 ubidi_getParaLevel(const UBiDi *pBiDi) { |
|
2803 if(IS_VALID_PARA_OR_LINE(pBiDi)) { |
|
2804 return pBiDi->paraLevel; |
|
2805 } else { |
|
2806 return 0; |
|
2807 } |
|
2808 } |
|
2809 |
|
2810 U_CAPI int32_t U_EXPORT2 |
|
2811 ubidi_countParagraphs(UBiDi *pBiDi) { |
|
2812 if(!IS_VALID_PARA_OR_LINE(pBiDi)) { |
|
2813 return 0; |
|
2814 } else { |
|
2815 return pBiDi->paraCount; |
|
2816 } |
|
2817 } |
|
2818 |
|
2819 U_CAPI void U_EXPORT2 |
|
2820 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, |
|
2821 int32_t *pParaStart, int32_t *pParaLimit, |
|
2822 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { |
|
2823 int32_t paraStart; |
|
2824 |
|
2825 /* check the argument values */ |
|
2826 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); |
|
2827 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode); |
|
2828 RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode); |
|
2829 |
|
2830 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ |
|
2831 if(paraIndex) { |
|
2832 paraStart=pBiDi->paras[paraIndex-1].limit; |
|
2833 } else { |
|
2834 paraStart=0; |
|
2835 } |
|
2836 if(pParaStart!=NULL) { |
|
2837 *pParaStart=paraStart; |
|
2838 } |
|
2839 if(pParaLimit!=NULL) { |
|
2840 *pParaLimit=pBiDi->paras[paraIndex].limit; |
|
2841 } |
|
2842 if(pParaLevel!=NULL) { |
|
2843 *pParaLevel=GET_PARALEVEL(pBiDi, paraStart); |
|
2844 } |
|
2845 } |
|
2846 |
|
2847 U_CAPI int32_t U_EXPORT2 |
|
2848 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, |
|
2849 int32_t *pParaStart, int32_t *pParaLimit, |
|
2850 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { |
|
2851 int32_t paraIndex; |
|
2852 |
|
2853 /* check the argument values */ |
|
2854 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */ |
|
2855 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); |
|
2856 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); |
|
2857 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ |
|
2858 RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1); |
|
2859 |
|
2860 for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++); |
|
2861 ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode); |
|
2862 return paraIndex; |
|
2863 } |
|
2864 |
|
2865 U_CAPI void U_EXPORT2 |
|
2866 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, |
|
2867 const void *newContext, UBiDiClassCallback **oldFn, |
|
2868 const void **oldContext, UErrorCode *pErrorCode) |
|
2869 { |
|
2870 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); |
|
2871 if(pBiDi==NULL) { |
|
2872 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
|
2873 return; |
|
2874 } |
|
2875 if( oldFn ) |
|
2876 { |
|
2877 *oldFn = pBiDi->fnClassCallback; |
|
2878 } |
|
2879 if( oldContext ) |
|
2880 { |
|
2881 *oldContext = pBiDi->coClassCallback; |
|
2882 } |
|
2883 pBiDi->fnClassCallback = newFn; |
|
2884 pBiDi->coClassCallback = newContext; |
|
2885 } |
|
2886 |
|
2887 U_CAPI void U_EXPORT2 |
|
2888 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context) |
|
2889 { |
|
2890 if(pBiDi==NULL) { |
|
2891 return; |
|
2892 } |
|
2893 if( fn ) |
|
2894 { |
|
2895 *fn = pBiDi->fnClassCallback; |
|
2896 } |
|
2897 if( context ) |
|
2898 { |
|
2899 *context = pBiDi->coClassCallback; |
|
2900 } |
|
2901 } |
|
2902 |
|
2903 U_CAPI UCharDirection U_EXPORT2 |
|
2904 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c) |
|
2905 { |
|
2906 UCharDirection dir; |
|
2907 |
|
2908 if( pBiDi->fnClassCallback == NULL || |
|
2909 (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT ) |
|
2910 { |
|
2911 dir = ubidi_getClass(pBiDi->bdp, c); |
|
2912 } |
|
2913 if(dir >= U_CHAR_DIRECTION_COUNT) { |
|
2914 dir = ON; |
|
2915 } |
|
2916 return dir; |
|
2917 } |