1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ubidi.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,2917 @@ 1.4 +/* 1.5 +****************************************************************************** 1.6 +* 1.7 +* Copyright (C) 1999-2013, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +****************************************************************************** 1.11 +* file name: ubidi.c 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 1999jul27 1.17 +* created by: Markus W. Scherer, updated by Matitiahu Allouche 1.18 +* 1.19 +*/ 1.20 + 1.21 +#include "cmemory.h" 1.22 +#include "unicode/utypes.h" 1.23 +#include "unicode/ustring.h" 1.24 +#include "unicode/uchar.h" 1.25 +#include "unicode/ubidi.h" 1.26 +#include "unicode/utf16.h" 1.27 +#include "ubidi_props.h" 1.28 +#include "ubidiimp.h" 1.29 +#include "uassert.h" 1.30 + 1.31 +/* 1.32 + * General implementation notes: 1.33 + * 1.34 + * Throughout the implementation, there are comments like (W2) that refer to 1.35 + * rules of the BiDi algorithm in its version 5, in this example to the second 1.36 + * rule of the resolution of weak types. 1.37 + * 1.38 + * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32) 1.39 + * character according to UTF-16, the second UChar gets the directional property of 1.40 + * the entire character assigned, while the first one gets a BN, a boundary 1.41 + * neutral, type, which is ignored by most of the algorithm according to 1.42 + * rule (X9) and the implementation suggestions of the BiDi algorithm. 1.43 + * 1.44 + * Later, adjustWSLevels() will set the level for each BN to that of the 1.45 + * following character (UChar), which results in surrogate pairs getting the 1.46 + * same level on each of their surrogates. 1.47 + * 1.48 + * In a UTF-8 implementation, the same thing could be done: the last byte of 1.49 + * a multi-byte sequence would get the "real" property, while all previous 1.50 + * bytes of that sequence would get BN. 1.51 + * 1.52 + * It is not possible to assign all those parts of a character the same real 1.53 + * property because this would fail in the resolution of weak types with rules 1.54 + * that look at immediately surrounding types. 1.55 + * 1.56 + * As a related topic, this implementation does not remove Boundary Neutral 1.57 + * types from the input, but ignores them wherever this is relevant. 1.58 + * For example, the loop for the resolution of the weak types reads 1.59 + * types until it finds a non-BN. 1.60 + * Also, explicit embedding codes are neither changed into BN nor removed. 1.61 + * They are only treated the same way real BNs are. 1.62 + * As stated before, adjustWSLevels() takes care of them at the end. 1.63 + * For the purpose of conformance, the levels of all these codes 1.64 + * do not matter. 1.65 + * 1.66 + * Note that this implementation never modifies the dirProps 1.67 + * after the initial setup, except for FSI which is changed to either 1.68 + * LRI or RLI in getDirProps(), and paired brackets which may be changed 1.69 + * to L or R according to N0. 1.70 + * 1.71 + * 1.72 + * In this implementation, the resolution of weak types (Wn), 1.73 + * neutrals (Nn), and the assignment of the resolved level (In) 1.74 + * are all done in one single loop, in resolveImplicitLevels(). 1.75 + * Changes of dirProp values are done on the fly, without writing 1.76 + * them back to the dirProps array. 1.77 + * 1.78 + * 1.79 + * This implementation contains code that allows to bypass steps of the 1.80 + * algorithm that are not needed on the specific paragraph 1.81 + * in order to speed up the most common cases considerably, 1.82 + * like text that is entirely LTR, or RTL text without numbers. 1.83 + * 1.84 + * Most of this is done by setting a bit for each directional property 1.85 + * in a flags variable and later checking for whether there are 1.86 + * any LTR characters or any RTL characters, or both, whether 1.87 + * there are any explicit embedding codes, etc. 1.88 + * 1.89 + * If the (Xn) steps are performed, then the flags are re-evaluated, 1.90 + * because they will then not contain the embedding codes any more 1.91 + * and will be adjusted for override codes, so that subsequently 1.92 + * more bypassing may be possible than what the initial flags suggested. 1.93 + * 1.94 + * If the text is not mixed-directional, then the 1.95 + * algorithm steps for the weak type resolution are not performed, 1.96 + * and all levels are set to the paragraph level. 1.97 + * 1.98 + * If there are no explicit embedding codes, then the (Xn) steps 1.99 + * are not performed. 1.100 + * 1.101 + * If embedding levels are supplied as a parameter, then all 1.102 + * explicit embedding codes are ignored, and the (Xn) steps 1.103 + * are not performed. 1.104 + * 1.105 + * White Space types could get the level of the run they belong to, 1.106 + * and are checked with a test of (flags&MASK_EMBEDDING) to 1.107 + * consider if the paragraph direction should be considered in 1.108 + * the flags variable. 1.109 + * 1.110 + * If there are no White Space types in the paragraph, then 1.111 + * (L1) is not necessary in adjustWSLevels(). 1.112 + */ 1.113 + 1.114 +/* to avoid some conditional statements, use tiny constant arrays */ 1.115 +static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) }; 1.116 +static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) }; 1.117 +static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; 1.118 + 1.119 +#define DIRPROP_FLAG_LR(level) flagLR[(level)&1] 1.120 +#define DIRPROP_FLAG_E(level) flagE[(level)&1] 1.121 +#define DIRPROP_FLAG_O(level) flagO[(level)&1] 1.122 + 1.123 +#define DIR_FROM_STRONG(strong) ((strong)==L ? L : R) 1.124 + 1.125 +/* UBiDi object management -------------------------------------------------- */ 1.126 + 1.127 +U_CAPI UBiDi * U_EXPORT2 1.128 +ubidi_open(void) 1.129 +{ 1.130 + UErrorCode errorCode=U_ZERO_ERROR; 1.131 + return ubidi_openSized(0, 0, &errorCode); 1.132 +} 1.133 + 1.134 +U_CAPI UBiDi * U_EXPORT2 1.135 +ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) { 1.136 + UBiDi *pBiDi; 1.137 + 1.138 + /* check the argument values */ 1.139 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.140 + return NULL; 1.141 + } else if(maxLength<0 || maxRunCount<0) { 1.142 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.143 + return NULL; /* invalid arguments */ 1.144 + } 1.145 + 1.146 + /* allocate memory for the object */ 1.147 + pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi)); 1.148 + if(pBiDi==NULL) { 1.149 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.150 + return NULL; 1.151 + } 1.152 + 1.153 + /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */ 1.154 + uprv_memset(pBiDi, 0, sizeof(UBiDi)); 1.155 + 1.156 + /* get BiDi properties */ 1.157 + pBiDi->bdp=ubidi_getSingleton(); 1.158 + 1.159 + /* allocate memory for arrays as requested */ 1.160 + if(maxLength>0) { 1.161 + if( !getInitialDirPropsMemory(pBiDi, maxLength) || 1.162 + !getInitialLevelsMemory(pBiDi, maxLength) 1.163 + ) { 1.164 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.165 + } 1.166 + } else { 1.167 + pBiDi->mayAllocateText=TRUE; 1.168 + } 1.169 + 1.170 + if(maxRunCount>0) { 1.171 + if(maxRunCount==1) { 1.172 + /* use simpleRuns[] */ 1.173 + pBiDi->runsSize=sizeof(Run); 1.174 + } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) { 1.175 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.176 + } 1.177 + } else { 1.178 + pBiDi->mayAllocateRuns=TRUE; 1.179 + } 1.180 + 1.181 + if(U_SUCCESS(*pErrorCode)) { 1.182 + return pBiDi; 1.183 + } else { 1.184 + ubidi_close(pBiDi); 1.185 + return NULL; 1.186 + } 1.187 +} 1.188 + 1.189 +/* 1.190 + * We are allowed to allocate memory if memory==NULL or 1.191 + * mayAllocate==TRUE for each array that we need. 1.192 + * We also try to grow memory as needed if we 1.193 + * allocate it. 1.194 + * 1.195 + * Assume sizeNeeded>0. 1.196 + * If *pMemory!=NULL, then assume *pSize>0. 1.197 + * 1.198 + * ### this realloc() may unnecessarily copy the old data, 1.199 + * which we know we don't need any more; 1.200 + * is this the best way to do this?? 1.201 + */ 1.202 +U_CFUNC UBool 1.203 +ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) { 1.204 + void **pMemory = (void **)bidiMem; 1.205 + /* check for existing memory */ 1.206 + if(*pMemory==NULL) { 1.207 + /* we need to allocate memory */ 1.208 + if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) { 1.209 + *pSize=sizeNeeded; 1.210 + return TRUE; 1.211 + } else { 1.212 + return FALSE; 1.213 + } 1.214 + } else { 1.215 + if(sizeNeeded<=*pSize) { 1.216 + /* there is already enough memory */ 1.217 + return TRUE; 1.218 + } 1.219 + else if(!mayAllocate) { 1.220 + /* not enough memory, and we must not allocate */ 1.221 + return FALSE; 1.222 + } else { 1.223 + /* we try to grow */ 1.224 + void *memory; 1.225 + /* in most cases, we do not need the copy-old-data part of 1.226 + * realloc, but it is needed when adding runs using getRunsMemory() 1.227 + * in setParaRunsOnly() 1.228 + */ 1.229 + if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) { 1.230 + *pMemory=memory; 1.231 + *pSize=sizeNeeded; 1.232 + return TRUE; 1.233 + } else { 1.234 + /* we failed to grow */ 1.235 + return FALSE; 1.236 + } 1.237 + } 1.238 + } 1.239 +} 1.240 + 1.241 +U_CAPI void U_EXPORT2 1.242 +ubidi_close(UBiDi *pBiDi) { 1.243 + if(pBiDi!=NULL) { 1.244 + pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */ 1.245 + if(pBiDi->dirPropsMemory!=NULL) { 1.246 + uprv_free(pBiDi->dirPropsMemory); 1.247 + } 1.248 + if(pBiDi->levelsMemory!=NULL) { 1.249 + uprv_free(pBiDi->levelsMemory); 1.250 + } 1.251 + if(pBiDi->openingsMemory!=NULL) { 1.252 + uprv_free(pBiDi->openingsMemory); 1.253 + } 1.254 + if(pBiDi->parasMemory!=NULL) { 1.255 + uprv_free(pBiDi->parasMemory); 1.256 + } 1.257 + if(pBiDi->runsMemory!=NULL) { 1.258 + uprv_free(pBiDi->runsMemory); 1.259 + } 1.260 + if(pBiDi->isolatesMemory!=NULL) { 1.261 + uprv_free(pBiDi->isolatesMemory); 1.262 + } 1.263 + if(pBiDi->insertPoints.points!=NULL) { 1.264 + uprv_free(pBiDi->insertPoints.points); 1.265 + } 1.266 + 1.267 + uprv_free(pBiDi); 1.268 + } 1.269 +} 1.270 + 1.271 +/* set to approximate "inverse BiDi" ---------------------------------------- */ 1.272 + 1.273 +U_CAPI void U_EXPORT2 1.274 +ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) { 1.275 + if(pBiDi!=NULL) { 1.276 + pBiDi->isInverse=isInverse; 1.277 + pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L 1.278 + : UBIDI_REORDER_DEFAULT; 1.279 + } 1.280 +} 1.281 + 1.282 +U_CAPI UBool U_EXPORT2 1.283 +ubidi_isInverse(UBiDi *pBiDi) { 1.284 + if(pBiDi!=NULL) { 1.285 + return pBiDi->isInverse; 1.286 + } else { 1.287 + return FALSE; 1.288 + } 1.289 +} 1.290 + 1.291 +/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of 1.292 + * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre 1.293 + * concept of RUNS_ONLY which is a double operation. 1.294 + * It could be advantageous to divide this into 3 concepts: 1.295 + * a) Operation: direct / inverse / RUNS_ONLY 1.296 + * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R 1.297 + * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL 1.298 + * This would allow combinations not possible today like RUNS_ONLY with 1.299 + * NUMBERS_SPECIAL. 1.300 + * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and 1.301 + * REMOVE_CONTROLS for the inverse step. 1.302 + * Not all combinations would be supported, and probably not all do make sense. 1.303 + * This would need to document which ones are supported and what are the 1.304 + * fallbacks for unsupported combinations. 1.305 + */ 1.306 +U_CAPI void U_EXPORT2 1.307 +ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) { 1.308 + if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT) 1.309 + && (reorderingMode < UBIDI_REORDER_COUNT)) { 1.310 + pBiDi->reorderingMode = reorderingMode; 1.311 + pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L); 1.312 + } 1.313 +} 1.314 + 1.315 +U_CAPI UBiDiReorderingMode U_EXPORT2 1.316 +ubidi_getReorderingMode(UBiDi *pBiDi) { 1.317 + if (pBiDi!=NULL) { 1.318 + return pBiDi->reorderingMode; 1.319 + } else { 1.320 + return UBIDI_REORDER_DEFAULT; 1.321 + } 1.322 +} 1.323 + 1.324 +U_CAPI void U_EXPORT2 1.325 +ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) { 1.326 + if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { 1.327 + reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; 1.328 + } 1.329 + if (pBiDi!=NULL) { 1.330 + pBiDi->reorderingOptions=reorderingOptions; 1.331 + } 1.332 +} 1.333 + 1.334 +U_CAPI uint32_t U_EXPORT2 1.335 +ubidi_getReorderingOptions(UBiDi *pBiDi) { 1.336 + if (pBiDi!=NULL) { 1.337 + return pBiDi->reorderingOptions; 1.338 + } else { 1.339 + return 0; 1.340 + } 1.341 +} 1.342 + 1.343 +U_CAPI UBiDiDirection U_EXPORT2 1.344 +ubidi_getBaseDirection(const UChar *text, 1.345 +int32_t length){ 1.346 + 1.347 + int32_t i; 1.348 + UChar32 uchar; 1.349 + UCharDirection dir; 1.350 + 1.351 + if( text==NULL || length<-1 ){ 1.352 + return UBIDI_NEUTRAL; 1.353 + } 1.354 + 1.355 + if(length==-1) { 1.356 + length=u_strlen(text); 1.357 + } 1.358 + 1.359 + for( i = 0 ; i < length; ) { 1.360 + /* i is incremented by U16_NEXT */ 1.361 + U16_NEXT(text, i, length, uchar); 1.362 + dir = u_charDirection(uchar); 1.363 + if( dir == U_LEFT_TO_RIGHT ) 1.364 + return UBIDI_LTR; 1.365 + if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC ) 1.366 + return UBIDI_RTL; 1.367 + } 1.368 + return UBIDI_NEUTRAL; 1.369 +} 1.370 + 1.371 +/* perform (P2)..(P3) ------------------------------------------------------- */ 1.372 + 1.373 +/** 1.374 + * Returns the directionality of the first strong character 1.375 + * after the last B in prologue, if any. 1.376 + * Requires prologue!=null. 1.377 + */ 1.378 +static DirProp 1.379 +firstL_R_AL(UBiDi *pBiDi) { 1.380 + const UChar *text=pBiDi->prologue; 1.381 + int32_t length=pBiDi->proLength; 1.382 + int32_t i; 1.383 + UChar32 uchar; 1.384 + DirProp dirProp, result=ON; 1.385 + for(i=0; i<length; ) { 1.386 + /* i is incremented by U16_NEXT */ 1.387 + U16_NEXT(text, i, length, uchar); 1.388 + dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); 1.389 + if(result==ON) { 1.390 + if(dirProp==L || dirProp==R || dirProp==AL) { 1.391 + result=dirProp; 1.392 + } 1.393 + } else { 1.394 + if(dirProp==B) { 1.395 + result=ON; 1.396 + } 1.397 + } 1.398 + } 1.399 + return result; 1.400 +} 1.401 + 1.402 +/* 1.403 + * Check that there are enough entries in the array pointed to by pBiDi->paras 1.404 + */ 1.405 +static UBool 1.406 +checkParaCount(UBiDi *pBiDi) { 1.407 + int32_t count=pBiDi->paraCount; 1.408 + if(pBiDi->paras==pBiDi->simpleParas) { 1.409 + if(count<=SIMPLE_PARAS_SIZE) 1.410 + return TRUE; 1.411 + if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_SIZE * 2)) 1.412 + return FALSE; 1.413 + pBiDi->paras=pBiDi->parasMemory; 1.414 + uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_SIZE * sizeof(Para)); 1.415 + return TRUE; 1.416 + } 1.417 + if(!getInitialParasMemory(pBiDi, count * 2)) 1.418 + return FALSE; 1.419 + pBiDi->paras=pBiDi->parasMemory; 1.420 + return TRUE; 1.421 +} 1.422 + 1.423 +/* 1.424 + * Get the directional properties for the text, calculate the flags bit-set, and 1.425 + * determine the paragraph level if necessary (in pBiDi->paras[i].level). 1.426 + * FSI initiators are also resolved and their dirProp replaced with LRI or RLI. 1.427 + */ 1.428 +static UBool 1.429 +getDirProps(UBiDi *pBiDi) { 1.430 + const UChar *text=pBiDi->text; 1.431 + DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */ 1.432 + 1.433 + int32_t i=0, originalLength=pBiDi->originalLength; 1.434 + Flags flags=0; /* collect all directionalities in the text */ 1.435 + UChar32 uchar; 1.436 + DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */ 1.437 + UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel); 1.438 + /* for inverse BiDi, the default para level is set to RTL if there is a 1.439 + strong R or AL character at either end of the text */ 1.440 + UBool isDefaultLevelInverse=isDefaultLevel && (UBool) 1.441 + (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || 1.442 + pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL); 1.443 + int32_t lastArabicPos=-1; 1.444 + int32_t controlCount=0; 1.445 + UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions & 1.446 + UBIDI_OPTION_REMOVE_CONTROLS); 1.447 + 1.448 + typedef enum { 1.449 + NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */ 1.450 + SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */ 1.451 + SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */ 1.452 + LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */ 1.453 + } State; 1.454 + State state; 1.455 + DirProp lastStrong=ON; /* for default level & inverse BiDi */ 1.456 + /* The following stacks are used to manage isolate sequences. Those 1.457 + sequences may be nested, but obviously never more deeply than the 1.458 + maximum explicit embedding level. 1.459 + lastStack is the index of the last used entry in the stack. A value of -1 1.460 + means that there is no open isolate sequence. 1.461 + lastStack is reset to -1 on paragraph boundaries. */ 1.462 + /* The following stack contains the position of the initiator of 1.463 + each open isolate sequence */ 1.464 + int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1]; 1.465 + /* The following stack contains the last known state before 1.466 + encountering the initiator of an isolate sequence */ 1.467 + int8_t previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1]; 1.468 + int32_t stackLast=-1; 1.469 + 1.470 + if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) 1.471 + pBiDi->length=0; 1.472 + defaultParaLevel=pBiDi->paraLevel&1; 1.473 + if(isDefaultLevel) { 1.474 + pBiDi->paras[0].level=defaultParaLevel; 1.475 + lastStrong=defaultParaLevel; 1.476 + if(pBiDi->proLength>0 && /* there is a prologue */ 1.477 + (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */ 1.478 + if(dirProp==L) 1.479 + pBiDi->paras[0].level=0; /* set the default para level */ 1.480 + else 1.481 + pBiDi->paras[0].level=1; /* set the default para level */ 1.482 + state=NOT_SEEKING_STRONG; 1.483 + } else { 1.484 + state=SEEKING_STRONG_FOR_PARA; 1.485 + } 1.486 + } else { 1.487 + pBiDi->paras[0].level=pBiDi->paraLevel; 1.488 + state=NOT_SEEKING_STRONG; 1.489 + } 1.490 + /* count paragraphs and determine the paragraph level (P2..P3) */ 1.491 + /* 1.492 + * see comment in ubidi.h: 1.493 + * the UBIDI_DEFAULT_XXX values are designed so that 1.494 + * their bit 0 alone yields the intended default 1.495 + */ 1.496 + for( /* i=0 above */ ; i<originalLength; ) { 1.497 + /* i is incremented by U16_NEXT */ 1.498 + U16_NEXT(text, i, originalLength, uchar); 1.499 + flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar)); 1.500 + dirProps[i-1]=dirProp; 1.501 + if(uchar>0xffff) { /* set the lead surrogate's property to BN */ 1.502 + flags|=DIRPROP_FLAG(BN); 1.503 + dirProps[i-2]=BN; 1.504 + } 1.505 + if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) 1.506 + controlCount++; 1.507 + if(dirProp==L) { 1.508 + if(state==SEEKING_STRONG_FOR_PARA) { 1.509 + pBiDi->paras[pBiDi->paraCount-1].level=0; 1.510 + state=NOT_SEEKING_STRONG; 1.511 + } 1.512 + else if(state==SEEKING_STRONG_FOR_FSI) { 1.513 + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { 1.514 + dirProps[isolateStartStack[stackLast]]=LRI; 1.515 + flags|=DIRPROP_FLAG(LRI); 1.516 + } 1.517 + state=LOOKING_FOR_PDI; 1.518 + } 1.519 + lastStrong=L; 1.520 + continue; 1.521 + } 1.522 + if(dirProp==R || dirProp==AL) { 1.523 + if(state==SEEKING_STRONG_FOR_PARA) { 1.524 + pBiDi->paras[pBiDi->paraCount-1].level=1; 1.525 + state=NOT_SEEKING_STRONG; 1.526 + } 1.527 + else if(state==SEEKING_STRONG_FOR_FSI) { 1.528 + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { 1.529 + dirProps[isolateStartStack[stackLast]]=RLI; 1.530 + flags|=DIRPROP_FLAG(RLI); 1.531 + } 1.532 + state=LOOKING_FOR_PDI; 1.533 + } 1.534 + lastStrong=R; 1.535 + if(dirProp==AL) 1.536 + lastArabicPos=i-1; 1.537 + continue; 1.538 + } 1.539 + if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */ 1.540 + stackLast++; 1.541 + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { 1.542 + isolateStartStack[stackLast]=i-1; 1.543 + previousStateStack[stackLast]=state; 1.544 + } 1.545 + if(dirProp==FSI) 1.546 + state=SEEKING_STRONG_FOR_FSI; 1.547 + else 1.548 + state=LOOKING_FOR_PDI; 1.549 + continue; 1.550 + } 1.551 + if(dirProp==PDI) { 1.552 + if(state==SEEKING_STRONG_FOR_FSI) { 1.553 + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { 1.554 + dirProps[isolateStartStack[stackLast]]=LRI; 1.555 + flags|=DIRPROP_FLAG(LRI); 1.556 + } 1.557 + } 1.558 + if(stackLast>=0) { 1.559 + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) 1.560 + state=previousStateStack[stackLast]; 1.561 + stackLast--; 1.562 + } 1.563 + continue; 1.564 + } 1.565 + if(dirProp==B) { 1.566 + if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */ 1.567 + continue; 1.568 + pBiDi->paras[pBiDi->paraCount-1].limit=i; 1.569 + if(isDefaultLevelInverse && lastStrong==R) 1.570 + pBiDi->paras[pBiDi->paraCount-1].level=1; 1.571 + if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { 1.572 + /* When streaming, we only process whole paragraphs 1.573 + thus some updates are only done on paragraph boundaries */ 1.574 + pBiDi->length=i; /* i is index to next character */ 1.575 + pBiDi->controlCount=controlCount; 1.576 + } 1.577 + if(i<originalLength) { /* B not last char in text */ 1.578 + pBiDi->paraCount++; 1.579 + if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */ 1.580 + return FALSE; 1.581 + if(isDefaultLevel) { 1.582 + pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel; 1.583 + state=SEEKING_STRONG_FOR_PARA; 1.584 + lastStrong=defaultParaLevel; 1.585 + } else { 1.586 + pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel; 1.587 + state=NOT_SEEKING_STRONG; 1.588 + } 1.589 + stackLast=-1; 1.590 + } 1.591 + continue; 1.592 + } 1.593 + } 1.594 + /* Ignore still open isolate sequences with overflow */ 1.595 + if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) { 1.596 + stackLast=UBIDI_MAX_EXPLICIT_LEVEL; 1.597 + if(dirProps[previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL]]!=FSI) 1.598 + state=LOOKING_FOR_PDI; 1.599 + } 1.600 + /* Resolve direction of still unresolved open FSI sequences */ 1.601 + while(stackLast>=0) { 1.602 + if(state==SEEKING_STRONG_FOR_FSI) { 1.603 + dirProps[isolateStartStack[stackLast]]=LRI; 1.604 + flags|=DIRPROP_FLAG(LRI); 1.605 + } 1.606 + state=previousStateStack[stackLast]; 1.607 + stackLast--; 1.608 + } 1.609 + /* When streaming, ignore text after the last paragraph separator */ 1.610 + if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { 1.611 + if(pBiDi->length<originalLength) 1.612 + pBiDi->paraCount--; 1.613 + } else { 1.614 + pBiDi->paras[pBiDi->paraCount-1].limit=originalLength; 1.615 + pBiDi->controlCount=controlCount; 1.616 + } 1.617 + /* For inverse bidi, default para direction is RTL if there is 1.618 + a strong R or AL at either end of the paragraph */ 1.619 + if(isDefaultLevelInverse && lastStrong==R) { 1.620 + pBiDi->paras[pBiDi->paraCount-1].level=1; 1.621 + } 1.622 + if(isDefaultLevel) { 1.623 + pBiDi->paraLevel=pBiDi->paras[0].level; 1.624 + } 1.625 + /* The following is needed to resolve the text direction for default level 1.626 + paragraphs containing no strong character */ 1.627 + for(i=0; i<pBiDi->paraCount; i++) 1.628 + flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level); 1.629 + 1.630 + if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { 1.631 + flags|=DIRPROP_FLAG(L); 1.632 + } 1.633 + pBiDi->flags=flags; 1.634 + pBiDi->lastArabicPos=lastArabicPos; 1.635 + return TRUE; 1.636 +} 1.637 + 1.638 +/* determine the paragraph level at position index */ 1.639 +U_CFUNC UBiDiLevel 1.640 +ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) { 1.641 + int32_t i; 1.642 + for(i=0; i<pBiDi->paraCount; i++) 1.643 + if(pindex<pBiDi->paras[i].limit) 1.644 + break; 1.645 + if(i>=pBiDi->paraCount) 1.646 + i=pBiDi->paraCount-1; 1.647 + return (UBiDiLevel)(pBiDi->paras[i].level); 1.648 +} 1.649 + 1.650 +/* Functions for handling paired brackets ----------------------------------- */ 1.651 + 1.652 +/* In the isoRuns array, the first entry is used for text outside of any 1.653 + isolate sequence. Higher entries are used for each more deeply nested 1.654 + isolate sequence. isoRunLast is the index of the last used entry. The 1.655 + openings array is used to note the data of opening brackets not yet 1.656 + matched by a closing bracket, or matched but still susceptible to change 1.657 + level. 1.658 + Each isoRun entry contains the index of the first and 1.659 + one-after-last openings entries for pending opening brackets it 1.660 + contains. The next openings entry to use is the one-after-last of the 1.661 + most deeply nested isoRun entry. 1.662 + isoRun entries also contain their current embedding level and the last 1.663 + encountered strong character, since these will be needed to resolve 1.664 + the level of paired brackets. */ 1.665 + 1.666 +static void 1.667 +bracketInit(UBiDi *pBiDi, BracketData *bd) { 1.668 + bd->pBiDi=pBiDi; 1.669 + bd->isoRunLast=0; 1.670 + bd->isoRuns[0].start=0; 1.671 + bd->isoRuns[0].limit=0; 1.672 + bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0); 1.673 + bd->isoRuns[0].lastStrong=bd->isoRuns[0].contextDir=GET_PARALEVEL(pBiDi, 0)&1; 1.674 + bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0; 1.675 + if(pBiDi->openingsMemory) { 1.676 + bd->openings=pBiDi->openingsMemory; 1.677 + bd->openingsSize=pBiDi->openingsSize; 1.678 + } else { 1.679 + bd->openings=bd->simpleOpenings; 1.680 + bd->openingsSize=SIMPLE_OPENINGS_SIZE; 1.681 + } 1.682 + bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL || 1.683 + bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL; 1.684 +} 1.685 + 1.686 +/* paragraph boundary */ 1.687 +static void 1.688 +bracketProcessB(BracketData *bd, UBiDiLevel level) { 1.689 + bd->isoRunLast=0; 1.690 + bd->isoRuns[0].limit=0; 1.691 + bd->isoRuns[0].level=level; 1.692 + bd->isoRuns[0].lastStrong=bd->isoRuns[0].contextDir=level&1; 1.693 + bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0; 1.694 +} 1.695 + 1.696 +/* LRE, LRO, RLE, RLO, PDF */ 1.697 +static void 1.698 +bracketProcessBoundary(BracketData *bd, int32_t lastCcPos, 1.699 + UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) { 1.700 + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; 1.701 + DirProp *dirProps=bd->pBiDi->dirProps; 1.702 + if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO) /* after an isolate */ 1.703 + return; 1.704 + if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)> 1.705 + (contextLevel&~UBIDI_LEVEL_OVERRIDE)) /* not a PDF */ 1.706 + contextLevel=embeddingLevel; 1.707 + pLastIsoRun->limit=pLastIsoRun->start; 1.708 + pLastIsoRun->level=embeddingLevel; 1.709 + pLastIsoRun->lastStrong=pLastIsoRun->contextDir=contextLevel&1; 1.710 + pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=lastCcPos; 1.711 +} 1.712 + 1.713 +/* LRI or RLI */ 1.714 +static void 1.715 +bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) { 1.716 + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; 1.717 + int16_t lastLimit; 1.718 + lastLimit=pLastIsoRun->limit; 1.719 + bd->isoRunLast++; 1.720 + pLastIsoRun++; 1.721 + pLastIsoRun->start=pLastIsoRun->limit=lastLimit; 1.722 + pLastIsoRun->level=level; 1.723 + pLastIsoRun->lastStrong=pLastIsoRun->contextDir=level&1; 1.724 + pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=0; 1.725 +} 1.726 + 1.727 +/* PDI */ 1.728 +static void 1.729 +bracketProcessPDI(BracketData *bd) { 1.730 + bd->isoRunLast--; 1.731 +} 1.732 + 1.733 +/* newly found opening bracket: create an openings entry */ 1.734 +static UBool /* return TRUE if success */ 1.735 +bracketAddOpening(BracketData *bd, UChar match, int32_t position) { 1.736 + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; 1.737 + Opening *pOpening; 1.738 + if(pLastIsoRun->limit>=bd->openingsSize) { /* no available new entry */ 1.739 + UBiDi *pBiDi=bd->pBiDi; 1.740 + if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2)) 1.741 + return FALSE; 1.742 + if(bd->openings==bd->simpleOpenings) 1.743 + uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings, 1.744 + SIMPLE_OPENINGS_SIZE * sizeof(Opening)); 1.745 + bd->openings=pBiDi->openingsMemory; /* may have changed */ 1.746 + bd->openingsSize=pBiDi->openingsSize; 1.747 + } 1.748 + pOpening=&bd->openings[pLastIsoRun->limit]; 1.749 + pOpening->position=position; 1.750 + pOpening->match=match; 1.751 + pOpening->contextDir=pLastIsoRun->contextDir; 1.752 + pOpening->contextPos=pLastIsoRun->contextPos; 1.753 + pOpening->flags=0; 1.754 + pLastIsoRun->limit++; 1.755 + return TRUE; 1.756 +} 1.757 + 1.758 +/* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */ 1.759 +static void 1.760 +fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) { 1.761 + /* This function calls itself recursively */ 1.762 + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; 1.763 + Opening *qOpening; 1.764 + DirProp *dirProps=bd->pBiDi->dirProps; 1.765 + int32_t k, openingPosition, closingPosition; 1.766 + for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) { 1.767 + if(qOpening->match>=0) /* not an N0c match */ 1.768 + continue; 1.769 + if(newPropPosition<qOpening->contextPos) 1.770 + break; 1.771 + if(newPropPosition>=qOpening->position) 1.772 + continue; 1.773 + if(newProp==qOpening->contextDir) 1.774 + break; 1.775 + openingPosition=qOpening->position; 1.776 + dirProps[openingPosition]=dirProps[newPropPosition]; 1.777 + closingPosition=-(qOpening->match); 1.778 + dirProps[closingPosition]= newProp; /* can never be AL */ 1.779 + qOpening->match=0; /* prevent further changes */ 1.780 + fixN0c(bd, k, openingPosition, newProp); 1.781 + fixN0c(bd, k, closingPosition, newProp); 1.782 + } 1.783 +} 1.784 + 1.785 +/* handle strong characters, digits and candidates for closing brackets */ 1.786 +static UBool /* return TRUE if success */ 1.787 +bracketProcessChar(BracketData *bd, int32_t position, DirProp dirProp) { 1.788 + IsoRun *pLastIsoRun; 1.789 + Opening *pOpening, *qOpening; 1.790 + DirProp *dirProps, newProp; 1.791 + UBiDiDirection direction; 1.792 + uint16_t flag; 1.793 + int32_t i, k; 1.794 + UBool stable; 1.795 + UChar c, match; 1.796 + dirProps=bd->pBiDi->dirProps; 1.797 + if(DIRPROP_FLAG(dirProp)&MASK_STRONG_EN_AN) { /* L, R, AL, EN or AN */ 1.798 + pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; 1.799 + /* AN after R or AL becomes R or AL; after L or L+AN, it is kept as-is */ 1.800 + if(dirProp==AN && (pLastIsoRun->lastStrong==R || pLastIsoRun->lastStrong==AL)) 1.801 + dirProp=pLastIsoRun->lastStrong; 1.802 + /* EN after L or L+AN becomes L; after R or AL, it becomes R or AL */ 1.803 + if(dirProp==EN) { 1.804 + if(pLastIsoRun->lastStrong==L || pLastIsoRun->lastStrong==AN) { 1.805 + dirProp=L; 1.806 + if(!bd->isNumbersSpecial) 1.807 + dirProps[position]=ENL; 1.808 + } 1.809 + else { 1.810 + dirProp=pLastIsoRun->lastStrong; /* may be R or AL */ 1.811 + if(!bd->isNumbersSpecial) 1.812 + dirProps[position]= dirProp==AL ? AN : ENR; 1.813 + } 1.814 + } 1.815 + pLastIsoRun->lastStrong=dirProp; 1.816 + pLastIsoRun->contextDir=DIR_FROM_STRONG(dirProp); 1.817 + pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=position; 1.818 + if(dirProp==AL || dirProp==AN) 1.819 + dirProp=R; 1.820 + flag=DIRPROP_FLAG(dirProp); 1.821 + /* strong characters found after an unmatched opening bracket 1.822 + must be noted for possibly applying N0b */ 1.823 + for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++) 1.824 + bd->openings[i].flags|=flag; 1.825 + return TRUE; 1.826 + } 1.827 + if(dirProp!=ON) 1.828 + return TRUE; 1.829 + /* First see if it is a matching closing bracket. Hopefully, this is more 1.830 + efficient than checking if it is a closing bracket at all */ 1.831 + c=bd->pBiDi->text[position]; 1.832 + pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; 1.833 + for(i=pLastIsoRun->limit-1; i>=pLastIsoRun->start; i--) { 1.834 + if(bd->openings[i].match!=c) 1.835 + continue; 1.836 + /* We have a match */ 1.837 + pOpening=&bd->openings[i]; 1.838 + direction=pLastIsoRun->level&1; 1.839 + stable=TRUE; /* assume stable until proved otherwise */ 1.840 + 1.841 + /* The stable flag is set when brackets are paired and their 1.842 + level is resolved and cannot be changed by what will be 1.843 + found later in the source string. 1.844 + An unstable match can occur only when applying N0c, where 1.845 + the resolved level depends on the preceding context, and 1.846 + this context may be affected by text occurring later. 1.847 + Example: RTL paragraph containing: abc[(latin) HEBREW] 1.848 + When the closing parenthesis is encountered, it appears 1.849 + that N0c1 must be applied since 'abc' sets an opposite 1.850 + direction context and both parentheses receive level 2. 1.851 + However, when the closing square bracket is processed, 1.852 + N0b applies because of 'HEBREW' being included within the 1.853 + brackets, thus the square brackets are treated like R and 1.854 + receive level 1. However, this changes the preceding 1.855 + context of the opening parenthesis, and it now appears 1.856 + that N0c2 must be applied to the parentheses rather than 1.857 + N0c1. */ 1.858 + 1.859 + if((direction==0 && pOpening->flags&FOUND_L) || 1.860 + (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */ 1.861 + newProp=direction; 1.862 + } 1.863 + else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */ 1.864 + if(direction!=pOpening->contextDir) { 1.865 + newProp=pOpening->contextDir; /* N0c1 */ 1.866 + /* it is stable if there is no preceding text or in 1.867 + conditions too complicated and not worth checking */ 1.868 + stable=(i==pLastIsoRun->start); 1.869 + } 1.870 + else 1.871 + newProp=direction; /* N0c2 */ 1.872 + } 1.873 + else { 1.874 + newProp=BN; /* N0d */ 1.875 + } 1.876 + if(newProp!=BN) { 1.877 + dirProps[pOpening->position]=newProp; 1.878 + dirProps[position]=newProp; 1.879 + pLastIsoRun->contextDir=newProp; 1.880 + pLastIsoRun->contextPos=position; 1.881 + } 1.882 + /* Update nested N0c pairs that may be affected */ 1.883 + if(newProp==direction) 1.884 + fixN0c(bd, i, pOpening->position, newProp); 1.885 + if(stable) { 1.886 + pLastIsoRun->limit=i; /* forget any brackets nested within this pair */ 1.887 + /* remove lower located synonyms if any */ 1.888 + while(pLastIsoRun->limit>pLastIsoRun->start && 1.889 + bd->openings[pLastIsoRun->limit-1].position==pOpening->position) 1.890 + pLastIsoRun->limit--; 1.891 + } 1.892 + else { 1.893 + pOpening->match=-position; 1.894 + /* neutralize lower located synonyms if any */ 1.895 + k=i-1; 1.896 + while(k>=pLastIsoRun->start && 1.897 + bd->openings[k].position==pOpening->position) 1.898 + bd->openings[k--].match=0; 1.899 + /* neutralize any unmatched opening between the current pair; 1.900 + this will also neutralize higher located synonyms if any */ 1.901 + for(k=i+1; k<pLastIsoRun->limit; k++) { 1.902 + qOpening=&bd->openings[k]; 1.903 + if(qOpening->position>=position) 1.904 + break; 1.905 + if(qOpening->match>0) 1.906 + qOpening->match=0; 1.907 + } 1.908 + } 1.909 + return TRUE; 1.910 + } 1.911 + /* We get here only if the ON character was not a matching closing bracket */ 1.912 + /* Now see if it is an opening bracket */ 1.913 + match=u_getBidiPairedBracket(c); /* get the matching char */ 1.914 + if(match==c) /* if no matching char */ 1.915 + return TRUE; 1.916 + if(ubidi_getPairedBracketType(bd->pBiDi->bdp, c)!=U_BPT_OPEN) 1.917 + return TRUE; /* not an opening bracket */ 1.918 + /* special case: process synonyms 1.919 + create an opening entry for each synonym */ 1.920 + if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */ 1.921 + if(!bracketAddOpening(bd, 0x3009, position)) 1.922 + return FALSE; 1.923 + } 1.924 + else if(match==0x3009) { /* RIGHT ANGLE BRACKET */ 1.925 + if(!bracketAddOpening(bd, 0x232A, position)) 1.926 + return FALSE; 1.927 + } 1.928 + return bracketAddOpening(bd, match, position); 1.929 +} 1.930 + 1.931 +/* perform (X1)..(X9) ------------------------------------------------------- */ 1.932 + 1.933 +/* determine if the text is mixed-directional or single-directional */ 1.934 +static UBiDiDirection 1.935 +directionFromFlags(UBiDi *pBiDi) { 1.936 + Flags flags=pBiDi->flags; 1.937 + /* if the text contains AN and neutrals, then some neutrals may become RTL */ 1.938 + if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) { 1.939 + return UBIDI_LTR; 1.940 + } else if(!(flags&MASK_LTR)) { 1.941 + return UBIDI_RTL; 1.942 + } else { 1.943 + return UBIDI_MIXED; 1.944 + } 1.945 +} 1.946 + 1.947 +/* 1.948 + * Resolve the explicit levels as specified by explicit embedding codes. 1.949 + * Recalculate the flags to have them reflect the real properties 1.950 + * after taking the explicit embeddings into account. 1.951 + * 1.952 + * The BiDi algorithm is designed to result in the same behavior whether embedding 1.953 + * levels are externally specified (from "styled text", supposedly the preferred 1.954 + * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text. 1.955 + * That is why (X9) instructs to remove all not-isolate explicit codes (and BN). 1.956 + * However, in a real implementation, the removal of these codes and their index 1.957 + * positions in the plain text is undesirable since it would result in 1.958 + * reallocated, reindexed text. 1.959 + * Instead, this implementation leaves the codes in there and just ignores them 1.960 + * in the subsequent processing. 1.961 + * In order to get the same reordering behavior, positions with a BN or a not-isolate 1.962 + * explicit embedding code just get the same level assigned as the last "real" 1.963 + * character. 1.964 + * 1.965 + * Some implementations, not this one, then overwrite some of these 1.966 + * directionality properties at "real" same-level-run boundaries by 1.967 + * L or R codes so that the resolution of weak types can be performed on the 1.968 + * entire paragraph at once instead of having to parse it once more and 1.969 + * perform that resolution on same-level-runs. 1.970 + * This limits the scope of the implicit rules in effectively 1.971 + * the same way as the run limits. 1.972 + * 1.973 + * Instead, this implementation does not modify these codes, except for 1.974 + * paired brackets whose properties (ON) may be replaced by L or R. 1.975 + * On one hand, the paragraph has to be scanned for same-level-runs, but 1.976 + * on the other hand, this saves another loop to reset these codes, 1.977 + * or saves making and modifying a copy of dirProps[]. 1.978 + * 1.979 + * 1.980 + * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm. 1.981 + * 1.982 + * 1.983 + * Handling the stack of explicit levels (Xn): 1.984 + * 1.985 + * With the BiDi stack of explicit levels, as pushed with each 1.986 + * LRE, RLE, LRO, RLO, LRI, RLI and FSO and popped with each PDF and PDI, 1.987 + * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL. 1.988 + * 1.989 + * In order to have a correct push-pop semantics even in the case of overflows, 1.990 + * overflow counters and a valid isolate counter are used as described in UAX#9 1.991 + * section 3.3.2 "Explicit Levels and Directions". 1.992 + * 1.993 + * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. 1.994 + */ 1.995 +static UBiDiDirection 1.996 +resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { 1.997 + DirProp *dirProps=pBiDi->dirProps; 1.998 + UBiDiLevel *levels=pBiDi->levels; 1.999 + const UChar *text=pBiDi->text; 1.1000 + 1.1001 + int32_t i=0, length=pBiDi->length; 1.1002 + Flags flags=pBiDi->flags; /* collect all directionalities in the text */ 1.1003 + DirProp dirProp; 1.1004 + UBiDiLevel level=GET_PARALEVEL(pBiDi, 0); 1.1005 + UBiDiDirection direction; 1.1006 + pBiDi->isolateCount=0; 1.1007 + 1.1008 + if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; } 1.1009 + 1.1010 + /* determine if the text is mixed-directional or single-directional */ 1.1011 + direction=directionFromFlags(pBiDi); 1.1012 + 1.1013 + /* we may not need to resolve any explicit levels */ 1.1014 + if((direction!=UBIDI_MIXED)) { 1.1015 + /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ 1.1016 + return direction; 1.1017 + } 1.1018 + if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) { 1.1019 + /* inverse BiDi: mixed, but all characters are at the same embedding level */ 1.1020 + /* set all levels to the paragraph level */ 1.1021 + int32_t paraIndex, start, limit; 1.1022 + for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) { 1.1023 + if(paraIndex==0) 1.1024 + start=0; 1.1025 + else 1.1026 + start=pBiDi->paras[paraIndex-1].limit; 1.1027 + limit=pBiDi->paras[paraIndex].limit; 1.1028 + level=pBiDi->paras[paraIndex].level; 1.1029 + for(i=start; i<limit; i++) 1.1030 + levels[i]=level; 1.1031 + } 1.1032 + return direction; /* no bracket matching for inverse BiDi */ 1.1033 + } 1.1034 + if(!(flags&(MASK_EXPLICIT|MASK_ISO))) { 1.1035 + /* no embeddings, set all levels to the paragraph level */ 1.1036 + /* we still have to perform bracket matching */ 1.1037 + int32_t paraIndex, start, limit; 1.1038 + BracketData bracketData; 1.1039 + bracketInit(pBiDi, &bracketData); 1.1040 + for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) { 1.1041 + if(paraIndex==0) 1.1042 + start=0; 1.1043 + else 1.1044 + start=pBiDi->paras[paraIndex-1].limit; 1.1045 + limit=pBiDi->paras[paraIndex].limit; 1.1046 + level=pBiDi->paras[paraIndex].level; 1.1047 + for(i=start; i<limit; i++) { 1.1048 + levels[i]=level; 1.1049 + dirProp=dirProps[i]; 1.1050 + if(dirProp==B) { 1.1051 + if((i+1)<length) { 1.1052 + if(text[i]==CR && text[i+1]==LF) 1.1053 + continue; /* skip CR when followed by LF */ 1.1054 + bracketProcessB(&bracketData, level); 1.1055 + } 1.1056 + continue; 1.1057 + } 1.1058 + if(!bracketProcessChar(&bracketData, i, dirProp)) { 1.1059 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.1060 + return UBIDI_LTR; 1.1061 + } 1.1062 + } 1.1063 + } 1.1064 + return direction; 1.1065 + } 1.1066 + { 1.1067 + /* continue to perform (Xn) */ 1.1068 + 1.1069 + /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */ 1.1070 + /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */ 1.1071 + UBiDiLevel embeddingLevel=level, newLevel; 1.1072 + UBiDiLevel previousLevel=level; /* previous level for regular (not CC) characters */ 1.1073 + int32_t lastCcPos=0; /* index of last effective LRx,RLx, PDx */ 1.1074 + 1.1075 + uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL 1.1076 + but we need one more entry as base */ 1.1077 + uint32_t stackLast=0; 1.1078 + int32_t overflowIsolateCount=0; 1.1079 + int32_t overflowEmbeddingCount=0; 1.1080 + int32_t validIsolateCount=0; 1.1081 + BracketData bracketData; 1.1082 + bracketInit(pBiDi, &bracketData); 1.1083 + stack[0]=level; /* initialize base entry to para level, no override, no isolate */ 1.1084 + 1.1085 + /* recalculate the flags */ 1.1086 + flags=0; 1.1087 + 1.1088 + for(i=0; i<length; ++i) { 1.1089 + dirProp=dirProps[i]; 1.1090 + switch(dirProp) { 1.1091 + case LRE: 1.1092 + case RLE: 1.1093 + case LRO: 1.1094 + case RLO: 1.1095 + /* (X2, X3, X4, X5) */ 1.1096 + flags|=DIRPROP_FLAG(BN); 1.1097 + if (dirProp==LRE || dirProp==LRO) 1.1098 + newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */ 1.1099 + else 1.1100 + newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */ 1.1101 + if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 && 1.1102 + overflowEmbeddingCount==0) { 1.1103 + lastCcPos=i; 1.1104 + embeddingLevel=newLevel; 1.1105 + if(dirProp==LRO || dirProp==RLO) 1.1106 + embeddingLevel|=UBIDI_LEVEL_OVERRIDE; 1.1107 + stackLast++; 1.1108 + stack[stackLast]=embeddingLevel; 1.1109 + /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE 1.1110 + since this has already been done for newLevel which is 1.1111 + the source for embeddingLevel. 1.1112 + */ 1.1113 + } else { 1.1114 + dirProps[i]|=IGNORE_CC; 1.1115 + if(overflowIsolateCount==0) 1.1116 + overflowEmbeddingCount++; 1.1117 + } 1.1118 + break; 1.1119 + case PDF: 1.1120 + /* (X7) */ 1.1121 + flags|=DIRPROP_FLAG(BN); 1.1122 + /* handle all the overflow cases first */ 1.1123 + if(overflowIsolateCount) { 1.1124 + dirProps[i]|=IGNORE_CC; 1.1125 + break; 1.1126 + } 1.1127 + if(overflowEmbeddingCount) { 1.1128 + dirProps[i]|=IGNORE_CC; 1.1129 + overflowEmbeddingCount--; 1.1130 + break; 1.1131 + } 1.1132 + if(stackLast>0 && stack[stackLast]<ISOLATE) { /* not an isolate entry */ 1.1133 + lastCcPos=i; 1.1134 + stackLast--; 1.1135 + embeddingLevel=(UBiDiLevel)stack[stackLast]; 1.1136 + } else 1.1137 + dirProps[i]|=IGNORE_CC; 1.1138 + break; 1.1139 + case LRI: 1.1140 + case RLI: 1.1141 + if(embeddingLevel!=previousLevel) { 1.1142 + bracketProcessBoundary(&bracketData, lastCcPos, 1.1143 + previousLevel, embeddingLevel); 1.1144 + previousLevel=embeddingLevel; 1.1145 + } 1.1146 + /* (X5a, X5b) */ 1.1147 + flags|= DIRPROP_FLAG(ON) | DIRPROP_FLAG(BN) | DIRPROP_FLAG_LR(embeddingLevel); 1.1148 + level=embeddingLevel; 1.1149 + if(dirProp==LRI) 1.1150 + newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */ 1.1151 + else 1.1152 + newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */ 1.1153 + if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 && 1.1154 + overflowEmbeddingCount==0) { 1.1155 + lastCcPos=i; 1.1156 + previousLevel=embeddingLevel; 1.1157 + validIsolateCount++; 1.1158 + if(validIsolateCount>pBiDi->isolateCount) 1.1159 + pBiDi->isolateCount=validIsolateCount; 1.1160 + embeddingLevel=newLevel; 1.1161 + stackLast++; 1.1162 + stack[stackLast]=embeddingLevel+ISOLATE; 1.1163 + bracketProcessLRI_RLI(&bracketData, embeddingLevel); 1.1164 + } else { 1.1165 + dirProps[i]|=IGNORE_CC; 1.1166 + overflowIsolateCount++; 1.1167 + } 1.1168 + break; 1.1169 + case PDI: 1.1170 + if(embeddingLevel!=previousLevel) { 1.1171 + bracketProcessBoundary(&bracketData, lastCcPos, 1.1172 + previousLevel, embeddingLevel); 1.1173 + } 1.1174 + /* (X6a) */ 1.1175 + if(overflowIsolateCount) { 1.1176 + dirProps[i]|=IGNORE_CC; 1.1177 + overflowIsolateCount--; 1.1178 + } 1.1179 + else if(validIsolateCount) { 1.1180 + lastCcPos=i; 1.1181 + overflowEmbeddingCount=0; 1.1182 + while(stack[stackLast]<ISOLATE) /* pop embedding entries */ 1.1183 + stackLast--; /* until the last isolate entry */ 1.1184 + stackLast--; /* pop also the last isolate entry */ 1.1185 + validIsolateCount--; 1.1186 + bracketProcessPDI(&bracketData); 1.1187 + } else 1.1188 + dirProps[i]|=IGNORE_CC; 1.1189 + embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE; 1.1190 + previousLevel=level=embeddingLevel; 1.1191 + flags|= DIRPROP_FLAG(ON) | DIRPROP_FLAG(BN) | DIRPROP_FLAG_LR(embeddingLevel); 1.1192 + break; 1.1193 + case B: 1.1194 + level=GET_PARALEVEL(pBiDi, i); 1.1195 + if((i+1)<length) { 1.1196 + if(text[i]==CR && text[i+1]==LF) 1.1197 + break; /* skip CR when followed by LF */ 1.1198 + overflowEmbeddingCount=overflowIsolateCount=0; 1.1199 + validIsolateCount=0; 1.1200 + stackLast=0; 1.1201 + stack[0]=level; /* initialize base entry to para level, no override, no isolate */ 1.1202 + previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1); 1.1203 + bracketProcessB(&bracketData, embeddingLevel); 1.1204 + } 1.1205 + flags|=DIRPROP_FLAG(B); 1.1206 + break; 1.1207 + case BN: 1.1208 + /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ 1.1209 + /* they will get their levels set correctly in adjustWSLevels() */ 1.1210 + flags|=DIRPROP_FLAG(BN); 1.1211 + break; 1.1212 + default: 1.1213 + /* all other types get the "real" level */ 1.1214 + level=embeddingLevel; 1.1215 + if(embeddingLevel!=previousLevel) { 1.1216 + bracketProcessBoundary(&bracketData, lastCcPos, 1.1217 + previousLevel, embeddingLevel); 1.1218 + previousLevel=embeddingLevel; 1.1219 + } 1.1220 + if(level&UBIDI_LEVEL_OVERRIDE) 1.1221 + flags|=DIRPROP_FLAG_LR(level); 1.1222 + else 1.1223 + flags|=DIRPROP_FLAG(dirProp); 1.1224 + if(!bracketProcessChar(&bracketData, i, dirProp)) 1.1225 + return -1; 1.1226 + break; 1.1227 + } 1.1228 + 1.1229 + /* 1.1230 + * We need to set reasonable levels even on BN codes and 1.1231 + * explicit codes because we will later look at same-level runs (X10). 1.1232 + */ 1.1233 + levels[i]=level; 1.1234 + if(i>0 && levels[i-1]!=level) { 1.1235 + flags|=DIRPROP_FLAG_MULTI_RUNS; 1.1236 + if(level&UBIDI_LEVEL_OVERRIDE) 1.1237 + flags|=DIRPROP_FLAG_O(level); 1.1238 + else 1.1239 + flags|=DIRPROP_FLAG_E(level); 1.1240 + } 1.1241 + if(DIRPROP_FLAG(dirProp)&MASK_ISO) 1.1242 + level=embeddingLevel; 1.1243 + } 1.1244 + if(flags&MASK_EMBEDDING) { 1.1245 + flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); 1.1246 + } 1.1247 + if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { 1.1248 + flags|=DIRPROP_FLAG(L); 1.1249 + } 1.1250 + 1.1251 + /* subsequently, ignore the explicit codes and BN (X9) */ 1.1252 + 1.1253 + /* again, determine if the text is mixed-directional or single-directional */ 1.1254 + pBiDi->flags=flags; 1.1255 + direction=directionFromFlags(pBiDi); 1.1256 + } 1.1257 + return direction; 1.1258 +} 1.1259 + 1.1260 +/* 1.1261 + * Use a pre-specified embedding levels array: 1.1262 + * 1.1263 + * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), 1.1264 + * ignore all explicit codes (X9), 1.1265 + * and check all the preset levels. 1.1266 + * 1.1267 + * Recalculate the flags to have them reflect the real properties 1.1268 + * after taking the explicit embeddings into account. 1.1269 + */ 1.1270 +static UBiDiDirection 1.1271 +checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { 1.1272 + DirProp *dirProps=pBiDi->dirProps; 1.1273 + DirProp dirProp; 1.1274 + UBiDiLevel *levels=pBiDi->levels; 1.1275 + int32_t isolateCount=0; 1.1276 + 1.1277 + int32_t i, length=pBiDi->length; 1.1278 + Flags flags=0; /* collect all directionalities in the text */ 1.1279 + UBiDiLevel level; 1.1280 + pBiDi->isolateCount=0; 1.1281 + 1.1282 + for(i=0; i<length; ++i) { 1.1283 + level=levels[i]; 1.1284 + dirProp=dirProps[i]; 1.1285 + if(dirProp==LRI || dirProp==RLI) { 1.1286 + isolateCount++; 1.1287 + if(isolateCount>pBiDi->isolateCount) 1.1288 + pBiDi->isolateCount=isolateCount; 1.1289 + } 1.1290 + else if(dirProp==PDI) 1.1291 + isolateCount--; 1.1292 + else if(dirProp==B) 1.1293 + isolateCount=0; 1.1294 + if(level&UBIDI_LEVEL_OVERRIDE) { 1.1295 + /* keep the override flag in levels[i] but adjust the flags */ 1.1296 + level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */ 1.1297 + flags|=DIRPROP_FLAG_O(level); 1.1298 + } else { 1.1299 + /* set the flags */ 1.1300 + flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp); 1.1301 + } 1.1302 + if((level<GET_PARALEVEL(pBiDi, i) && 1.1303 + !((0==level)&&(dirProp==B))) || 1.1304 + (UBIDI_MAX_EXPLICIT_LEVEL<level)) { 1.1305 + /* level out of bounds */ 1.1306 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.1307 + return UBIDI_LTR; 1.1308 + } 1.1309 + } 1.1310 + if(flags&MASK_EMBEDDING) { 1.1311 + flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); 1.1312 + } 1.1313 + 1.1314 + /* determine if the text is mixed-directional or single-directional */ 1.1315 + pBiDi->flags=flags; 1.1316 + return directionFromFlags(pBiDi); 1.1317 +} 1.1318 + 1.1319 +/****************************************************************** 1.1320 + The Properties state machine table 1.1321 +******************************************************************* 1.1322 + 1.1323 + All table cells are 8 bits: 1.1324 + bits 0..4: next state 1.1325 + bits 5..7: action to perform (if > 0) 1.1326 + 1.1327 + Cells may be of format "n" where n represents the next state 1.1328 + (except for the rightmost column). 1.1329 + Cells may also be of format "s(x,y)" where x represents an action 1.1330 + to perform and y represents the next state. 1.1331 + 1.1332 +******************************************************************* 1.1333 + Definitions and type for properties state table 1.1334 +******************************************************************* 1.1335 +*/ 1.1336 +#define IMPTABPROPS_COLUMNS 16 1.1337 +#define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1) 1.1338 +#define GET_STATEPROPS(cell) ((cell)&0x1f) 1.1339 +#define GET_ACTIONPROPS(cell) ((cell)>>5) 1.1340 +#define s(action, newState) ((uint8_t)(newState+(action<<5))) 1.1341 + 1.1342 +static const uint8_t groupProp[] = /* dirProp regrouped */ 1.1343 +{ 1.1344 +/* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */ 1.1345 + 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14 1.1346 +}; 1.1347 +enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */ 1.1348 + 1.1349 +/****************************************************************** 1.1350 + 1.1351 + PROPERTIES STATE TABLE 1.1352 + 1.1353 + In table impTabProps, 1.1354 + - the ON column regroups ON and WS, FSI, RLI, LRI and PDI 1.1355 + - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF 1.1356 + - the Res column is the reduced property assigned to a run 1.1357 + 1.1358 + Action 1: process current run1, init new run1 1.1359 + 2: init new run2 1.1360 + 3: process run1, process run2, init new run1 1.1361 + 4: process run1, set run1=run2, init new run2 1.1362 + 1.1363 + Notes: 1.1364 + 1) This table is used in resolveImplicitLevels(). 1.1365 + 2) This table triggers actions when there is a change in the Bidi 1.1366 + property of incoming characters (action 1). 1.1367 + 3) Most such property sequences are processed immediately (in 1.1368 + fact, passed to processPropertySeq(). 1.1369 + 4) However, numbers are assembled as one sequence. This means 1.1370 + that undefined situations (like CS following digits, until 1.1371 + it is known if the next char will be a digit) are held until 1.1372 + following chars define them. 1.1373 + Example: digits followed by CS, then comes another CS or ON; 1.1374 + the digits will be processed, then the CS assigned 1.1375 + as the start of an ON sequence (action 3). 1.1376 + 5) There are cases where more than one sequence must be 1.1377 + processed, for instance digits followed by CS followed by L: 1.1378 + the digits must be processed as one sequence, and the CS 1.1379 + must be processed as an ON sequence, all this before starting 1.1380 + assembling chars for the opening L sequence. 1.1381 + 1.1382 + 1.1383 +*/ 1.1384 +static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] = 1.1385 +{ 1.1386 +/* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */ 1.1387 +/* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON }, 1.1388 +/* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L }, 1.1389 +/* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R }, 1.1390 +/* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R }, 1.1391 +/* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN }, 1.1392 +/* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN }, 1.1393 +/* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN }, 1.1394 +/* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON }, 1.1395 +/* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON }, 1.1396 +/* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON }, 1.1397 +/*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN }, 1.1398 +/*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN }, 1.1399 +/*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN }, 1.1400 +/*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN }, 1.1401 +/*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON }, 1.1402 +/*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S }, 1.1403 +/*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S }, 1.1404 +/*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B }, 1.1405 +/*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L }, 1.1406 +/*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L }, 1.1407 +/*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L }, 1.1408 +/*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN }, 1.1409 +/*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN }, 1.1410 +/*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN } 1.1411 +}; 1.1412 + 1.1413 +/* we must undef macro s because the levels table have a different 1.1414 + * structure (4 bits for action and 4 bits for next state. 1.1415 + */ 1.1416 +#undef s 1.1417 + 1.1418 +/****************************************************************** 1.1419 + The levels state machine tables 1.1420 +******************************************************************* 1.1421 + 1.1422 + All table cells are 8 bits: 1.1423 + bits 0..3: next state 1.1424 + bits 4..7: action to perform (if > 0) 1.1425 + 1.1426 + Cells may be of format "n" where n represents the next state 1.1427 + (except for the rightmost column). 1.1428 + Cells may also be of format "s(x,y)" where x represents an action 1.1429 + to perform and y represents the next state. 1.1430 + 1.1431 + This format limits each table to 16 states each and to 15 actions. 1.1432 + 1.1433 +******************************************************************* 1.1434 + Definitions and type for levels state tables 1.1435 +******************************************************************* 1.1436 +*/ 1.1437 +#define IMPTABLEVELS_COLUMNS (DirProp_B + 2) 1.1438 +#define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1) 1.1439 +#define GET_STATE(cell) ((cell)&0x0f) 1.1440 +#define GET_ACTION(cell) ((cell)>>4) 1.1441 +#define s(action, newState) ((uint8_t)(newState+(action<<4))) 1.1442 + 1.1443 +typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS]; 1.1444 +typedef uint8_t ImpAct[]; 1.1445 + 1.1446 +/* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct, 1.1447 + * instead of having a pair of ImpTab and a pair of ImpAct. 1.1448 + */ 1.1449 +typedef struct ImpTabPair { 1.1450 + const void * pImpTab[2]; 1.1451 + const void * pImpAct[2]; 1.1452 +} ImpTabPair; 1.1453 + 1.1454 +/****************************************************************** 1.1455 + 1.1456 + LEVELS STATE TABLES 1.1457 + 1.1458 + In all levels state tables, 1.1459 + - state 0 is the initial state 1.1460 + - the Res column is the increment to add to the text level 1.1461 + for this property sequence. 1.1462 + 1.1463 + The impAct arrays for each table of a pair map the local action 1.1464 + numbers of the table to the total list of actions. For instance, 1.1465 + action 2 in a given table corresponds to the action number which 1.1466 + appears in entry [2] of the impAct array for that table. 1.1467 + The first entry of all impAct arrays must be 0. 1.1468 + 1.1469 + Action 1: init conditional sequence 1.1470 + 2: prepend conditional sequence to current sequence 1.1471 + 3: set ON sequence to new level - 1 1.1472 + 4: init EN/AN/ON sequence 1.1473 + 5: fix EN/AN/ON sequence followed by R 1.1474 + 6: set previous level sequence to level 2 1.1475 + 1.1476 + Notes: 1.1477 + 1) These tables are used in processPropertySeq(). The input 1.1478 + is property sequences as determined by resolveImplicitLevels. 1.1479 + 2) Most such property sequences are processed immediately 1.1480 + (levels are assigned). 1.1481 + 3) However, some sequences cannot be assigned a final level till 1.1482 + one or more following sequences are received. For instance, 1.1483 + ON following an R sequence within an even-level paragraph. 1.1484 + If the following sequence is R, the ON sequence will be 1.1485 + assigned basic run level+1, and so will the R sequence. 1.1486 + 4) S is generally handled like ON, since its level will be fixed 1.1487 + to paragraph level in adjustWSLevels(). 1.1488 + 1.1489 +*/ 1.1490 + 1.1491 +static const ImpTab impTabL_DEFAULT = /* Even paragraph level */ 1.1492 +/* In this table, conditional sequences receive the higher possible level 1.1493 + until proven otherwise. 1.1494 +*/ 1.1495 +{ 1.1496 +/* L , R , EN , AN , ON , S , B , Res */ 1.1497 +/* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 }, 1.1498 +/* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 }, 1.1499 +/* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 }, 1.1500 +/* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 }, 1.1501 +/* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0), 1 }, 1.1502 +/* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0), 1 } 1.1503 +}; 1.1504 +static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */ 1.1505 +/* In this table, conditional sequences receive the lower possible level 1.1506 + until proven otherwise. 1.1507 +*/ 1.1508 +{ 1.1509 +/* L , R , EN , AN , ON , S , B , Res */ 1.1510 +/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, 1.1511 +/* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 }, 1.1512 +/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, 1.1513 +/* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 }, 1.1514 +/* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 }, 1.1515 +/* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 } 1.1516 +}; 1.1517 +static const ImpAct impAct0 = {0,1,2,3,4,5,6}; 1.1518 +static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT, 1.1519 + &impTabR_DEFAULT}, 1.1520 + {&impAct0, &impAct0}}; 1.1521 + 1.1522 +static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */ 1.1523 +/* In this table, conditional sequences receive the higher possible level 1.1524 + until proven otherwise. 1.1525 +*/ 1.1526 +{ 1.1527 +/* L , R , EN , AN , ON , S , B , Res */ 1.1528 +/* 0 : init */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 0 }, 1.1529 +/* 1 : L+EN/AN */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 2 }, 1.1530 +/* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), 0 , 0 , 1 }, 1.1531 +/* 3 : R+ON */ { s(2,0), 2 , 4 , 4 , 3 , 3 , s(2,0), 1 }, 1.1532 +/* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 } 1.1533 + }; 1.1534 +static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL, 1.1535 + &impTabR_DEFAULT}, 1.1536 + {&impAct0, &impAct0}}; 1.1537 + 1.1538 +static const ImpTab impTabL_GROUP_NUMBERS_WITH_R = 1.1539 +/* In this table, EN/AN+ON sequences receive levels as if associated with R 1.1540 + until proven that there is L or sor/eor on both sides. AN is handled like EN. 1.1541 +*/ 1.1542 +{ 1.1543 +/* L , R , EN , AN , ON , S , B , Res */ 1.1544 +/* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 }, 1.1545 +/* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 }, 1.1546 +/* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 }, 1.1547 +/* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 }, 1.1548 +/* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 }, 1.1549 +/* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 } 1.1550 +}; 1.1551 +static const ImpTab impTabR_GROUP_NUMBERS_WITH_R = 1.1552 +/* In this table, EN/AN+ON sequences receive levels as if associated with R 1.1553 + until proven that there is L on both sides. AN is handled like EN. 1.1554 +*/ 1.1555 +{ 1.1556 +/* L , R , EN , AN , ON , S , B , Res */ 1.1557 +/* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, 1.1558 +/* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, 1.1559 +/* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 }, 1.1560 +/* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 }, 1.1561 +/* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 } 1.1562 +}; 1.1563 +static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = { 1.1564 + {&impTabL_GROUP_NUMBERS_WITH_R, 1.1565 + &impTabR_GROUP_NUMBERS_WITH_R}, 1.1566 + {&impAct0, &impAct0}}; 1.1567 + 1.1568 + 1.1569 +static const ImpTab impTabL_INVERSE_NUMBERS_AS_L = 1.1570 +/* This table is identical to the Default LTR table except that EN and AN are 1.1571 + handled like L. 1.1572 +*/ 1.1573 +{ 1.1574 +/* L , R , EN , AN , ON , S , B , Res */ 1.1575 +/* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 }, 1.1576 +/* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 }, 1.1577 +/* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 }, 1.1578 +/* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 }, 1.1579 +/* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 }, 1.1580 +/* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 } 1.1581 +}; 1.1582 +static const ImpTab impTabR_INVERSE_NUMBERS_AS_L = 1.1583 +/* This table is identical to the Default RTL table except that EN and AN are 1.1584 + handled like L. 1.1585 +*/ 1.1586 +{ 1.1587 +/* L , R , EN , AN , ON , S , B , Res */ 1.1588 +/* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, 1.1589 +/* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 }, 1.1590 +/* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, 1.1591 +/* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 }, 1.1592 +/* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 }, 1.1593 +/* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 } 1.1594 +}; 1.1595 +static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = { 1.1596 + {&impTabL_INVERSE_NUMBERS_AS_L, 1.1597 + &impTabR_INVERSE_NUMBERS_AS_L}, 1.1598 + {&impAct0, &impAct0}}; 1.1599 + 1.1600 +static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */ 1.1601 +/* In this table, conditional sequences receive the lower possible level 1.1602 + until proven otherwise. 1.1603 +*/ 1.1604 +{ 1.1605 +/* L , R , EN , AN , ON , S , B , Res */ 1.1606 +/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, 1.1607 +/* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 }, 1.1608 +/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, 1.1609 +/* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 }, 1.1610 +/* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 }, 1.1611 +/* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 }, 1.1612 +/* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 } 1.1613 +}; 1.1614 +static const ImpAct impAct1 = {0,1,11,12}; 1.1615 +/* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc" 1.1616 + */ 1.1617 +static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = { 1.1618 + {&impTabL_DEFAULT, 1.1619 + &impTabR_INVERSE_LIKE_DIRECT}, 1.1620 + {&impAct0, &impAct1}}; 1.1621 + 1.1622 +static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS = 1.1623 +/* The case handled in this table is (visually): R EN L 1.1624 +*/ 1.1625 +{ 1.1626 +/* L , R , EN , AN , ON , S , B , Res */ 1.1627 +/* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 }, 1.1628 +/* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 }, 1.1629 +/* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 }, 1.1630 +/* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 }, 1.1631 +/* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 }, 1.1632 +/* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 }, 1.1633 +/* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 } 1.1634 +}; 1.1635 +static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS = 1.1636 +/* The cases handled in this table are (visually): R EN L 1.1637 + R L AN L 1.1638 +*/ 1.1639 +{ 1.1640 +/* L , R , EN , AN , ON , S , B , Res */ 1.1641 +/* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 }, 1.1642 +/* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 }, 1.1643 +/* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 }, 1.1644 +/* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 }, 1.1645 +/* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 }, 1.1646 +/* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 }, 1.1647 +/* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 } 1.1648 +}; 1.1649 +static const ImpAct impAct2 = {0,1,7,8,9,10}; 1.1650 +static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = { 1.1651 + {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, 1.1652 + &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, 1.1653 + {&impAct0, &impAct2}}; 1.1654 + 1.1655 +static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = { 1.1656 + {&impTabL_NUMBERS_SPECIAL, 1.1657 + &impTabR_INVERSE_LIKE_DIRECT}, 1.1658 + {&impAct0, &impAct1}}; 1.1659 + 1.1660 +static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = 1.1661 +/* The case handled in this table is (visually): R EN L 1.1662 +*/ 1.1663 +{ 1.1664 +/* L , R , EN , AN , ON , S , B , Res */ 1.1665 +/* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 }, 1.1666 +/* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 }, 1.1667 +/* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 }, 1.1668 +/* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 }, 1.1669 +/* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 } 1.1670 +}; 1.1671 +static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = { 1.1672 + {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, 1.1673 + &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, 1.1674 + {&impAct0, &impAct2}}; 1.1675 + 1.1676 +#undef s 1.1677 + 1.1678 +typedef struct { 1.1679 + const ImpTab * pImpTab; /* level table pointer */ 1.1680 + const ImpAct * pImpAct; /* action map array */ 1.1681 + int32_t startON; /* start of ON sequence */ 1.1682 + int32_t startL2EN; /* start of level 2 sequence */ 1.1683 + int32_t lastStrongRTL; /* index of last found R or AL */ 1.1684 + int32_t state; /* current state */ 1.1685 + int32_t runStart; /* start position of the run */ 1.1686 + UBiDiLevel runLevel; /* run level before implicit solving */ 1.1687 +} LevState; 1.1688 + 1.1689 +/*------------------------------------------------------------------------*/ 1.1690 + 1.1691 +static void 1.1692 +addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag) 1.1693 + /* param pos: position where to insert 1.1694 + param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER 1.1695 + */ 1.1696 +{ 1.1697 +#define FIRSTALLOC 10 1.1698 + Point point; 1.1699 + InsertPoints * pInsertPoints=&(pBiDi->insertPoints); 1.1700 + 1.1701 + if (pInsertPoints->capacity == 0) 1.1702 + { 1.1703 + pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC); 1.1704 + if (pInsertPoints->points == NULL) 1.1705 + { 1.1706 + pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; 1.1707 + return; 1.1708 + } 1.1709 + pInsertPoints->capacity=FIRSTALLOC; 1.1710 + } 1.1711 + if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */ 1.1712 + { 1.1713 + void * savePoints=pInsertPoints->points; 1.1714 + pInsertPoints->points=uprv_realloc(pInsertPoints->points, 1.1715 + pInsertPoints->capacity*2*sizeof(Point)); 1.1716 + if (pInsertPoints->points == NULL) 1.1717 + { 1.1718 + pInsertPoints->points=savePoints; 1.1719 + pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; 1.1720 + return; 1.1721 + } 1.1722 + else pInsertPoints->capacity*=2; 1.1723 + } 1.1724 + point.pos=pos; 1.1725 + point.flag=flag; 1.1726 + pInsertPoints->points[pInsertPoints->size]=point; 1.1727 + pInsertPoints->size++; 1.1728 +#undef FIRSTALLOC 1.1729 +} 1.1730 + 1.1731 +/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */ 1.1732 + 1.1733 +/* 1.1734 + * This implementation of the (Wn) rules applies all rules in one pass. 1.1735 + * In order to do so, it needs a look-ahead of typically 1 character 1.1736 + * (except for W5: sequences of ET) and keeps track of changes 1.1737 + * in a rule Wp that affect a later Wq (p<q). 1.1738 + * 1.1739 + * The (Nn) and (In) rules are also performed in that same single loop, 1.1740 + * but effectively one iteration behind for white space. 1.1741 + * 1.1742 + * Since all implicit rules are performed in one step, it is not necessary 1.1743 + * to actually store the intermediate directional properties in dirProps[]. 1.1744 + */ 1.1745 + 1.1746 +static void 1.1747 +processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop, 1.1748 + int32_t start, int32_t limit) { 1.1749 + uint8_t cell, oldStateSeq, actionSeq; 1.1750 + const ImpTab * pImpTab=pLevState->pImpTab; 1.1751 + const ImpAct * pImpAct=pLevState->pImpAct; 1.1752 + UBiDiLevel * levels=pBiDi->levels; 1.1753 + UBiDiLevel level, addLevel; 1.1754 + InsertPoints * pInsertPoints; 1.1755 + int32_t start0, k; 1.1756 + 1.1757 + start0=start; /* save original start position */ 1.1758 + oldStateSeq=(uint8_t)pLevState->state; 1.1759 + cell=(*pImpTab)[oldStateSeq][_prop]; 1.1760 + pLevState->state=GET_STATE(cell); /* isolate the new state */ 1.1761 + actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */ 1.1762 + addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES]; 1.1763 + 1.1764 + if(actionSeq) { 1.1765 + switch(actionSeq) { 1.1766 + case 1: /* init ON seq */ 1.1767 + pLevState->startON=start0; 1.1768 + break; 1.1769 + 1.1770 + case 2: /* prepend ON seq to current seq */ 1.1771 + start=pLevState->startON; 1.1772 + break; 1.1773 + 1.1774 + case 3: /* L or S after possible relevant EN/AN */ 1.1775 + /* check if we had EN after R/AL */ 1.1776 + if (pLevState->startL2EN >= 0) { 1.1777 + addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); 1.1778 + } 1.1779 + pLevState->startL2EN=-1; /* not within previous if since could also be -2 */ 1.1780 + /* check if we had any relevant EN/AN after R/AL */ 1.1781 + pInsertPoints=&(pBiDi->insertPoints); 1.1782 + if ((pInsertPoints->capacity == 0) || 1.1783 + (pInsertPoints->size <= pInsertPoints->confirmed)) 1.1784 + { 1.1785 + /* nothing, just clean up */ 1.1786 + pLevState->lastStrongRTL=-1; 1.1787 + /* check if we have a pending conditional segment */ 1.1788 + level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES]; 1.1789 + if ((level & 1) && (pLevState->startON > 0)) { /* after ON */ 1.1790 + start=pLevState->startON; /* reset to basic run level */ 1.1791 + } 1.1792 + if (_prop == DirProp_S) /* add LRM before S */ 1.1793 + { 1.1794 + addPoint(pBiDi, start0, LRM_BEFORE); 1.1795 + pInsertPoints->confirmed=pInsertPoints->size; 1.1796 + } 1.1797 + break; 1.1798 + } 1.1799 + /* reset previous RTL cont to level for LTR text */ 1.1800 + for (k=pLevState->lastStrongRTL+1; k<start0; k++) 1.1801 + { 1.1802 + /* reset odd level, leave runLevel+2 as is */ 1.1803 + levels[k]=(levels[k] - 2) & ~1; 1.1804 + } 1.1805 + /* mark insert points as confirmed */ 1.1806 + pInsertPoints->confirmed=pInsertPoints->size; 1.1807 + pLevState->lastStrongRTL=-1; 1.1808 + if (_prop == DirProp_S) /* add LRM before S */ 1.1809 + { 1.1810 + addPoint(pBiDi, start0, LRM_BEFORE); 1.1811 + pInsertPoints->confirmed=pInsertPoints->size; 1.1812 + } 1.1813 + break; 1.1814 + 1.1815 + case 4: /* R/AL after possible relevant EN/AN */ 1.1816 + /* just clean up */ 1.1817 + pInsertPoints=&(pBiDi->insertPoints); 1.1818 + if (pInsertPoints->capacity > 0) 1.1819 + /* remove all non confirmed insert points */ 1.1820 + pInsertPoints->size=pInsertPoints->confirmed; 1.1821 + pLevState->startON=-1; 1.1822 + pLevState->startL2EN=-1; 1.1823 + pLevState->lastStrongRTL=limit - 1; 1.1824 + break; 1.1825 + 1.1826 + case 5: /* EN/AN after R/AL + possible cont */ 1.1827 + /* check for real AN */ 1.1828 + if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) && 1.1829 + (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)) 1.1830 + { 1.1831 + /* real AN */ 1.1832 + if (pLevState->startL2EN == -1) /* if no relevant EN already found */ 1.1833 + { 1.1834 + /* just note the righmost digit as a strong RTL */ 1.1835 + pLevState->lastStrongRTL=limit - 1; 1.1836 + break; 1.1837 + } 1.1838 + if (pLevState->startL2EN >= 0) /* after EN, no AN */ 1.1839 + { 1.1840 + addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); 1.1841 + pLevState->startL2EN=-2; 1.1842 + } 1.1843 + /* note AN */ 1.1844 + addPoint(pBiDi, start0, LRM_BEFORE); 1.1845 + break; 1.1846 + } 1.1847 + /* if first EN/AN after R/AL */ 1.1848 + if (pLevState->startL2EN == -1) { 1.1849 + pLevState->startL2EN=start0; 1.1850 + } 1.1851 + break; 1.1852 + 1.1853 + case 6: /* note location of latest R/AL */ 1.1854 + pLevState->lastStrongRTL=limit - 1; 1.1855 + pLevState->startON=-1; 1.1856 + break; 1.1857 + 1.1858 + case 7: /* L after R+ON/EN/AN */ 1.1859 + /* include possible adjacent number on the left */ 1.1860 + for (k=start0-1; k>=0 && !(levels[k]&1); k--); 1.1861 + if(k>=0) { 1.1862 + addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */ 1.1863 + pInsertPoints=&(pBiDi->insertPoints); 1.1864 + pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */ 1.1865 + } 1.1866 + pLevState->startON=start0; 1.1867 + break; 1.1868 + 1.1869 + case 8: /* AN after L */ 1.1870 + /* AN numbers between L text on both sides may be trouble. */ 1.1871 + /* tentatively bracket with LRMs; will be confirmed if followed by L */ 1.1872 + addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */ 1.1873 + addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */ 1.1874 + break; 1.1875 + 1.1876 + case 9: /* R after L+ON/EN/AN */ 1.1877 + /* false alert, infirm LRMs around previous AN */ 1.1878 + pInsertPoints=&(pBiDi->insertPoints); 1.1879 + pInsertPoints->size=pInsertPoints->confirmed; 1.1880 + if (_prop == DirProp_S) /* add RLM before S */ 1.1881 + { 1.1882 + addPoint(pBiDi, start0, RLM_BEFORE); 1.1883 + pInsertPoints->confirmed=pInsertPoints->size; 1.1884 + } 1.1885 + break; 1.1886 + 1.1887 + case 10: /* L after L+ON/AN */ 1.1888 + level=pLevState->runLevel + addLevel; 1.1889 + for(k=pLevState->startON; k<start0; k++) { 1.1890 + if (levels[k]<level) 1.1891 + levels[k]=level; 1.1892 + } 1.1893 + pInsertPoints=&(pBiDi->insertPoints); 1.1894 + pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */ 1.1895 + pLevState->startON=start0; 1.1896 + break; 1.1897 + 1.1898 + case 11: /* L after L+ON+EN/AN/ON */ 1.1899 + level=pLevState->runLevel; 1.1900 + for(k=start0-1; k>=pLevState->startON; k--) { 1.1901 + if(levels[k]==level+3) { 1.1902 + while(levels[k]==level+3) { 1.1903 + levels[k--]-=2; 1.1904 + } 1.1905 + while(levels[k]==level) { 1.1906 + k--; 1.1907 + } 1.1908 + } 1.1909 + if(levels[k]==level+2) { 1.1910 + levels[k]=level; 1.1911 + continue; 1.1912 + } 1.1913 + levels[k]=level+1; 1.1914 + } 1.1915 + break; 1.1916 + 1.1917 + case 12: /* R after L+ON+EN/AN/ON */ 1.1918 + level=pLevState->runLevel+1; 1.1919 + for(k=start0-1; k>=pLevState->startON; k--) { 1.1920 + if(levels[k]>level) { 1.1921 + levels[k]-=2; 1.1922 + } 1.1923 + } 1.1924 + break; 1.1925 + 1.1926 + default: /* we should never get here */ 1.1927 + U_ASSERT(FALSE); 1.1928 + break; 1.1929 + } 1.1930 + } 1.1931 + if((addLevel) || (start < start0)) { 1.1932 + level=pLevState->runLevel + addLevel; 1.1933 + if(start>=pLevState->runStart) { 1.1934 + for(k=start; k<limit; k++) { 1.1935 + levels[k]=level; 1.1936 + } 1.1937 + } else { 1.1938 + DirProp *dirProps=pBiDi->dirProps, dirProp; 1.1939 + int32_t isolateCount=0; 1.1940 + for(k=start; k<limit; k++) { 1.1941 + dirProp=dirProps[k]; 1.1942 + if(dirProp==PDI) 1.1943 + isolateCount--; 1.1944 + if(isolateCount==0) 1.1945 + levels[k]=level; 1.1946 + if(dirProp==LRI || dirProp==RLI) 1.1947 + isolateCount++; 1.1948 + } 1.1949 + } 1.1950 + } 1.1951 +} 1.1952 + 1.1953 +/** 1.1954 + * Returns the directionality of the last strong character at the end of the prologue, if any. 1.1955 + * Requires prologue!=null. 1.1956 + */ 1.1957 +static DirProp 1.1958 +lastL_R_AL(UBiDi *pBiDi) { 1.1959 + const UChar *text=pBiDi->prologue; 1.1960 + int32_t length=pBiDi->proLength; 1.1961 + int32_t i; 1.1962 + UChar32 uchar; 1.1963 + DirProp dirProp; 1.1964 + for(i=length; i>0; ) { 1.1965 + /* i is decremented by U16_PREV */ 1.1966 + U16_PREV(text, 0, i, uchar); 1.1967 + dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); 1.1968 + if(dirProp==L) { 1.1969 + return DirProp_L; 1.1970 + } 1.1971 + if(dirProp==R || dirProp==AL) { 1.1972 + return DirProp_R; 1.1973 + } 1.1974 + if(dirProp==B) { 1.1975 + return DirProp_ON; 1.1976 + } 1.1977 + } 1.1978 + return DirProp_ON; 1.1979 +} 1.1980 + 1.1981 +/** 1.1982 + * Returns the directionality of the first strong character, or digit, in the epilogue, if any. 1.1983 + * Requires epilogue!=null. 1.1984 + */ 1.1985 +static DirProp 1.1986 +firstL_R_AL_EN_AN(UBiDi *pBiDi) { 1.1987 + const UChar *text=pBiDi->epilogue; 1.1988 + int32_t length=pBiDi->epiLength; 1.1989 + int32_t i; 1.1990 + UChar32 uchar; 1.1991 + DirProp dirProp; 1.1992 + for(i=0; i<length; ) { 1.1993 + /* i is incremented by U16_NEXT */ 1.1994 + U16_NEXT(text, i, length, uchar); 1.1995 + dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); 1.1996 + if(dirProp==L) { 1.1997 + return DirProp_L; 1.1998 + } 1.1999 + if(dirProp==R || dirProp==AL) { 1.2000 + return DirProp_R; 1.2001 + } 1.2002 + if(dirProp==EN) { 1.2003 + return DirProp_EN; 1.2004 + } 1.2005 + if(dirProp==AN) { 1.2006 + return DirProp_AN; 1.2007 + } 1.2008 + } 1.2009 + return DirProp_ON; 1.2010 +} 1.2011 + 1.2012 +static void 1.2013 +resolveImplicitLevels(UBiDi *pBiDi, 1.2014 + int32_t start, int32_t limit, 1.2015 + DirProp sor, DirProp eor) { 1.2016 + const DirProp *dirProps=pBiDi->dirProps; 1.2017 + DirProp dirProp; 1.2018 + LevState levState; 1.2019 + int32_t i, start1, start2; 1.2020 + uint16_t oldStateImp, stateImp, actionImp; 1.2021 + uint8_t gprop, resProp, cell; 1.2022 + UBool inverseRTL; 1.2023 + DirProp nextStrongProp=R; 1.2024 + int32_t nextStrongPos=-1; 1.2025 + 1.2026 + /* check for RTL inverse BiDi mode */ 1.2027 + /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to 1.2028 + * loop on the text characters from end to start. 1.2029 + * This would need a different properties state table (at least different 1.2030 + * actions) and different levels state tables (maybe very similar to the 1.2031 + * LTR corresponding ones. 1.2032 + */ 1.2033 + inverseRTL=(UBool) 1.2034 + ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) && 1.2035 + (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || 1.2036 + pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)); 1.2037 + 1.2038 + /* initialize for property and levels state tables */ 1.2039 + levState.startON=-1; 1.2040 + levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ 1.2041 + levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ 1.2042 + levState.runStart=start; 1.2043 + levState.runLevel=pBiDi->levels[start]; 1.2044 + levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1]; 1.2045 + levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1]; 1.2046 + if(start==0 && pBiDi->proLength>0) { 1.2047 + DirProp lastStrong=lastL_R_AL(pBiDi); 1.2048 + if(lastStrong!=DirProp_ON) { 1.2049 + sor=lastStrong; 1.2050 + } 1.2051 + } 1.2052 + /* The isolates[] entries contain enough information to 1.2053 + resume the bidi algorithm in the same state as it was 1.2054 + when it was interrupted by an isolate sequence. */ 1.2055 + if(dirProps[start]==PDI) { 1.2056 + start1=pBiDi->isolates[pBiDi->isolateCount].start1; 1.2057 + stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp; 1.2058 + levState.state=pBiDi->isolates[pBiDi->isolateCount].state; 1.2059 + pBiDi->isolateCount--; 1.2060 + } else { 1.2061 + start1=start; 1.2062 + if(dirProps[start]==NSM) 1.2063 + stateImp = 1 + sor; 1.2064 + else 1.2065 + stateImp=0; 1.2066 + levState.state=0; 1.2067 + processPropertySeq(pBiDi, &levState, sor, start, start); 1.2068 + } 1.2069 + start2=start; 1.2070 + 1.2071 + for(i=start; i<=limit; i++) { 1.2072 + if(i>=limit) { 1.2073 + if(limit>start) { 1.2074 + dirProp=pBiDi->dirProps[limit-1]; 1.2075 + if(dirProp==LRI || dirProp==RLI) 1.2076 + break; /* no forced closing for sequence ending with LRI/RLI */ 1.2077 + } 1.2078 + gprop=eor; 1.2079 + } else { 1.2080 + DirProp prop, prop1; 1.2081 + prop=PURE_DIRPROP(dirProps[i]); 1.2082 + if(inverseRTL) { 1.2083 + if(prop==AL) { 1.2084 + /* AL before EN does not make it AN */ 1.2085 + prop=R; 1.2086 + } else if(prop==EN) { 1.2087 + if(nextStrongPos<=i) { 1.2088 + /* look for next strong char (L/R/AL) */ 1.2089 + int32_t j; 1.2090 + nextStrongProp=R; /* set default */ 1.2091 + nextStrongPos=limit; 1.2092 + for(j=i+1; j<limit; j++) { 1.2093 + prop1=dirProps[j]; 1.2094 + if(prop1==L || prop1==R || prop1==AL) { 1.2095 + nextStrongProp=prop1; 1.2096 + nextStrongPos=j; 1.2097 + break; 1.2098 + } 1.2099 + } 1.2100 + } 1.2101 + if(nextStrongProp==AL) { 1.2102 + prop=AN; 1.2103 + } 1.2104 + } 1.2105 + } 1.2106 + gprop=groupProp[prop]; 1.2107 + } 1.2108 + oldStateImp=stateImp; 1.2109 + cell=impTabProps[oldStateImp][gprop]; 1.2110 + stateImp=GET_STATEPROPS(cell); /* isolate the new state */ 1.2111 + actionImp=GET_ACTIONPROPS(cell); /* isolate the action */ 1.2112 + if((i==limit) && (actionImp==0)) { 1.2113 + /* there is an unprocessed sequence if its property == eor */ 1.2114 + actionImp=1; /* process the last sequence */ 1.2115 + } 1.2116 + if(actionImp) { 1.2117 + resProp=impTabProps[oldStateImp][IMPTABPROPS_RES]; 1.2118 + switch(actionImp) { 1.2119 + case 1: /* process current seq1, init new seq1 */ 1.2120 + processPropertySeq(pBiDi, &levState, resProp, start1, i); 1.2121 + start1=i; 1.2122 + break; 1.2123 + case 2: /* init new seq2 */ 1.2124 + start2=i; 1.2125 + break; 1.2126 + case 3: /* process seq1, process seq2, init new seq1 */ 1.2127 + processPropertySeq(pBiDi, &levState, resProp, start1, start2); 1.2128 + processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i); 1.2129 + start1=i; 1.2130 + break; 1.2131 + case 4: /* process seq1, set seq1=seq2, init new seq2 */ 1.2132 + processPropertySeq(pBiDi, &levState, resProp, start1, start2); 1.2133 + start1=start2; 1.2134 + start2=i; 1.2135 + break; 1.2136 + default: /* we should never get here */ 1.2137 + U_ASSERT(FALSE); 1.2138 + break; 1.2139 + } 1.2140 + } 1.2141 + } 1.2142 + 1.2143 + /* flush possible pending sequence, e.g. ON */ 1.2144 + if(limit==pBiDi->length && pBiDi->epiLength>0) { 1.2145 + DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi); 1.2146 + if(firstStrong!=DirProp_ON) { 1.2147 + eor=firstStrong; 1.2148 + } 1.2149 + } 1.2150 + 1.2151 + dirProp=dirProps[limit-1]; 1.2152 + if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) { 1.2153 + pBiDi->isolateCount++; 1.2154 + pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp; 1.2155 + pBiDi->isolates[pBiDi->isolateCount].state=levState.state; 1.2156 + pBiDi->isolates[pBiDi->isolateCount].start1=start1; 1.2157 + } 1.2158 + else 1.2159 + processPropertySeq(pBiDi, &levState, eor, limit, limit); 1.2160 +} 1.2161 + 1.2162 +/* perform (L1) and (X9) ---------------------------------------------------- */ 1.2163 + 1.2164 +/* 1.2165 + * Reset the embedding levels for some non-graphic characters (L1). 1.2166 + * This function also sets appropriate levels for BN, and 1.2167 + * explicit embedding types that are supposed to have been removed 1.2168 + * from the paragraph in (X9). 1.2169 + */ 1.2170 +static void 1.2171 +adjustWSLevels(UBiDi *pBiDi) { 1.2172 + const DirProp *dirProps=pBiDi->dirProps; 1.2173 + UBiDiLevel *levels=pBiDi->levels; 1.2174 + int32_t i; 1.2175 + 1.2176 + if(pBiDi->flags&MASK_WS) { 1.2177 + UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR; 1.2178 + Flags flag; 1.2179 + 1.2180 + i=pBiDi->trailingWSStart; 1.2181 + while(i>0) { 1.2182 + /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ 1.2183 + while(i>0 && (flag=DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i])))&MASK_WS) { 1.2184 + if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { 1.2185 + levels[i]=0; 1.2186 + } else { 1.2187 + levels[i]=GET_PARALEVEL(pBiDi, i); 1.2188 + } 1.2189 + } 1.2190 + 1.2191 + /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ 1.2192 + /* here, i+1 is guaranteed to be <length */ 1.2193 + while(i>0) { 1.2194 + flag=DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i])); 1.2195 + if(flag&MASK_BN_EXPLICIT) { 1.2196 + levels[i]=levels[i+1]; 1.2197 + } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { 1.2198 + levels[i]=0; 1.2199 + break; 1.2200 + } else if(flag&MASK_B_S) { 1.2201 + levels[i]=GET_PARALEVEL(pBiDi, i); 1.2202 + break; 1.2203 + } 1.2204 + } 1.2205 + } 1.2206 + } 1.2207 +} 1.2208 + 1.2209 +U_CAPI void U_EXPORT2 1.2210 +ubidi_setContext(UBiDi *pBiDi, 1.2211 + const UChar *prologue, int32_t proLength, 1.2212 + const UChar *epilogue, int32_t epiLength, 1.2213 + UErrorCode *pErrorCode) { 1.2214 + /* check the argument values */ 1.2215 + RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); 1.2216 + if(pBiDi==NULL || proLength<-1 || epiLength<-1 || 1.2217 + (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) { 1.2218 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.2219 + return; 1.2220 + } 1.2221 + 1.2222 + if(proLength==-1) { 1.2223 + pBiDi->proLength=u_strlen(prologue); 1.2224 + } else { 1.2225 + pBiDi->proLength=proLength; 1.2226 + } 1.2227 + if(epiLength==-1) { 1.2228 + pBiDi->epiLength=u_strlen(epilogue); 1.2229 + } else { 1.2230 + pBiDi->epiLength=epiLength; 1.2231 + } 1.2232 + pBiDi->prologue=prologue; 1.2233 + pBiDi->epilogue=epilogue; 1.2234 +} 1.2235 + 1.2236 +static void 1.2237 +setParaSuccess(UBiDi *pBiDi) { 1.2238 + pBiDi->proLength=0; /* forget the last context */ 1.2239 + pBiDi->epiLength=0; 1.2240 + pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ 1.2241 +} 1.2242 + 1.2243 +#define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y)) 1.2244 +#define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x))) 1.2245 + 1.2246 +static void 1.2247 +setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, 1.2248 + UBiDiLevel paraLevel, UErrorCode *pErrorCode) { 1.2249 + void *runsOnlyMemory; 1.2250 + int32_t *visualMap; 1.2251 + UChar *visualText; 1.2252 + int32_t saveLength, saveTrailingWSStart; 1.2253 + const UBiDiLevel *levels; 1.2254 + UBiDiLevel *saveLevels; 1.2255 + UBiDiDirection saveDirection; 1.2256 + UBool saveMayAllocateText; 1.2257 + Run *runs; 1.2258 + int32_t visualLength, i, j, visualStart, logicalStart, 1.2259 + runCount, runLength, addedRuns, insertRemove, 1.2260 + start, limit, step, indexOddBit, logicalPos, 1.2261 + index0, index1; 1.2262 + uint32_t saveOptions; 1.2263 + 1.2264 + pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT; 1.2265 + if(length==0) { 1.2266 + ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); 1.2267 + goto cleanup3; 1.2268 + } 1.2269 + /* obtain memory for mapping table and visual text */ 1.2270 + runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))); 1.2271 + if(runsOnlyMemory==NULL) { 1.2272 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.2273 + goto cleanup3; 1.2274 + } 1.2275 + visualMap=runsOnlyMemory; 1.2276 + visualText=(UChar *)&visualMap[length]; 1.2277 + saveLevels=(UBiDiLevel *)&visualText[length]; 1.2278 + saveOptions=pBiDi->reorderingOptions; 1.2279 + if(saveOptions & UBIDI_OPTION_INSERT_MARKS) { 1.2280 + pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; 1.2281 + pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS; 1.2282 + } 1.2283 + paraLevel&=1; /* accept only 0 or 1 */ 1.2284 + ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); 1.2285 + if(U_FAILURE(*pErrorCode)) { 1.2286 + goto cleanup3; 1.2287 + } 1.2288 + /* we cannot access directly pBiDi->levels since it is not yet set if 1.2289 + * direction is not MIXED 1.2290 + */ 1.2291 + levels=ubidi_getLevels(pBiDi, pErrorCode); 1.2292 + uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel)); 1.2293 + saveTrailingWSStart=pBiDi->trailingWSStart; 1.2294 + saveLength=pBiDi->length; 1.2295 + saveDirection=pBiDi->direction; 1.2296 + 1.2297 + /* FOOD FOR THOUGHT: instead of writing the visual text, we could use 1.2298 + * the visual map and the dirProps array to drive the second call 1.2299 + * to ubidi_setPara (but must make provision for possible removal of 1.2300 + * BiDi controls. Alternatively, only use the dirProps array via 1.2301 + * customized classifier callback. 1.2302 + */ 1.2303 + visualLength=ubidi_writeReordered(pBiDi, visualText, length, 1.2304 + UBIDI_DO_MIRRORING, pErrorCode); 1.2305 + ubidi_getVisualMap(pBiDi, visualMap, pErrorCode); 1.2306 + if(U_FAILURE(*pErrorCode)) { 1.2307 + goto cleanup2; 1.2308 + } 1.2309 + pBiDi->reorderingOptions=saveOptions; 1.2310 + 1.2311 + pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT; 1.2312 + paraLevel^=1; 1.2313 + /* Because what we did with reorderingOptions, visualText may be shorter 1.2314 + * than the original text. But we don't want the levels memory to be 1.2315 + * reallocated shorter than the original length, since we need to restore 1.2316 + * the levels as after the first call to ubidi_setpara() before returning. 1.2317 + * We will force mayAllocateText to FALSE before the second call to 1.2318 + * ubidi_setpara(), and will restore it afterwards. 1.2319 + */ 1.2320 + saveMayAllocateText=pBiDi->mayAllocateText; 1.2321 + pBiDi->mayAllocateText=FALSE; 1.2322 + ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode); 1.2323 + pBiDi->mayAllocateText=saveMayAllocateText; 1.2324 + ubidi_getRuns(pBiDi, pErrorCode); 1.2325 + if(U_FAILURE(*pErrorCode)) { 1.2326 + goto cleanup1; 1.2327 + } 1.2328 + /* check if some runs must be split, count how many splits */ 1.2329 + addedRuns=0; 1.2330 + runCount=pBiDi->runCount; 1.2331 + runs=pBiDi->runs; 1.2332 + visualStart=0; 1.2333 + for(i=0; i<runCount; i++, visualStart+=runLength) { 1.2334 + runLength=runs[i].visualLimit-visualStart; 1.2335 + if(runLength<2) { 1.2336 + continue; 1.2337 + } 1.2338 + logicalStart=GET_INDEX(runs[i].logicalStart); 1.2339 + for(j=logicalStart+1; j<logicalStart+runLength; j++) { 1.2340 + index0=visualMap[j]; 1.2341 + index1=visualMap[j-1]; 1.2342 + if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) { 1.2343 + addedRuns++; 1.2344 + } 1.2345 + } 1.2346 + } 1.2347 + if(addedRuns) { 1.2348 + if(getRunsMemory(pBiDi, runCount+addedRuns)) { 1.2349 + if(runCount==1) { 1.2350 + /* because we switch from UBiDi.simpleRuns to UBiDi.runs */ 1.2351 + pBiDi->runsMemory[0]=runs[0]; 1.2352 + } 1.2353 + runs=pBiDi->runs=pBiDi->runsMemory; 1.2354 + pBiDi->runCount+=addedRuns; 1.2355 + } else { 1.2356 + goto cleanup1; 1.2357 + } 1.2358 + } 1.2359 + /* split runs which are not consecutive in source text */ 1.2360 + for(i=runCount-1; i>=0; i--) { 1.2361 + runLength= i==0 ? runs[0].visualLimit : 1.2362 + runs[i].visualLimit-runs[i-1].visualLimit; 1.2363 + logicalStart=runs[i].logicalStart; 1.2364 + indexOddBit=GET_ODD_BIT(logicalStart); 1.2365 + logicalStart=GET_INDEX(logicalStart); 1.2366 + if(runLength<2) { 1.2367 + if(addedRuns) { 1.2368 + runs[i+addedRuns]=runs[i]; 1.2369 + } 1.2370 + logicalPos=visualMap[logicalStart]; 1.2371 + runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, 1.2372 + saveLevels[logicalPos]^indexOddBit); 1.2373 + continue; 1.2374 + } 1.2375 + if(indexOddBit) { 1.2376 + start=logicalStart; 1.2377 + limit=logicalStart+runLength-1; 1.2378 + step=1; 1.2379 + } else { 1.2380 + start=logicalStart+runLength-1; 1.2381 + limit=logicalStart; 1.2382 + step=-1; 1.2383 + } 1.2384 + for(j=start; j!=limit; j+=step) { 1.2385 + index0=visualMap[j]; 1.2386 + index1=visualMap[j+step]; 1.2387 + if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) { 1.2388 + logicalPos=BIDI_MIN(visualMap[start], index0); 1.2389 + runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, 1.2390 + saveLevels[logicalPos]^indexOddBit); 1.2391 + runs[i+addedRuns].visualLimit=runs[i].visualLimit; 1.2392 + runs[i].visualLimit-=BIDI_ABS(j-start)+1; 1.2393 + insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER); 1.2394 + runs[i+addedRuns].insertRemove=insertRemove; 1.2395 + runs[i].insertRemove&=~insertRemove; 1.2396 + start=j+step; 1.2397 + addedRuns--; 1.2398 + } 1.2399 + } 1.2400 + if(addedRuns) { 1.2401 + runs[i+addedRuns]=runs[i]; 1.2402 + } 1.2403 + logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]); 1.2404 + runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, 1.2405 + saveLevels[logicalPos]^indexOddBit); 1.2406 + } 1.2407 + 1.2408 + cleanup1: 1.2409 + /* restore initial paraLevel */ 1.2410 + pBiDi->paraLevel^=1; 1.2411 + cleanup2: 1.2412 + /* restore real text */ 1.2413 + pBiDi->text=text; 1.2414 + pBiDi->length=saveLength; 1.2415 + pBiDi->originalLength=length; 1.2416 + pBiDi->direction=saveDirection; 1.2417 + /* the saved levels should never excess levelsSize, but we check anyway */ 1.2418 + if(saveLength>pBiDi->levelsSize) { 1.2419 + saveLength=pBiDi->levelsSize; 1.2420 + } 1.2421 + uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel)); 1.2422 + pBiDi->trailingWSStart=saveTrailingWSStart; 1.2423 + /* free memory for mapping table and visual text */ 1.2424 + uprv_free(runsOnlyMemory); 1.2425 + if(pBiDi->runCount>1) { 1.2426 + pBiDi->direction=UBIDI_MIXED; 1.2427 + } 1.2428 + cleanup3: 1.2429 + pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY; 1.2430 +} 1.2431 + 1.2432 +/* ubidi_setPara ------------------------------------------------------------ */ 1.2433 + 1.2434 +U_CAPI void U_EXPORT2 1.2435 +ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, 1.2436 + UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, 1.2437 + UErrorCode *pErrorCode) { 1.2438 + UBiDiDirection direction; 1.2439 + 1.2440 + /* check the argument values */ 1.2441 + RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); 1.2442 + if(pBiDi==NULL || text==NULL || length<-1 || 1.2443 + (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) { 1.2444 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.2445 + return; 1.2446 + } 1.2447 + 1.2448 + if(length==-1) { 1.2449 + length=u_strlen(text); 1.2450 + } 1.2451 + 1.2452 + /* special treatment for RUNS_ONLY mode */ 1.2453 + if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) { 1.2454 + setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode); 1.2455 + return; 1.2456 + } 1.2457 + 1.2458 + /* initialize the UBiDi structure */ 1.2459 + pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */ 1.2460 + pBiDi->text=text; 1.2461 + pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length; 1.2462 + pBiDi->paraLevel=paraLevel; 1.2463 + pBiDi->direction=paraLevel&1; 1.2464 + pBiDi->paraCount=1; 1.2465 + 1.2466 + pBiDi->dirProps=NULL; 1.2467 + pBiDi->levels=NULL; 1.2468 + pBiDi->runs=NULL; 1.2469 + pBiDi->insertPoints.size=0; /* clean up from last call */ 1.2470 + pBiDi->insertPoints.confirmed=0; /* clean up from last call */ 1.2471 + 1.2472 + /* 1.2473 + * Save the original paraLevel if contextual; otherwise, set to 0. 1.2474 + */ 1.2475 + pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel); 1.2476 + 1.2477 + if(length==0) { 1.2478 + /* 1.2479 + * For an empty paragraph, create a UBiDi object with the paraLevel and 1.2480 + * the flags and the direction set but without allocating zero-length arrays. 1.2481 + * There is nothing more to do. 1.2482 + */ 1.2483 + if(IS_DEFAULT_LEVEL(paraLevel)) { 1.2484 + pBiDi->paraLevel&=1; 1.2485 + pBiDi->defaultParaLevel=0; 1.2486 + } 1.2487 + pBiDi->flags=DIRPROP_FLAG_LR(paraLevel); 1.2488 + pBiDi->runCount=0; 1.2489 + pBiDi->paraCount=0; 1.2490 + setParaSuccess(pBiDi); /* mark successful setPara */ 1.2491 + return; 1.2492 + } 1.2493 + 1.2494 + pBiDi->runCount=-1; 1.2495 + 1.2496 + /* allocate paras memory */ 1.2497 + if(pBiDi->parasMemory) 1.2498 + pBiDi->paras=pBiDi->parasMemory; 1.2499 + else 1.2500 + pBiDi->paras=pBiDi->simpleParas; 1.2501 + 1.2502 + /* 1.2503 + * Get the directional properties, 1.2504 + * the flags bit-set, and 1.2505 + * determine the paragraph level if necessary. 1.2506 + */ 1.2507 + if(getDirPropsMemory(pBiDi, length)) { 1.2508 + pBiDi->dirProps=pBiDi->dirPropsMemory; 1.2509 + if(!getDirProps(pBiDi)) { 1.2510 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.2511 + return; 1.2512 + } 1.2513 + } else { 1.2514 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.2515 + return; 1.2516 + } 1.2517 + /* the processed length may have changed if UBIDI_OPTION_STREAMING */ 1.2518 + length= pBiDi->length; 1.2519 + pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */ 1.2520 + 1.2521 + /* are explicit levels specified? */ 1.2522 + if(embeddingLevels==NULL) { 1.2523 + /* no: determine explicit levels according to the (Xn) rules */\ 1.2524 + if(getLevelsMemory(pBiDi, length)) { 1.2525 + pBiDi->levels=pBiDi->levelsMemory; 1.2526 + direction=resolveExplicitLevels(pBiDi, pErrorCode); 1.2527 + if(U_FAILURE(*pErrorCode)) { 1.2528 + return; 1.2529 + } 1.2530 + } else { 1.2531 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.2532 + return; 1.2533 + } 1.2534 + } else { 1.2535 + /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */ 1.2536 + pBiDi->levels=embeddingLevels; 1.2537 + direction=checkExplicitLevels(pBiDi, pErrorCode); 1.2538 + if(U_FAILURE(*pErrorCode)) { 1.2539 + return; 1.2540 + } 1.2541 + } 1.2542 + 1.2543 + /* allocate isolate memory */ 1.2544 + if(pBiDi->isolateCount<=SIMPLE_ISOLATES_SIZE) 1.2545 + pBiDi->isolates=pBiDi->simpleIsolates; 1.2546 + else 1.2547 + if(pBiDi->isolateCount<=pBiDi->isolatesSize) 1.2548 + pBiDi->isolates=pBiDi->isolatesMemory; 1.2549 + else { 1.2550 + if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) { 1.2551 + pBiDi->isolates=pBiDi->isolatesMemory; 1.2552 + } else { 1.2553 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.2554 + return; 1.2555 + } 1.2556 + } 1.2557 + pBiDi->isolateCount=-1; /* current isolates stack entry == none */ 1.2558 + 1.2559 + /* 1.2560 + * The steps after (X9) in the UBiDi algorithm are performed only if 1.2561 + * the paragraph text has mixed directionality! 1.2562 + */ 1.2563 + pBiDi->direction=direction; 1.2564 + switch(direction) { 1.2565 + case UBIDI_LTR: 1.2566 + /* make sure paraLevel is even */ 1.2567 + pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1); 1.2568 + 1.2569 + /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ 1.2570 + pBiDi->trailingWSStart=0; 1.2571 + break; 1.2572 + case UBIDI_RTL: 1.2573 + /* make sure paraLevel is odd */ 1.2574 + pBiDi->paraLevel|=1; 1.2575 + 1.2576 + /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ 1.2577 + pBiDi->trailingWSStart=0; 1.2578 + break; 1.2579 + default: 1.2580 + /* 1.2581 + * Choose the right implicit state table 1.2582 + */ 1.2583 + switch(pBiDi->reorderingMode) { 1.2584 + case UBIDI_REORDER_DEFAULT: 1.2585 + pBiDi->pImpTabPair=&impTab_DEFAULT; 1.2586 + break; 1.2587 + case UBIDI_REORDER_NUMBERS_SPECIAL: 1.2588 + pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL; 1.2589 + break; 1.2590 + case UBIDI_REORDER_GROUP_NUMBERS_WITH_R: 1.2591 + pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R; 1.2592 + break; 1.2593 + case UBIDI_REORDER_INVERSE_NUMBERS_AS_L: 1.2594 + pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L; 1.2595 + break; 1.2596 + case UBIDI_REORDER_INVERSE_LIKE_DIRECT: 1.2597 + if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { 1.2598 + pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS; 1.2599 + } else { 1.2600 + pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT; 1.2601 + } 1.2602 + break; 1.2603 + case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL: 1.2604 + if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { 1.2605 + pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS; 1.2606 + } else { 1.2607 + pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL; 1.2608 + } 1.2609 + break; 1.2610 + default: 1.2611 + /* we should never get here */ 1.2612 + U_ASSERT(FALSE); 1.2613 + break; 1.2614 + } 1.2615 + /* 1.2616 + * If there are no external levels specified and there 1.2617 + * are no significant explicit level codes in the text, 1.2618 + * then we can treat the entire paragraph as one run. 1.2619 + * Otherwise, we need to perform the following rules on runs of 1.2620 + * the text with the same embedding levels. (X10) 1.2621 + * "Significant" explicit level codes are ones that actually 1.2622 + * affect non-BN characters. 1.2623 + * Examples for "insignificant" ones are empty embeddings 1.2624 + * LRE-PDF, LRE-RLE-PDF-PDF, etc. 1.2625 + */ 1.2626 + if(embeddingLevels==NULL && pBiDi->paraCount<=1 && 1.2627 + !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) { 1.2628 + resolveImplicitLevels(pBiDi, 0, length, 1.2629 + GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)), 1.2630 + GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1))); 1.2631 + } else { 1.2632 + /* sor, eor: start and end types of same-level-run */ 1.2633 + UBiDiLevel *levels=pBiDi->levels; 1.2634 + int32_t start, limit=0; 1.2635 + UBiDiLevel level, nextLevel; 1.2636 + DirProp sor, eor; 1.2637 + 1.2638 + /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ 1.2639 + level=GET_PARALEVEL(pBiDi, 0); 1.2640 + nextLevel=levels[0]; 1.2641 + if(level<nextLevel) { 1.2642 + eor=GET_LR_FROM_LEVEL(nextLevel); 1.2643 + } else { 1.2644 + eor=GET_LR_FROM_LEVEL(level); 1.2645 + } 1.2646 + 1.2647 + do { 1.2648 + /* determine start and limit of the run (end points just behind the run) */ 1.2649 + 1.2650 + /* the values for this run's start are the same as for the previous run's end */ 1.2651 + start=limit; 1.2652 + level=nextLevel; 1.2653 + if((start>0) && (pBiDi->dirProps[start-1]==B)) { 1.2654 + /* except if this is a new paragraph, then set sor = para level */ 1.2655 + sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start)); 1.2656 + } else { 1.2657 + sor=eor; 1.2658 + } 1.2659 + 1.2660 + /* search for the limit of this run */ 1.2661 + while(++limit<length && levels[limit]==level) {} 1.2662 + 1.2663 + /* get the correct level of the next run */ 1.2664 + if(limit<length) { 1.2665 + nextLevel=levels[limit]; 1.2666 + } else { 1.2667 + nextLevel=GET_PARALEVEL(pBiDi, length-1); 1.2668 + } 1.2669 + 1.2670 + /* determine eor from max(level, nextLevel); sor is last run's eor */ 1.2671 + if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) { 1.2672 + eor=GET_LR_FROM_LEVEL(nextLevel); 1.2673 + } else { 1.2674 + eor=GET_LR_FROM_LEVEL(level); 1.2675 + } 1.2676 + 1.2677 + /* if the run consists of overridden directional types, then there 1.2678 + are no implicit types to be resolved */ 1.2679 + if(!(level&UBIDI_LEVEL_OVERRIDE)) { 1.2680 + resolveImplicitLevels(pBiDi, start, limit, sor, eor); 1.2681 + } else { 1.2682 + /* remove the UBIDI_LEVEL_OVERRIDE flags */ 1.2683 + do { 1.2684 + levels[start++]&=~UBIDI_LEVEL_OVERRIDE; 1.2685 + } while(start<limit); 1.2686 + } 1.2687 + } while(limit<length); 1.2688 + } 1.2689 + /* check if we got any memory shortage while adding insert points */ 1.2690 + if (U_FAILURE(pBiDi->insertPoints.errorCode)) 1.2691 + { 1.2692 + *pErrorCode=pBiDi->insertPoints.errorCode; 1.2693 + return; 1.2694 + } 1.2695 + /* reset the embedding levels for some non-graphic characters (L1), (X9) */ 1.2696 + adjustWSLevels(pBiDi); 1.2697 + break; 1.2698 + } 1.2699 + /* add RLM for inverse Bidi with contextual orientation resolving 1.2700 + * to RTL which would not round-trip otherwise 1.2701 + */ 1.2702 + if((pBiDi->defaultParaLevel>0) && 1.2703 + (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) && 1.2704 + ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) || 1.2705 + (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) { 1.2706 + int32_t i, j, start, last; 1.2707 + UBiDiLevel level; 1.2708 + DirProp dirProp; 1.2709 + for(i=0; i<pBiDi->paraCount; i++) { 1.2710 + last=(pBiDi->paras[i].limit)-1; 1.2711 + level=pBiDi->paras[i].level; 1.2712 + if(level==0) 1.2713 + continue; /* LTR paragraph */ 1.2714 + start= i==0 ? 0 : pBiDi->paras[i-1].limit; 1.2715 + for(j=last; j>=start; j--) { 1.2716 + dirProp=pBiDi->dirProps[j]; 1.2717 + if(dirProp==L) { 1.2718 + if(j<last) { 1.2719 + while(pBiDi->dirProps[last]==B) { 1.2720 + last--; 1.2721 + } 1.2722 + } 1.2723 + addPoint(pBiDi, last, RLM_BEFORE); 1.2724 + break; 1.2725 + } 1.2726 + if(DIRPROP_FLAG(dirProp) & MASK_R_AL) { 1.2727 + break; 1.2728 + } 1.2729 + } 1.2730 + } 1.2731 + } 1.2732 + 1.2733 + if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { 1.2734 + pBiDi->resultLength -= pBiDi->controlCount; 1.2735 + } else { 1.2736 + pBiDi->resultLength += pBiDi->insertPoints.size; 1.2737 + } 1.2738 + setParaSuccess(pBiDi); /* mark successful setPara */ 1.2739 +} 1.2740 + 1.2741 +U_CAPI void U_EXPORT2 1.2742 +ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) { 1.2743 + if(pBiDi!=NULL) { 1.2744 + pBiDi->orderParagraphsLTR=orderParagraphsLTR; 1.2745 + } 1.2746 +} 1.2747 + 1.2748 +U_CAPI UBool U_EXPORT2 1.2749 +ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) { 1.2750 + if(pBiDi!=NULL) { 1.2751 + return pBiDi->orderParagraphsLTR; 1.2752 + } else { 1.2753 + return FALSE; 1.2754 + } 1.2755 +} 1.2756 + 1.2757 +U_CAPI UBiDiDirection U_EXPORT2 1.2758 +ubidi_getDirection(const UBiDi *pBiDi) { 1.2759 + if(IS_VALID_PARA_OR_LINE(pBiDi)) { 1.2760 + return pBiDi->direction; 1.2761 + } else { 1.2762 + return UBIDI_LTR; 1.2763 + } 1.2764 +} 1.2765 + 1.2766 +U_CAPI const UChar * U_EXPORT2 1.2767 +ubidi_getText(const UBiDi *pBiDi) { 1.2768 + if(IS_VALID_PARA_OR_LINE(pBiDi)) { 1.2769 + return pBiDi->text; 1.2770 + } else { 1.2771 + return NULL; 1.2772 + } 1.2773 +} 1.2774 + 1.2775 +U_CAPI int32_t U_EXPORT2 1.2776 +ubidi_getLength(const UBiDi *pBiDi) { 1.2777 + if(IS_VALID_PARA_OR_LINE(pBiDi)) { 1.2778 + return pBiDi->originalLength; 1.2779 + } else { 1.2780 + return 0; 1.2781 + } 1.2782 +} 1.2783 + 1.2784 +U_CAPI int32_t U_EXPORT2 1.2785 +ubidi_getProcessedLength(const UBiDi *pBiDi) { 1.2786 + if(IS_VALID_PARA_OR_LINE(pBiDi)) { 1.2787 + return pBiDi->length; 1.2788 + } else { 1.2789 + return 0; 1.2790 + } 1.2791 +} 1.2792 + 1.2793 +U_CAPI int32_t U_EXPORT2 1.2794 +ubidi_getResultLength(const UBiDi *pBiDi) { 1.2795 + if(IS_VALID_PARA_OR_LINE(pBiDi)) { 1.2796 + return pBiDi->resultLength; 1.2797 + } else { 1.2798 + return 0; 1.2799 + } 1.2800 +} 1.2801 + 1.2802 +/* paragraphs API functions ------------------------------------------------- */ 1.2803 + 1.2804 +U_CAPI UBiDiLevel U_EXPORT2 1.2805 +ubidi_getParaLevel(const UBiDi *pBiDi) { 1.2806 + if(IS_VALID_PARA_OR_LINE(pBiDi)) { 1.2807 + return pBiDi->paraLevel; 1.2808 + } else { 1.2809 + return 0; 1.2810 + } 1.2811 +} 1.2812 + 1.2813 +U_CAPI int32_t U_EXPORT2 1.2814 +ubidi_countParagraphs(UBiDi *pBiDi) { 1.2815 + if(!IS_VALID_PARA_OR_LINE(pBiDi)) { 1.2816 + return 0; 1.2817 + } else { 1.2818 + return pBiDi->paraCount; 1.2819 + } 1.2820 +} 1.2821 + 1.2822 +U_CAPI void U_EXPORT2 1.2823 +ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, 1.2824 + int32_t *pParaStart, int32_t *pParaLimit, 1.2825 + UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { 1.2826 + int32_t paraStart; 1.2827 + 1.2828 + /* check the argument values */ 1.2829 + RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); 1.2830 + RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode); 1.2831 + RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode); 1.2832 + 1.2833 + pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ 1.2834 + if(paraIndex) { 1.2835 + paraStart=pBiDi->paras[paraIndex-1].limit; 1.2836 + } else { 1.2837 + paraStart=0; 1.2838 + } 1.2839 + if(pParaStart!=NULL) { 1.2840 + *pParaStart=paraStart; 1.2841 + } 1.2842 + if(pParaLimit!=NULL) { 1.2843 + *pParaLimit=pBiDi->paras[paraIndex].limit; 1.2844 + } 1.2845 + if(pParaLevel!=NULL) { 1.2846 + *pParaLevel=GET_PARALEVEL(pBiDi, paraStart); 1.2847 + } 1.2848 +} 1.2849 + 1.2850 +U_CAPI int32_t U_EXPORT2 1.2851 +ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, 1.2852 + int32_t *pParaStart, int32_t *pParaLimit, 1.2853 + UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { 1.2854 + int32_t paraIndex; 1.2855 + 1.2856 + /* check the argument values */ 1.2857 + /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */ 1.2858 + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); 1.2859 + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); 1.2860 + pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ 1.2861 + RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1); 1.2862 + 1.2863 + for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++); 1.2864 + ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode); 1.2865 + return paraIndex; 1.2866 +} 1.2867 + 1.2868 +U_CAPI void U_EXPORT2 1.2869 +ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, 1.2870 + const void *newContext, UBiDiClassCallback **oldFn, 1.2871 + const void **oldContext, UErrorCode *pErrorCode) 1.2872 +{ 1.2873 + RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); 1.2874 + if(pBiDi==NULL) { 1.2875 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.2876 + return; 1.2877 + } 1.2878 + if( oldFn ) 1.2879 + { 1.2880 + *oldFn = pBiDi->fnClassCallback; 1.2881 + } 1.2882 + if( oldContext ) 1.2883 + { 1.2884 + *oldContext = pBiDi->coClassCallback; 1.2885 + } 1.2886 + pBiDi->fnClassCallback = newFn; 1.2887 + pBiDi->coClassCallback = newContext; 1.2888 +} 1.2889 + 1.2890 +U_CAPI void U_EXPORT2 1.2891 +ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context) 1.2892 +{ 1.2893 + if(pBiDi==NULL) { 1.2894 + return; 1.2895 + } 1.2896 + if( fn ) 1.2897 + { 1.2898 + *fn = pBiDi->fnClassCallback; 1.2899 + } 1.2900 + if( context ) 1.2901 + { 1.2902 + *context = pBiDi->coClassCallback; 1.2903 + } 1.2904 +} 1.2905 + 1.2906 +U_CAPI UCharDirection U_EXPORT2 1.2907 +ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c) 1.2908 +{ 1.2909 + UCharDirection dir; 1.2910 + 1.2911 + if( pBiDi->fnClassCallback == NULL || 1.2912 + (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT ) 1.2913 + { 1.2914 + dir = ubidi_getClass(pBiDi->bdp, c); 1.2915 + } 1.2916 + if(dir >= U_CHAR_DIRECTION_COUNT) { 1.2917 + dir = ON; 1.2918 + } 1.2919 + return dir; 1.2920 +}