michael@0: /* michael@0: ****************************************************************************** michael@0: * michael@0: * Copyright (C) 1999-2013, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ****************************************************************************** michael@0: * file name: ubidi.c michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 1999jul27 michael@0: * created by: Markus W. Scherer, updated by Matitiahu Allouche michael@0: * michael@0: */ michael@0: michael@0: #include "cmemory.h" michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/ustring.h" michael@0: #include "unicode/uchar.h" michael@0: #include "unicode/ubidi.h" michael@0: #include "unicode/utf16.h" michael@0: #include "ubidi_props.h" michael@0: #include "ubidiimp.h" michael@0: #include "uassert.h" michael@0: michael@0: /* michael@0: * General implementation notes: michael@0: * michael@0: * Throughout the implementation, there are comments like (W2) that refer to michael@0: * rules of the BiDi algorithm in its version 5, in this example to the second michael@0: * rule of the resolution of weak types. michael@0: * michael@0: * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32) michael@0: * character according to UTF-16, the second UChar gets the directional property of michael@0: * the entire character assigned, while the first one gets a BN, a boundary michael@0: * neutral, type, which is ignored by most of the algorithm according to michael@0: * rule (X9) and the implementation suggestions of the BiDi algorithm. michael@0: * michael@0: * Later, adjustWSLevels() will set the level for each BN to that of the michael@0: * following character (UChar), which results in surrogate pairs getting the michael@0: * same level on each of their surrogates. michael@0: * michael@0: * In a UTF-8 implementation, the same thing could be done: the last byte of michael@0: * a multi-byte sequence would get the "real" property, while all previous michael@0: * bytes of that sequence would get BN. michael@0: * michael@0: * It is not possible to assign all those parts of a character the same real michael@0: * property because this would fail in the resolution of weak types with rules michael@0: * that look at immediately surrounding types. michael@0: * michael@0: * As a related topic, this implementation does not remove Boundary Neutral michael@0: * types from the input, but ignores them wherever this is relevant. michael@0: * For example, the loop for the resolution of the weak types reads michael@0: * types until it finds a non-BN. michael@0: * Also, explicit embedding codes are neither changed into BN nor removed. michael@0: * They are only treated the same way real BNs are. michael@0: * As stated before, adjustWSLevels() takes care of them at the end. michael@0: * For the purpose of conformance, the levels of all these codes michael@0: * do not matter. michael@0: * michael@0: * Note that this implementation never modifies the dirProps michael@0: * after the initial setup, except for FSI which is changed to either michael@0: * LRI or RLI in getDirProps(), and paired brackets which may be changed michael@0: * to L or R according to N0. michael@0: * michael@0: * michael@0: * In this implementation, the resolution of weak types (Wn), michael@0: * neutrals (Nn), and the assignment of the resolved level (In) michael@0: * are all done in one single loop, in resolveImplicitLevels(). michael@0: * Changes of dirProp values are done on the fly, without writing michael@0: * them back to the dirProps array. michael@0: * michael@0: * michael@0: * This implementation contains code that allows to bypass steps of the michael@0: * algorithm that are not needed on the specific paragraph michael@0: * in order to speed up the most common cases considerably, michael@0: * like text that is entirely LTR, or RTL text without numbers. michael@0: * michael@0: * Most of this is done by setting a bit for each directional property michael@0: * in a flags variable and later checking for whether there are michael@0: * any LTR characters or any RTL characters, or both, whether michael@0: * there are any explicit embedding codes, etc. michael@0: * michael@0: * If the (Xn) steps are performed, then the flags are re-evaluated, michael@0: * because they will then not contain the embedding codes any more michael@0: * and will be adjusted for override codes, so that subsequently michael@0: * more bypassing may be possible than what the initial flags suggested. michael@0: * michael@0: * If the text is not mixed-directional, then the michael@0: * algorithm steps for the weak type resolution are not performed, michael@0: * and all levels are set to the paragraph level. michael@0: * michael@0: * If there are no explicit embedding codes, then the (Xn) steps michael@0: * are not performed. michael@0: * michael@0: * If embedding levels are supplied as a parameter, then all michael@0: * explicit embedding codes are ignored, and the (Xn) steps michael@0: * are not performed. michael@0: * michael@0: * White Space types could get the level of the run they belong to, michael@0: * and are checked with a test of (flags&MASK_EMBEDDING) to michael@0: * consider if the paragraph direction should be considered in michael@0: * the flags variable. michael@0: * michael@0: * If there are no White Space types in the paragraph, then michael@0: * (L1) is not necessary in adjustWSLevels(). michael@0: */ michael@0: michael@0: /* to avoid some conditional statements, use tiny constant arrays */ michael@0: static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) }; michael@0: static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) }; michael@0: static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; michael@0: michael@0: #define DIRPROP_FLAG_LR(level) flagLR[(level)&1] michael@0: #define DIRPROP_FLAG_E(level) flagE[(level)&1] michael@0: #define DIRPROP_FLAG_O(level) flagO[(level)&1] michael@0: michael@0: #define DIR_FROM_STRONG(strong) ((strong)==L ? L : R) michael@0: michael@0: /* UBiDi object management -------------------------------------------------- */ michael@0: michael@0: U_CAPI UBiDi * U_EXPORT2 michael@0: ubidi_open(void) michael@0: { michael@0: UErrorCode errorCode=U_ZERO_ERROR; michael@0: return ubidi_openSized(0, 0, &errorCode); michael@0: } michael@0: michael@0: U_CAPI UBiDi * U_EXPORT2 michael@0: ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) { michael@0: UBiDi *pBiDi; michael@0: michael@0: /* check the argument values */ michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return NULL; michael@0: } else if(maxLength<0 || maxRunCount<0) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return NULL; /* invalid arguments */ michael@0: } michael@0: michael@0: /* allocate memory for the object */ michael@0: pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi)); michael@0: if(pBiDi==NULL) { michael@0: *pErrorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return NULL; michael@0: } michael@0: michael@0: /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */ michael@0: uprv_memset(pBiDi, 0, sizeof(UBiDi)); michael@0: michael@0: /* get BiDi properties */ michael@0: pBiDi->bdp=ubidi_getSingleton(); michael@0: michael@0: /* allocate memory for arrays as requested */ michael@0: if(maxLength>0) { michael@0: if( !getInitialDirPropsMemory(pBiDi, maxLength) || michael@0: !getInitialLevelsMemory(pBiDi, maxLength) michael@0: ) { michael@0: *pErrorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: } michael@0: } else { michael@0: pBiDi->mayAllocateText=TRUE; michael@0: } michael@0: michael@0: if(maxRunCount>0) { michael@0: if(maxRunCount==1) { michael@0: /* use simpleRuns[] */ michael@0: pBiDi->runsSize=sizeof(Run); michael@0: } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) { michael@0: *pErrorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: } michael@0: } else { michael@0: pBiDi->mayAllocateRuns=TRUE; michael@0: } michael@0: michael@0: if(U_SUCCESS(*pErrorCode)) { michael@0: return pBiDi; michael@0: } else { michael@0: ubidi_close(pBiDi); michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: /* michael@0: * We are allowed to allocate memory if memory==NULL or michael@0: * mayAllocate==TRUE for each array that we need. michael@0: * We also try to grow memory as needed if we michael@0: * allocate it. michael@0: * michael@0: * Assume sizeNeeded>0. michael@0: * If *pMemory!=NULL, then assume *pSize>0. michael@0: * michael@0: * ### this realloc() may unnecessarily copy the old data, michael@0: * which we know we don't need any more; michael@0: * is this the best way to do this?? michael@0: */ michael@0: U_CFUNC UBool michael@0: ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) { michael@0: void **pMemory = (void **)bidiMem; michael@0: /* check for existing memory */ michael@0: if(*pMemory==NULL) { michael@0: /* we need to allocate memory */ michael@0: if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) { michael@0: *pSize=sizeNeeded; michael@0: return TRUE; michael@0: } else { michael@0: return FALSE; michael@0: } michael@0: } else { michael@0: if(sizeNeeded<=*pSize) { michael@0: /* there is already enough memory */ michael@0: return TRUE; michael@0: } michael@0: else if(!mayAllocate) { michael@0: /* not enough memory, and we must not allocate */ michael@0: return FALSE; michael@0: } else { michael@0: /* we try to grow */ michael@0: void *memory; michael@0: /* in most cases, we do not need the copy-old-data part of michael@0: * realloc, but it is needed when adding runs using getRunsMemory() michael@0: * in setParaRunsOnly() michael@0: */ michael@0: if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) { michael@0: *pMemory=memory; michael@0: *pSize=sizeNeeded; michael@0: return TRUE; michael@0: } else { michael@0: /* we failed to grow */ michael@0: return FALSE; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ubidi_close(UBiDi *pBiDi) { michael@0: if(pBiDi!=NULL) { michael@0: pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */ michael@0: if(pBiDi->dirPropsMemory!=NULL) { michael@0: uprv_free(pBiDi->dirPropsMemory); michael@0: } michael@0: if(pBiDi->levelsMemory!=NULL) { michael@0: uprv_free(pBiDi->levelsMemory); michael@0: } michael@0: if(pBiDi->openingsMemory!=NULL) { michael@0: uprv_free(pBiDi->openingsMemory); michael@0: } michael@0: if(pBiDi->parasMemory!=NULL) { michael@0: uprv_free(pBiDi->parasMemory); michael@0: } michael@0: if(pBiDi->runsMemory!=NULL) { michael@0: uprv_free(pBiDi->runsMemory); michael@0: } michael@0: if(pBiDi->isolatesMemory!=NULL) { michael@0: uprv_free(pBiDi->isolatesMemory); michael@0: } michael@0: if(pBiDi->insertPoints.points!=NULL) { michael@0: uprv_free(pBiDi->insertPoints.points); michael@0: } michael@0: michael@0: uprv_free(pBiDi); michael@0: } michael@0: } michael@0: michael@0: /* set to approximate "inverse BiDi" ---------------------------------------- */ michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) { michael@0: if(pBiDi!=NULL) { michael@0: pBiDi->isInverse=isInverse; michael@0: pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L michael@0: : UBIDI_REORDER_DEFAULT; michael@0: } michael@0: } michael@0: michael@0: U_CAPI UBool U_EXPORT2 michael@0: ubidi_isInverse(UBiDi *pBiDi) { michael@0: if(pBiDi!=NULL) { michael@0: return pBiDi->isInverse; michael@0: } else { michael@0: return FALSE; michael@0: } michael@0: } michael@0: michael@0: /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of michael@0: * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre michael@0: * concept of RUNS_ONLY which is a double operation. michael@0: * It could be advantageous to divide this into 3 concepts: michael@0: * a) Operation: direct / inverse / RUNS_ONLY michael@0: * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R michael@0: * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL michael@0: * This would allow combinations not possible today like RUNS_ONLY with michael@0: * NUMBERS_SPECIAL. michael@0: * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and michael@0: * REMOVE_CONTROLS for the inverse step. michael@0: * Not all combinations would be supported, and probably not all do make sense. michael@0: * This would need to document which ones are supported and what are the michael@0: * fallbacks for unsupported combinations. michael@0: */ michael@0: U_CAPI void U_EXPORT2 michael@0: ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) { michael@0: if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT) michael@0: && (reorderingMode < UBIDI_REORDER_COUNT)) { michael@0: pBiDi->reorderingMode = reorderingMode; michael@0: pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L); michael@0: } michael@0: } michael@0: michael@0: U_CAPI UBiDiReorderingMode U_EXPORT2 michael@0: ubidi_getReorderingMode(UBiDi *pBiDi) { michael@0: if (pBiDi!=NULL) { michael@0: return pBiDi->reorderingMode; michael@0: } else { michael@0: return UBIDI_REORDER_DEFAULT; michael@0: } michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) { michael@0: if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { michael@0: reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; michael@0: } michael@0: if (pBiDi!=NULL) { michael@0: pBiDi->reorderingOptions=reorderingOptions; michael@0: } michael@0: } michael@0: michael@0: U_CAPI uint32_t U_EXPORT2 michael@0: ubidi_getReorderingOptions(UBiDi *pBiDi) { michael@0: if (pBiDi!=NULL) { michael@0: return pBiDi->reorderingOptions; michael@0: } else { michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: U_CAPI UBiDiDirection U_EXPORT2 michael@0: ubidi_getBaseDirection(const UChar *text, michael@0: int32_t length){ michael@0: michael@0: int32_t i; michael@0: UChar32 uchar; michael@0: UCharDirection dir; michael@0: michael@0: if( text==NULL || length<-1 ){ michael@0: return UBIDI_NEUTRAL; michael@0: } michael@0: michael@0: if(length==-1) { michael@0: length=u_strlen(text); michael@0: } michael@0: michael@0: for( i = 0 ; i < length; ) { michael@0: /* i is incremented by U16_NEXT */ michael@0: U16_NEXT(text, i, length, uchar); michael@0: dir = u_charDirection(uchar); michael@0: if( dir == U_LEFT_TO_RIGHT ) michael@0: return UBIDI_LTR; michael@0: if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC ) michael@0: return UBIDI_RTL; michael@0: } michael@0: return UBIDI_NEUTRAL; michael@0: } michael@0: michael@0: /* perform (P2)..(P3) ------------------------------------------------------- */ michael@0: michael@0: /** michael@0: * Returns the directionality of the first strong character michael@0: * after the last B in prologue, if any. michael@0: * Requires prologue!=null. michael@0: */ michael@0: static DirProp michael@0: firstL_R_AL(UBiDi *pBiDi) { michael@0: const UChar *text=pBiDi->prologue; michael@0: int32_t length=pBiDi->proLength; michael@0: int32_t i; michael@0: UChar32 uchar; michael@0: DirProp dirProp, result=ON; michael@0: for(i=0; iparas michael@0: */ michael@0: static UBool michael@0: checkParaCount(UBiDi *pBiDi) { michael@0: int32_t count=pBiDi->paraCount; michael@0: if(pBiDi->paras==pBiDi->simpleParas) { michael@0: if(count<=SIMPLE_PARAS_SIZE) michael@0: return TRUE; michael@0: if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_SIZE * 2)) michael@0: return FALSE; michael@0: pBiDi->paras=pBiDi->parasMemory; michael@0: uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_SIZE * sizeof(Para)); michael@0: return TRUE; michael@0: } michael@0: if(!getInitialParasMemory(pBiDi, count * 2)) michael@0: return FALSE; michael@0: pBiDi->paras=pBiDi->parasMemory; michael@0: return TRUE; michael@0: } michael@0: michael@0: /* michael@0: * Get the directional properties for the text, calculate the flags bit-set, and michael@0: * determine the paragraph level if necessary (in pBiDi->paras[i].level). michael@0: * FSI initiators are also resolved and their dirProp replaced with LRI or RLI. michael@0: */ michael@0: static UBool michael@0: getDirProps(UBiDi *pBiDi) { michael@0: const UChar *text=pBiDi->text; michael@0: DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */ michael@0: michael@0: int32_t i=0, originalLength=pBiDi->originalLength; michael@0: Flags flags=0; /* collect all directionalities in the text */ michael@0: UChar32 uchar; michael@0: DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */ michael@0: UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel); michael@0: /* for inverse BiDi, the default para level is set to RTL if there is a michael@0: strong R or AL character at either end of the text */ michael@0: UBool isDefaultLevelInverse=isDefaultLevel && (UBool) michael@0: (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || michael@0: pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL); michael@0: int32_t lastArabicPos=-1; michael@0: int32_t controlCount=0; michael@0: UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions & michael@0: UBIDI_OPTION_REMOVE_CONTROLS); michael@0: michael@0: typedef enum { michael@0: NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */ michael@0: SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */ michael@0: SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */ michael@0: LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */ michael@0: } State; michael@0: State state; michael@0: DirProp lastStrong=ON; /* for default level & inverse BiDi */ michael@0: /* The following stacks are used to manage isolate sequences. Those michael@0: sequences may be nested, but obviously never more deeply than the michael@0: maximum explicit embedding level. michael@0: lastStack is the index of the last used entry in the stack. A value of -1 michael@0: means that there is no open isolate sequence. michael@0: lastStack is reset to -1 on paragraph boundaries. */ michael@0: /* The following stack contains the position of the initiator of michael@0: each open isolate sequence */ michael@0: int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1]; michael@0: /* The following stack contains the last known state before michael@0: encountering the initiator of an isolate sequence */ michael@0: int8_t previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1]; michael@0: int32_t stackLast=-1; michael@0: michael@0: if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) michael@0: pBiDi->length=0; michael@0: defaultParaLevel=pBiDi->paraLevel&1; michael@0: if(isDefaultLevel) { michael@0: pBiDi->paras[0].level=defaultParaLevel; michael@0: lastStrong=defaultParaLevel; michael@0: if(pBiDi->proLength>0 && /* there is a prologue */ michael@0: (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */ michael@0: if(dirProp==L) michael@0: pBiDi->paras[0].level=0; /* set the default para level */ michael@0: else michael@0: pBiDi->paras[0].level=1; /* set the default para level */ michael@0: state=NOT_SEEKING_STRONG; michael@0: } else { michael@0: state=SEEKING_STRONG_FOR_PARA; michael@0: } michael@0: } else { michael@0: pBiDi->paras[0].level=pBiDi->paraLevel; michael@0: state=NOT_SEEKING_STRONG; michael@0: } michael@0: /* count paragraphs and determine the paragraph level (P2..P3) */ michael@0: /* michael@0: * see comment in ubidi.h: michael@0: * the UBIDI_DEFAULT_XXX values are designed so that michael@0: * their bit 0 alone yields the intended default michael@0: */ michael@0: for( /* i=0 above */ ; i0xffff) { /* set the lead surrogate's property to BN */ michael@0: flags|=DIRPROP_FLAG(BN); michael@0: dirProps[i-2]=BN; michael@0: } michael@0: if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) michael@0: controlCount++; michael@0: if(dirProp==L) { michael@0: if(state==SEEKING_STRONG_FOR_PARA) { michael@0: pBiDi->paras[pBiDi->paraCount-1].level=0; michael@0: state=NOT_SEEKING_STRONG; michael@0: } michael@0: else if(state==SEEKING_STRONG_FOR_FSI) { michael@0: if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { michael@0: dirProps[isolateStartStack[stackLast]]=LRI; michael@0: flags|=DIRPROP_FLAG(LRI); michael@0: } michael@0: state=LOOKING_FOR_PDI; michael@0: } michael@0: lastStrong=L; michael@0: continue; michael@0: } michael@0: if(dirProp==R || dirProp==AL) { michael@0: if(state==SEEKING_STRONG_FOR_PARA) { michael@0: pBiDi->paras[pBiDi->paraCount-1].level=1; michael@0: state=NOT_SEEKING_STRONG; michael@0: } michael@0: else if(state==SEEKING_STRONG_FOR_FSI) { michael@0: if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { michael@0: dirProps[isolateStartStack[stackLast]]=RLI; michael@0: flags|=DIRPROP_FLAG(RLI); michael@0: } michael@0: state=LOOKING_FOR_PDI; michael@0: } michael@0: lastStrong=R; michael@0: if(dirProp==AL) michael@0: lastArabicPos=i-1; michael@0: continue; michael@0: } michael@0: if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */ michael@0: stackLast++; michael@0: if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { michael@0: isolateStartStack[stackLast]=i-1; michael@0: previousStateStack[stackLast]=state; michael@0: } michael@0: if(dirProp==FSI) michael@0: state=SEEKING_STRONG_FOR_FSI; michael@0: else michael@0: state=LOOKING_FOR_PDI; michael@0: continue; michael@0: } michael@0: if(dirProp==PDI) { michael@0: if(state==SEEKING_STRONG_FOR_FSI) { michael@0: if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { michael@0: dirProps[isolateStartStack[stackLast]]=LRI; michael@0: flags|=DIRPROP_FLAG(LRI); michael@0: } michael@0: } michael@0: if(stackLast>=0) { michael@0: if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) michael@0: state=previousStateStack[stackLast]; michael@0: stackLast--; michael@0: } michael@0: continue; michael@0: } michael@0: if(dirProp==B) { michael@0: if(iparas[pBiDi->paraCount-1].limit=i; michael@0: if(isDefaultLevelInverse && lastStrong==R) michael@0: pBiDi->paras[pBiDi->paraCount-1].level=1; michael@0: if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { michael@0: /* When streaming, we only process whole paragraphs michael@0: thus some updates are only done on paragraph boundaries */ michael@0: pBiDi->length=i; /* i is index to next character */ michael@0: pBiDi->controlCount=controlCount; michael@0: } michael@0: if(iparaCount++; michael@0: if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */ michael@0: return FALSE; michael@0: if(isDefaultLevel) { michael@0: pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel; michael@0: state=SEEKING_STRONG_FOR_PARA; michael@0: lastStrong=defaultParaLevel; michael@0: } else { michael@0: pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel; michael@0: state=NOT_SEEKING_STRONG; michael@0: } michael@0: stackLast=-1; michael@0: } michael@0: continue; michael@0: } michael@0: } michael@0: /* Ignore still open isolate sequences with overflow */ michael@0: if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) { michael@0: stackLast=UBIDI_MAX_EXPLICIT_LEVEL; michael@0: if(dirProps[previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL]]!=FSI) michael@0: state=LOOKING_FOR_PDI; michael@0: } michael@0: /* Resolve direction of still unresolved open FSI sequences */ michael@0: while(stackLast>=0) { michael@0: if(state==SEEKING_STRONG_FOR_FSI) { michael@0: dirProps[isolateStartStack[stackLast]]=LRI; michael@0: flags|=DIRPROP_FLAG(LRI); michael@0: } michael@0: state=previousStateStack[stackLast]; michael@0: stackLast--; michael@0: } michael@0: /* When streaming, ignore text after the last paragraph separator */ michael@0: if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { michael@0: if(pBiDi->lengthparaCount--; michael@0: } else { michael@0: pBiDi->paras[pBiDi->paraCount-1].limit=originalLength; michael@0: pBiDi->controlCount=controlCount; michael@0: } michael@0: /* For inverse bidi, default para direction is RTL if there is michael@0: a strong R or AL at either end of the paragraph */ michael@0: if(isDefaultLevelInverse && lastStrong==R) { michael@0: pBiDi->paras[pBiDi->paraCount-1].level=1; michael@0: } michael@0: if(isDefaultLevel) { michael@0: pBiDi->paraLevel=pBiDi->paras[0].level; michael@0: } michael@0: /* The following is needed to resolve the text direction for default level michael@0: paragraphs containing no strong character */ michael@0: for(i=0; iparaCount; i++) michael@0: flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level); michael@0: michael@0: if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { michael@0: flags|=DIRPROP_FLAG(L); michael@0: } michael@0: pBiDi->flags=flags; michael@0: pBiDi->lastArabicPos=lastArabicPos; michael@0: return TRUE; michael@0: } michael@0: michael@0: /* determine the paragraph level at position index */ michael@0: U_CFUNC UBiDiLevel michael@0: ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) { michael@0: int32_t i; michael@0: for(i=0; iparaCount; i++) michael@0: if(pindexparas[i].limit) michael@0: break; michael@0: if(i>=pBiDi->paraCount) michael@0: i=pBiDi->paraCount-1; michael@0: return (UBiDiLevel)(pBiDi->paras[i].level); michael@0: } michael@0: michael@0: /* Functions for handling paired brackets ----------------------------------- */ michael@0: michael@0: /* In the isoRuns array, the first entry is used for text outside of any michael@0: isolate sequence. Higher entries are used for each more deeply nested michael@0: isolate sequence. isoRunLast is the index of the last used entry. The michael@0: openings array is used to note the data of opening brackets not yet michael@0: matched by a closing bracket, or matched but still susceptible to change michael@0: level. michael@0: Each isoRun entry contains the index of the first and michael@0: one-after-last openings entries for pending opening brackets it michael@0: contains. The next openings entry to use is the one-after-last of the michael@0: most deeply nested isoRun entry. michael@0: isoRun entries also contain their current embedding level and the last michael@0: encountered strong character, since these will be needed to resolve michael@0: the level of paired brackets. */ michael@0: michael@0: static void michael@0: bracketInit(UBiDi *pBiDi, BracketData *bd) { michael@0: bd->pBiDi=pBiDi; michael@0: bd->isoRunLast=0; michael@0: bd->isoRuns[0].start=0; michael@0: bd->isoRuns[0].limit=0; michael@0: bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0); michael@0: bd->isoRuns[0].lastStrong=bd->isoRuns[0].contextDir=GET_PARALEVEL(pBiDi, 0)&1; michael@0: bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0; michael@0: if(pBiDi->openingsMemory) { michael@0: bd->openings=pBiDi->openingsMemory; michael@0: bd->openingsSize=pBiDi->openingsSize; michael@0: } else { michael@0: bd->openings=bd->simpleOpenings; michael@0: bd->openingsSize=SIMPLE_OPENINGS_SIZE; michael@0: } michael@0: bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL || michael@0: bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL; michael@0: } michael@0: michael@0: /* paragraph boundary */ michael@0: static void michael@0: bracketProcessB(BracketData *bd, UBiDiLevel level) { michael@0: bd->isoRunLast=0; michael@0: bd->isoRuns[0].limit=0; michael@0: bd->isoRuns[0].level=level; michael@0: bd->isoRuns[0].lastStrong=bd->isoRuns[0].contextDir=level&1; michael@0: bd->isoRuns[0].lastStrongPos=bd->isoRuns[0].contextPos=0; michael@0: } michael@0: michael@0: /* LRE, LRO, RLE, RLO, PDF */ michael@0: static void michael@0: bracketProcessBoundary(BracketData *bd, int32_t lastCcPos, michael@0: UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) { michael@0: IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; michael@0: DirProp *dirProps=bd->pBiDi->dirProps; michael@0: if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO) /* after an isolate */ michael@0: return; michael@0: if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)> michael@0: (contextLevel&~UBIDI_LEVEL_OVERRIDE)) /* not a PDF */ michael@0: contextLevel=embeddingLevel; michael@0: pLastIsoRun->limit=pLastIsoRun->start; michael@0: pLastIsoRun->level=embeddingLevel; michael@0: pLastIsoRun->lastStrong=pLastIsoRun->contextDir=contextLevel&1; michael@0: pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=lastCcPos; michael@0: } michael@0: michael@0: /* LRI or RLI */ michael@0: static void michael@0: bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) { michael@0: IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; michael@0: int16_t lastLimit; michael@0: lastLimit=pLastIsoRun->limit; michael@0: bd->isoRunLast++; michael@0: pLastIsoRun++; michael@0: pLastIsoRun->start=pLastIsoRun->limit=lastLimit; michael@0: pLastIsoRun->level=level; michael@0: pLastIsoRun->lastStrong=pLastIsoRun->contextDir=level&1; michael@0: pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=0; michael@0: } michael@0: michael@0: /* PDI */ michael@0: static void michael@0: bracketProcessPDI(BracketData *bd) { michael@0: bd->isoRunLast--; michael@0: } michael@0: michael@0: /* newly found opening bracket: create an openings entry */ michael@0: static UBool /* return TRUE if success */ michael@0: bracketAddOpening(BracketData *bd, UChar match, int32_t position) { michael@0: IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; michael@0: Opening *pOpening; michael@0: if(pLastIsoRun->limit>=bd->openingsSize) { /* no available new entry */ michael@0: UBiDi *pBiDi=bd->pBiDi; michael@0: if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2)) michael@0: return FALSE; michael@0: if(bd->openings==bd->simpleOpenings) michael@0: uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings, michael@0: SIMPLE_OPENINGS_SIZE * sizeof(Opening)); michael@0: bd->openings=pBiDi->openingsMemory; /* may have changed */ michael@0: bd->openingsSize=pBiDi->openingsSize; michael@0: } michael@0: pOpening=&bd->openings[pLastIsoRun->limit]; michael@0: pOpening->position=position; michael@0: pOpening->match=match; michael@0: pOpening->contextDir=pLastIsoRun->contextDir; michael@0: pOpening->contextPos=pLastIsoRun->contextPos; michael@0: pOpening->flags=0; michael@0: pLastIsoRun->limit++; michael@0: return TRUE; michael@0: } michael@0: michael@0: /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */ michael@0: static void michael@0: fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) { michael@0: /* This function calls itself recursively */ michael@0: IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; michael@0: Opening *qOpening; michael@0: DirProp *dirProps=bd->pBiDi->dirProps; michael@0: int32_t k, openingPosition, closingPosition; michael@0: for(k=openingIndex+1, qOpening=&bd->openings[k]; klimit; k++, qOpening++) { michael@0: if(qOpening->match>=0) /* not an N0c match */ michael@0: continue; michael@0: if(newPropPositioncontextPos) michael@0: break; michael@0: if(newPropPosition>=qOpening->position) michael@0: continue; michael@0: if(newProp==qOpening->contextDir) michael@0: break; michael@0: openingPosition=qOpening->position; michael@0: dirProps[openingPosition]=dirProps[newPropPosition]; michael@0: closingPosition=-(qOpening->match); michael@0: dirProps[closingPosition]= newProp; /* can never be AL */ michael@0: qOpening->match=0; /* prevent further changes */ michael@0: fixN0c(bd, k, openingPosition, newProp); michael@0: fixN0c(bd, k, closingPosition, newProp); michael@0: } michael@0: } michael@0: michael@0: /* handle strong characters, digits and candidates for closing brackets */ michael@0: static UBool /* return TRUE if success */ michael@0: bracketProcessChar(BracketData *bd, int32_t position, DirProp dirProp) { michael@0: IsoRun *pLastIsoRun; michael@0: Opening *pOpening, *qOpening; michael@0: DirProp *dirProps, newProp; michael@0: UBiDiDirection direction; michael@0: uint16_t flag; michael@0: int32_t i, k; michael@0: UBool stable; michael@0: UChar c, match; michael@0: dirProps=bd->pBiDi->dirProps; michael@0: if(DIRPROP_FLAG(dirProp)&MASK_STRONG_EN_AN) { /* L, R, AL, EN or AN */ michael@0: pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; michael@0: /* AN after R or AL becomes R or AL; after L or L+AN, it is kept as-is */ michael@0: if(dirProp==AN && (pLastIsoRun->lastStrong==R || pLastIsoRun->lastStrong==AL)) michael@0: dirProp=pLastIsoRun->lastStrong; michael@0: /* EN after L or L+AN becomes L; after R or AL, it becomes R or AL */ michael@0: if(dirProp==EN) { michael@0: if(pLastIsoRun->lastStrong==L || pLastIsoRun->lastStrong==AN) { michael@0: dirProp=L; michael@0: if(!bd->isNumbersSpecial) michael@0: dirProps[position]=ENL; michael@0: } michael@0: else { michael@0: dirProp=pLastIsoRun->lastStrong; /* may be R or AL */ michael@0: if(!bd->isNumbersSpecial) michael@0: dirProps[position]= dirProp==AL ? AN : ENR; michael@0: } michael@0: } michael@0: pLastIsoRun->lastStrong=dirProp; michael@0: pLastIsoRun->contextDir=DIR_FROM_STRONG(dirProp); michael@0: pLastIsoRun->lastStrongPos=pLastIsoRun->contextPos=position; michael@0: if(dirProp==AL || dirProp==AN) michael@0: dirProp=R; michael@0: flag=DIRPROP_FLAG(dirProp); michael@0: /* strong characters found after an unmatched opening bracket michael@0: must be noted for possibly applying N0b */ michael@0: for(i=pLastIsoRun->start; ilimit; i++) michael@0: bd->openings[i].flags|=flag; michael@0: return TRUE; michael@0: } michael@0: if(dirProp!=ON) michael@0: return TRUE; michael@0: /* First see if it is a matching closing bracket. Hopefully, this is more michael@0: efficient than checking if it is a closing bracket at all */ michael@0: c=bd->pBiDi->text[position]; michael@0: pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; michael@0: for(i=pLastIsoRun->limit-1; i>=pLastIsoRun->start; i--) { michael@0: if(bd->openings[i].match!=c) michael@0: continue; michael@0: /* We have a match */ michael@0: pOpening=&bd->openings[i]; michael@0: direction=pLastIsoRun->level&1; michael@0: stable=TRUE; /* assume stable until proved otherwise */ michael@0: michael@0: /* The stable flag is set when brackets are paired and their michael@0: level is resolved and cannot be changed by what will be michael@0: found later in the source string. michael@0: An unstable match can occur only when applying N0c, where michael@0: the resolved level depends on the preceding context, and michael@0: this context may be affected by text occurring later. michael@0: Example: RTL paragraph containing: abc[(latin) HEBREW] michael@0: When the closing parenthesis is encountered, it appears michael@0: that N0c1 must be applied since 'abc' sets an opposite michael@0: direction context and both parentheses receive level 2. michael@0: However, when the closing square bracket is processed, michael@0: N0b applies because of 'HEBREW' being included within the michael@0: brackets, thus the square brackets are treated like R and michael@0: receive level 1. However, this changes the preceding michael@0: context of the opening parenthesis, and it now appears michael@0: that N0c2 must be applied to the parentheses rather than michael@0: N0c1. */ michael@0: michael@0: if((direction==0 && pOpening->flags&FOUND_L) || michael@0: (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */ michael@0: newProp=direction; michael@0: } michael@0: else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */ michael@0: if(direction!=pOpening->contextDir) { michael@0: newProp=pOpening->contextDir; /* N0c1 */ michael@0: /* it is stable if there is no preceding text or in michael@0: conditions too complicated and not worth checking */ michael@0: stable=(i==pLastIsoRun->start); michael@0: } michael@0: else michael@0: newProp=direction; /* N0c2 */ michael@0: } michael@0: else { michael@0: newProp=BN; /* N0d */ michael@0: } michael@0: if(newProp!=BN) { michael@0: dirProps[pOpening->position]=newProp; michael@0: dirProps[position]=newProp; michael@0: pLastIsoRun->contextDir=newProp; michael@0: pLastIsoRun->contextPos=position; michael@0: } michael@0: /* Update nested N0c pairs that may be affected */ michael@0: if(newProp==direction) michael@0: fixN0c(bd, i, pOpening->position, newProp); michael@0: if(stable) { michael@0: pLastIsoRun->limit=i; /* forget any brackets nested within this pair */ michael@0: /* remove lower located synonyms if any */ michael@0: while(pLastIsoRun->limit>pLastIsoRun->start && michael@0: bd->openings[pLastIsoRun->limit-1].position==pOpening->position) michael@0: pLastIsoRun->limit--; michael@0: } michael@0: else { michael@0: pOpening->match=-position; michael@0: /* neutralize lower located synonyms if any */ michael@0: k=i-1; michael@0: while(k>=pLastIsoRun->start && michael@0: bd->openings[k].position==pOpening->position) michael@0: bd->openings[k--].match=0; michael@0: /* neutralize any unmatched opening between the current pair; michael@0: this will also neutralize higher located synonyms if any */ michael@0: for(k=i+1; klimit; k++) { michael@0: qOpening=&bd->openings[k]; michael@0: if(qOpening->position>=position) michael@0: break; michael@0: if(qOpening->match>0) michael@0: qOpening->match=0; michael@0: } michael@0: } michael@0: return TRUE; michael@0: } michael@0: /* We get here only if the ON character was not a matching closing bracket */ michael@0: /* Now see if it is an opening bracket */ michael@0: match=u_getBidiPairedBracket(c); /* get the matching char */ michael@0: if(match==c) /* if no matching char */ michael@0: return TRUE; michael@0: if(ubidi_getPairedBracketType(bd->pBiDi->bdp, c)!=U_BPT_OPEN) michael@0: return TRUE; /* not an opening bracket */ michael@0: /* special case: process synonyms michael@0: create an opening entry for each synonym */ michael@0: if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */ michael@0: if(!bracketAddOpening(bd, 0x3009, position)) michael@0: return FALSE; michael@0: } michael@0: else if(match==0x3009) { /* RIGHT ANGLE BRACKET */ michael@0: if(!bracketAddOpening(bd, 0x232A, position)) michael@0: return FALSE; michael@0: } michael@0: return bracketAddOpening(bd, match, position); michael@0: } michael@0: michael@0: /* perform (X1)..(X9) ------------------------------------------------------- */ michael@0: michael@0: /* determine if the text is mixed-directional or single-directional */ michael@0: static UBiDiDirection michael@0: directionFromFlags(UBiDi *pBiDi) { michael@0: Flags flags=pBiDi->flags; michael@0: /* if the text contains AN and neutrals, then some neutrals may become RTL */ michael@0: if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) { michael@0: return UBIDI_LTR; michael@0: } else if(!(flags&MASK_LTR)) { michael@0: return UBIDI_RTL; michael@0: } else { michael@0: return UBIDI_MIXED; michael@0: } michael@0: } michael@0: michael@0: /* michael@0: * Resolve the explicit levels as specified by explicit embedding codes. michael@0: * Recalculate the flags to have them reflect the real properties michael@0: * after taking the explicit embeddings into account. michael@0: * michael@0: * The BiDi algorithm is designed to result in the same behavior whether embedding michael@0: * levels are externally specified (from "styled text", supposedly the preferred michael@0: * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text. michael@0: * That is why (X9) instructs to remove all not-isolate explicit codes (and BN). michael@0: * However, in a real implementation, the removal of these codes and their index michael@0: * positions in the plain text is undesirable since it would result in michael@0: * reallocated, reindexed text. michael@0: * Instead, this implementation leaves the codes in there and just ignores them michael@0: * in the subsequent processing. michael@0: * In order to get the same reordering behavior, positions with a BN or a not-isolate michael@0: * explicit embedding code just get the same level assigned as the last "real" michael@0: * character. michael@0: * michael@0: * Some implementations, not this one, then overwrite some of these michael@0: * directionality properties at "real" same-level-run boundaries by michael@0: * L or R codes so that the resolution of weak types can be performed on the michael@0: * entire paragraph at once instead of having to parse it once more and michael@0: * perform that resolution on same-level-runs. michael@0: * This limits the scope of the implicit rules in effectively michael@0: * the same way as the run limits. michael@0: * michael@0: * Instead, this implementation does not modify these codes, except for michael@0: * paired brackets whose properties (ON) may be replaced by L or R. michael@0: * On one hand, the paragraph has to be scanned for same-level-runs, but michael@0: * on the other hand, this saves another loop to reset these codes, michael@0: * or saves making and modifying a copy of dirProps[]. michael@0: * michael@0: * michael@0: * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm. michael@0: * michael@0: * michael@0: * Handling the stack of explicit levels (Xn): michael@0: * michael@0: * With the BiDi stack of explicit levels, as pushed with each michael@0: * LRE, RLE, LRO, RLO, LRI, RLI and FSO and popped with each PDF and PDI, michael@0: * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL. michael@0: * michael@0: * In order to have a correct push-pop semantics even in the case of overflows, michael@0: * overflow counters and a valid isolate counter are used as described in UAX#9 michael@0: * section 3.3.2 "Explicit Levels and Directions". michael@0: * michael@0: * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. michael@0: */ michael@0: static UBiDiDirection michael@0: resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { michael@0: DirProp *dirProps=pBiDi->dirProps; michael@0: UBiDiLevel *levels=pBiDi->levels; michael@0: const UChar *text=pBiDi->text; michael@0: michael@0: int32_t i=0, length=pBiDi->length; michael@0: Flags flags=pBiDi->flags; /* collect all directionalities in the text */ michael@0: DirProp dirProp; michael@0: UBiDiLevel level=GET_PARALEVEL(pBiDi, 0); michael@0: UBiDiDirection direction; michael@0: pBiDi->isolateCount=0; michael@0: michael@0: if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; } michael@0: michael@0: /* determine if the text is mixed-directional or single-directional */ michael@0: direction=directionFromFlags(pBiDi); michael@0: michael@0: /* we may not need to resolve any explicit levels */ michael@0: if((direction!=UBIDI_MIXED)) { michael@0: /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ michael@0: return direction; michael@0: } michael@0: if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) { michael@0: /* inverse BiDi: mixed, but all characters are at the same embedding level */ michael@0: /* set all levels to the paragraph level */ michael@0: int32_t paraIndex, start, limit; michael@0: for(paraIndex=0; paraIndexparaCount; paraIndex++) { michael@0: if(paraIndex==0) michael@0: start=0; michael@0: else michael@0: start=pBiDi->paras[paraIndex-1].limit; michael@0: limit=pBiDi->paras[paraIndex].limit; michael@0: level=pBiDi->paras[paraIndex].level; michael@0: for(i=start; iparaCount; paraIndex++) { michael@0: if(paraIndex==0) michael@0: start=0; michael@0: else michael@0: start=pBiDi->paras[paraIndex-1].limit; michael@0: limit=pBiDi->paras[paraIndex].limit; michael@0: level=pBiDi->paras[paraIndex].level; michael@0: for(i=start; i=UBIDI_MAX_EXPLICIT_LEVEL michael@0: but we need one more entry as base */ michael@0: uint32_t stackLast=0; michael@0: int32_t overflowIsolateCount=0; michael@0: int32_t overflowEmbeddingCount=0; michael@0: int32_t validIsolateCount=0; michael@0: BracketData bracketData; michael@0: bracketInit(pBiDi, &bracketData); michael@0: stack[0]=level; /* initialize base entry to para level, no override, no isolate */ michael@0: michael@0: /* recalculate the flags */ michael@0: flags=0; michael@0: michael@0: for(i=0; i0 && stack[stackLast]pBiDi->isolateCount) michael@0: pBiDi->isolateCount=validIsolateCount; michael@0: embeddingLevel=newLevel; michael@0: stackLast++; michael@0: stack[stackLast]=embeddingLevel+ISOLATE; michael@0: bracketProcessLRI_RLI(&bracketData, embeddingLevel); michael@0: } else { michael@0: dirProps[i]|=IGNORE_CC; michael@0: overflowIsolateCount++; michael@0: } michael@0: break; michael@0: case PDI: michael@0: if(embeddingLevel!=previousLevel) { michael@0: bracketProcessBoundary(&bracketData, lastCcPos, michael@0: previousLevel, embeddingLevel); michael@0: } michael@0: /* (X6a) */ michael@0: if(overflowIsolateCount) { michael@0: dirProps[i]|=IGNORE_CC; michael@0: overflowIsolateCount--; michael@0: } michael@0: else if(validIsolateCount) { michael@0: lastCcPos=i; michael@0: overflowEmbeddingCount=0; michael@0: while(stack[stackLast]0 && levels[i-1]!=level) { michael@0: flags|=DIRPROP_FLAG_MULTI_RUNS; michael@0: if(level&UBIDI_LEVEL_OVERRIDE) michael@0: flags|=DIRPROP_FLAG_O(level); michael@0: else michael@0: flags|=DIRPROP_FLAG_E(level); michael@0: } michael@0: if(DIRPROP_FLAG(dirProp)&MASK_ISO) michael@0: level=embeddingLevel; michael@0: } michael@0: if(flags&MASK_EMBEDDING) { michael@0: flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); michael@0: } michael@0: if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { michael@0: flags|=DIRPROP_FLAG(L); michael@0: } michael@0: michael@0: /* subsequently, ignore the explicit codes and BN (X9) */ michael@0: michael@0: /* again, determine if the text is mixed-directional or single-directional */ michael@0: pBiDi->flags=flags; michael@0: direction=directionFromFlags(pBiDi); michael@0: } michael@0: return direction; michael@0: } michael@0: michael@0: /* michael@0: * Use a pre-specified embedding levels array: michael@0: * michael@0: * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), michael@0: * ignore all explicit codes (X9), michael@0: * and check all the preset levels. michael@0: * michael@0: * Recalculate the flags to have them reflect the real properties michael@0: * after taking the explicit embeddings into account. michael@0: */ michael@0: static UBiDiDirection michael@0: checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { michael@0: DirProp *dirProps=pBiDi->dirProps; michael@0: DirProp dirProp; michael@0: UBiDiLevel *levels=pBiDi->levels; michael@0: int32_t isolateCount=0; michael@0: michael@0: int32_t i, length=pBiDi->length; michael@0: Flags flags=0; /* collect all directionalities in the text */ michael@0: UBiDiLevel level; michael@0: pBiDi->isolateCount=0; michael@0: michael@0: for(i=0; ipBiDi->isolateCount) michael@0: pBiDi->isolateCount=isolateCount; michael@0: } michael@0: else if(dirProp==PDI) michael@0: isolateCount--; michael@0: else if(dirProp==B) michael@0: isolateCount=0; michael@0: if(level&UBIDI_LEVEL_OVERRIDE) { michael@0: /* keep the override flag in levels[i] but adjust the flags */ michael@0: level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */ michael@0: flags|=DIRPROP_FLAG_O(level); michael@0: } else { michael@0: /* set the flags */ michael@0: flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp); michael@0: } michael@0: if((levelparaLevel); michael@0: } michael@0: michael@0: /* determine if the text is mixed-directional or single-directional */ michael@0: pBiDi->flags=flags; michael@0: return directionFromFlags(pBiDi); michael@0: } michael@0: michael@0: /****************************************************************** michael@0: The Properties state machine table michael@0: ******************************************************************* michael@0: michael@0: All table cells are 8 bits: michael@0: bits 0..4: next state michael@0: bits 5..7: action to perform (if > 0) michael@0: michael@0: Cells may be of format "n" where n represents the next state michael@0: (except for the rightmost column). michael@0: Cells may also be of format "s(x,y)" where x represents an action michael@0: to perform and y represents the next state. michael@0: michael@0: ******************************************************************* michael@0: Definitions and type for properties state table michael@0: ******************************************************************* michael@0: */ michael@0: #define IMPTABPROPS_COLUMNS 16 michael@0: #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1) michael@0: #define GET_STATEPROPS(cell) ((cell)&0x1f) michael@0: #define GET_ACTIONPROPS(cell) ((cell)>>5) michael@0: #define s(action, newState) ((uint8_t)(newState+(action<<5))) michael@0: michael@0: static const uint8_t groupProp[] = /* dirProp regrouped */ michael@0: { michael@0: /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */ michael@0: 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14 michael@0: }; michael@0: enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */ michael@0: michael@0: /****************************************************************** michael@0: michael@0: PROPERTIES STATE TABLE michael@0: michael@0: In table impTabProps, michael@0: - the ON column regroups ON and WS, FSI, RLI, LRI and PDI michael@0: - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF michael@0: - the Res column is the reduced property assigned to a run michael@0: michael@0: Action 1: process current run1, init new run1 michael@0: 2: init new run2 michael@0: 3: process run1, process run2, init new run1 michael@0: 4: process run1, set run1=run2, init new run2 michael@0: michael@0: Notes: michael@0: 1) This table is used in resolveImplicitLevels(). michael@0: 2) This table triggers actions when there is a change in the Bidi michael@0: property of incoming characters (action 1). michael@0: 3) Most such property sequences are processed immediately (in michael@0: fact, passed to processPropertySeq(). michael@0: 4) However, numbers are assembled as one sequence. This means michael@0: that undefined situations (like CS following digits, until michael@0: it is known if the next char will be a digit) are held until michael@0: following chars define them. michael@0: Example: digits followed by CS, then comes another CS or ON; michael@0: the digits will be processed, then the CS assigned michael@0: as the start of an ON sequence (action 3). michael@0: 5) There are cases where more than one sequence must be michael@0: processed, for instance digits followed by CS followed by L: michael@0: the digits must be processed as one sequence, and the CS michael@0: must be processed as an ON sequence, all this before starting michael@0: assembling chars for the opening L sequence. michael@0: michael@0: michael@0: */ michael@0: static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] = michael@0: { michael@0: /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */ michael@0: /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON }, michael@0: /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L }, michael@0: /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R }, michael@0: /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R }, michael@0: /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN }, michael@0: /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN }, michael@0: /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN }, michael@0: /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON }, michael@0: /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON }, michael@0: /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON }, michael@0: /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN }, michael@0: /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN }, michael@0: /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN }, michael@0: /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN }, michael@0: /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON }, michael@0: /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S }, michael@0: /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S }, michael@0: /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B }, michael@0: /*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L }, michael@0: /*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L }, michael@0: /*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L }, michael@0: /*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN }, michael@0: /*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN }, michael@0: /*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN } michael@0: }; michael@0: michael@0: /* we must undef macro s because the levels table have a different michael@0: * structure (4 bits for action and 4 bits for next state. michael@0: */ michael@0: #undef s michael@0: michael@0: /****************************************************************** michael@0: The levels state machine tables michael@0: ******************************************************************* michael@0: michael@0: All table cells are 8 bits: michael@0: bits 0..3: next state michael@0: bits 4..7: action to perform (if > 0) michael@0: michael@0: Cells may be of format "n" where n represents the next state michael@0: (except for the rightmost column). michael@0: Cells may also be of format "s(x,y)" where x represents an action michael@0: to perform and y represents the next state. michael@0: michael@0: This format limits each table to 16 states each and to 15 actions. michael@0: michael@0: ******************************************************************* michael@0: Definitions and type for levels state tables michael@0: ******************************************************************* michael@0: */ michael@0: #define IMPTABLEVELS_COLUMNS (DirProp_B + 2) michael@0: #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1) michael@0: #define GET_STATE(cell) ((cell)&0x0f) michael@0: #define GET_ACTION(cell) ((cell)>>4) michael@0: #define s(action, newState) ((uint8_t)(newState+(action<<4))) michael@0: michael@0: typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS]; michael@0: typedef uint8_t ImpAct[]; michael@0: michael@0: /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct, michael@0: * instead of having a pair of ImpTab and a pair of ImpAct. michael@0: */ michael@0: typedef struct ImpTabPair { michael@0: const void * pImpTab[2]; michael@0: const void * pImpAct[2]; michael@0: } ImpTabPair; michael@0: michael@0: /****************************************************************** michael@0: michael@0: LEVELS STATE TABLES michael@0: michael@0: In all levels state tables, michael@0: - state 0 is the initial state michael@0: - the Res column is the increment to add to the text level michael@0: for this property sequence. michael@0: michael@0: The impAct arrays for each table of a pair map the local action michael@0: numbers of the table to the total list of actions. For instance, michael@0: action 2 in a given table corresponds to the action number which michael@0: appears in entry [2] of the impAct array for that table. michael@0: The first entry of all impAct arrays must be 0. michael@0: michael@0: Action 1: init conditional sequence michael@0: 2: prepend conditional sequence to current sequence michael@0: 3: set ON sequence to new level - 1 michael@0: 4: init EN/AN/ON sequence michael@0: 5: fix EN/AN/ON sequence followed by R michael@0: 6: set previous level sequence to level 2 michael@0: michael@0: Notes: michael@0: 1) These tables are used in processPropertySeq(). The input michael@0: is property sequences as determined by resolveImplicitLevels. michael@0: 2) Most such property sequences are processed immediately michael@0: (levels are assigned). michael@0: 3) However, some sequences cannot be assigned a final level till michael@0: one or more following sequences are received. For instance, michael@0: ON following an R sequence within an even-level paragraph. michael@0: If the following sequence is R, the ON sequence will be michael@0: assigned basic run level+1, and so will the R sequence. michael@0: 4) S is generally handled like ON, since its level will be fixed michael@0: to paragraph level in adjustWSLevels(). michael@0: michael@0: */ michael@0: michael@0: static const ImpTab impTabL_DEFAULT = /* Even paragraph level */ michael@0: /* In this table, conditional sequences receive the higher possible level michael@0: until proven otherwise. michael@0: */ michael@0: { michael@0: /* L , R , EN , AN , ON , S , B , Res */ michael@0: /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 }, michael@0: /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 }, michael@0: /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 }, michael@0: /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 }, michael@0: /* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0), 1 }, michael@0: /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0), 1 } michael@0: }; michael@0: static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */ michael@0: /* In this table, conditional sequences receive the lower possible level michael@0: until proven otherwise. michael@0: */ michael@0: { michael@0: /* L , R , EN , AN , ON , S , B , Res */ michael@0: /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, michael@0: /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 }, michael@0: /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, michael@0: /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 }, michael@0: /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 }, michael@0: /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 } michael@0: }; michael@0: static const ImpAct impAct0 = {0,1,2,3,4,5,6}; michael@0: static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT, michael@0: &impTabR_DEFAULT}, michael@0: {&impAct0, &impAct0}}; michael@0: michael@0: static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */ michael@0: /* In this table, conditional sequences receive the higher possible level michael@0: until proven otherwise. michael@0: */ michael@0: { michael@0: /* L , R , EN , AN , ON , S , B , Res */ michael@0: /* 0 : init */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 0 }, michael@0: /* 1 : L+EN/AN */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 2 }, michael@0: /* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), 0 , 0 , 1 }, michael@0: /* 3 : R+ON */ { s(2,0), 2 , 4 , 4 , 3 , 3 , s(2,0), 1 }, michael@0: /* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 } michael@0: }; michael@0: static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL, michael@0: &impTabR_DEFAULT}, michael@0: {&impAct0, &impAct0}}; michael@0: michael@0: static const ImpTab impTabL_GROUP_NUMBERS_WITH_R = michael@0: /* In this table, EN/AN+ON sequences receive levels as if associated with R michael@0: until proven that there is L or sor/eor on both sides. AN is handled like EN. michael@0: */ michael@0: { michael@0: /* L , R , EN , AN , ON , S , B , Res */ michael@0: /* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 }, michael@0: /* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 }, michael@0: /* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 }, michael@0: /* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 }, michael@0: /* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 }, michael@0: /* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 } michael@0: }; michael@0: static const ImpTab impTabR_GROUP_NUMBERS_WITH_R = michael@0: /* In this table, EN/AN+ON sequences receive levels as if associated with R michael@0: until proven that there is L on both sides. AN is handled like EN. michael@0: */ michael@0: { michael@0: /* L , R , EN , AN , ON , S , B , Res */ michael@0: /* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, michael@0: /* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, michael@0: /* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 }, michael@0: /* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 }, michael@0: /* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 } michael@0: }; michael@0: static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = { michael@0: {&impTabL_GROUP_NUMBERS_WITH_R, michael@0: &impTabR_GROUP_NUMBERS_WITH_R}, michael@0: {&impAct0, &impAct0}}; michael@0: michael@0: michael@0: static const ImpTab impTabL_INVERSE_NUMBERS_AS_L = michael@0: /* This table is identical to the Default LTR table except that EN and AN are michael@0: handled like L. michael@0: */ michael@0: { michael@0: /* L , R , EN , AN , ON , S , B , Res */ michael@0: /* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 }, michael@0: /* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 }, michael@0: /* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 }, michael@0: /* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 }, michael@0: /* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 }, michael@0: /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 } michael@0: }; michael@0: static const ImpTab impTabR_INVERSE_NUMBERS_AS_L = michael@0: /* This table is identical to the Default RTL table except that EN and AN are michael@0: handled like L. michael@0: */ michael@0: { michael@0: /* L , R , EN , AN , ON , S , B , Res */ michael@0: /* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, michael@0: /* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 }, michael@0: /* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, michael@0: /* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 }, michael@0: /* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 }, michael@0: /* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 } michael@0: }; michael@0: static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = { michael@0: {&impTabL_INVERSE_NUMBERS_AS_L, michael@0: &impTabR_INVERSE_NUMBERS_AS_L}, michael@0: {&impAct0, &impAct0}}; michael@0: michael@0: static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */ michael@0: /* In this table, conditional sequences receive the lower possible level michael@0: until proven otherwise. michael@0: */ michael@0: { michael@0: /* L , R , EN , AN , ON , S , B , Res */ michael@0: /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, michael@0: /* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 }, michael@0: /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, michael@0: /* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 }, michael@0: /* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 }, michael@0: /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 }, michael@0: /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 } michael@0: }; michael@0: static const ImpAct impAct1 = {0,1,11,12}; michael@0: /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc" michael@0: */ michael@0: static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = { michael@0: {&impTabL_DEFAULT, michael@0: &impTabR_INVERSE_LIKE_DIRECT}, michael@0: {&impAct0, &impAct1}}; michael@0: michael@0: static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS = michael@0: /* The case handled in this table is (visually): R EN L michael@0: */ michael@0: { michael@0: /* L , R , EN , AN , ON , S , B , Res */ michael@0: /* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 }, michael@0: /* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 }, michael@0: /* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 }, michael@0: /* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 }, michael@0: /* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 }, michael@0: /* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 }, michael@0: /* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 } michael@0: }; michael@0: static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS = michael@0: /* The cases handled in this table are (visually): R EN L michael@0: R L AN L michael@0: */ michael@0: { michael@0: /* L , R , EN , AN , ON , S , B , Res */ michael@0: /* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 }, michael@0: /* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 }, michael@0: /* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 }, michael@0: /* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 }, michael@0: /* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 }, michael@0: /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 }, michael@0: /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 } michael@0: }; michael@0: static const ImpAct impAct2 = {0,1,7,8,9,10}; michael@0: static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = { michael@0: {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, michael@0: &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, michael@0: {&impAct0, &impAct2}}; michael@0: michael@0: static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = { michael@0: {&impTabL_NUMBERS_SPECIAL, michael@0: &impTabR_INVERSE_LIKE_DIRECT}, michael@0: {&impAct0, &impAct1}}; michael@0: michael@0: static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = michael@0: /* The case handled in this table is (visually): R EN L michael@0: */ michael@0: { michael@0: /* L , R , EN , AN , ON , S , B , Res */ michael@0: /* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 }, michael@0: /* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 }, michael@0: /* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 }, michael@0: /* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 }, michael@0: /* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 } michael@0: }; michael@0: static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = { michael@0: {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, michael@0: &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, michael@0: {&impAct0, &impAct2}}; michael@0: michael@0: #undef s michael@0: michael@0: typedef struct { michael@0: const ImpTab * pImpTab; /* level table pointer */ michael@0: const ImpAct * pImpAct; /* action map array */ michael@0: int32_t startON; /* start of ON sequence */ michael@0: int32_t startL2EN; /* start of level 2 sequence */ michael@0: int32_t lastStrongRTL; /* index of last found R or AL */ michael@0: int32_t state; /* current state */ michael@0: int32_t runStart; /* start position of the run */ michael@0: UBiDiLevel runLevel; /* run level before implicit solving */ michael@0: } LevState; michael@0: michael@0: /*------------------------------------------------------------------------*/ michael@0: michael@0: static void michael@0: addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag) michael@0: /* param pos: position where to insert michael@0: param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER michael@0: */ michael@0: { michael@0: #define FIRSTALLOC 10 michael@0: Point point; michael@0: InsertPoints * pInsertPoints=&(pBiDi->insertPoints); michael@0: michael@0: if (pInsertPoints->capacity == 0) michael@0: { michael@0: pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC); michael@0: if (pInsertPoints->points == NULL) michael@0: { michael@0: pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: pInsertPoints->capacity=FIRSTALLOC; michael@0: } michael@0: if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */ michael@0: { michael@0: void * savePoints=pInsertPoints->points; michael@0: pInsertPoints->points=uprv_realloc(pInsertPoints->points, michael@0: pInsertPoints->capacity*2*sizeof(Point)); michael@0: if (pInsertPoints->points == NULL) michael@0: { michael@0: pInsertPoints->points=savePoints; michael@0: pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: else pInsertPoints->capacity*=2; michael@0: } michael@0: point.pos=pos; michael@0: point.flag=flag; michael@0: pInsertPoints->points[pInsertPoints->size]=point; michael@0: pInsertPoints->size++; michael@0: #undef FIRSTALLOC michael@0: } michael@0: michael@0: /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */ michael@0: michael@0: /* michael@0: * This implementation of the (Wn) rules applies all rules in one pass. michael@0: * In order to do so, it needs a look-ahead of typically 1 character michael@0: * (except for W5: sequences of ET) and keeps track of changes michael@0: * in a rule Wp that affect a later Wq (ppImpTab; michael@0: const ImpAct * pImpAct=pLevState->pImpAct; michael@0: UBiDiLevel * levels=pBiDi->levels; michael@0: UBiDiLevel level, addLevel; michael@0: InsertPoints * pInsertPoints; michael@0: int32_t start0, k; michael@0: michael@0: start0=start; /* save original start position */ michael@0: oldStateSeq=(uint8_t)pLevState->state; michael@0: cell=(*pImpTab)[oldStateSeq][_prop]; michael@0: pLevState->state=GET_STATE(cell); /* isolate the new state */ michael@0: actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */ michael@0: addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES]; michael@0: michael@0: if(actionSeq) { michael@0: switch(actionSeq) { michael@0: case 1: /* init ON seq */ michael@0: pLevState->startON=start0; michael@0: break; michael@0: michael@0: case 2: /* prepend ON seq to current seq */ michael@0: start=pLevState->startON; michael@0: break; michael@0: michael@0: case 3: /* L or S after possible relevant EN/AN */ michael@0: /* check if we had EN after R/AL */ michael@0: if (pLevState->startL2EN >= 0) { michael@0: addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); michael@0: } michael@0: pLevState->startL2EN=-1; /* not within previous if since could also be -2 */ michael@0: /* check if we had any relevant EN/AN after R/AL */ michael@0: pInsertPoints=&(pBiDi->insertPoints); michael@0: if ((pInsertPoints->capacity == 0) || michael@0: (pInsertPoints->size <= pInsertPoints->confirmed)) michael@0: { michael@0: /* nothing, just clean up */ michael@0: pLevState->lastStrongRTL=-1; michael@0: /* check if we have a pending conditional segment */ michael@0: level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES]; michael@0: if ((level & 1) && (pLevState->startON > 0)) { /* after ON */ michael@0: start=pLevState->startON; /* reset to basic run level */ michael@0: } michael@0: if (_prop == DirProp_S) /* add LRM before S */ michael@0: { michael@0: addPoint(pBiDi, start0, LRM_BEFORE); michael@0: pInsertPoints->confirmed=pInsertPoints->size; michael@0: } michael@0: break; michael@0: } michael@0: /* reset previous RTL cont to level for LTR text */ michael@0: for (k=pLevState->lastStrongRTL+1; kconfirmed=pInsertPoints->size; michael@0: pLevState->lastStrongRTL=-1; michael@0: if (_prop == DirProp_S) /* add LRM before S */ michael@0: { michael@0: addPoint(pBiDi, start0, LRM_BEFORE); michael@0: pInsertPoints->confirmed=pInsertPoints->size; michael@0: } michael@0: break; michael@0: michael@0: case 4: /* R/AL after possible relevant EN/AN */ michael@0: /* just clean up */ michael@0: pInsertPoints=&(pBiDi->insertPoints); michael@0: if (pInsertPoints->capacity > 0) michael@0: /* remove all non confirmed insert points */ michael@0: pInsertPoints->size=pInsertPoints->confirmed; michael@0: pLevState->startON=-1; michael@0: pLevState->startL2EN=-1; michael@0: pLevState->lastStrongRTL=limit - 1; michael@0: break; michael@0: michael@0: case 5: /* EN/AN after R/AL + possible cont */ michael@0: /* check for real AN */ michael@0: if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) && michael@0: (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)) michael@0: { michael@0: /* real AN */ michael@0: if (pLevState->startL2EN == -1) /* if no relevant EN already found */ michael@0: { michael@0: /* just note the righmost digit as a strong RTL */ michael@0: pLevState->lastStrongRTL=limit - 1; michael@0: break; michael@0: } michael@0: if (pLevState->startL2EN >= 0) /* after EN, no AN */ michael@0: { michael@0: addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); michael@0: pLevState->startL2EN=-2; michael@0: } michael@0: /* note AN */ michael@0: addPoint(pBiDi, start0, LRM_BEFORE); michael@0: break; michael@0: } michael@0: /* if first EN/AN after R/AL */ michael@0: if (pLevState->startL2EN == -1) { michael@0: pLevState->startL2EN=start0; michael@0: } michael@0: break; michael@0: michael@0: case 6: /* note location of latest R/AL */ michael@0: pLevState->lastStrongRTL=limit - 1; michael@0: pLevState->startON=-1; michael@0: break; michael@0: michael@0: case 7: /* L after R+ON/EN/AN */ michael@0: /* include possible adjacent number on the left */ michael@0: for (k=start0-1; k>=0 && !(levels[k]&1); k--); michael@0: if(k>=0) { michael@0: addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */ michael@0: pInsertPoints=&(pBiDi->insertPoints); michael@0: pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */ michael@0: } michael@0: pLevState->startON=start0; michael@0: break; michael@0: michael@0: case 8: /* AN after L */ michael@0: /* AN numbers between L text on both sides may be trouble. */ michael@0: /* tentatively bracket with LRMs; will be confirmed if followed by L */ michael@0: addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */ michael@0: addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */ michael@0: break; michael@0: michael@0: case 9: /* R after L+ON/EN/AN */ michael@0: /* false alert, infirm LRMs around previous AN */ michael@0: pInsertPoints=&(pBiDi->insertPoints); michael@0: pInsertPoints->size=pInsertPoints->confirmed; michael@0: if (_prop == DirProp_S) /* add RLM before S */ michael@0: { michael@0: addPoint(pBiDi, start0, RLM_BEFORE); michael@0: pInsertPoints->confirmed=pInsertPoints->size; michael@0: } michael@0: break; michael@0: michael@0: case 10: /* L after L+ON/AN */ michael@0: level=pLevState->runLevel + addLevel; michael@0: for(k=pLevState->startON; kinsertPoints); michael@0: pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */ michael@0: pLevState->startON=start0; michael@0: break; michael@0: michael@0: case 11: /* L after L+ON+EN/AN/ON */ michael@0: level=pLevState->runLevel; michael@0: for(k=start0-1; k>=pLevState->startON; k--) { michael@0: if(levels[k]==level+3) { michael@0: while(levels[k]==level+3) { michael@0: levels[k--]-=2; michael@0: } michael@0: while(levels[k]==level) { michael@0: k--; michael@0: } michael@0: } michael@0: if(levels[k]==level+2) { michael@0: levels[k]=level; michael@0: continue; michael@0: } michael@0: levels[k]=level+1; michael@0: } michael@0: break; michael@0: michael@0: case 12: /* R after L+ON+EN/AN/ON */ michael@0: level=pLevState->runLevel+1; michael@0: for(k=start0-1; k>=pLevState->startON; k--) { michael@0: if(levels[k]>level) { michael@0: levels[k]-=2; michael@0: } michael@0: } michael@0: break; michael@0: michael@0: default: /* we should never get here */ michael@0: U_ASSERT(FALSE); michael@0: break; michael@0: } michael@0: } michael@0: if((addLevel) || (start < start0)) { michael@0: level=pLevState->runLevel + addLevel; michael@0: if(start>=pLevState->runStart) { michael@0: for(k=start; kdirProps, dirProp; michael@0: int32_t isolateCount=0; michael@0: for(k=start; kprologue; michael@0: int32_t length=pBiDi->proLength; michael@0: int32_t i; michael@0: UChar32 uchar; michael@0: DirProp dirProp; michael@0: for(i=length; i>0; ) { michael@0: /* i is decremented by U16_PREV */ michael@0: U16_PREV(text, 0, i, uchar); michael@0: dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); michael@0: if(dirProp==L) { michael@0: return DirProp_L; michael@0: } michael@0: if(dirProp==R || dirProp==AL) { michael@0: return DirProp_R; michael@0: } michael@0: if(dirProp==B) { michael@0: return DirProp_ON; michael@0: } michael@0: } michael@0: return DirProp_ON; michael@0: } michael@0: michael@0: /** michael@0: * Returns the directionality of the first strong character, or digit, in the epilogue, if any. michael@0: * Requires epilogue!=null. michael@0: */ michael@0: static DirProp michael@0: firstL_R_AL_EN_AN(UBiDi *pBiDi) { michael@0: const UChar *text=pBiDi->epilogue; michael@0: int32_t length=pBiDi->epiLength; michael@0: int32_t i; michael@0: UChar32 uchar; michael@0: DirProp dirProp; michael@0: for(i=0; idirProps; michael@0: DirProp dirProp; michael@0: LevState levState; michael@0: int32_t i, start1, start2; michael@0: uint16_t oldStateImp, stateImp, actionImp; michael@0: uint8_t gprop, resProp, cell; michael@0: UBool inverseRTL; michael@0: DirProp nextStrongProp=R; michael@0: int32_t nextStrongPos=-1; michael@0: michael@0: /* check for RTL inverse BiDi mode */ michael@0: /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to michael@0: * loop on the text characters from end to start. michael@0: * This would need a different properties state table (at least different michael@0: * actions) and different levels state tables (maybe very similar to the michael@0: * LTR corresponding ones. michael@0: */ michael@0: inverseRTL=(UBool) michael@0: ((startlastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) && michael@0: (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || michael@0: pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)); michael@0: michael@0: /* initialize for property and levels state tables */ michael@0: levState.startON=-1; michael@0: levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ michael@0: levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ michael@0: levState.runStart=start; michael@0: levState.runLevel=pBiDi->levels[start]; michael@0: levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1]; michael@0: levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1]; michael@0: if(start==0 && pBiDi->proLength>0) { michael@0: DirProp lastStrong=lastL_R_AL(pBiDi); michael@0: if(lastStrong!=DirProp_ON) { michael@0: sor=lastStrong; michael@0: } michael@0: } michael@0: /* The isolates[] entries contain enough information to michael@0: resume the bidi algorithm in the same state as it was michael@0: when it was interrupted by an isolate sequence. */ michael@0: if(dirProps[start]==PDI) { michael@0: start1=pBiDi->isolates[pBiDi->isolateCount].start1; michael@0: stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp; michael@0: levState.state=pBiDi->isolates[pBiDi->isolateCount].state; michael@0: pBiDi->isolateCount--; michael@0: } else { michael@0: start1=start; michael@0: if(dirProps[start]==NSM) michael@0: stateImp = 1 + sor; michael@0: else michael@0: stateImp=0; michael@0: levState.state=0; michael@0: processPropertySeq(pBiDi, &levState, sor, start, start); michael@0: } michael@0: start2=start; michael@0: michael@0: for(i=start; i<=limit; i++) { michael@0: if(i>=limit) { michael@0: if(limit>start) { michael@0: dirProp=pBiDi->dirProps[limit-1]; michael@0: if(dirProp==LRI || dirProp==RLI) michael@0: break; /* no forced closing for sequence ending with LRI/RLI */ michael@0: } michael@0: gprop=eor; michael@0: } else { michael@0: DirProp prop, prop1; michael@0: prop=PURE_DIRPROP(dirProps[i]); michael@0: if(inverseRTL) { michael@0: if(prop==AL) { michael@0: /* AL before EN does not make it AN */ michael@0: prop=R; michael@0: } else if(prop==EN) { michael@0: if(nextStrongPos<=i) { michael@0: /* look for next strong char (L/R/AL) */ michael@0: int32_t j; michael@0: nextStrongProp=R; /* set default */ michael@0: nextStrongPos=limit; michael@0: for(j=i+1; jlength && pBiDi->epiLength>0) { michael@0: DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi); michael@0: if(firstStrong!=DirProp_ON) { michael@0: eor=firstStrong; michael@0: } michael@0: } michael@0: michael@0: dirProp=dirProps[limit-1]; michael@0: if((dirProp==LRI || dirProp==RLI) && limitlength) { michael@0: pBiDi->isolateCount++; michael@0: pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp; michael@0: pBiDi->isolates[pBiDi->isolateCount].state=levState.state; michael@0: pBiDi->isolates[pBiDi->isolateCount].start1=start1; michael@0: } michael@0: else michael@0: processPropertySeq(pBiDi, &levState, eor, limit, limit); michael@0: } michael@0: michael@0: /* perform (L1) and (X9) ---------------------------------------------------- */ michael@0: michael@0: /* michael@0: * Reset the embedding levels for some non-graphic characters (L1). michael@0: * This function also sets appropriate levels for BN, and michael@0: * explicit embedding types that are supposed to have been removed michael@0: * from the paragraph in (X9). michael@0: */ michael@0: static void michael@0: adjustWSLevels(UBiDi *pBiDi) { michael@0: const DirProp *dirProps=pBiDi->dirProps; michael@0: UBiDiLevel *levels=pBiDi->levels; michael@0: int32_t i; michael@0: michael@0: if(pBiDi->flags&MASK_WS) { michael@0: UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR; michael@0: Flags flag; michael@0: michael@0: i=pBiDi->trailingWSStart; michael@0: while(i>0) { michael@0: /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ michael@0: while(i>0 && (flag=DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i])))&MASK_WS) { michael@0: if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { michael@0: levels[i]=0; michael@0: } else { michael@0: levels[i]=GET_PARALEVEL(pBiDi, i); michael@0: } michael@0: } michael@0: michael@0: /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ michael@0: /* here, i+1 is guaranteed to be 0) { michael@0: flag=DIRPROP_FLAG(PURE_DIRPROP(dirProps[--i])); michael@0: if(flag&MASK_BN_EXPLICIT) { michael@0: levels[i]=levels[i+1]; michael@0: } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { michael@0: levels[i]=0; michael@0: break; michael@0: } else if(flag&MASK_B_S) { michael@0: levels[i]=GET_PARALEVEL(pBiDi, i); michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ubidi_setContext(UBiDi *pBiDi, michael@0: const UChar *prologue, int32_t proLength, michael@0: const UChar *epilogue, int32_t epiLength, michael@0: UErrorCode *pErrorCode) { michael@0: /* check the argument values */ michael@0: RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); michael@0: if(pBiDi==NULL || proLength<-1 || epiLength<-1 || michael@0: (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: michael@0: if(proLength==-1) { michael@0: pBiDi->proLength=u_strlen(prologue); michael@0: } else { michael@0: pBiDi->proLength=proLength; michael@0: } michael@0: if(epiLength==-1) { michael@0: pBiDi->epiLength=u_strlen(epilogue); michael@0: } else { michael@0: pBiDi->epiLength=epiLength; michael@0: } michael@0: pBiDi->prologue=prologue; michael@0: pBiDi->epilogue=epilogue; michael@0: } michael@0: michael@0: static void michael@0: setParaSuccess(UBiDi *pBiDi) { michael@0: pBiDi->proLength=0; /* forget the last context */ michael@0: pBiDi->epiLength=0; michael@0: pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ michael@0: } michael@0: michael@0: #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y)) michael@0: #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x))) michael@0: michael@0: static void michael@0: setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, michael@0: UBiDiLevel paraLevel, UErrorCode *pErrorCode) { michael@0: void *runsOnlyMemory; michael@0: int32_t *visualMap; michael@0: UChar *visualText; michael@0: int32_t saveLength, saveTrailingWSStart; michael@0: const UBiDiLevel *levels; michael@0: UBiDiLevel *saveLevels; michael@0: UBiDiDirection saveDirection; michael@0: UBool saveMayAllocateText; michael@0: Run *runs; michael@0: int32_t visualLength, i, j, visualStart, logicalStart, michael@0: runCount, runLength, addedRuns, insertRemove, michael@0: start, limit, step, indexOddBit, logicalPos, michael@0: index0, index1; michael@0: uint32_t saveOptions; michael@0: michael@0: pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT; michael@0: if(length==0) { michael@0: ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); michael@0: goto cleanup3; michael@0: } michael@0: /* obtain memory for mapping table and visual text */ michael@0: runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))); michael@0: if(runsOnlyMemory==NULL) { michael@0: *pErrorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: goto cleanup3; michael@0: } michael@0: visualMap=runsOnlyMemory; michael@0: visualText=(UChar *)&visualMap[length]; michael@0: saveLevels=(UBiDiLevel *)&visualText[length]; michael@0: saveOptions=pBiDi->reorderingOptions; michael@0: if(saveOptions & UBIDI_OPTION_INSERT_MARKS) { michael@0: pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; michael@0: pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS; michael@0: } michael@0: paraLevel&=1; /* accept only 0 or 1 */ michael@0: ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: goto cleanup3; michael@0: } michael@0: /* we cannot access directly pBiDi->levels since it is not yet set if michael@0: * direction is not MIXED michael@0: */ michael@0: levels=ubidi_getLevels(pBiDi, pErrorCode); michael@0: uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel)); michael@0: saveTrailingWSStart=pBiDi->trailingWSStart; michael@0: saveLength=pBiDi->length; michael@0: saveDirection=pBiDi->direction; michael@0: michael@0: /* FOOD FOR THOUGHT: instead of writing the visual text, we could use michael@0: * the visual map and the dirProps array to drive the second call michael@0: * to ubidi_setPara (but must make provision for possible removal of michael@0: * BiDi controls. Alternatively, only use the dirProps array via michael@0: * customized classifier callback. michael@0: */ michael@0: visualLength=ubidi_writeReordered(pBiDi, visualText, length, michael@0: UBIDI_DO_MIRRORING, pErrorCode); michael@0: ubidi_getVisualMap(pBiDi, visualMap, pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: goto cleanup2; michael@0: } michael@0: pBiDi->reorderingOptions=saveOptions; michael@0: michael@0: pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT; michael@0: paraLevel^=1; michael@0: /* Because what we did with reorderingOptions, visualText may be shorter michael@0: * than the original text. But we don't want the levels memory to be michael@0: * reallocated shorter than the original length, since we need to restore michael@0: * the levels as after the first call to ubidi_setpara() before returning. michael@0: * We will force mayAllocateText to FALSE before the second call to michael@0: * ubidi_setpara(), and will restore it afterwards. michael@0: */ michael@0: saveMayAllocateText=pBiDi->mayAllocateText; michael@0: pBiDi->mayAllocateText=FALSE; michael@0: ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode); michael@0: pBiDi->mayAllocateText=saveMayAllocateText; michael@0: ubidi_getRuns(pBiDi, pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: goto cleanup1; michael@0: } michael@0: /* check if some runs must be split, count how many splits */ michael@0: addedRuns=0; michael@0: runCount=pBiDi->runCount; michael@0: runs=pBiDi->runs; michael@0: visualStart=0; michael@0: for(i=0; irunsMemory[0]=runs[0]; michael@0: } michael@0: runs=pBiDi->runs=pBiDi->runsMemory; michael@0: pBiDi->runCount+=addedRuns; michael@0: } else { michael@0: goto cleanup1; michael@0: } michael@0: } michael@0: /* split runs which are not consecutive in source text */ michael@0: for(i=runCount-1; i>=0; i--) { michael@0: runLength= i==0 ? runs[0].visualLimit : michael@0: runs[i].visualLimit-runs[i-1].visualLimit; michael@0: logicalStart=runs[i].logicalStart; michael@0: indexOddBit=GET_ODD_BIT(logicalStart); michael@0: logicalStart=GET_INDEX(logicalStart); michael@0: if(runLength<2) { michael@0: if(addedRuns) { michael@0: runs[i+addedRuns]=runs[i]; michael@0: } michael@0: logicalPos=visualMap[logicalStart]; michael@0: runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, michael@0: saveLevels[logicalPos]^indexOddBit); michael@0: continue; michael@0: } michael@0: if(indexOddBit) { michael@0: start=logicalStart; michael@0: limit=logicalStart+runLength-1; michael@0: step=1; michael@0: } else { michael@0: start=logicalStart+runLength-1; michael@0: limit=logicalStart; michael@0: step=-1; michael@0: } michael@0: for(j=start; j!=limit; j+=step) { michael@0: index0=visualMap[j]; michael@0: index1=visualMap[j+step]; michael@0: if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) { michael@0: logicalPos=BIDI_MIN(visualMap[start], index0); michael@0: runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, michael@0: saveLevels[logicalPos]^indexOddBit); michael@0: runs[i+addedRuns].visualLimit=runs[i].visualLimit; michael@0: runs[i].visualLimit-=BIDI_ABS(j-start)+1; michael@0: insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER); michael@0: runs[i+addedRuns].insertRemove=insertRemove; michael@0: runs[i].insertRemove&=~insertRemove; michael@0: start=j+step; michael@0: addedRuns--; michael@0: } michael@0: } michael@0: if(addedRuns) { michael@0: runs[i+addedRuns]=runs[i]; michael@0: } michael@0: logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]); michael@0: runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, michael@0: saveLevels[logicalPos]^indexOddBit); michael@0: } michael@0: michael@0: cleanup1: michael@0: /* restore initial paraLevel */ michael@0: pBiDi->paraLevel^=1; michael@0: cleanup2: michael@0: /* restore real text */ michael@0: pBiDi->text=text; michael@0: pBiDi->length=saveLength; michael@0: pBiDi->originalLength=length; michael@0: pBiDi->direction=saveDirection; michael@0: /* the saved levels should never excess levelsSize, but we check anyway */ michael@0: if(saveLength>pBiDi->levelsSize) { michael@0: saveLength=pBiDi->levelsSize; michael@0: } michael@0: uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel)); michael@0: pBiDi->trailingWSStart=saveTrailingWSStart; michael@0: /* free memory for mapping table and visual text */ michael@0: uprv_free(runsOnlyMemory); michael@0: if(pBiDi->runCount>1) { michael@0: pBiDi->direction=UBIDI_MIXED; michael@0: } michael@0: cleanup3: michael@0: pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY; michael@0: } michael@0: michael@0: /* ubidi_setPara ------------------------------------------------------------ */ michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, michael@0: UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, michael@0: UErrorCode *pErrorCode) { michael@0: UBiDiDirection direction; michael@0: michael@0: /* check the argument values */ michael@0: RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); michael@0: if(pBiDi==NULL || text==NULL || length<-1 || michael@0: (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevelreorderingMode==UBIDI_REORDER_RUNS_ONLY) { michael@0: setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode); michael@0: return; michael@0: } michael@0: michael@0: /* initialize the UBiDi structure */ michael@0: pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */ michael@0: pBiDi->text=text; michael@0: pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length; michael@0: pBiDi->paraLevel=paraLevel; michael@0: pBiDi->direction=paraLevel&1; michael@0: pBiDi->paraCount=1; michael@0: michael@0: pBiDi->dirProps=NULL; michael@0: pBiDi->levels=NULL; michael@0: pBiDi->runs=NULL; michael@0: pBiDi->insertPoints.size=0; /* clean up from last call */ michael@0: pBiDi->insertPoints.confirmed=0; /* clean up from last call */ michael@0: michael@0: /* michael@0: * Save the original paraLevel if contextual; otherwise, set to 0. michael@0: */ michael@0: pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel); michael@0: michael@0: if(length==0) { michael@0: /* michael@0: * For an empty paragraph, create a UBiDi object with the paraLevel and michael@0: * the flags and the direction set but without allocating zero-length arrays. michael@0: * There is nothing more to do. michael@0: */ michael@0: if(IS_DEFAULT_LEVEL(paraLevel)) { michael@0: pBiDi->paraLevel&=1; michael@0: pBiDi->defaultParaLevel=0; michael@0: } michael@0: pBiDi->flags=DIRPROP_FLAG_LR(paraLevel); michael@0: pBiDi->runCount=0; michael@0: pBiDi->paraCount=0; michael@0: setParaSuccess(pBiDi); /* mark successful setPara */ michael@0: return; michael@0: } michael@0: michael@0: pBiDi->runCount=-1; michael@0: michael@0: /* allocate paras memory */ michael@0: if(pBiDi->parasMemory) michael@0: pBiDi->paras=pBiDi->parasMemory; michael@0: else michael@0: pBiDi->paras=pBiDi->simpleParas; michael@0: michael@0: /* michael@0: * Get the directional properties, michael@0: * the flags bit-set, and michael@0: * determine the paragraph level if necessary. michael@0: */ michael@0: if(getDirPropsMemory(pBiDi, length)) { michael@0: pBiDi->dirProps=pBiDi->dirPropsMemory; michael@0: if(!getDirProps(pBiDi)) { michael@0: *pErrorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: } else { michael@0: *pErrorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: /* the processed length may have changed if UBIDI_OPTION_STREAMING */ michael@0: length= pBiDi->length; michael@0: pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */ michael@0: michael@0: /* are explicit levels specified? */ michael@0: if(embeddingLevels==NULL) { michael@0: /* no: determine explicit levels according to the (Xn) rules */\ michael@0: if(getLevelsMemory(pBiDi, length)) { michael@0: pBiDi->levels=pBiDi->levelsMemory; michael@0: direction=resolveExplicitLevels(pBiDi, pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return; michael@0: } michael@0: } else { michael@0: *pErrorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: } else { michael@0: /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */ michael@0: pBiDi->levels=embeddingLevels; michael@0: direction=checkExplicitLevels(pBiDi, pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return; michael@0: } michael@0: } michael@0: michael@0: /* allocate isolate memory */ michael@0: if(pBiDi->isolateCount<=SIMPLE_ISOLATES_SIZE) michael@0: pBiDi->isolates=pBiDi->simpleIsolates; michael@0: else michael@0: if(pBiDi->isolateCount<=pBiDi->isolatesSize) michael@0: pBiDi->isolates=pBiDi->isolatesMemory; michael@0: else { michael@0: if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) { michael@0: pBiDi->isolates=pBiDi->isolatesMemory; michael@0: } else { michael@0: *pErrorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: } michael@0: pBiDi->isolateCount=-1; /* current isolates stack entry == none */ michael@0: michael@0: /* michael@0: * The steps after (X9) in the UBiDi algorithm are performed only if michael@0: * the paragraph text has mixed directionality! michael@0: */ michael@0: pBiDi->direction=direction; michael@0: switch(direction) { michael@0: case UBIDI_LTR: michael@0: /* make sure paraLevel is even */ michael@0: pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1); michael@0: michael@0: /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ michael@0: pBiDi->trailingWSStart=0; michael@0: break; michael@0: case UBIDI_RTL: michael@0: /* make sure paraLevel is odd */ michael@0: pBiDi->paraLevel|=1; michael@0: michael@0: /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ michael@0: pBiDi->trailingWSStart=0; michael@0: break; michael@0: default: michael@0: /* michael@0: * Choose the right implicit state table michael@0: */ michael@0: switch(pBiDi->reorderingMode) { michael@0: case UBIDI_REORDER_DEFAULT: michael@0: pBiDi->pImpTabPair=&impTab_DEFAULT; michael@0: break; michael@0: case UBIDI_REORDER_NUMBERS_SPECIAL: michael@0: pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL; michael@0: break; michael@0: case UBIDI_REORDER_GROUP_NUMBERS_WITH_R: michael@0: pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R; michael@0: break; michael@0: case UBIDI_REORDER_INVERSE_NUMBERS_AS_L: michael@0: pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L; michael@0: break; michael@0: case UBIDI_REORDER_INVERSE_LIKE_DIRECT: michael@0: if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { michael@0: pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS; michael@0: } else { michael@0: pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT; michael@0: } michael@0: break; michael@0: case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL: michael@0: if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { michael@0: pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS; michael@0: } else { michael@0: pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL; michael@0: } michael@0: break; michael@0: default: michael@0: /* we should never get here */ michael@0: U_ASSERT(FALSE); michael@0: break; michael@0: } michael@0: /* michael@0: * If there are no external levels specified and there michael@0: * are no significant explicit level codes in the text, michael@0: * then we can treat the entire paragraph as one run. michael@0: * Otherwise, we need to perform the following rules on runs of michael@0: * the text with the same embedding levels. (X10) michael@0: * "Significant" explicit level codes are ones that actually michael@0: * affect non-BN characters. michael@0: * Examples for "insignificant" ones are empty embeddings michael@0: * LRE-PDF, LRE-RLE-PDF-PDF, etc. michael@0: */ michael@0: if(embeddingLevels==NULL && pBiDi->paraCount<=1 && michael@0: !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) { michael@0: resolveImplicitLevels(pBiDi, 0, length, michael@0: GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)), michael@0: GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1))); michael@0: } else { michael@0: /* sor, eor: start and end types of same-level-run */ michael@0: UBiDiLevel *levels=pBiDi->levels; michael@0: int32_t start, limit=0; michael@0: UBiDiLevel level, nextLevel; michael@0: DirProp sor, eor; michael@0: michael@0: /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ michael@0: level=GET_PARALEVEL(pBiDi, 0); michael@0: nextLevel=levels[0]; michael@0: if(level0) && (pBiDi->dirProps[start-1]==B)) { michael@0: /* except if this is a new paragraph, then set sor = para level */ michael@0: sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start)); michael@0: } else { michael@0: sor=eor; michael@0: } michael@0: michael@0: /* search for the limit of this run */ michael@0: while(++limitinsertPoints.errorCode)) michael@0: { michael@0: *pErrorCode=pBiDi->insertPoints.errorCode; michael@0: return; michael@0: } michael@0: /* reset the embedding levels for some non-graphic characters (L1), (X9) */ michael@0: adjustWSLevels(pBiDi); michael@0: break; michael@0: } michael@0: /* add RLM for inverse Bidi with contextual orientation resolving michael@0: * to RTL which would not round-trip otherwise michael@0: */ michael@0: if((pBiDi->defaultParaLevel>0) && michael@0: (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) && michael@0: ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) || michael@0: (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) { michael@0: int32_t i, j, start, last; michael@0: UBiDiLevel level; michael@0: DirProp dirProp; michael@0: for(i=0; iparaCount; i++) { michael@0: last=(pBiDi->paras[i].limit)-1; michael@0: level=pBiDi->paras[i].level; michael@0: if(level==0) michael@0: continue; /* LTR paragraph */ michael@0: start= i==0 ? 0 : pBiDi->paras[i-1].limit; michael@0: for(j=last; j>=start; j--) { michael@0: dirProp=pBiDi->dirProps[j]; michael@0: if(dirProp==L) { michael@0: if(jdirProps[last]==B) { michael@0: last--; michael@0: } michael@0: } michael@0: addPoint(pBiDi, last, RLM_BEFORE); michael@0: break; michael@0: } michael@0: if(DIRPROP_FLAG(dirProp) & MASK_R_AL) { michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { michael@0: pBiDi->resultLength -= pBiDi->controlCount; michael@0: } else { michael@0: pBiDi->resultLength += pBiDi->insertPoints.size; michael@0: } michael@0: setParaSuccess(pBiDi); /* mark successful setPara */ michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) { michael@0: if(pBiDi!=NULL) { michael@0: pBiDi->orderParagraphsLTR=orderParagraphsLTR; michael@0: } michael@0: } michael@0: michael@0: U_CAPI UBool U_EXPORT2 michael@0: ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) { michael@0: if(pBiDi!=NULL) { michael@0: return pBiDi->orderParagraphsLTR; michael@0: } else { michael@0: return FALSE; michael@0: } michael@0: } michael@0: michael@0: U_CAPI UBiDiDirection U_EXPORT2 michael@0: ubidi_getDirection(const UBiDi *pBiDi) { michael@0: if(IS_VALID_PARA_OR_LINE(pBiDi)) { michael@0: return pBiDi->direction; michael@0: } else { michael@0: return UBIDI_LTR; michael@0: } michael@0: } michael@0: michael@0: U_CAPI const UChar * U_EXPORT2 michael@0: ubidi_getText(const UBiDi *pBiDi) { michael@0: if(IS_VALID_PARA_OR_LINE(pBiDi)) { michael@0: return pBiDi->text; michael@0: } else { michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ubidi_getLength(const UBiDi *pBiDi) { michael@0: if(IS_VALID_PARA_OR_LINE(pBiDi)) { michael@0: return pBiDi->originalLength; michael@0: } else { michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ubidi_getProcessedLength(const UBiDi *pBiDi) { michael@0: if(IS_VALID_PARA_OR_LINE(pBiDi)) { michael@0: return pBiDi->length; michael@0: } else { michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ubidi_getResultLength(const UBiDi *pBiDi) { michael@0: if(IS_VALID_PARA_OR_LINE(pBiDi)) { michael@0: return pBiDi->resultLength; michael@0: } else { michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: /* paragraphs API functions ------------------------------------------------- */ michael@0: michael@0: U_CAPI UBiDiLevel U_EXPORT2 michael@0: ubidi_getParaLevel(const UBiDi *pBiDi) { michael@0: if(IS_VALID_PARA_OR_LINE(pBiDi)) { michael@0: return pBiDi->paraLevel; michael@0: } else { michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ubidi_countParagraphs(UBiDi *pBiDi) { michael@0: if(!IS_VALID_PARA_OR_LINE(pBiDi)) { michael@0: return 0; michael@0: } else { michael@0: return pBiDi->paraCount; michael@0: } michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, michael@0: int32_t *pParaStart, int32_t *pParaLimit, michael@0: UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { michael@0: int32_t paraStart; michael@0: michael@0: /* check the argument values */ michael@0: RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); michael@0: RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode); michael@0: RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode); michael@0: michael@0: pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ michael@0: if(paraIndex) { michael@0: paraStart=pBiDi->paras[paraIndex-1].limit; michael@0: } else { michael@0: paraStart=0; michael@0: } michael@0: if(pParaStart!=NULL) { michael@0: *pParaStart=paraStart; michael@0: } michael@0: if(pParaLimit!=NULL) { michael@0: *pParaLimit=pBiDi->paras[paraIndex].limit; michael@0: } michael@0: if(pParaLevel!=NULL) { michael@0: *pParaLevel=GET_PARALEVEL(pBiDi, paraStart); michael@0: } michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, michael@0: int32_t *pParaStart, int32_t *pParaLimit, michael@0: UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { michael@0: int32_t paraIndex; michael@0: michael@0: /* check the argument values */ michael@0: /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */ michael@0: RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); michael@0: RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); michael@0: pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ michael@0: RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1); michael@0: michael@0: for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++); michael@0: ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode); michael@0: return paraIndex; michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, michael@0: const void *newContext, UBiDiClassCallback **oldFn, michael@0: const void **oldContext, UErrorCode *pErrorCode) michael@0: { michael@0: RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); michael@0: if(pBiDi==NULL) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return; michael@0: } michael@0: if( oldFn ) michael@0: { michael@0: *oldFn = pBiDi->fnClassCallback; michael@0: } michael@0: if( oldContext ) michael@0: { michael@0: *oldContext = pBiDi->coClassCallback; michael@0: } michael@0: pBiDi->fnClassCallback = newFn; michael@0: pBiDi->coClassCallback = newContext; michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context) michael@0: { michael@0: if(pBiDi==NULL) { michael@0: return; michael@0: } michael@0: if( fn ) michael@0: { michael@0: *fn = pBiDi->fnClassCallback; michael@0: } michael@0: if( context ) michael@0: { michael@0: *context = pBiDi->coClassCallback; michael@0: } michael@0: } michael@0: michael@0: U_CAPI UCharDirection U_EXPORT2 michael@0: ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c) michael@0: { michael@0: UCharDirection dir; michael@0: michael@0: if( pBiDi->fnClassCallback == NULL || michael@0: (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT ) michael@0: { michael@0: dir = ubidi_getClass(pBiDi->bdp, c); michael@0: } michael@0: if(dir >= U_CHAR_DIRECTION_COUNT) { michael@0: dir = ON; michael@0: } michael@0: return dir; michael@0: }