michael@0: /* michael@0: ****************************************************************************** michael@0: * michael@0: * Copyright (C) 2000-2011, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ****************************************************************************** michael@0: * file name: ubidiwrt.c michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 1999aug06 michael@0: * created by: Markus W. Scherer, updated by Matitiahu Allouche michael@0: * michael@0: * This file contains implementations for BiDi functions that use michael@0: * the core algorithm and core API to write reordered text. michael@0: */ michael@0: michael@0: /* set import/export definitions */ michael@0: #ifndef U_COMMON_IMPLEMENTATION michael@0: # define U_COMMON_IMPLEMENTATION michael@0: #endif michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/ustring.h" michael@0: #include "unicode/uchar.h" michael@0: #include "unicode/ubidi.h" michael@0: #include "unicode/utf16.h" michael@0: #include "cmemory.h" michael@0: #include "ustr_imp.h" michael@0: #include "ubidiimp.h" michael@0: michael@0: /* michael@0: * The function implementations in this file are designed michael@0: * for UTF-16 and UTF-32, not for UTF-8. michael@0: * michael@0: * Assumptions that are not true for UTF-8: michael@0: * - Any code point always needs the same number of code units michael@0: * ("minimum-length-problem" of UTF-8) michael@0: * - The BiDi control characters need only one code unit each michael@0: * michael@0: * Further assumptions for all UTFs: michael@0: * - u_charMirror(c) needs the same number of code units as c michael@0: */ michael@0: #if UTF_SIZE==8 michael@0: # error reimplement ubidi_writeReordered() for UTF-8, see comment above michael@0: #endif michael@0: michael@0: #define IS_COMBINING(type) ((1UL<<(type))&(1UL<0); michael@0: return srcLength; michael@0: } michael@0: case UBIDI_DO_MIRRORING: { michael@0: /* do mirroring */ michael@0: int32_t i=0, j=0; michael@0: UChar32 c; michael@0: michael@0: if(destSize0) { michael@0: c=*src++; michael@0: if(!IS_BIDI_CONTROL_CHAR(c)) { michael@0: --remaining; michael@0: } michael@0: } michael@0: return destSize-remaining; michael@0: } michael@0: *dest++=c; michael@0: } michael@0: } while(--srcLength>0); michael@0: return destSize-remaining; michael@0: } michael@0: default: { michael@0: /* remove BiDi control characters and do mirroring */ michael@0: int32_t remaining=destSize; michael@0: int32_t i, j=0; michael@0: UChar32 c; michael@0: do { michael@0: i=0; michael@0: U16_NEXT(src, i, srcLength, c); michael@0: src+=i; michael@0: srcLength-=i; michael@0: if(!IS_BIDI_CONTROL_CHAR(c)) { michael@0: remaining-=i; michael@0: if(remaining<0) { michael@0: *pErrorCode=U_BUFFER_OVERFLOW_ERROR; michael@0: michael@0: /* preflight the length */ michael@0: while(srcLength>0) { michael@0: c=*src++; michael@0: if(!IS_BIDI_CONTROL_CHAR(c)) { michael@0: --remaining; michael@0: } michael@0: --srcLength; michael@0: } michael@0: return destSize-remaining; michael@0: } michael@0: c=u_charMirror(c); michael@0: U16_APPEND_UNSAFE(dest, j, c); michael@0: } michael@0: } while(srcLength>0); michael@0: return j; michael@0: } michael@0: } /* end of switch */ michael@0: } michael@0: michael@0: static int32_t michael@0: doWriteReverse(const UChar *src, int32_t srcLength, michael@0: UChar *dest, int32_t destSize, michael@0: uint16_t options, michael@0: UErrorCode *pErrorCode) { michael@0: /* michael@0: * RTL run - michael@0: * michael@0: * RTL runs need to be copied to the destination in reverse order michael@0: * of code points, not code units, to keep Unicode characters intact. michael@0: * michael@0: * The general strategy for this is to read the source text michael@0: * in backward order, collect all code units for a code point michael@0: * (and optionally following combining characters, see below), michael@0: * and copy all these code units in ascending order michael@0: * to the destination for this run. michael@0: * michael@0: * Several options request whether combining characters michael@0: * should be kept after their base characters, michael@0: * whether BiDi control characters should be removed, and michael@0: * whether characters should be replaced by their mirror-image michael@0: * equivalent Unicode characters. michael@0: */ michael@0: int32_t i, j; michael@0: UChar32 c; michael@0: michael@0: /* optimize for several combinations of options */ michael@0: switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING|UBIDI_KEEP_BASE_COMBINING)) { michael@0: case 0: michael@0: /* michael@0: * With none of the "complicated" options set, the destination michael@0: * run will have the same length as the source run, michael@0: * and there is no mirroring and no keeping combining characters michael@0: * with their base characters. michael@0: */ michael@0: if(destSize0); michael@0: break; michael@0: case UBIDI_KEEP_BASE_COMBINING: michael@0: /* michael@0: * Here, too, the destination michael@0: * run will have the same length as the source run, michael@0: * and there is no mirroring. michael@0: * We do need to keep combining characters with their base characters. michael@0: */ michael@0: if(destSize0 && IS_COMBINING(u_charType(c))); michael@0: michael@0: /* copy this "user character" */ michael@0: j=srcLength; michael@0: do { michael@0: *dest++=src[j++]; michael@0: } while(j0); michael@0: break; michael@0: default: michael@0: /* michael@0: * With several "complicated" options set, this is the most michael@0: * general and the slowest copying of an RTL run. michael@0: * We will do mirroring, remove BiDi controls, and michael@0: * keep combining characters with their base characters michael@0: * as requested. michael@0: */ michael@0: if(!(options&UBIDI_REMOVE_BIDI_CONTROLS)) { michael@0: i=srcLength; michael@0: } else { michael@0: /* we need to find out the destination length of the run, michael@0: which will not include the BiDi control characters */ michael@0: int32_t length=srcLength; michael@0: UChar ch; michael@0: michael@0: i=0; michael@0: do { michael@0: ch=*src++; michael@0: if(!IS_BIDI_CONTROL_CHAR(ch)) { michael@0: ++i; michael@0: } michael@0: } while(--length>0); michael@0: src-=srcLength; michael@0: } michael@0: michael@0: if(destSize0 && IS_COMBINING(u_charType(c))) { michael@0: U16_PREV(src, 0, srcLength, c); michael@0: } michael@0: } michael@0: michael@0: if(options&UBIDI_REMOVE_BIDI_CONTROLS && IS_BIDI_CONTROL_CHAR(c)) { michael@0: /* do not copy this BiDi control character */ michael@0: continue; michael@0: } michael@0: michael@0: /* copy this "user character" */ michael@0: j=srcLength; michael@0: if(options&UBIDI_DO_MIRRORING) { michael@0: /* mirror only the base character */ michael@0: int32_t k=0; michael@0: c=u_charMirror(c); michael@0: U16_APPEND_UNSAFE(dest, k, c); michael@0: dest+=k; michael@0: j+=k; michael@0: } michael@0: while(j0); michael@0: break; michael@0: } /* end of switch */ michael@0: michael@0: return destSize; michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ubidi_writeReverse(const UChar *src, int32_t srcLength, michael@0: UChar *dest, int32_t destSize, michael@0: uint16_t options, michael@0: UErrorCode *pErrorCode) { michael@0: int32_t destLength; michael@0: michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: /* more error checking */ michael@0: if( src==NULL || srcLength<-1 || michael@0: destSize<0 || (destSize>0 && dest==NULL)) michael@0: { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: /* do input and output overlap? */ michael@0: if( dest!=NULL && michael@0: ((src>=dest && src=src && dest0) { michael@0: destLength=doWriteReverse(src, srcLength, dest, destSize, options, pErrorCode); michael@0: } else { michael@0: /* nothing to do */ michael@0: destLength=0; michael@0: } michael@0: michael@0: return u_terminateUChars(dest, destSize, destLength, pErrorCode); michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ubidi_writeReordered(UBiDi *pBiDi, michael@0: UChar *dest, int32_t destSize, michael@0: uint16_t options, michael@0: UErrorCode *pErrorCode) { michael@0: const UChar *text; michael@0: UChar *saveDest; michael@0: int32_t length, destCapacity; michael@0: int32_t run, runCount, logicalStart, runLength; michael@0: michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: /* more error checking */ michael@0: if( pBiDi==NULL || michael@0: (text=pBiDi->text)==NULL || (length=pBiDi->length)<0 || michael@0: destSize<0 || (destSize>0 && dest==NULL)) michael@0: { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: /* do input and output overlap? */ michael@0: if( dest!=NULL && michael@0: ((text>=dest && text=text && destoriginalLength))) michael@0: { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: if(length==0) { michael@0: /* nothing to do */ michael@0: return u_terminateUChars(dest, destSize, 0, pErrorCode); michael@0: } michael@0: michael@0: runCount=ubidi_countRuns(pBiDi, pErrorCode); michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: michael@0: /* destSize shrinks, later destination length=destCapacity-destSize */ michael@0: saveDest=dest; michael@0: destCapacity=destSize; michael@0: michael@0: /* michael@0: * Option "insert marks" implies UBIDI_INSERT_LRM_FOR_NUMERIC if the michael@0: * reordering mode (checked below) is appropriate. michael@0: */ michael@0: if(pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { michael@0: options|=UBIDI_INSERT_LRM_FOR_NUMERIC; michael@0: options&=~UBIDI_REMOVE_BIDI_CONTROLS; michael@0: } michael@0: /* michael@0: * Option "remove controls" implies UBIDI_REMOVE_BIDI_CONTROLS michael@0: * and cancels UBIDI_INSERT_LRM_FOR_NUMERIC. michael@0: */ michael@0: if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { michael@0: options|=UBIDI_REMOVE_BIDI_CONTROLS; michael@0: options&=~UBIDI_INSERT_LRM_FOR_NUMERIC; michael@0: } michael@0: /* michael@0: * If we do not perform the "inverse BiDi" algorithm, then we michael@0: * don't need to insert any LRMs, and don't need to test for it. michael@0: */ michael@0: if((pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_NUMBERS_AS_L) && michael@0: (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_LIKE_DIRECT) && michael@0: (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL) && michael@0: (pBiDi->reorderingMode != UBIDI_REORDER_RUNS_ONLY)) { michael@0: options&=~UBIDI_INSERT_LRM_FOR_NUMERIC; michael@0: } michael@0: /* michael@0: * Iterate through all visual runs and copy the run text segments to michael@0: * the destination, according to the options. michael@0: * michael@0: * The tests for where to insert LRMs ignore the fact that there may be michael@0: * BN codes or non-BMP code points at the beginning and end of a run; michael@0: * they may insert LRMs unnecessarily but the tests are faster this way michael@0: * (this would have to be improved for UTF-8). michael@0: * michael@0: * Note that the only errors that are set by doWriteXY() are buffer overflow michael@0: * errors. Ignore them until the end, and continue for preflighting. michael@0: */ michael@0: if(!(options&UBIDI_OUTPUT_REVERSE)) { michael@0: /* forward output */ michael@0: if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) { michael@0: /* do not insert BiDi controls */ michael@0: for(run=0; rundirProps; michael@0: const UChar *src; michael@0: UChar uc; michael@0: UBiDiDirection dir; michael@0: int32_t markFlag; michael@0: michael@0: for(run=0; runruns[run].insertRemove; michael@0: if(markFlag<0) { /* BiDi controls count */ michael@0: markFlag=0; michael@0: } michael@0: michael@0: if(UBIDI_LTR==dir) { michael@0: if((pBiDi->isInverse) && michael@0: (/*run>0 &&*/ dirProps[logicalStart]!=L)) { michael@0: markFlag |= LRM_BEFORE; michael@0: } michael@0: if (markFlag & LRM_BEFORE) { michael@0: uc=LRM_CHAR; michael@0: } michael@0: else if (markFlag & RLM_BEFORE) { michael@0: uc=RLM_CHAR; michael@0: } michael@0: else uc=0; michael@0: if(uc) { michael@0: if(destSize>0) { michael@0: *dest++=uc; michael@0: } michael@0: --destSize; michael@0: } michael@0: michael@0: runLength=doWriteForward(src, runLength, michael@0: dest, destSize, michael@0: (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode); michael@0: if(dest!=NULL) { michael@0: dest+=runLength; michael@0: } michael@0: destSize-=runLength; michael@0: michael@0: if((pBiDi->isInverse) && michael@0: (/*run0) { michael@0: *dest++=uc; michael@0: } michael@0: --destSize; michael@0: } michael@0: } else { /* RTL run */ michael@0: if((pBiDi->isInverse) && michael@0: (/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1])))) { michael@0: markFlag |= RLM_BEFORE; michael@0: } michael@0: if (markFlag & LRM_BEFORE) { michael@0: uc=LRM_CHAR; michael@0: } michael@0: else if (markFlag & RLM_BEFORE) { michael@0: uc=RLM_CHAR; michael@0: } michael@0: else uc=0; michael@0: if(uc) { michael@0: if(destSize>0) { michael@0: *dest++=uc; michael@0: } michael@0: --destSize; michael@0: } michael@0: michael@0: runLength=doWriteReverse(src, runLength, michael@0: dest, destSize, michael@0: options, pErrorCode); michael@0: if(dest!=NULL) { michael@0: dest+=runLength; michael@0: } michael@0: destSize-=runLength; michael@0: michael@0: if((pBiDi->isInverse) && michael@0: (/*run0) { michael@0: *dest++=uc; michael@0: } michael@0: --destSize; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: } else { michael@0: /* reverse output */ michael@0: if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) { michael@0: /* do not insert BiDi controls */ michael@0: for(run=runCount; --run>=0;) { michael@0: if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) { michael@0: runLength=doWriteReverse(text+logicalStart, runLength, michael@0: dest, destSize, michael@0: (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode); michael@0: } else { michael@0: runLength=doWriteForward(text+logicalStart, runLength, michael@0: dest, destSize, michael@0: options, pErrorCode); michael@0: } michael@0: if(dest!=NULL) { michael@0: dest+=runLength; michael@0: } michael@0: destSize-=runLength; michael@0: } michael@0: } else { michael@0: /* insert BiDi controls for "inverse BiDi" */ michael@0: const DirProp *dirProps=pBiDi->dirProps; michael@0: const UChar *src; michael@0: UBiDiDirection dir; michael@0: michael@0: for(run=runCount; --run>=0;) { michael@0: /* reverse output */ michael@0: dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength); michael@0: src=text+logicalStart; michael@0: michael@0: if(UBIDI_LTR==dir) { michael@0: if(/*run0) { michael@0: *dest++=LRM_CHAR; michael@0: } michael@0: --destSize; michael@0: } michael@0: michael@0: runLength=doWriteReverse(src, runLength, michael@0: dest, destSize, michael@0: (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode); michael@0: if(dest!=NULL) { michael@0: dest+=runLength; michael@0: } michael@0: destSize-=runLength; michael@0: michael@0: if(/*run>0 &&*/ dirProps[logicalStart]!=L) { michael@0: if(destSize>0) { michael@0: *dest++=LRM_CHAR; michael@0: } michael@0: --destSize; michael@0: } michael@0: } else { michael@0: if(/*run0) { michael@0: *dest++=RLM_CHAR; michael@0: } michael@0: --destSize; michael@0: } michael@0: michael@0: runLength=doWriteForward(src, runLength, michael@0: dest, destSize, michael@0: options, pErrorCode); michael@0: if(dest!=NULL) { michael@0: dest+=runLength; michael@0: } michael@0: destSize-=runLength; michael@0: michael@0: if(/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1]))) { michael@0: if(destSize>0) { michael@0: *dest++=RLM_CHAR; michael@0: } michael@0: --destSize; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: return u_terminateUChars(saveDest, destCapacity, destCapacity-destSize, pErrorCode); michael@0: }