michael@0: /* GRAPHITE2 LICENSING michael@0: michael@0: Copyright 2011, SIL International michael@0: All rights reserved. michael@0: michael@0: This library is free software; you can redistribute it and/or modify michael@0: it under the terms of the GNU Lesser General Public License as published michael@0: by the Free Software Foundation; either version 2.1 of License, or michael@0: (at your option) any later version. michael@0: michael@0: This program is distributed in the hope that it will be useful, michael@0: but WITHOUT ANY WARRANTY; without even the implied warranty of michael@0: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU michael@0: Lesser General Public License for more details. michael@0: michael@0: You should also have received a copy of the GNU Lesser General Public michael@0: License along with this library in the file named "LICENSE". michael@0: If not, write to the Free Software Foundation, 51 Franklin Street, michael@0: suite 500, Boston, MA 02110-1335, USA or visit their web page on the michael@0: internet at http://www.fsf.org/licenses/lgpl.html. michael@0: michael@0: Alternatively, the contents of this file may be used under the terms of the michael@0: Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public michael@0: License, as published by the Free Software Foundation, either version 2 michael@0: of the License or (at your option) any later version. michael@0: */ michael@0: #include "inc/Main.h" michael@0: #include "inc/Slot.h" michael@0: #include "inc/Segment.h" michael@0: #include "inc/Bidi.h" michael@0: michael@0: using namespace graphite2; michael@0: michael@0: enum DirCode { // Hungarian: dirc michael@0: Unk = -1, michael@0: N = 0, // other neutrals (default) - ON michael@0: L = 1, // left-to-right, strong - L michael@0: R = 2, // right-to-left, strong - R michael@0: AL = 3, // Arabic letter, right-to-left, strong, AR michael@0: EN = 4, // European number, left-to-right, weak - EN michael@0: EUS = 5, // European separator, left-to-right, weak - ES michael@0: ET = 6, // European number terminator, left-to-right, weak - ET michael@0: AN = 7, // Arabic number, left-to-right, weak - AN michael@0: CUS = 8, // Common number separator, left-to-right, weak - CS michael@0: WS = 9, // white space, neutral - WS michael@0: BN = 10, // boundary neutral - BN michael@0: michael@0: LRO = 11, // LTR override michael@0: RLO = 12, // RTL override michael@0: LRE = 13, // LTR embedding michael@0: RLE = 14, // RTL embedding michael@0: PDF = 15, // pop directional format michael@0: NSM = 16, // non-space mark michael@0: LRI = 17, // LRI isolate michael@0: RLI = 18, // RLI isolate michael@0: FSI = 19, // FSI isolate michael@0: PDI = 20, // pop isolate michael@0: OPP = 21, // opening paired parenthesis michael@0: CPP = 22, // closing paired parenthesis michael@0: michael@0: ON = N michael@0: }; michael@0: michael@0: enum DirMask { michael@0: WSflag = (1 << 7), // keep track of WS for eos handling michael@0: WSMask = ~(1 << 7) michael@0: }; michael@0: michael@0: inline uint8 BaseClass(Slot *s) { return s->getBidiClass() & WSMask; } michael@0: michael@0: unsigned int bidi_class_map[] = { 0, 1, 2, 5, 4, 8, 9, 3, 7, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0 }; michael@0: // Algorithms based on Unicode reference standard code. Thanks Asmus Freitag. michael@0: michael@0: void resolveWeak(Slot *start, int sos, int eos); michael@0: void resolveNeutrals(Slot *s, int baseLevel, int sos, int eos); michael@0: void processParens(Slot *s, Segment *seg, uint8 aMirror, int level, BracketPairStack &stack); michael@0: michael@0: inline int calc_base_level(Slot *s) michael@0: { michael@0: int count = 0; michael@0: for ( ; s; s = s->next()) michael@0: { michael@0: int cls = s->getBidiClass(); michael@0: if (count) michael@0: { michael@0: switch(cls) michael@0: { michael@0: case LRI : michael@0: case RLI : michael@0: case FSI : michael@0: ++count; michael@0: break; michael@0: case PDI : michael@0: --count; michael@0: } michael@0: } michael@0: else michael@0: { michael@0: switch(cls) michael@0: { michael@0: case L : michael@0: return 0; michael@0: case R : michael@0: case AL : michael@0: return 1; michael@0: case LRI : michael@0: case RLI : michael@0: case FSI : michael@0: ++count; michael@0: } michael@0: } michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: // inline or not? michael@0: void do_resolves(Slot *start, int level, int sos, int eos, int &bmask, Segment *seg, uint8 aMirror, BracketPairStack &stack) michael@0: { michael@0: if (bmask & 0x1F1178) michael@0: resolveWeak(start, sos, eos); michael@0: if (bmask & 0x200000) michael@0: processParens(start, seg, aMirror, level, stack); michael@0: if (bmask & 0x7E0361) michael@0: resolveNeutrals(start, level, sos, eos); michael@0: bmask = 0; michael@0: } michael@0: michael@0: enum maxs michael@0: { michael@0: MAX_LEVEL = 125, michael@0: }; michael@0: michael@0: // returns where we are up to in processing michael@0: Slot *process_bidi(Slot *start, int level, int prelevel, int &nextLevel, int dirover, int isol, int &cisol, int &isolerr, int &embederr, int init, Segment *seg, uint8 aMirror, BracketPairStack &bstack) michael@0: { michael@0: int bmask = 0; michael@0: Slot *s = start; michael@0: Slot *slast = start; michael@0: Slot *scurr = 0; michael@0: Slot *stemp; michael@0: int lnextLevel = nextLevel; michael@0: int newLevel; michael@0: int empty = 1; michael@0: for ( ; s; s = s ? s->next() : s) michael@0: { michael@0: int cls = s->getBidiClass(); michael@0: bmask |= (1 << cls); michael@0: s->setBidiLevel(level); michael@0: // we keep s->prev() pointing backwards for PDI repeating michael@0: michael@0: switch (cls) michael@0: { michael@0: case BN : michael@0: if (slast == s) slast = s->next(); // ignore if at front of text michael@0: continue; michael@0: case LRE : michael@0: case LRO : michael@0: case RLE : michael@0: case RLO : michael@0: switch (cls) michael@0: { michael@0: case LRE : michael@0: case LRO : michael@0: newLevel = level + (level & 1 ? 1 : 2); michael@0: break; michael@0: case RLE : michael@0: case RLO : michael@0: newLevel = level + (level & 1 ? 2 : 1); michael@0: break; michael@0: } michael@0: s->setBidiClass(BN); michael@0: if (isolerr || newLevel > MAX_LEVEL || embederr) michael@0: { michael@0: if (!isolerr) ++embederr; michael@0: break; michael@0: } michael@0: stemp = scurr; michael@0: if (scurr) michael@0: scurr->prev(0); // don't include control in string michael@0: lnextLevel = newLevel; michael@0: scurr = s; michael@0: s->setBidiLevel(newLevel); // to make it vanish michael@0: // recurse for the new subsequence. A sequence only contains text at the same level michael@0: s = process_bidi(s->next(), newLevel, level, lnextLevel, cls < LRE, 0, cisol, isolerr, embederr, 0, seg, aMirror, bstack); michael@0: // s points at PDF or end of sequence michael@0: // try to keep extending the run and not process it until we have to michael@0: if (lnextLevel != level || !s) // if the subsequence really had something in it, or we are at the end of the run michael@0: { michael@0: if (slast != scurr) // process the run now, don't try to extend it michael@0: { michael@0: // process text preceeding embedding michael@0: do_resolves(slast, level, (prelevel > level ? prelevel : level) & 1, lnextLevel & 1, bmask, seg, aMirror, bstack); michael@0: empty = 0; michael@0: nextLevel = level; michael@0: } michael@0: else if (lnextLevel != level) // the subsequence had something michael@0: { michael@0: empty = 0; // so we aren't empty either michael@0: nextLevel = lnextLevel; // but since we really are empty, pass back our level from the subsequence michael@0: } michael@0: if (s) // if still more to process michael@0: { michael@0: prelevel = lnextLevel; // future text starts out with sos of the higher subsequence michael@0: lnextLevel = level; // and eos is our level michael@0: } michael@0: slast = s ? s->next() : s; michael@0: } michael@0: else if (stemp) michael@0: stemp->prev(s); michael@0: break; michael@0: michael@0: case PDF : michael@0: s->setBidiClass(BN); michael@0: s->prev(0); // unstitch us since we skip final stitching code when we return michael@0: if (isol || isolerr || init) // boundary error conditions michael@0: break; michael@0: if (embederr) michael@0: { michael@0: --embederr; michael@0: break; michael@0: } michael@0: if (slast != s) michael@0: { michael@0: scurr->prev(0); // if slast, then scurr. Terminate before here michael@0: do_resolves(slast, level, level & 1, level & 1, bmask, seg, aMirror, bstack); michael@0: empty = 0; michael@0: } michael@0: if (empty) michael@0: { michael@0: nextLevel = prelevel; // no contents? set our level to that of parent michael@0: s->setBidiLevel(prelevel); michael@0: } michael@0: return s; michael@0: michael@0: case FSI : michael@0: case LRI : michael@0: case RLI : michael@0: switch (cls) michael@0: { michael@0: case FSI : michael@0: if (calc_base_level(s->next())) michael@0: newLevel = level + (level & 1 ? 2 : 1); michael@0: else michael@0: newLevel = level + (level & 1 ? 1 : 2); michael@0: break; michael@0: case LRI : michael@0: newLevel = level + (level & 1 ? 1 : 2); michael@0: break; michael@0: case RLI : michael@0: newLevel = level + (level & 1 ? 2 : 1); michael@0: break; michael@0: } michael@0: if (newLevel > MAX_LEVEL || isolerr) michael@0: { michael@0: ++isolerr; michael@0: s->setBidiClass(ON | WSflag); michael@0: break; michael@0: } michael@0: ++cisol; michael@0: if (scurr) scurr->prev(s); michael@0: scurr = s; // include FSI michael@0: lnextLevel = newLevel; michael@0: // recurse for the new sub sequence michael@0: s = process_bidi(s->next(), newLevel, newLevel, lnextLevel, 0, 1, cisol, isolerr, embederr, 0, seg, aMirror, bstack); michael@0: // s points at PDI michael@0: if (s) michael@0: { michael@0: bmask |= 1 << BaseClass(s); // include the PDI in the mask michael@0: s->setBidiLevel(level); // reset its level to our level michael@0: } michael@0: lnextLevel = level; michael@0: break; michael@0: michael@0: case PDI : michael@0: if (isolerr) michael@0: { michael@0: --isolerr; michael@0: s->setBidiClass(ON | WSflag); michael@0: break; michael@0: } michael@0: if (init || !cisol) michael@0: { michael@0: s->setBidiClass(ON | WSflag); michael@0: break; michael@0: } michael@0: embederr = 0; michael@0: if (!isol) // we are in an embedded subsequence, we have to return through all those michael@0: { michael@0: if (empty) // if empty, reset the level to tell embedded parent michael@0: nextLevel = prelevel; michael@0: return s->prev(); // keep working up the stack pointing at this PDI until we get to an isolate entry michael@0: } michael@0: else // we are terminating an isolate sequence michael@0: { michael@0: if (slast != s) // process any remaining content in this subseqence michael@0: { michael@0: scurr->prev(0); michael@0: do_resolves(slast, level, prelevel & 1, level & 1, bmask, seg, aMirror, bstack); michael@0: } michael@0: --cisol; // pop the isol sequence from the stack michael@0: return s; michael@0: } michael@0: michael@0: default : michael@0: if (dirover) michael@0: s->setBidiClass((level & 1 ? R : L) | (WSflag * (cls == WS))); michael@0: } michael@0: if (s) s->prev(0); // unstitch us michael@0: if (scurr) // stitch in text for processing michael@0: scurr->prev(s); michael@0: scurr = s; // add us to text to process michael@0: } michael@0: if (slast != s) michael@0: { michael@0: do_resolves(slast, level, (level > prelevel ? level : prelevel) & 1, lnextLevel & 1, bmask, seg, aMirror, bstack); michael@0: empty = 0; michael@0: } michael@0: if (empty || isol) michael@0: nextLevel = prelevel; michael@0: return s; michael@0: } michael@0: michael@0: // === RESOLVE WEAK TYPES ================================================ michael@0: michael@0: enum bidi_state // possible states michael@0: { michael@0: xa, // arabic letter michael@0: xr, // right leter michael@0: xl, // left letter michael@0: michael@0: ao, // arabic lett. foll by ON michael@0: ro, // right lett. foll by ON michael@0: lo, // left lett. foll by ON michael@0: michael@0: rt, // ET following R michael@0: lt, // ET following L michael@0: michael@0: cn, // EN, AN following AL michael@0: ra, // arabic number foll R michael@0: re, // european number foll R michael@0: la, // arabic number foll L michael@0: le, // european number foll L michael@0: michael@0: ac, // CS following cn michael@0: rc, // CS following ra michael@0: rs, // CS,ES following re michael@0: lc, // CS following la michael@0: ls, // CS,ES following le michael@0: michael@0: ret, // ET following re michael@0: let, // ET following le michael@0: } ; michael@0: michael@0: const bidi_state stateWeak[][10] = michael@0: { michael@0: // N, L, R, AN, EN, AL,NSM, CS, ES, ET, michael@0: { /*xa*/ ao, xl, xr, cn, cn, xa, xa, ao, ao, ao, /* arabic letter */ }, michael@0: { /*xr*/ ro, xl, xr, ra, re, xa, xr, ro, ro, rt, /* right letter */ }, michael@0: { /*xl*/ lo, xl, xr, la, le, xa, xl, lo, lo, lt, /* left letter */ }, michael@0: michael@0: { /*ao*/ ao, xl, xr, cn, cn, xa, ao, ao, ao, ao, /* arabic lett. foll by ON*/ }, michael@0: { /*ro*/ ro, xl, xr, ra, re, xa, ro, ro, ro, rt, /* right lett. foll by ON */ }, michael@0: { /*lo*/ lo, xl, xr, la, le, xa, lo, lo, lo, lt, /* left lett. foll by ON */ }, michael@0: michael@0: { /*rt*/ ro, xl, xr, ra, re, xa, rt, ro, ro, rt, /* ET following R */ }, michael@0: { /*lt*/ lo, xl, xr, la, le, xa, lt, lo, lo, lt, /* ET following L */ }, michael@0: michael@0: { /*cn*/ ao, xl, xr, cn, cn, xa, cn, ac, ao, ao, /* EN, AN following AL */ }, michael@0: { /*ra*/ ro, xl, xr, ra, re, xa, ra, rc, ro, rt, /* arabic number foll R */ }, michael@0: { /*re*/ ro, xl, xr, ra, re, xa, re, rs, rs,ret, /* european number foll R */ }, michael@0: { /*la*/ lo, xl, xr, la, le, xa, la, lc, lo, lt, /* arabic number foll L */ }, michael@0: { /*le*/ lo, xl, xr, la, le, xa, le, ls, ls,let, /* european number foll L */ }, michael@0: michael@0: { /*ac*/ ao, xl, xr, cn, cn, xa, ao, ao, ao, ao, /* CS following cn */ }, michael@0: { /*rc*/ ro, xl, xr, ra, re, xa, ro, ro, ro, rt, /* CS following ra */ }, michael@0: { /*rs*/ ro, xl, xr, ra, re, xa, ro, ro, ro, rt, /* CS,ES following re */ }, michael@0: { /*lc*/ lo, xl, xr, la, le, xa, lo, lo, lo, lt, /* CS following la */ }, michael@0: { /*ls*/ lo, xl, xr, la, le, xa, lo, lo, lo, lt, /* CS,ES following le */ }, michael@0: michael@0: { /*ret*/ ro, xl, xr, ra, re, xa,ret, ro, ro,ret, /* ET following re */ }, michael@0: { /*let*/ lo, xl, xr, la, le, xa,let, lo, lo,let, /* ET following le */ }, michael@0: michael@0: michael@0: }; michael@0: michael@0: enum bidi_action // possible actions michael@0: { michael@0: // primitives michael@0: IX = 0x100, // increment michael@0: XX = 0xF, // no-op michael@0: michael@0: // actions michael@0: xxx = (XX << 4) + XX, // no-op michael@0: xIx = IX + xxx, // increment run michael@0: xxN = (XX << 4) + ON, // set current to N michael@0: xxE = (XX << 4) + EN, // set current to EN michael@0: xxA = (XX << 4) + AN, // set current to AN michael@0: xxR = (XX << 4) + R, // set current to R michael@0: xxL = (XX << 4) + L, // set current to L michael@0: Nxx = (ON << 4) + 0xF, // set run to neutral michael@0: Axx = (AN << 4) + 0xF, // set run to AN michael@0: ExE = (EN << 4) + EN, // set run to EN, set current to EN michael@0: NIx = (ON << 4) + 0xF + IX, // set run to N, increment michael@0: NxN = (ON << 4) + ON, // set run to N, set current to N michael@0: NxR = (ON << 4) + R, // set run to N, set current to R michael@0: NxE = (ON << 4) + EN, // set run to N, set current to EN michael@0: michael@0: AxA = (AN << 4) + AN, // set run to AN, set current to AN michael@0: NxL = (ON << 4) + L, // set run to N, set current to L michael@0: LxL = (L << 4) + L, // set run to L, set current to L michael@0: }; michael@0: michael@0: michael@0: const bidi_action actionWeak[][10] = michael@0: { michael@0: // N,.. L, R, AN, EN, AL, NSM, CS,..ES, ET, michael@0: { /*xa*/ xxx, xxx, xxx, xxx, xxA, xxR, xxR, xxN, xxN, xxN, /* arabic letter */ }, michael@0: { /*xr*/ xxx, xxx, xxx, xxx, xxE, xxR, xxR, xxN, xxN, xIx, /* right leter */ }, michael@0: { /*xl*/ xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xIx, /* left letter */ }, michael@0: michael@0: { /*ao*/ xxx, xxx, xxx, xxx, xxA, xxR, xxN, xxN, xxN, xxN, /* arabic lett. foll by ON */ }, michael@0: { /*ro*/ xxx, xxx, xxx, xxx, xxE, xxR, xxN, xxN, xxN, xIx, /* right lett. foll by ON */ }, michael@0: { /*lo*/ xxx, xxx, xxx, xxx, xxL, xxR, xxN, xxN, xxN, xIx, /* left lett. foll by ON */ }, michael@0: michael@0: { /*rt*/ Nxx, Nxx, Nxx, Nxx, ExE, NxR, xIx, NxN, NxN, xIx, /* ET following R */ }, michael@0: { /*lt*/ Nxx, Nxx, Nxx, Nxx, LxL, NxR, xIx, NxN, NxN, xIx, /* ET following L */ }, michael@0: michael@0: { /*cn*/ xxx, xxx, xxx, xxx, xxA, xxR, xxA, xIx, xxN, xxN, /* EN, AN following AL */ }, michael@0: { /*ra*/ xxx, xxx, xxx, xxx, xxE, xxR, xxA, xIx, xxN, xIx, /* arabic number foll R */ }, michael@0: { /*re*/ xxx, xxx, xxx, xxx, xxE, xxR, xxE, xIx, xIx, xxE, /* european number foll R */ }, michael@0: { /*la*/ xxx, xxx, xxx, xxx, xxL, xxR, xxA, xIx, xxN, xIx, /* arabic number foll L */ }, michael@0: { /*le*/ xxx, xxx, xxx, xxx, xxL, xxR, xxL, xIx, xIx, xxL, /* european number foll L */ }, michael@0: michael@0: { /*ac*/ Nxx, Nxx, Nxx, Axx, AxA, NxR, NxN, NxN, NxN, NxN, /* CS following cn */ }, michael@0: { /*rc*/ Nxx, Nxx, Nxx, Axx, NxE, NxR, NxN, NxN, NxN, NIx, /* CS following ra */ }, michael@0: { /*rs*/ Nxx, Nxx, Nxx, Nxx, ExE, NxR, NxN, NxN, NxN, NIx, /* CS,ES following re */ }, michael@0: { /*lc*/ Nxx, Nxx, Nxx, Axx, NxL, NxR, NxN, NxN, NxN, NIx, /* CS following la */ }, michael@0: { /*ls*/ Nxx, Nxx, Nxx, Nxx, LxL, NxR, NxN, NxN, NxN, NIx, /* CS,ES following le */ }, michael@0: michael@0: { /*ret*/xxx, xxx, xxx, xxx, xxE, xxR, xxE, xxN, xxN, xxE, /* ET following re */ }, michael@0: { /*let*/xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xxL, /* ET following le */ }, michael@0: }; michael@0: michael@0: inline uint8 GetDeferredType(bidi_action a) { return (a >> 4) & 0xF; } michael@0: inline uint8 GetResolvedType(bidi_action a) { return a & 0xF; } michael@0: inline DirCode EmbeddingDirection(int l) { return l & 1 ? R : L; } michael@0: michael@0: // Neutrals michael@0: enum neutral_action michael@0: { michael@0: // action to resolve previous input michael@0: nL = L, // resolve EN to L michael@0: En = 3 << 4, // resolve neutrals run to embedding level direction michael@0: Rn = R << 4, // resolve neutrals run to strong right michael@0: Ln = L << 4, // resolved neutrals run to strong left michael@0: In = (1<<8), // increment count of deferred neutrals michael@0: LnL = (1<<4)+L, // set run and EN to L michael@0: }; michael@0: michael@0: // ->prev() here means ->next() michael@0: void SetDeferredRunClass(Slot *s, Slot *sRun, int nval) michael@0: { michael@0: if (!sRun || s == sRun) return; michael@0: for (Slot *p = sRun; p != s; p = p->prev()) michael@0: if (p->getBidiClass() == WS) p->setBidiClass(nval | WSflag); michael@0: else if (BaseClass(p) != BN) p->setBidiClass(nval | (p->getBidiClass() & WSflag)); michael@0: } michael@0: michael@0: void SetThisDeferredRunClass(Slot *s, Slot *sRun, int nval) michael@0: { michael@0: if (!sRun) return; michael@0: for (Slot *p = sRun, *e = s->prev(); p != e; p = p->prev()) michael@0: if (p->getBidiClass() == WS) p->setBidiClass(nval | WSflag); michael@0: else if (BaseClass(p) != BN) p->setBidiClass(nval | (p->getBidiClass() & WSflag)); michael@0: } michael@0: michael@0: void resolveWeak(Slot *start, int sos, int eos) michael@0: { michael@0: int state = (sos & 1) ? xr : xl; michael@0: int cls; michael@0: Slot *s = start; michael@0: Slot *sRun = NULL; michael@0: Slot *sLast = s; michael@0: michael@0: for ( ; s; s = s->prev()) michael@0: { michael@0: sLast = s; michael@0: cls = BaseClass(s); michael@0: switch (cls) michael@0: { michael@0: case BN : michael@0: if (s == start) start = s->prev(); // skip initial BNs for NSM resolving michael@0: continue; michael@0: case LRI : michael@0: case RLI : michael@0: case FSI : michael@0: case PDI : michael@0: { michael@0: Slot *snext = s->prev(); michael@0: if (snext && snext->getBidiClass() == NSM) michael@0: snext->setBidiClass(ON); michael@0: s->setBidiClass(ON | WSflag); michael@0: } michael@0: break; michael@0: michael@0: case NSM : michael@0: if (s == start) michael@0: { michael@0: cls = EmbeddingDirection(sos); michael@0: s->setBidiClass(cls); michael@0: } michael@0: break; michael@0: } michael@0: michael@0: bidi_action action = actionWeak[state][bidi_class_map[cls]]; michael@0: int clsRun = GetDeferredType(action); michael@0: if (clsRun != XX) michael@0: { michael@0: SetDeferredRunClass(s, sRun, clsRun); michael@0: sRun = NULL; michael@0: } michael@0: int clsNew = GetResolvedType(action); michael@0: if (clsNew != XX) michael@0: s->setBidiClass(clsNew); michael@0: if (!sRun && (IX & action)) michael@0: sRun = s; michael@0: state = stateWeak[state][bidi_class_map[cls]]; michael@0: } michael@0: michael@0: cls = EmbeddingDirection(eos); michael@0: int clsRun = GetDeferredType(actionWeak[state][bidi_class_map[cls]]); michael@0: if (clsRun != XX) michael@0: SetThisDeferredRunClass(sLast, sRun, clsRun); michael@0: } michael@0: michael@0: void processParens(Slot *s, Segment *seg, uint8 aMirror, int level, BracketPairStack &stack) michael@0: { michael@0: uint8 mask = 0; michael@0: int8 lastDir = -1; michael@0: BracketPair *p; michael@0: for ( ; s; s = s->prev()) // walk the sequence michael@0: { michael@0: uint16 ogid = seg->glyphAttr(s->gid(), aMirror); michael@0: int cls = BaseClass(s); michael@0: michael@0: switch(cls) michael@0: { michael@0: case OPP : michael@0: stack.orin(mask); michael@0: stack.push(ogid, s, lastDir, lastDir != CPP); michael@0: mask = 0; michael@0: lastDir = OPP; michael@0: break; michael@0: case CPP : michael@0: stack.orin(mask); michael@0: p = stack.scan(s->gid()); michael@0: if (!p) break; michael@0: mask = 0; michael@0: stack.close(p, s); michael@0: lastDir = CPP; michael@0: break; michael@0: case L : michael@0: lastDir = L; michael@0: mask |= 1; michael@0: break; michael@0: case R : michael@0: case AL : michael@0: case AN : michael@0: case EN : michael@0: lastDir = R; michael@0: mask |= 2; michael@0: } michael@0: } michael@0: for (p = stack.start(); p; p =p->next()) // walk the stack michael@0: { michael@0: if (p->close() && p->mask()) michael@0: { michael@0: int dir = (level & 1) + 1; michael@0: if (p->mask() & dir) michael@0: { } michael@0: else if (p->mask() & (1 << (~level & 1))) // if inside has strong other embedding michael@0: { michael@0: int ldir = p->before(); michael@0: if ((p->before() == OPP || p->before() == CPP) && p->prev()) michael@0: { michael@0: for (BracketPair *q = p->prev(); q; q = q->prev()) michael@0: { michael@0: ldir = q->open()->getBidiClass(); michael@0: if (ldir < 3) break; michael@0: ldir = q->before(); michael@0: if (ldir < 3) break; michael@0: } michael@0: if (ldir > 2) ldir = 0; michael@0: } michael@0: if (ldir > 0 && (ldir - 1) != (level & 1)) // is dir given opp. to level dir (ldir == R or L) michael@0: dir = (~level & 1) + 1; michael@0: } michael@0: p->open()->setBidiClass(dir); michael@0: p->close()->setBidiClass(dir); michael@0: } michael@0: } michael@0: stack.clear(); michael@0: } michael@0: michael@0: int GetDeferredNeutrals(int action, int level) michael@0: { michael@0: action = (action >> 4) & 0xF; michael@0: if (action == (En >> 4)) michael@0: return EmbeddingDirection(level); michael@0: else michael@0: return action; michael@0: } michael@0: michael@0: int GetResolvedNeutrals(int action) michael@0: { michael@0: return action & 0xF; michael@0: } michael@0: michael@0: // state values michael@0: enum neutral_state michael@0: { michael@0: // new temporary class michael@0: r, // R and characters resolved to R michael@0: l, // L and characters resolved to L michael@0: rn, // N preceded by right michael@0: ln, // N preceded by left michael@0: a, // AN preceded by left (the abbrev 'la' is used up above) michael@0: na, // N preceeded by a michael@0: } ; michael@0: michael@0: const uint8 neutral_class_map[] = { 0, 1, 2, 0, 4, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; michael@0: michael@0: const int actionNeutrals[][5] = michael@0: { michael@0: // cls= N, L, R, AN, EN, state = michael@0: { In, 0, 0, 0, 0, }, // r right michael@0: { In, 0, 0, 0, L, }, // l left michael@0: michael@0: { In, En, Rn, Rn, Rn, }, // rn N preceded by right michael@0: { In, Ln, En, En, LnL, }, // ln N preceded by left michael@0: michael@0: { In, 0, 0, 0, L, }, // a AN preceded by left michael@0: { In, En, Rn, Rn, En, }, // na N preceded by a michael@0: } ; michael@0: michael@0: const int stateNeutrals[][5] = michael@0: { michael@0: // cls= N, L, R, AN, EN state = michael@0: { rn, l, r, r, r, }, // r right michael@0: { ln, l, r, a, l, }, // l left michael@0: michael@0: { rn, l, r, r, r, }, // rn N preceded by right michael@0: { ln, l, r, a, l, }, // ln N preceded by left michael@0: michael@0: { na, l, r, a, l, }, // a AN preceded by left michael@0: { na, l, r, a, l, }, // na N preceded by la michael@0: } ; michael@0: michael@0: void resolveNeutrals(Slot *s, int baseLevel, int sos, int eos) michael@0: { michael@0: int state = (sos & 1) ? r : l; michael@0: int cls; michael@0: Slot *sRun = NULL; michael@0: Slot *sLast = s; michael@0: int level = baseLevel; michael@0: michael@0: for ( ; s; s = s->prev()) michael@0: { michael@0: sLast = s; michael@0: cls = BaseClass(s); michael@0: switch (cls) michael@0: { michael@0: case BN : michael@0: continue; michael@0: case LRI : michael@0: case RLI : michael@0: case FSI : michael@0: s->setBidiClass(BN | WSflag); michael@0: continue; michael@0: michael@0: default : michael@0: int action = actionNeutrals[state][neutral_class_map[cls]]; michael@0: int clsRun = GetDeferredNeutrals(action, level); michael@0: if (clsRun != N) michael@0: { michael@0: SetDeferredRunClass(s, sRun, clsRun); michael@0: sRun = NULL; michael@0: } michael@0: int clsNew = GetResolvedNeutrals(action); michael@0: if (clsNew != N) michael@0: s->setBidiClass(clsNew); michael@0: if (!sRun && (action & In)) michael@0: sRun = s; michael@0: state = stateNeutrals[state][neutral_class_map[cls]]; michael@0: } michael@0: } michael@0: cls = EmbeddingDirection(eos); michael@0: int clsRun = GetDeferredNeutrals(actionNeutrals[state][neutral_class_map[cls]], level); michael@0: if (clsRun != N) michael@0: SetThisDeferredRunClass(sLast, sRun, clsRun); michael@0: } michael@0: michael@0: const int addLevel[][4] = michael@0: { michael@0: // cls = L, R, AN, EN level = michael@0: /* even */ { 0, 1, 2, 2, }, // EVEN michael@0: /* odd */ { 1, 0, 1, 1, }, // ODD michael@0: michael@0: }; michael@0: michael@0: void resolveImplicit(Slot *s, Segment *seg, uint8 aMirror) michael@0: { michael@0: bool rtl = seg->dir() & 1; michael@0: int level = rtl; michael@0: Slot *slast = 0; michael@0: for ( ; s; s = s->next()) michael@0: { michael@0: int cls = BaseClass(s); michael@0: s->prev(slast); // restitch the prev() side of the doubly linked list michael@0: slast = s; michael@0: if (cls == AN) michael@0: cls = AL; // use AL value as the index for AN, no property change michael@0: if (cls < 5 && cls > 0) michael@0: { michael@0: level = s->getBidiLevel(); michael@0: level += addLevel[level & 1][cls - 1]; michael@0: s->setBidiLevel(level); michael@0: } michael@0: if (aMirror) michael@0: { michael@0: int hasChar = seg->glyphAttr(s->gid(), aMirror + 1); michael@0: if ( ((level & 1) && (!(seg->dir() & 4) || !hasChar)) michael@0: || ((rtl ^ (level & 1)) && (seg->dir() & 4) && hasChar) ) michael@0: { michael@0: unsigned short g = seg->glyphAttr(s->gid(), aMirror); michael@0: if (g) s->setGlyph(seg, g); michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: void resolveWhitespace(int baseLevel, Slot *s) michael@0: { michael@0: for ( ; s; s = s->prev()) michael@0: { michael@0: int8 cls = s->getBidiClass(); michael@0: if (cls == WS || cls & WSflag) michael@0: s->setBidiLevel(baseLevel); michael@0: else if (cls != BN) michael@0: break; michael@0: } michael@0: } michael@0: michael@0: michael@0: /* michael@0: Stitch two spans together to make another span (with ends tied together). michael@0: If the level is odd then swap the order of the two spans michael@0: */ michael@0: inline michael@0: Slot * join(int level, Slot * a, Slot * b) michael@0: { michael@0: if (!a) return b; michael@0: if (level & 1) { Slot * const t = a; a = b; b = t; } michael@0: Slot * const t = b->prev(); michael@0: a->prev()->next(b); b->prev(a->prev()); // splice middle michael@0: t->next(a); a->prev(t); // splice ends michael@0: return a; michael@0: } michael@0: michael@0: /* michael@0: Given the first slot in a run of slots with the same bidi level, turn the run michael@0: into it's own little doubly linked list ring (a span) with the two ends joined together. michael@0: If the run is rtl then reverse its direction. michael@0: Returns the first slot after the span michael@0: */ michael@0: Slot * span(Slot * & cs, const bool rtl) michael@0: { michael@0: Slot * r = cs, * re = cs; cs = cs->next(); michael@0: if (rtl) michael@0: { michael@0: Slot * t = r->next(); r->next(r->prev()); r->prev(t); michael@0: for (int l = r->getBidiLevel(); cs && (l == cs->getBidiLevel() || cs->getBidiClass() == BN); cs = cs->prev()) michael@0: { michael@0: re = cs; michael@0: t = cs->next(); cs->next(cs->prev()); cs->prev(t); michael@0: } michael@0: r->next(re); michael@0: re->prev(r); michael@0: r = re; michael@0: } michael@0: else michael@0: { michael@0: for (int l = r->getBidiLevel(); cs && (l == cs->getBidiLevel() || cs->getBidiClass() == BN); cs = cs->next()) michael@0: re = cs; michael@0: r->prev(re); michael@0: re->next(r); michael@0: } michael@0: if (cs) cs->prev(0); michael@0: return r; michael@0: } michael@0: michael@0: inline int getlevel(const Slot *cs, const int level) michael@0: { michael@0: while (cs && cs->getBidiClass() == BN) michael@0: { cs = cs->next(); } michael@0: if (cs) michael@0: return cs->getBidiLevel(); michael@0: else michael@0: return level; michael@0: } michael@0: michael@0: Slot *resolveOrder(Slot * & cs, const bool reordered, const int level) michael@0: { michael@0: Slot * r = 0; michael@0: int ls; michael@0: while (cs && level <= (ls = getlevel(cs, level) - reordered)) michael@0: { michael@0: r = join(level, r, level < ls michael@0: ? resolveOrder(/* updates */cs, reordered, level+1) // find span of heighest level michael@0: : span(/* updates */cs, level & 1)); michael@0: } michael@0: return r; michael@0: }