michael@0: /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ michael@0: michael@0: /* This file is modified from JPNIC's mDNKit, it is under both MPL and michael@0: * JPNIC's license. michael@0: */ michael@0: michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /* michael@0: * Copyright (c) 2000,2002 Japan Network Information Center. michael@0: * All rights reserved. michael@0: * michael@0: * By using this file, you agree to the terms and conditions set forth bellow. michael@0: * michael@0: * LICENSE TERMS AND CONDITIONS michael@0: * michael@0: * The following License Terms and Conditions apply, unless a different michael@0: * license is obtained from Japan Network Information Center ("JPNIC"), michael@0: * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, michael@0: * Chiyoda-ku, Tokyo 101-0047, Japan. michael@0: * michael@0: * 1. Use, Modification and Redistribution (including distribution of any michael@0: * modified or derived work) in source and/or binary forms is permitted michael@0: * under this License Terms and Conditions. michael@0: * michael@0: * 2. Redistribution of source code must retain the copyright notices as they michael@0: * appear in each source code file, this License Terms and Conditions. michael@0: * michael@0: * 3. Redistribution in binary form must reproduce the Copyright Notice, michael@0: * this License Terms and Conditions, in the documentation and/or other michael@0: * materials provided with the distribution. For the purposes of binary michael@0: * distribution the "Copyright Notice" refers to the following language: michael@0: * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." michael@0: * michael@0: * 4. The name of JPNIC may not be used to endorse or promote products michael@0: * derived from this Software without specific prior written approval of michael@0: * JPNIC. michael@0: * michael@0: * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC michael@0: * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A michael@0: * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE michael@0: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR michael@0: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF michael@0: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR michael@0: * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, michael@0: * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR michael@0: * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF michael@0: * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. michael@0: */ michael@0: michael@0: #include michael@0: michael@0: #include "nsMemory.h" michael@0: #include "nsUnicodeNormalizer.h" michael@0: #include "nsString.h" michael@0: michael@0: NS_IMPL_ISUPPORTS(nsUnicodeNormalizer, nsIUnicodeNormalizer) michael@0: michael@0: michael@0: nsUnicodeNormalizer::nsUnicodeNormalizer() michael@0: { michael@0: } michael@0: michael@0: nsUnicodeNormalizer::~nsUnicodeNormalizer() michael@0: { michael@0: } michael@0: michael@0: michael@0: michael@0: #define END_BIT 0x80000000 michael@0: michael@0: michael@0: /* michael@0: * Some constants for Hangul decomposition/composition. michael@0: * These things were taken from unicode book. michael@0: */ michael@0: #define SBase 0xac00 michael@0: #define LBase 0x1100 michael@0: #define VBase 0x1161 michael@0: #define TBase 0x11a7 michael@0: #define LCount 19 michael@0: #define VCount 21 michael@0: #define TCount 28 michael@0: #define SLast (SBase + LCount * VCount * TCount) michael@0: michael@0: struct composition { michael@0: uint32_t c2; /* 2nd character */ michael@0: uint32_t comp; /* composed character */ michael@0: }; michael@0: michael@0: michael@0: #include "normalization_data.h" michael@0: michael@0: /* michael@0: * Macro for multi-level index table. michael@0: */ michael@0: #define LOOKUPTBL(vprefix, mprefix, v) \ michael@0: DMAP(vprefix)[\ michael@0: IMAP(vprefix)[\ michael@0: IMAP(vprefix)[IDX0(mprefix, v)] + IDX1(mprefix, v)\ michael@0: ]\ michael@0: ].tbl[IDX2(mprefix, v)] michael@0: michael@0: #define IDX0(mprefix, v) IDX_0(v, BITS1(mprefix), BITS2(mprefix)) michael@0: #define IDX1(mprefix, v) IDX_1(v, BITS1(mprefix), BITS2(mprefix)) michael@0: #define IDX2(mprefix, v) IDX_2(v, BITS1(mprefix), BITS2(mprefix)) michael@0: michael@0: #define IDX_0(v, bits1, bits2) ((v) >> ((bits1) + (bits2))) michael@0: #define IDX_1(v, bits1, bits2) (((v) >> (bits2)) & ((1 << (bits1)) - 1)) michael@0: #define IDX_2(v, bits1, bits2) ((v) & ((1 << (bits2)) - 1)) michael@0: michael@0: #define BITS1(mprefix) mprefix ## _BITS_1 michael@0: #define BITS2(mprefix) mprefix ## _BITS_2 michael@0: michael@0: #define IMAP(vprefix) vprefix ## _imap michael@0: #define DMAP(vprefix) vprefix ## _table michael@0: #define SEQ(vprefix) vprefix ## _seq michael@0: michael@0: static int32_t michael@0: canonclass(uint32_t c) { michael@0: /* Look up canonicalclass table. */ michael@0: return (LOOKUPTBL(canon_class, CANON_CLASS, c)); michael@0: } michael@0: michael@0: static int32_t michael@0: decompose_char(uint32_t c, const uint32_t **seqp) michael@0: { michael@0: /* Look up decomposition table. */ michael@0: int32_t seqidx = LOOKUPTBL(decompose, DECOMP, c); michael@0: *seqp = SEQ(decompose) + (seqidx & ~DECOMP_COMPAT); michael@0: return (seqidx); michael@0: } michael@0: michael@0: static int32_t michael@0: compose_char(uint32_t c, michael@0: const struct composition **compp) michael@0: { michael@0: /* Look up composition table. */ michael@0: int32_t seqidx = LOOKUPTBL(compose, CANON_COMPOSE, c); michael@0: *compp = SEQ(compose) + (seqidx & 0xffff); michael@0: return (seqidx >> 16); michael@0: } michael@0: michael@0: static nsresult michael@0: mdn__unicode_decompose(int32_t compat, uint32_t *v, size_t vlen, michael@0: uint32_t c, int32_t *decomp_lenp) michael@0: { michael@0: uint32_t *vorg = v; michael@0: int32_t seqidx; michael@0: const uint32_t *seq; michael@0: michael@0: //assert(v != nullptr && vlen >= 0 && decomp_lenp != nullptr); michael@0: michael@0: /* michael@0: * First, check for Hangul. michael@0: */ michael@0: if (SBase <= c && c < SLast) { michael@0: int32_t idx, t_offset, v_offset, l_offset; michael@0: michael@0: idx = c - SBase; michael@0: t_offset = idx % TCount; michael@0: idx /= TCount; michael@0: v_offset = idx % VCount; michael@0: l_offset = idx / VCount; michael@0: if ((t_offset == 0 && vlen < 2) || (t_offset > 0 && vlen < 3)) michael@0: return (NS_ERROR_UNORM_MOREOUTPUT); michael@0: *v++ = LBase + l_offset; michael@0: *v++ = VBase + v_offset; michael@0: if (t_offset > 0) michael@0: *v++ = TBase + t_offset; michael@0: *decomp_lenp = v - vorg; michael@0: return (NS_OK); michael@0: } michael@0: michael@0: /* michael@0: * Look up decomposition table. If no decomposition is defined michael@0: * or if it is a compatibility decomosition when canonical michael@0: * decomposition requested, return 'NS_SUCCESS_UNORM_NOTFOUND'. michael@0: */ michael@0: seqidx = decompose_char(c, &seq); michael@0: if (seqidx == 0 || (compat == 0 && (seqidx & DECOMP_COMPAT) != 0)) michael@0: return (NS_SUCCESS_UNORM_NOTFOUND); michael@0: michael@0: /* michael@0: * Copy the decomposed sequence. The end of the sequence are michael@0: * marked with END_BIT. michael@0: */ michael@0: do { michael@0: uint32_t c; michael@0: int32_t dlen; michael@0: nsresult r; michael@0: michael@0: c = *seq & ~END_BIT; michael@0: michael@0: /* Decompose recursively. */ michael@0: r = mdn__unicode_decompose(compat, v, vlen, c, &dlen); michael@0: if (r == NS_OK) { michael@0: v += dlen; michael@0: vlen -= dlen; michael@0: } else if (r == NS_SUCCESS_UNORM_NOTFOUND) { michael@0: if (vlen < 1) michael@0: return (NS_ERROR_UNORM_MOREOUTPUT); michael@0: *v++ = c; michael@0: vlen--; michael@0: } else { michael@0: return (r); michael@0: } michael@0: michael@0: } while ((*seq++ & END_BIT) == 0); michael@0: michael@0: *decomp_lenp = v - vorg; michael@0: michael@0: return (NS_OK); michael@0: } michael@0: michael@0: static int32_t michael@0: mdn__unicode_iscompositecandidate(uint32_t c) michael@0: { michael@0: const struct composition *dummy; michael@0: michael@0: /* Check for Hangul */ michael@0: if ((LBase <= c && c < LBase + LCount) || (SBase <= c && c < SLast)) michael@0: return (1); michael@0: michael@0: /* michael@0: * Look up composition table. If there are no composition michael@0: * that begins with the given character, it is not a michael@0: * composition candidate. michael@0: */ michael@0: if (compose_char(c, &dummy) == 0) michael@0: return (0); michael@0: else michael@0: return (1); michael@0: } michael@0: michael@0: static nsresult michael@0: mdn__unicode_compose(uint32_t c1, uint32_t c2, uint32_t *compp) michael@0: { michael@0: int32_t n; michael@0: int32_t lo, hi; michael@0: const struct composition *cseq; michael@0: michael@0: //assert(compp != nullptr); michael@0: michael@0: /* michael@0: * Check for Hangul. michael@0: */ michael@0: if (LBase <= c1 && c1 < LBase + LCount && michael@0: VBase <= c2 && c2 < VBase + VCount) { michael@0: /* michael@0: * Hangul L and V. michael@0: */ michael@0: *compp = SBase + michael@0: ((c1 - LBase) * VCount + (c2 - VBase)) * TCount; michael@0: return (NS_OK); michael@0: } else if (SBase <= c1 && c1 < SLast && michael@0: TBase <= c2 && c2 < TBase + TCount && michael@0: (c1 - SBase) % TCount == 0) { michael@0: /* michael@0: * Hangul LV and T. michael@0: */ michael@0: *compp = c1 + (c2 - TBase); michael@0: return (NS_OK); michael@0: } michael@0: michael@0: /* michael@0: * Look up composition table. If the result is 0, no composition michael@0: * is defined. Otherwise, upper 16bits of the result contains michael@0: * the number of composition that begins with 'c1', and the lower michael@0: * 16bits is the offset in 'compose_seq'. michael@0: */ michael@0: if ((n = compose_char(c1, &cseq)) == 0) michael@0: return (NS_SUCCESS_UNORM_NOTFOUND); michael@0: michael@0: /* michael@0: * The composite sequences are sorted by the 2nd character 'c2'. michael@0: * So we can use binary search. michael@0: */ michael@0: lo = 0; michael@0: hi = n - 1; michael@0: while (lo <= hi) { michael@0: int32_t mid = (lo + hi) / 2; michael@0: michael@0: if (cseq[mid].c2 < c2) { michael@0: lo = mid + 1; michael@0: } else if (cseq[mid].c2 > c2) { michael@0: hi = mid - 1; michael@0: } else { michael@0: *compp = cseq[mid].comp; michael@0: return (NS_OK); michael@0: } michael@0: } michael@0: return (NS_SUCCESS_UNORM_NOTFOUND); michael@0: } michael@0: michael@0: michael@0: #define WORKBUF_SIZE 128 michael@0: #define WORKBUF_SIZE_MAX 10000 michael@0: michael@0: typedef struct { michael@0: int32_t cur; /* pointing now processing character */ michael@0: int32_t last; /* pointing just after the last character */ michael@0: int32_t size; /* size of UCS and CLASS array */ michael@0: uint32_t *ucs; /* UCS-4 characters */ michael@0: int32_t *cclass; /* and their canonical classes */ michael@0: uint32_t ucs_buf[WORKBUF_SIZE]; /* local buffer */ michael@0: int32_t class_buf[WORKBUF_SIZE]; /* ditto */ michael@0: } workbuf_t; michael@0: michael@0: static nsresult decompose(workbuf_t *wb, uint32_t c, int32_t compat); michael@0: static void get_class(workbuf_t *wb); michael@0: static void reorder(workbuf_t *wb); michael@0: static void compose(workbuf_t *wb); michael@0: static nsresult flush_before_cur(workbuf_t *wb, nsAString& aToStr); michael@0: static void workbuf_init(workbuf_t *wb); michael@0: static void workbuf_free(workbuf_t *wb); michael@0: static nsresult workbuf_extend(workbuf_t *wb); michael@0: static nsresult workbuf_append(workbuf_t *wb, uint32_t c); michael@0: static void workbuf_shift(workbuf_t *wb, int32_t shift); michael@0: static void workbuf_removevoid(workbuf_t *wb); michael@0: michael@0: michael@0: static nsresult michael@0: mdn_normalize(bool do_composition, bool compat, michael@0: const nsAString& aSrcStr, nsAString& aToStr) michael@0: { michael@0: workbuf_t wb; michael@0: nsresult r = NS_OK; michael@0: /* michael@0: * Initialize working buffer. michael@0: */ michael@0: workbuf_init(&wb); michael@0: michael@0: nsAString::const_iterator start, end; michael@0: aSrcStr.BeginReading(start); michael@0: aSrcStr.EndReading(end); michael@0: michael@0: while (start != end) { michael@0: uint32_t c; michael@0: char16_t curChar; michael@0: michael@0: //assert(wb.cur == wb.last); michael@0: michael@0: /* michael@0: * Get one character from 'from'. michael@0: */ michael@0: curChar= *start++; michael@0: michael@0: if (NS_IS_HIGH_SURROGATE(curChar) && start != end && NS_IS_LOW_SURROGATE(*(start)) ) { michael@0: c = SURROGATE_TO_UCS4(curChar, *start); michael@0: ++start; michael@0: } else { michael@0: c = curChar; michael@0: } michael@0: michael@0: /* michael@0: * Decompose it. michael@0: */ michael@0: if ((r = decompose(&wb, c, compat)) != NS_OK) michael@0: break; michael@0: michael@0: /* michael@0: * Get canonical class. michael@0: */ michael@0: get_class(&wb); michael@0: michael@0: /* michael@0: * Reorder & compose. michael@0: */ michael@0: for (; wb.cur < wb.last; wb.cur++) { michael@0: if (wb.cur == 0) { michael@0: continue; michael@0: } else if (wb.cclass[wb.cur] > 0) { michael@0: /* michael@0: * This is not a starter. Try reordering. michael@0: * Note that characters up to it are michael@0: * already in canonical order. michael@0: */ michael@0: reorder(&wb); michael@0: continue; michael@0: } michael@0: michael@0: /* michael@0: * This is a starter character, and there are michael@0: * some characters before it. Those characters michael@0: * have been reordered properly, and michael@0: * ready for composition. michael@0: */ michael@0: if (do_composition && wb.cclass[0] == 0) michael@0: compose(&wb); michael@0: michael@0: /* michael@0: * If CUR points to a starter character, michael@0: * then process of characters before CUR are michael@0: * already finished, because any further michael@0: * reordering/composition for them are blocked michael@0: * by the starter CUR points. michael@0: */ michael@0: if (wb.cur > 0 && wb.cclass[wb.cur] == 0) { michael@0: /* Flush everything before CUR. */ michael@0: r = flush_before_cur(&wb, aToStr); michael@0: if (r != NS_OK) michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: if (r == NS_OK) { michael@0: if (do_composition && wb.cur > 0 && wb.cclass[0] == 0) { michael@0: /* michael@0: * There is some characters left in WB. michael@0: * They are ordered, but not composed yet. michael@0: * Now CUR points just after the last character in WB, michael@0: * and since compose() tries to compose characters michael@0: * between top and CUR inclusive, we must make CUR michael@0: * one character back during compose(). michael@0: */ michael@0: wb.cur--; michael@0: compose(&wb); michael@0: wb.cur++; michael@0: } michael@0: /* michael@0: * Call this even when WB.CUR == 0, to make TO michael@0: * NUL-terminated. michael@0: */ michael@0: r = flush_before_cur(&wb, aToStr); michael@0: } michael@0: michael@0: workbuf_free(&wb); michael@0: michael@0: return (r); michael@0: } michael@0: michael@0: static nsresult michael@0: decompose(workbuf_t *wb, uint32_t c, int32_t compat) { michael@0: nsresult r; michael@0: int32_t dec_len; michael@0: michael@0: again: michael@0: r = mdn__unicode_decompose(compat, wb->ucs + wb->last, michael@0: wb->size - wb->last, c, &dec_len); michael@0: switch (r) { michael@0: case NS_OK: michael@0: wb->last += dec_len; michael@0: return (NS_OK); michael@0: case NS_SUCCESS_UNORM_NOTFOUND: michael@0: return (workbuf_append(wb, c)); michael@0: case NS_ERROR_UNORM_MOREOUTPUT: michael@0: if ((r = workbuf_extend(wb)) != NS_OK) michael@0: return (r); michael@0: if (wb->size > WORKBUF_SIZE_MAX) { michael@0: // "mdn__unormalize_form*: " "working buffer too large\n" michael@0: return (NS_ERROR_FAILURE); michael@0: } michael@0: goto again; michael@0: default: michael@0: return (r); michael@0: } michael@0: /* NOTREACHED */ michael@0: } michael@0: michael@0: static void michael@0: get_class(workbuf_t *wb) { michael@0: int32_t i; michael@0: michael@0: for (i = wb->cur; i < wb->last; i++) michael@0: wb->cclass[i] = canonclass(wb->ucs[i]); michael@0: } michael@0: michael@0: static void michael@0: reorder(workbuf_t *wb) { michael@0: uint32_t c; michael@0: int32_t i; michael@0: int32_t cclass; michael@0: michael@0: //assert(wb != nullptr); michael@0: michael@0: i = wb->cur; michael@0: c = wb->ucs[i]; michael@0: cclass = wb->cclass[i]; michael@0: michael@0: while (i > 0 && wb->cclass[i - 1] > cclass) { michael@0: wb->ucs[i] = wb->ucs[i - 1]; michael@0: wb->cclass[i] =wb->cclass[i - 1]; michael@0: i--; michael@0: wb->ucs[i] = c; michael@0: wb->cclass[i] = cclass; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: compose(workbuf_t *wb) { michael@0: int32_t cur; michael@0: uint32_t *ucs; michael@0: int32_t *cclass; michael@0: int32_t last_class; michael@0: int32_t nvoids; michael@0: int32_t i; michael@0: michael@0: //assert(wb != nullptr && wb->cclass[0] == 0); michael@0: michael@0: cur = wb->cur; michael@0: ucs = wb->ucs; michael@0: cclass = wb->cclass; michael@0: michael@0: /* michael@0: * If there are no decomposition sequence that begins with michael@0: * the top character, composition is impossible. michael@0: */ michael@0: if (!mdn__unicode_iscompositecandidate(ucs[0])) michael@0: return; michael@0: michael@0: last_class = 0; michael@0: nvoids = 0; michael@0: for (i = 1; i <= cur; i++) { michael@0: uint32_t c; michael@0: int32_t cl = cclass[i]; michael@0: michael@0: if ((last_class < cl || cl == 0) && michael@0: mdn__unicode_compose(ucs[0], ucs[i], michael@0: &c) == NS_OK) { michael@0: /* michael@0: * Replace the top character with the composed one. michael@0: */ michael@0: ucs[0] = c; michael@0: cclass[0] = canonclass(c); michael@0: michael@0: cclass[i] = -1; /* void this character */ michael@0: nvoids++; michael@0: } else { michael@0: last_class = cl; michael@0: } michael@0: } michael@0: michael@0: /* Purge void characters, if any. */ michael@0: if (nvoids > 0) michael@0: workbuf_removevoid(wb); michael@0: } michael@0: michael@0: static nsresult michael@0: flush_before_cur(workbuf_t *wb, nsAString& aToStr) michael@0: { michael@0: int32_t i; michael@0: michael@0: for (i = 0; i < wb->cur; i++) { michael@0: if (!IS_IN_BMP(wb->ucs[i])) { michael@0: aToStr.Append((char16_t)H_SURROGATE(wb->ucs[i])); michael@0: aToStr.Append((char16_t)L_SURROGATE(wb->ucs[i])); michael@0: } else { michael@0: aToStr.Append((char16_t)(wb->ucs[i])); michael@0: } michael@0: } michael@0: michael@0: workbuf_shift(wb, wb->cur); michael@0: michael@0: return (NS_OK); michael@0: } michael@0: michael@0: static void michael@0: workbuf_init(workbuf_t *wb) { michael@0: wb->cur = 0; michael@0: wb->last = 0; michael@0: wb->size = WORKBUF_SIZE; michael@0: wb->ucs = wb->ucs_buf; michael@0: wb->cclass = wb->class_buf; michael@0: } michael@0: michael@0: static void michael@0: workbuf_free(workbuf_t *wb) { michael@0: if (wb->ucs != wb->ucs_buf) { michael@0: nsMemory::Free(wb->ucs); michael@0: nsMemory::Free(wb->cclass); michael@0: } michael@0: } michael@0: michael@0: static nsresult michael@0: workbuf_extend(workbuf_t *wb) { michael@0: int32_t newsize = wb->size * 3; michael@0: michael@0: if (wb->ucs == wb->ucs_buf) { michael@0: wb->ucs = (uint32_t*)nsMemory::Alloc(sizeof(wb->ucs[0]) * newsize); michael@0: if (!wb->ucs) michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: wb->cclass = (int32_t*)nsMemory::Alloc(sizeof(wb->cclass[0]) * newsize); michael@0: if (!wb->cclass) { michael@0: nsMemory::Free(wb->ucs); michael@0: wb->ucs = nullptr; michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: } else { michael@0: void* buf = nsMemory::Realloc(wb->ucs, sizeof(wb->ucs[0]) * newsize); michael@0: if (!buf) michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: wb->ucs = (uint32_t*)buf; michael@0: buf = nsMemory::Realloc(wb->cclass, sizeof(wb->cclass[0]) * newsize); michael@0: if (!buf) michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: wb->cclass = (int32_t*)buf; michael@0: } michael@0: return (NS_OK); michael@0: } michael@0: michael@0: static nsresult michael@0: workbuf_append(workbuf_t *wb, uint32_t c) { michael@0: nsresult r; michael@0: michael@0: if (wb->last >= wb->size && (r = workbuf_extend(wb)) != NS_OK) michael@0: return (r); michael@0: wb->ucs[wb->last++] = c; michael@0: return (NS_OK); michael@0: } michael@0: michael@0: static void michael@0: workbuf_shift(workbuf_t *wb, int32_t shift) { michael@0: int32_t nmove; michael@0: michael@0: //assert(wb != nullptr && wb->cur >= shift); michael@0: michael@0: nmove = wb->last - shift; michael@0: memmove(&wb->ucs[0], &wb->ucs[shift], michael@0: nmove * sizeof(wb->ucs[0])); michael@0: memmove(&wb->cclass[0], &wb->cclass[shift], michael@0: nmove * sizeof(wb->cclass[0])); michael@0: wb->cur -= shift; michael@0: wb->last -= shift; michael@0: } michael@0: michael@0: static void michael@0: workbuf_removevoid(workbuf_t *wb) { michael@0: int32_t i, j; michael@0: int32_t last = wb->last; michael@0: michael@0: for (i = j = 0; i < last; i++) { michael@0: if (wb->cclass[i] >= 0) { michael@0: if (j < i) { michael@0: wb->ucs[j] = wb->ucs[i]; michael@0: wb->cclass[j] = wb->cclass[i]; michael@0: } michael@0: j++; michael@0: } michael@0: } michael@0: wb->cur -= last - j; michael@0: wb->last = j; michael@0: } michael@0: michael@0: nsresult michael@0: nsUnicodeNormalizer::NormalizeUnicodeNFD( const nsAString& aSrc, nsAString& aDest) michael@0: { michael@0: return mdn_normalize(false, false, aSrc, aDest); michael@0: } michael@0: michael@0: nsresult michael@0: nsUnicodeNormalizer::NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest) michael@0: { michael@0: return mdn_normalize(true, false, aSrc, aDest); michael@0: } michael@0: michael@0: nsresult michael@0: nsUnicodeNormalizer::NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest) michael@0: { michael@0: return mdn_normalize(false, true, aSrc, aDest); michael@0: } michael@0: michael@0: nsresult michael@0: nsUnicodeNormalizer::NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest) michael@0: { michael@0: return mdn_normalize(true, true, aSrc, aDest); michael@0: } michael@0: michael@0: bool michael@0: nsUnicodeNormalizer::Compose(uint32_t a, uint32_t b, uint32_t *ab) michael@0: { michael@0: return mdn__unicode_compose(a, b, ab) == NS_OK; michael@0: } michael@0: michael@0: bool michael@0: nsUnicodeNormalizer::DecomposeNonRecursively(uint32_t c, uint32_t *c1, uint32_t *c2) michael@0: { michael@0: // We can't use mdn__unicode_decompose here, because that does a recursive michael@0: // decomposition that may yield more than two characters, but the harfbuzz michael@0: // callback wants just a single-step decomp that is guaranteed to produce michael@0: // no more than two characters. So we do a low-level lookup in the table michael@0: // of decomp sequences. michael@0: const uint32_t *seq; michael@0: uint32_t seqidx = decompose_char(c, &seq); michael@0: if (seqidx == 0 || ((seqidx & DECOMP_COMPAT) != 0)) { michael@0: return false; michael@0: } michael@0: *c1 = *seq & ~END_BIT; michael@0: if (*seq & END_BIT) { michael@0: *c2 = 0; michael@0: } else { michael@0: *c2 = *++seq & ~END_BIT; michael@0: } michael@0: return true; michael@0: }