1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/unicharutil/src/nsUnicodeNormalizer.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,699 @@ 1.4 +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 1.5 + 1.6 +/* This file is modified from JPNIC's mDNKit, it is under both MPL and 1.7 + * JPNIC's license. 1.8 + */ 1.9 + 1.10 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.11 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.12 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.13 + 1.14 +/* 1.15 + * Copyright (c) 2000,2002 Japan Network Information Center. 1.16 + * All rights reserved. 1.17 + * 1.18 + * By using this file, you agree to the terms and conditions set forth bellow. 1.19 + * 1.20 + * LICENSE TERMS AND CONDITIONS 1.21 + * 1.22 + * The following License Terms and Conditions apply, unless a different 1.23 + * license is obtained from Japan Network Information Center ("JPNIC"), 1.24 + * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, 1.25 + * Chiyoda-ku, Tokyo 101-0047, Japan. 1.26 + * 1.27 + * 1. Use, Modification and Redistribution (including distribution of any 1.28 + * modified or derived work) in source and/or binary forms is permitted 1.29 + * under this License Terms and Conditions. 1.30 + * 1.31 + * 2. Redistribution of source code must retain the copyright notices as they 1.32 + * appear in each source code file, this License Terms and Conditions. 1.33 + * 1.34 + * 3. Redistribution in binary form must reproduce the Copyright Notice, 1.35 + * this License Terms and Conditions, in the documentation and/or other 1.36 + * materials provided with the distribution. For the purposes of binary 1.37 + * distribution the "Copyright Notice" refers to the following language: 1.38 + * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." 1.39 + * 1.40 + * 4. The name of JPNIC may not be used to endorse or promote products 1.41 + * derived from this Software without specific prior written approval of 1.42 + * JPNIC. 1.43 + * 1.44 + * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC 1.45 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1.46 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 1.47 + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE 1.48 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 1.49 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 1.50 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 1.51 + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 1.52 + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 1.53 + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 1.54 + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 1.55 + */ 1.56 + 1.57 +#include <string.h> 1.58 + 1.59 +#include "nsMemory.h" 1.60 +#include "nsUnicodeNormalizer.h" 1.61 +#include "nsString.h" 1.62 + 1.63 +NS_IMPL_ISUPPORTS(nsUnicodeNormalizer, nsIUnicodeNormalizer) 1.64 + 1.65 + 1.66 +nsUnicodeNormalizer::nsUnicodeNormalizer() 1.67 +{ 1.68 +} 1.69 + 1.70 +nsUnicodeNormalizer::~nsUnicodeNormalizer() 1.71 +{ 1.72 +} 1.73 + 1.74 + 1.75 + 1.76 +#define END_BIT 0x80000000 1.77 + 1.78 + 1.79 +/* 1.80 + * Some constants for Hangul decomposition/composition. 1.81 + * These things were taken from unicode book. 1.82 + */ 1.83 +#define SBase 0xac00 1.84 +#define LBase 0x1100 1.85 +#define VBase 0x1161 1.86 +#define TBase 0x11a7 1.87 +#define LCount 19 1.88 +#define VCount 21 1.89 +#define TCount 28 1.90 +#define SLast (SBase + LCount * VCount * TCount) 1.91 + 1.92 +struct composition { 1.93 + uint32_t c2; /* 2nd character */ 1.94 + uint32_t comp; /* composed character */ 1.95 +}; 1.96 + 1.97 + 1.98 +#include "normalization_data.h" 1.99 + 1.100 +/* 1.101 + * Macro for multi-level index table. 1.102 + */ 1.103 +#define LOOKUPTBL(vprefix, mprefix, v) \ 1.104 + DMAP(vprefix)[\ 1.105 + IMAP(vprefix)[\ 1.106 + IMAP(vprefix)[IDX0(mprefix, v)] + IDX1(mprefix, v)\ 1.107 + ]\ 1.108 + ].tbl[IDX2(mprefix, v)] 1.109 + 1.110 +#define IDX0(mprefix, v) IDX_0(v, BITS1(mprefix), BITS2(mprefix)) 1.111 +#define IDX1(mprefix, v) IDX_1(v, BITS1(mprefix), BITS2(mprefix)) 1.112 +#define IDX2(mprefix, v) IDX_2(v, BITS1(mprefix), BITS2(mprefix)) 1.113 + 1.114 +#define IDX_0(v, bits1, bits2) ((v) >> ((bits1) + (bits2))) 1.115 +#define IDX_1(v, bits1, bits2) (((v) >> (bits2)) & ((1 << (bits1)) - 1)) 1.116 +#define IDX_2(v, bits1, bits2) ((v) & ((1 << (bits2)) - 1)) 1.117 + 1.118 +#define BITS1(mprefix) mprefix ## _BITS_1 1.119 +#define BITS2(mprefix) mprefix ## _BITS_2 1.120 + 1.121 +#define IMAP(vprefix) vprefix ## _imap 1.122 +#define DMAP(vprefix) vprefix ## _table 1.123 +#define SEQ(vprefix) vprefix ## _seq 1.124 + 1.125 +static int32_t 1.126 +canonclass(uint32_t c) { 1.127 + /* Look up canonicalclass table. */ 1.128 + return (LOOKUPTBL(canon_class, CANON_CLASS, c)); 1.129 +} 1.130 + 1.131 +static int32_t 1.132 +decompose_char(uint32_t c, const uint32_t **seqp) 1.133 +{ 1.134 + /* Look up decomposition table. */ 1.135 + int32_t seqidx = LOOKUPTBL(decompose, DECOMP, c); 1.136 + *seqp = SEQ(decompose) + (seqidx & ~DECOMP_COMPAT); 1.137 + return (seqidx); 1.138 +} 1.139 + 1.140 +static int32_t 1.141 +compose_char(uint32_t c, 1.142 + const struct composition **compp) 1.143 +{ 1.144 + /* Look up composition table. */ 1.145 + int32_t seqidx = LOOKUPTBL(compose, CANON_COMPOSE, c); 1.146 + *compp = SEQ(compose) + (seqidx & 0xffff); 1.147 + return (seqidx >> 16); 1.148 +} 1.149 + 1.150 +static nsresult 1.151 +mdn__unicode_decompose(int32_t compat, uint32_t *v, size_t vlen, 1.152 + uint32_t c, int32_t *decomp_lenp) 1.153 +{ 1.154 + uint32_t *vorg = v; 1.155 + int32_t seqidx; 1.156 + const uint32_t *seq; 1.157 + 1.158 + //assert(v != nullptr && vlen >= 0 && decomp_lenp != nullptr); 1.159 + 1.160 + /* 1.161 + * First, check for Hangul. 1.162 + */ 1.163 + if (SBase <= c && c < SLast) { 1.164 + int32_t idx, t_offset, v_offset, l_offset; 1.165 + 1.166 + idx = c - SBase; 1.167 + t_offset = idx % TCount; 1.168 + idx /= TCount; 1.169 + v_offset = idx % VCount; 1.170 + l_offset = idx / VCount; 1.171 + if ((t_offset == 0 && vlen < 2) || (t_offset > 0 && vlen < 3)) 1.172 + return (NS_ERROR_UNORM_MOREOUTPUT); 1.173 + *v++ = LBase + l_offset; 1.174 + *v++ = VBase + v_offset; 1.175 + if (t_offset > 0) 1.176 + *v++ = TBase + t_offset; 1.177 + *decomp_lenp = v - vorg; 1.178 + return (NS_OK); 1.179 + } 1.180 + 1.181 + /* 1.182 + * Look up decomposition table. If no decomposition is defined 1.183 + * or if it is a compatibility decomosition when canonical 1.184 + * decomposition requested, return 'NS_SUCCESS_UNORM_NOTFOUND'. 1.185 + */ 1.186 + seqidx = decompose_char(c, &seq); 1.187 + if (seqidx == 0 || (compat == 0 && (seqidx & DECOMP_COMPAT) != 0)) 1.188 + return (NS_SUCCESS_UNORM_NOTFOUND); 1.189 + 1.190 + /* 1.191 + * Copy the decomposed sequence. The end of the sequence are 1.192 + * marked with END_BIT. 1.193 + */ 1.194 + do { 1.195 + uint32_t c; 1.196 + int32_t dlen; 1.197 + nsresult r; 1.198 + 1.199 + c = *seq & ~END_BIT; 1.200 + 1.201 + /* Decompose recursively. */ 1.202 + r = mdn__unicode_decompose(compat, v, vlen, c, &dlen); 1.203 + if (r == NS_OK) { 1.204 + v += dlen; 1.205 + vlen -= dlen; 1.206 + } else if (r == NS_SUCCESS_UNORM_NOTFOUND) { 1.207 + if (vlen < 1) 1.208 + return (NS_ERROR_UNORM_MOREOUTPUT); 1.209 + *v++ = c; 1.210 + vlen--; 1.211 + } else { 1.212 + return (r); 1.213 + } 1.214 + 1.215 + } while ((*seq++ & END_BIT) == 0); 1.216 + 1.217 + *decomp_lenp = v - vorg; 1.218 + 1.219 + return (NS_OK); 1.220 +} 1.221 + 1.222 +static int32_t 1.223 +mdn__unicode_iscompositecandidate(uint32_t c) 1.224 +{ 1.225 + const struct composition *dummy; 1.226 + 1.227 + /* Check for Hangul */ 1.228 + if ((LBase <= c && c < LBase + LCount) || (SBase <= c && c < SLast)) 1.229 + return (1); 1.230 + 1.231 + /* 1.232 + * Look up composition table. If there are no composition 1.233 + * that begins with the given character, it is not a 1.234 + * composition candidate. 1.235 + */ 1.236 + if (compose_char(c, &dummy) == 0) 1.237 + return (0); 1.238 + else 1.239 + return (1); 1.240 +} 1.241 + 1.242 +static nsresult 1.243 +mdn__unicode_compose(uint32_t c1, uint32_t c2, uint32_t *compp) 1.244 +{ 1.245 + int32_t n; 1.246 + int32_t lo, hi; 1.247 + const struct composition *cseq; 1.248 + 1.249 + //assert(compp != nullptr); 1.250 + 1.251 + /* 1.252 + * Check for Hangul. 1.253 + */ 1.254 + if (LBase <= c1 && c1 < LBase + LCount && 1.255 + VBase <= c2 && c2 < VBase + VCount) { 1.256 + /* 1.257 + * Hangul L and V. 1.258 + */ 1.259 + *compp = SBase + 1.260 + ((c1 - LBase) * VCount + (c2 - VBase)) * TCount; 1.261 + return (NS_OK); 1.262 + } else if (SBase <= c1 && c1 < SLast && 1.263 + TBase <= c2 && c2 < TBase + TCount && 1.264 + (c1 - SBase) % TCount == 0) { 1.265 + /* 1.266 + * Hangul LV and T. 1.267 + */ 1.268 + *compp = c1 + (c2 - TBase); 1.269 + return (NS_OK); 1.270 + } 1.271 + 1.272 + /* 1.273 + * Look up composition table. If the result is 0, no composition 1.274 + * is defined. Otherwise, upper 16bits of the result contains 1.275 + * the number of composition that begins with 'c1', and the lower 1.276 + * 16bits is the offset in 'compose_seq'. 1.277 + */ 1.278 + if ((n = compose_char(c1, &cseq)) == 0) 1.279 + return (NS_SUCCESS_UNORM_NOTFOUND); 1.280 + 1.281 + /* 1.282 + * The composite sequences are sorted by the 2nd character 'c2'. 1.283 + * So we can use binary search. 1.284 + */ 1.285 + lo = 0; 1.286 + hi = n - 1; 1.287 + while (lo <= hi) { 1.288 + int32_t mid = (lo + hi) / 2; 1.289 + 1.290 + if (cseq[mid].c2 < c2) { 1.291 + lo = mid + 1; 1.292 + } else if (cseq[mid].c2 > c2) { 1.293 + hi = mid - 1; 1.294 + } else { 1.295 + *compp = cseq[mid].comp; 1.296 + return (NS_OK); 1.297 + } 1.298 + } 1.299 + return (NS_SUCCESS_UNORM_NOTFOUND); 1.300 +} 1.301 + 1.302 + 1.303 +#define WORKBUF_SIZE 128 1.304 +#define WORKBUF_SIZE_MAX 10000 1.305 + 1.306 +typedef struct { 1.307 + int32_t cur; /* pointing now processing character */ 1.308 + int32_t last; /* pointing just after the last character */ 1.309 + int32_t size; /* size of UCS and CLASS array */ 1.310 + uint32_t *ucs; /* UCS-4 characters */ 1.311 + int32_t *cclass; /* and their canonical classes */ 1.312 + uint32_t ucs_buf[WORKBUF_SIZE]; /* local buffer */ 1.313 + int32_t class_buf[WORKBUF_SIZE]; /* ditto */ 1.314 +} workbuf_t; 1.315 + 1.316 +static nsresult decompose(workbuf_t *wb, uint32_t c, int32_t compat); 1.317 +static void get_class(workbuf_t *wb); 1.318 +static void reorder(workbuf_t *wb); 1.319 +static void compose(workbuf_t *wb); 1.320 +static nsresult flush_before_cur(workbuf_t *wb, nsAString& aToStr); 1.321 +static void workbuf_init(workbuf_t *wb); 1.322 +static void workbuf_free(workbuf_t *wb); 1.323 +static nsresult workbuf_extend(workbuf_t *wb); 1.324 +static nsresult workbuf_append(workbuf_t *wb, uint32_t c); 1.325 +static void workbuf_shift(workbuf_t *wb, int32_t shift); 1.326 +static void workbuf_removevoid(workbuf_t *wb); 1.327 + 1.328 + 1.329 +static nsresult 1.330 +mdn_normalize(bool do_composition, bool compat, 1.331 + const nsAString& aSrcStr, nsAString& aToStr) 1.332 +{ 1.333 + workbuf_t wb; 1.334 + nsresult r = NS_OK; 1.335 + /* 1.336 + * Initialize working buffer. 1.337 + */ 1.338 + workbuf_init(&wb); 1.339 + 1.340 + nsAString::const_iterator start, end; 1.341 + aSrcStr.BeginReading(start); 1.342 + aSrcStr.EndReading(end); 1.343 + 1.344 + while (start != end) { 1.345 + uint32_t c; 1.346 + char16_t curChar; 1.347 + 1.348 + //assert(wb.cur == wb.last); 1.349 + 1.350 + /* 1.351 + * Get one character from 'from'. 1.352 + */ 1.353 + curChar= *start++; 1.354 + 1.355 + if (NS_IS_HIGH_SURROGATE(curChar) && start != end && NS_IS_LOW_SURROGATE(*(start)) ) { 1.356 + c = SURROGATE_TO_UCS4(curChar, *start); 1.357 + ++start; 1.358 + } else { 1.359 + c = curChar; 1.360 + } 1.361 + 1.362 + /* 1.363 + * Decompose it. 1.364 + */ 1.365 + if ((r = decompose(&wb, c, compat)) != NS_OK) 1.366 + break; 1.367 + 1.368 + /* 1.369 + * Get canonical class. 1.370 + */ 1.371 + get_class(&wb); 1.372 + 1.373 + /* 1.374 + * Reorder & compose. 1.375 + */ 1.376 + for (; wb.cur < wb.last; wb.cur++) { 1.377 + if (wb.cur == 0) { 1.378 + continue; 1.379 + } else if (wb.cclass[wb.cur] > 0) { 1.380 + /* 1.381 + * This is not a starter. Try reordering. 1.382 + * Note that characters up to it are 1.383 + * already in canonical order. 1.384 + */ 1.385 + reorder(&wb); 1.386 + continue; 1.387 + } 1.388 + 1.389 + /* 1.390 + * This is a starter character, and there are 1.391 + * some characters before it. Those characters 1.392 + * have been reordered properly, and 1.393 + * ready for composition. 1.394 + */ 1.395 + if (do_composition && wb.cclass[0] == 0) 1.396 + compose(&wb); 1.397 + 1.398 + /* 1.399 + * If CUR points to a starter character, 1.400 + * then process of characters before CUR are 1.401 + * already finished, because any further 1.402 + * reordering/composition for them are blocked 1.403 + * by the starter CUR points. 1.404 + */ 1.405 + if (wb.cur > 0 && wb.cclass[wb.cur] == 0) { 1.406 + /* Flush everything before CUR. */ 1.407 + r = flush_before_cur(&wb, aToStr); 1.408 + if (r != NS_OK) 1.409 + break; 1.410 + } 1.411 + } 1.412 + } 1.413 + 1.414 + if (r == NS_OK) { 1.415 + if (do_composition && wb.cur > 0 && wb.cclass[0] == 0) { 1.416 + /* 1.417 + * There is some characters left in WB. 1.418 + * They are ordered, but not composed yet. 1.419 + * Now CUR points just after the last character in WB, 1.420 + * and since compose() tries to compose characters 1.421 + * between top and CUR inclusive, we must make CUR 1.422 + * one character back during compose(). 1.423 + */ 1.424 + wb.cur--; 1.425 + compose(&wb); 1.426 + wb.cur++; 1.427 + } 1.428 + /* 1.429 + * Call this even when WB.CUR == 0, to make TO 1.430 + * NUL-terminated. 1.431 + */ 1.432 + r = flush_before_cur(&wb, aToStr); 1.433 + } 1.434 + 1.435 + workbuf_free(&wb); 1.436 + 1.437 + return (r); 1.438 +} 1.439 + 1.440 +static nsresult 1.441 +decompose(workbuf_t *wb, uint32_t c, int32_t compat) { 1.442 + nsresult r; 1.443 + int32_t dec_len; 1.444 + 1.445 +again: 1.446 + r = mdn__unicode_decompose(compat, wb->ucs + wb->last, 1.447 + wb->size - wb->last, c, &dec_len); 1.448 + switch (r) { 1.449 + case NS_OK: 1.450 + wb->last += dec_len; 1.451 + return (NS_OK); 1.452 + case NS_SUCCESS_UNORM_NOTFOUND: 1.453 + return (workbuf_append(wb, c)); 1.454 + case NS_ERROR_UNORM_MOREOUTPUT: 1.455 + if ((r = workbuf_extend(wb)) != NS_OK) 1.456 + return (r); 1.457 + if (wb->size > WORKBUF_SIZE_MAX) { 1.458 + // "mdn__unormalize_form*: " "working buffer too large\n" 1.459 + return (NS_ERROR_FAILURE); 1.460 + } 1.461 + goto again; 1.462 + default: 1.463 + return (r); 1.464 + } 1.465 + /* NOTREACHED */ 1.466 +} 1.467 + 1.468 +static void 1.469 +get_class(workbuf_t *wb) { 1.470 + int32_t i; 1.471 + 1.472 + for (i = wb->cur; i < wb->last; i++) 1.473 + wb->cclass[i] = canonclass(wb->ucs[i]); 1.474 +} 1.475 + 1.476 +static void 1.477 +reorder(workbuf_t *wb) { 1.478 + uint32_t c; 1.479 + int32_t i; 1.480 + int32_t cclass; 1.481 + 1.482 + //assert(wb != nullptr); 1.483 + 1.484 + i = wb->cur; 1.485 + c = wb->ucs[i]; 1.486 + cclass = wb->cclass[i]; 1.487 + 1.488 + while (i > 0 && wb->cclass[i - 1] > cclass) { 1.489 + wb->ucs[i] = wb->ucs[i - 1]; 1.490 + wb->cclass[i] =wb->cclass[i - 1]; 1.491 + i--; 1.492 + wb->ucs[i] = c; 1.493 + wb->cclass[i] = cclass; 1.494 + } 1.495 +} 1.496 + 1.497 +static void 1.498 +compose(workbuf_t *wb) { 1.499 + int32_t cur; 1.500 + uint32_t *ucs; 1.501 + int32_t *cclass; 1.502 + int32_t last_class; 1.503 + int32_t nvoids; 1.504 + int32_t i; 1.505 + 1.506 + //assert(wb != nullptr && wb->cclass[0] == 0); 1.507 + 1.508 + cur = wb->cur; 1.509 + ucs = wb->ucs; 1.510 + cclass = wb->cclass; 1.511 + 1.512 + /* 1.513 + * If there are no decomposition sequence that begins with 1.514 + * the top character, composition is impossible. 1.515 + */ 1.516 + if (!mdn__unicode_iscompositecandidate(ucs[0])) 1.517 + return; 1.518 + 1.519 + last_class = 0; 1.520 + nvoids = 0; 1.521 + for (i = 1; i <= cur; i++) { 1.522 + uint32_t c; 1.523 + int32_t cl = cclass[i]; 1.524 + 1.525 + if ((last_class < cl || cl == 0) && 1.526 + mdn__unicode_compose(ucs[0], ucs[i], 1.527 + &c) == NS_OK) { 1.528 + /* 1.529 + * Replace the top character with the composed one. 1.530 + */ 1.531 + ucs[0] = c; 1.532 + cclass[0] = canonclass(c); 1.533 + 1.534 + cclass[i] = -1; /* void this character */ 1.535 + nvoids++; 1.536 + } else { 1.537 + last_class = cl; 1.538 + } 1.539 + } 1.540 + 1.541 + /* Purge void characters, if any. */ 1.542 + if (nvoids > 0) 1.543 + workbuf_removevoid(wb); 1.544 +} 1.545 + 1.546 +static nsresult 1.547 +flush_before_cur(workbuf_t *wb, nsAString& aToStr) 1.548 +{ 1.549 + int32_t i; 1.550 + 1.551 + for (i = 0; i < wb->cur; i++) { 1.552 + if (!IS_IN_BMP(wb->ucs[i])) { 1.553 + aToStr.Append((char16_t)H_SURROGATE(wb->ucs[i])); 1.554 + aToStr.Append((char16_t)L_SURROGATE(wb->ucs[i])); 1.555 + } else { 1.556 + aToStr.Append((char16_t)(wb->ucs[i])); 1.557 + } 1.558 + } 1.559 + 1.560 + workbuf_shift(wb, wb->cur); 1.561 + 1.562 + return (NS_OK); 1.563 +} 1.564 + 1.565 +static void 1.566 +workbuf_init(workbuf_t *wb) { 1.567 + wb->cur = 0; 1.568 + wb->last = 0; 1.569 + wb->size = WORKBUF_SIZE; 1.570 + wb->ucs = wb->ucs_buf; 1.571 + wb->cclass = wb->class_buf; 1.572 +} 1.573 + 1.574 +static void 1.575 +workbuf_free(workbuf_t *wb) { 1.576 + if (wb->ucs != wb->ucs_buf) { 1.577 + nsMemory::Free(wb->ucs); 1.578 + nsMemory::Free(wb->cclass); 1.579 + } 1.580 +} 1.581 + 1.582 +static nsresult 1.583 +workbuf_extend(workbuf_t *wb) { 1.584 + int32_t newsize = wb->size * 3; 1.585 + 1.586 + if (wb->ucs == wb->ucs_buf) { 1.587 + wb->ucs = (uint32_t*)nsMemory::Alloc(sizeof(wb->ucs[0]) * newsize); 1.588 + if (!wb->ucs) 1.589 + return NS_ERROR_OUT_OF_MEMORY; 1.590 + wb->cclass = (int32_t*)nsMemory::Alloc(sizeof(wb->cclass[0]) * newsize); 1.591 + if (!wb->cclass) { 1.592 + nsMemory::Free(wb->ucs); 1.593 + wb->ucs = nullptr; 1.594 + return NS_ERROR_OUT_OF_MEMORY; 1.595 + } 1.596 + } else { 1.597 + void* buf = nsMemory::Realloc(wb->ucs, sizeof(wb->ucs[0]) * newsize); 1.598 + if (!buf) 1.599 + return NS_ERROR_OUT_OF_MEMORY; 1.600 + wb->ucs = (uint32_t*)buf; 1.601 + buf = nsMemory::Realloc(wb->cclass, sizeof(wb->cclass[0]) * newsize); 1.602 + if (!buf) 1.603 + return NS_ERROR_OUT_OF_MEMORY; 1.604 + wb->cclass = (int32_t*)buf; 1.605 + } 1.606 + return (NS_OK); 1.607 +} 1.608 + 1.609 +static nsresult 1.610 +workbuf_append(workbuf_t *wb, uint32_t c) { 1.611 + nsresult r; 1.612 + 1.613 + if (wb->last >= wb->size && (r = workbuf_extend(wb)) != NS_OK) 1.614 + return (r); 1.615 + wb->ucs[wb->last++] = c; 1.616 + return (NS_OK); 1.617 +} 1.618 + 1.619 +static void 1.620 +workbuf_shift(workbuf_t *wb, int32_t shift) { 1.621 + int32_t nmove; 1.622 + 1.623 + //assert(wb != nullptr && wb->cur >= shift); 1.624 + 1.625 + nmove = wb->last - shift; 1.626 + memmove(&wb->ucs[0], &wb->ucs[shift], 1.627 + nmove * sizeof(wb->ucs[0])); 1.628 + memmove(&wb->cclass[0], &wb->cclass[shift], 1.629 + nmove * sizeof(wb->cclass[0])); 1.630 + wb->cur -= shift; 1.631 + wb->last -= shift; 1.632 +} 1.633 + 1.634 +static void 1.635 +workbuf_removevoid(workbuf_t *wb) { 1.636 + int32_t i, j; 1.637 + int32_t last = wb->last; 1.638 + 1.639 + for (i = j = 0; i < last; i++) { 1.640 + if (wb->cclass[i] >= 0) { 1.641 + if (j < i) { 1.642 + wb->ucs[j] = wb->ucs[i]; 1.643 + wb->cclass[j] = wb->cclass[i]; 1.644 + } 1.645 + j++; 1.646 + } 1.647 + } 1.648 + wb->cur -= last - j; 1.649 + wb->last = j; 1.650 +} 1.651 + 1.652 +nsresult 1.653 +nsUnicodeNormalizer::NormalizeUnicodeNFD( const nsAString& aSrc, nsAString& aDest) 1.654 +{ 1.655 + return mdn_normalize(false, false, aSrc, aDest); 1.656 +} 1.657 + 1.658 +nsresult 1.659 +nsUnicodeNormalizer::NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest) 1.660 +{ 1.661 + return mdn_normalize(true, false, aSrc, aDest); 1.662 +} 1.663 + 1.664 +nsresult 1.665 +nsUnicodeNormalizer::NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest) 1.666 +{ 1.667 + return mdn_normalize(false, true, aSrc, aDest); 1.668 +} 1.669 + 1.670 +nsresult 1.671 +nsUnicodeNormalizer::NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest) 1.672 +{ 1.673 + return mdn_normalize(true, true, aSrc, aDest); 1.674 +} 1.675 + 1.676 +bool 1.677 +nsUnicodeNormalizer::Compose(uint32_t a, uint32_t b, uint32_t *ab) 1.678 +{ 1.679 + return mdn__unicode_compose(a, b, ab) == NS_OK; 1.680 +} 1.681 + 1.682 +bool 1.683 +nsUnicodeNormalizer::DecomposeNonRecursively(uint32_t c, uint32_t *c1, uint32_t *c2) 1.684 +{ 1.685 + // We can't use mdn__unicode_decompose here, because that does a recursive 1.686 + // decomposition that may yield more than two characters, but the harfbuzz 1.687 + // callback wants just a single-step decomp that is guaranteed to produce 1.688 + // no more than two characters. So we do a low-level lookup in the table 1.689 + // of decomp sequences. 1.690 + const uint32_t *seq; 1.691 + uint32_t seqidx = decompose_char(c, &seq); 1.692 + if (seqidx == 0 || ((seqidx & DECOMP_COMPAT) != 0)) { 1.693 + return false; 1.694 + } 1.695 + *c1 = *seq & ~END_BIT; 1.696 + if (*seq & END_BIT) { 1.697 + *c2 = 0; 1.698 + } else { 1.699 + *c2 = *++seq & ~END_BIT; 1.700 + } 1.701 + return true; 1.702 +}