The Tor Browser: intl/unicharutil/src/nsUnicodeNormalizer.cpp@fc2d59ddac77

     1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */

     3 /* This file is modified from JPNIC's mDNKit, it is under both MPL and

     4  * JPNIC's license.

     5  */

     7 /* This Source Code Form is subject to the terms of the Mozilla Public

     8  * License, v. 2.0. If a copy of the MPL was not distributed with this

     9  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

    11 /*

    12  * Copyright (c) 2000,2002 Japan Network Information Center.

    13  * All rights reserved.

    14  *

    15  * By using this file, you agree to the terms and conditions set forth bellow.

    16  *

    17  * 			LICENSE TERMS AND CONDITIONS

    18  *

    19  * The following License Terms and Conditions apply, unless a different

    20  * license is obtained from Japan Network Information Center ("JPNIC"),

    21  * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,

    22  * Chiyoda-ku, Tokyo 101-0047, Japan.

    23  *

    24  * 1. Use, Modification and Redistribution (including distribution of any

    25  *    modified or derived work) in source and/or binary forms is permitted

    26  *    under this License Terms and Conditions.

    27  *

    28  * 2. Redistribution of source code must retain the copyright notices as they

    29  *    appear in each source code file, this License Terms and Conditions.

    30  *

    31  * 3. Redistribution in binary form must reproduce the Copyright Notice,

    32  *    this License Terms and Conditions, in the documentation and/or other

    33  *    materials provided with the distribution.  For the purposes of binary

    34  *    distribution the "Copyright Notice" refers to the following language:

    35  *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."

    36  *

    37  * 4. The name of JPNIC may not be used to endorse or promote products

    38  *    derived from this Software without specific prior written approval of

    39  *    JPNIC.

    40  *

    41  * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC

    42  *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

    43  *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A

    44  *    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE

    45  *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

    46  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

    47  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR

    48  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,

    49  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR

    50  *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF

    51  *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.

    52  */

    54 #include <string.h>

    56 #include "nsMemory.h"

    57 #include "nsUnicodeNormalizer.h"

    58 #include "nsString.h"

    60 NS_IMPL_ISUPPORTS(nsUnicodeNormalizer, nsIUnicodeNormalizer)

    63 nsUnicodeNormalizer::nsUnicodeNormalizer()

    64 {

    65 }

    67 nsUnicodeNormalizer::~nsUnicodeNormalizer()

    68 {

    69 }

    73 #define END_BIT		0x80000000

    76 /*

    77  * Some constants for Hangul decomposition/composition.

    78  * These things were taken from unicode book.

    79  */

    80 #define SBase		0xac00

    81 #define LBase		0x1100

    82 #define VBase		0x1161

    83 #define TBase		0x11a7

    84 #define LCount		19

    85 #define VCount		21

    86 #define TCount		28

    87 #define SLast		(SBase + LCount * VCount * TCount)

    89 struct composition {

    90 	uint32_t c2;	/* 2nd character */

    91 	uint32_t comp;	/* composed character */

    92 };

    95 #include "normalization_data.h"

    97 /*

    98  * Macro for multi-level index table.

    99  */

   100 #define LOOKUPTBL(vprefix, mprefix, v) \

   101 	DMAP(vprefix)[\

   102 		IMAP(vprefix)[\

   103 			IMAP(vprefix)[IDX0(mprefix, v)] + IDX1(mprefix, v)\

   104 		]\

   105 	].tbl[IDX2(mprefix, v)]

   107 #define IDX0(mprefix, v) IDX_0(v, BITS1(mprefix), BITS2(mprefix))

   108 #define IDX1(mprefix, v) IDX_1(v, BITS1(mprefix), BITS2(mprefix))

   109 #define IDX2(mprefix, v) IDX_2(v, BITS1(mprefix), BITS2(mprefix))

   111 #define IDX_0(v, bits1, bits2)	((v) >> ((bits1) + (bits2)))

   112 #define IDX_1(v, bits1, bits2)	(((v) >> (bits2)) & ((1 << (bits1)) - 1))

   113 #define IDX_2(v, bits1, bits2)	((v) & ((1 << (bits2)) - 1))

   115 #define BITS1(mprefix)	mprefix ## _BITS_1

   116 #define BITS2(mprefix)	mprefix ## _BITS_2

   118 #define IMAP(vprefix)	vprefix ## _imap

   119 #define DMAP(vprefix)	vprefix ## _table

   120 #define SEQ(vprefix)	vprefix ## _seq

   122 static int32_t

   123 canonclass(uint32_t c) {

   124 	/* Look up canonicalclass table. */

   125 	return (LOOKUPTBL(canon_class, CANON_CLASS, c));

   126 }

   128 static int32_t

   129 decompose_char(uint32_t c, const uint32_t **seqp)

   130 {

   131 	/* Look up decomposition table. */

   132 	int32_t seqidx = LOOKUPTBL(decompose, DECOMP, c);

   133 	*seqp = SEQ(decompose) + (seqidx & ~DECOMP_COMPAT);

   134 	return (seqidx);

   135 }

   137 static int32_t

   138 compose_char(uint32_t c,

   139 				const struct composition **compp)

   140 {

   141 	/* Look up composition table. */

   142 	int32_t seqidx = LOOKUPTBL(compose, CANON_COMPOSE, c);

   143 	*compp = SEQ(compose) + (seqidx & 0xffff);

   144 	return (seqidx >> 16);

   145 }

   147 static nsresult

   148 mdn__unicode_decompose(int32_t compat, uint32_t *v, size_t vlen,

   149 		       uint32_t c, int32_t *decomp_lenp)

   150 {

   151 	uint32_t *vorg = v;

   152 	int32_t seqidx;

   153 	const uint32_t *seq;

   155 	//assert(v != nullptr && vlen >= 0 && decomp_lenp != nullptr);

   157 	/*

   158 	 * First, check for Hangul.

   159 	 */

   160 	if (SBase <= c && c < SLast) {

   161 		int32_t idx, t_offset, v_offset, l_offset;

   163 		idx = c - SBase;

   164 		t_offset = idx % TCount;

   165 		idx /= TCount;

   166 		v_offset = idx % VCount;

   167 		l_offset = idx / VCount;

   168 		if ((t_offset == 0 && vlen < 2) || (t_offset > 0 && vlen < 3))

   169 			return (NS_ERROR_UNORM_MOREOUTPUT);

   170 		*v++ = LBase + l_offset;

   171 		*v++ = VBase + v_offset;

   172 		if (t_offset > 0)

   173 			*v++ = TBase + t_offset;

   174 		*decomp_lenp = v - vorg;

   175 		return (NS_OK);

   176 	}

   178 	/*

   179 	 * Look up decomposition table.  If no decomposition is defined

   180 	 * or if it is a compatibility decomosition when canonical

   181 	 * decomposition requested, return 'NS_SUCCESS_UNORM_NOTFOUND'.

   182 	 */

   183 	seqidx = decompose_char(c, &seq);

   184 	if (seqidx == 0 || (compat == 0 && (seqidx & DECOMP_COMPAT) != 0))

   185 		return (NS_SUCCESS_UNORM_NOTFOUND);

   187 	/*

   188 	 * Copy the decomposed sequence.  The end of the sequence are

   189 	 * marked with END_BIT.

   190 	 */

   191 	do {

   192 		uint32_t c;

   193 		int32_t dlen;

   194 		nsresult r;

   196 		c = *seq & ~END_BIT;

   198 		/* Decompose recursively. */

   199 		r = mdn__unicode_decompose(compat, v, vlen, c, &dlen);

   200 		if (r == NS_OK) {

   201 			v += dlen;

   202 			vlen -= dlen;

   203 		} else if (r == NS_SUCCESS_UNORM_NOTFOUND) {

   204 			if (vlen < 1)

   205 				return (NS_ERROR_UNORM_MOREOUTPUT);

   206 			*v++ = c;

   207 			vlen--;

   208 		} else {

   209 			return (r);

   210 		}

   212 	} while ((*seq++ & END_BIT) == 0);

   214 	*decomp_lenp = v - vorg;

   216 	return (NS_OK);

   217 }

   219 static int32_t

   220 mdn__unicode_iscompositecandidate(uint32_t c)

   221 {

   222 	const struct composition *dummy;

   224 	/* Check for Hangul */

   225 	if ((LBase <= c && c < LBase + LCount) || (SBase <= c && c < SLast))

   226 		return (1);

   228 	/*

   229 	 * Look up composition table.  If there are no composition

   230 	 * that begins with the given character, it is not a

   231 	 * composition candidate.

   232 	 */

   233 	if (compose_char(c, &dummy) == 0)

   234 		return (0);

   235 	else

   236 		return (1);

   237 }

   239 static nsresult

   240 mdn__unicode_compose(uint32_t c1, uint32_t c2, uint32_t *compp)

   241 {

   242 	int32_t n;

   243 	int32_t lo, hi;

   244 	const struct composition *cseq;

   246 	//assert(compp != nullptr);

   248 	/*

   249 	 * Check for Hangul.

   250 	 */

   251 	if (LBase <= c1 && c1 < LBase + LCount &&

   252 	    VBase <= c2 && c2 < VBase + VCount) {

   253 		/*

   254 		 * Hangul L and V.

   255 		 */

   256 		*compp = SBase +

   257 			((c1 - LBase) * VCount + (c2 - VBase)) * TCount;

   258 		return (NS_OK);

   259 	} else if (SBase <= c1 && c1 < SLast &&

   260 		   TBase <= c2 && c2 < TBase + TCount &&

   261 		   (c1 - SBase) % TCount == 0) {

   262 		/*

   263 		 * Hangul LV and T.

   264 		 */

   265 		*compp = c1 + (c2 - TBase);

   266 		return (NS_OK);

   267 	}

   269 	/*

   270 	 * Look up composition table.  If the result is 0, no composition

   271 	 * is defined.  Otherwise, upper 16bits of the result contains

   272 	 * the number of composition that begins with 'c1', and the lower

   273 	 * 16bits is the offset in 'compose_seq'.

   274 	 */

   275 	if ((n = compose_char(c1, &cseq)) == 0)

   276 		return (NS_SUCCESS_UNORM_NOTFOUND);

   278 	/*

   279 	 * The composite sequences are sorted by the 2nd character 'c2'.

   280 	 * So we can use binary search.

   281 	 */

   282 	lo = 0;

   283 	hi = n - 1;

   284 	while (lo <= hi) {

   285 		int32_t mid = (lo + hi) / 2;

   287 		if (cseq[mid].c2 < c2) {

   288 			lo = mid + 1;

   289 		} else if (cseq[mid].c2 > c2) {

   290 			hi = mid - 1;

   291 		} else {

   292 			*compp = cseq[mid].comp;

   293 			return (NS_OK);

   294 		}

   295 	}

   296 	return (NS_SUCCESS_UNORM_NOTFOUND);

   297 }

   300 #define WORKBUF_SIZE		128

   301 #define WORKBUF_SIZE_MAX	10000

   303 typedef struct {

   304 	int32_t cur;		/* pointing now processing character */

   305 	int32_t last;		/* pointing just after the last character */

   306 	int32_t size;		/* size of UCS and CLASS array */

   307 	uint32_t *ucs;	/* UCS-4 characters */

   308 	int32_t *cclass;		/* and their canonical classes */

   309 	uint32_t ucs_buf[WORKBUF_SIZE];	/* local buffer */

   310 	int32_t class_buf[WORKBUF_SIZE];		/* ditto */

   311 } workbuf_t;

   313 static nsresult	decompose(workbuf_t *wb, uint32_t c, int32_t compat);

   314 static void		get_class(workbuf_t *wb);

   315 static void		reorder(workbuf_t *wb);

   316 static void		compose(workbuf_t *wb);

   317 static nsresult flush_before_cur(workbuf_t *wb, nsAString& aToStr);

   318 static void		workbuf_init(workbuf_t *wb);

   319 static void		workbuf_free(workbuf_t *wb);

   320 static nsresult	workbuf_extend(workbuf_t *wb);

   321 static nsresult	workbuf_append(workbuf_t *wb, uint32_t c);

   322 static void		workbuf_shift(workbuf_t *wb, int32_t shift);

   323 static void		workbuf_removevoid(workbuf_t *wb);

   326 static nsresult

   327 mdn_normalize(bool do_composition, bool compat,

   328 	  const nsAString& aSrcStr, nsAString& aToStr)

   329 {

   330 	workbuf_t wb;

   331 	nsresult r = NS_OK;

   332 	/*

   333 	 * Initialize working buffer.

   334 	 */

   335 	workbuf_init(&wb);

   337 	nsAString::const_iterator start, end;

   338 	aSrcStr.BeginReading(start);

   339 	aSrcStr.EndReading(end);

   341 	while (start != end) {

   342 		uint32_t c;

   343 		char16_t curChar;

   345 		//assert(wb.cur == wb.last);

   347 		/*

   348 		 * Get one character from 'from'.

   349 		 */

   350 		curChar= *start++;

   352 		if (NS_IS_HIGH_SURROGATE(curChar) && start != end && NS_IS_LOW_SURROGATE(*(start)) ) {

   353 			c = SURROGATE_TO_UCS4(curChar, *start);

   354 			++start;

   355 		} else {

   356 			c = curChar;

   357 		}

   359 		/*

   360 		 * Decompose it.

   361 		 */

   362 		if ((r = decompose(&wb, c, compat)) != NS_OK)

   363 			break;

   365 		/*

   366 		 * Get canonical class.

   367 		 */

   368 		get_class(&wb);

   370 		/*

   371 		 * Reorder & compose.

   372 		 */

   373 		for (; wb.cur < wb.last; wb.cur++) {

   374 			if (wb.cur == 0) {

   375 				continue;

   376 			} else if (wb.cclass[wb.cur] > 0) {

   377 				/*

   378 				 * This is not a starter. Try reordering.

   379 				 * Note that characters up to it are

   380 				 * already in canonical order.

   381 				 */

   382 				reorder(&wb);

   383 				continue;

   384 			}

   386 			/*

   387 			 * This is a starter character, and there are

   388 			 * some characters before it.  Those characters

   389 			 * have been reordered properly, and

   390 			 * ready for composition.

   391 			 */

   392 			if (do_composition && wb.cclass[0] == 0)

   393 				compose(&wb);

   395 			/*

   396 			 * If CUR points to a starter character,

   397 			 * then process of characters before CUR are

   398 			 * already finished, because any further

   399 			 * reordering/composition for them are blocked

   400 			 * by the starter CUR points.

   401 			 */

   402 			if (wb.cur > 0 && wb.cclass[wb.cur] == 0) {

   403 				/* Flush everything before CUR. */

   404 				r = flush_before_cur(&wb, aToStr);

   405 				if (r != NS_OK)

   406 					break;

   407 			}

   408 		}

   409 	}

   411 	if (r == NS_OK) {

   412 		if (do_composition && wb.cur > 0 && wb.cclass[0] == 0) {

   413 			/*

   414 			 * There is some characters left in WB.

   415 			 * They are ordered, but not composed yet.

   416 			 * Now CUR points just after the last character in WB,

   417 			 * and since compose() tries to compose characters

   418 			 * between top and CUR inclusive, we must make CUR

   419 			 * one character back during compose().

   420 			 */

   421 			wb.cur--;

   422 			compose(&wb);

   423 			wb.cur++;

   424 		}

   425 		/*

   426 		 * Call this even when WB.CUR == 0, to make TO

   427 		 * NUL-terminated.

   428 		 */

   429 		r = flush_before_cur(&wb, aToStr);

   430 	}

   432 	workbuf_free(&wb);

   434 	return (r);

   435 }

   437 static nsresult

   438 decompose(workbuf_t *wb, uint32_t c, int32_t compat) {

   439 	nsresult r;

   440 	int32_t dec_len;

   442 again:

   443 	r = mdn__unicode_decompose(compat, wb->ucs + wb->last,

   444 				   wb->size - wb->last, c, &dec_len);

   445 	switch (r) {

   446 	case NS_OK:

   447 		wb->last += dec_len;

   448 		return (NS_OK);

   449 	case NS_SUCCESS_UNORM_NOTFOUND:

   450 		return (workbuf_append(wb, c));

   451 	case NS_ERROR_UNORM_MOREOUTPUT:

   452 		if ((r = workbuf_extend(wb)) != NS_OK)

   453 			return (r);

   454 		if (wb->size > WORKBUF_SIZE_MAX) {

   455 			// "mdn__unormalize_form*: " "working buffer too large\n"

   456 			return (NS_ERROR_FAILURE);

   457 		}

   458 		goto again;

   459 	default:

   460 		return (r);

   461 	}

   462 	/* NOTREACHED */

   463 }

   465 static void

   466 get_class(workbuf_t *wb) {

   467 	int32_t i;

   469 	for (i = wb->cur; i < wb->last; i++)

   470 		wb->cclass[i] = canonclass(wb->ucs[i]);

   471 }

   473 static void

   474 reorder(workbuf_t *wb) {

   475 	uint32_t c;

   476 	int32_t i;

   477 	int32_t cclass;

   479 	//assert(wb != nullptr);

   481 	i = wb->cur;

   482 	c = wb->ucs[i];

   483 	cclass = wb->cclass[i];

   485 	while (i > 0 && wb->cclass[i - 1] > cclass) {

   486 		wb->ucs[i] = wb->ucs[i - 1];

   487 		wb->cclass[i] =wb->cclass[i - 1];

   488 		i--;

   489 		wb->ucs[i] = c;

   490 		wb->cclass[i] = cclass;

   491 	}

   492 }

   494 static void

   495 compose(workbuf_t *wb) {

   496 	int32_t cur;

   497 	uint32_t *ucs;

   498 	int32_t *cclass;

   499 	int32_t last_class;

   500 	int32_t nvoids;

   501 	int32_t i;

   503 	//assert(wb != nullptr && wb->cclass[0] == 0);

   505 	cur = wb->cur;

   506 	ucs = wb->ucs;

   507 	cclass = wb->cclass;

   509 	/*

   510 	 * If there are no decomposition sequence that begins with

   511 	 * the top character, composition is impossible.

   512 	 */

   513 	if (!mdn__unicode_iscompositecandidate(ucs[0]))

   514 		return;

   516 	last_class = 0;

   517 	nvoids = 0;

   518 	for (i = 1; i <= cur; i++) {

   519 		uint32_t c;

   520 		int32_t cl = cclass[i];

   522 		if ((last_class < cl || cl == 0) &&

   523 		    mdn__unicode_compose(ucs[0], ucs[i],

   524 					 &c) == NS_OK) {

   525 			/*

   526 			 * Replace the top character with the composed one.

   527 			 */

   528 			ucs[0] = c;

   529 			cclass[0] = canonclass(c);

   531 			cclass[i] = -1;	/* void this character */

   532 			nvoids++;

   533 		} else {

   534 			last_class = cl;

   535 		}

   536 	}

   538 	/* Purge void characters, if any. */

   539 	if (nvoids > 0)

   540 		workbuf_removevoid(wb);

   541 }

   543 static nsresult

   544 flush_before_cur(workbuf_t *wb, nsAString& aToStr)

   545 {

   546 	int32_t i;

   548 	for (i = 0; i < wb->cur; i++) {

   549 		if (!IS_IN_BMP(wb->ucs[i])) {

   550 			aToStr.Append((char16_t)H_SURROGATE(wb->ucs[i]));

   551 			aToStr.Append((char16_t)L_SURROGATE(wb->ucs[i]));

   552 		} else {

   553 			aToStr.Append((char16_t)(wb->ucs[i]));

   554 		}

   555 	}

   557 	workbuf_shift(wb, wb->cur);

   559 	return (NS_OK);

   560 }

   562 static void

   563 workbuf_init(workbuf_t *wb) {

   564 	wb->cur = 0;

   565 	wb->last = 0;

   566 	wb->size = WORKBUF_SIZE;

   567 	wb->ucs = wb->ucs_buf;

   568 	wb->cclass = wb->class_buf;

   569 }

   571 static void

   572 workbuf_free(workbuf_t *wb) {

   573 	if (wb->ucs != wb->ucs_buf) {

   574 		nsMemory::Free(wb->ucs);

   575 		nsMemory::Free(wb->cclass);

   576 	}

   577 }

   579 static nsresult

   580 workbuf_extend(workbuf_t *wb) {

   581 	int32_t newsize = wb->size * 3;

   583 	if (wb->ucs == wb->ucs_buf) {

   584 		wb->ucs = (uint32_t*)nsMemory::Alloc(sizeof(wb->ucs[0]) * newsize);

   585 		if (!wb->ucs)

   586 			return NS_ERROR_OUT_OF_MEMORY;

   587 		wb->cclass = (int32_t*)nsMemory::Alloc(sizeof(wb->cclass[0]) * newsize);

   588 		if (!wb->cclass) {

   589 			nsMemory::Free(wb->ucs);

   590 			wb->ucs = nullptr;

   591 			return NS_ERROR_OUT_OF_MEMORY;

   592 		}

   593 	} else {

   594 		void* buf = nsMemory::Realloc(wb->ucs, sizeof(wb->ucs[0]) * newsize);

   595 		if (!buf)

   596 			return NS_ERROR_OUT_OF_MEMORY;

   597 		wb->ucs = (uint32_t*)buf;

   598 		buf = nsMemory::Realloc(wb->cclass, sizeof(wb->cclass[0]) * newsize);

   599 		if (!buf)

   600 			return NS_ERROR_OUT_OF_MEMORY;

   601 		wb->cclass = (int32_t*)buf;

   602 	}

   603 	return (NS_OK);

   604 }

   606 static nsresult

   607 workbuf_append(workbuf_t *wb, uint32_t c) {

   608 	nsresult r;

   610 	if (wb->last >= wb->size && (r = workbuf_extend(wb)) != NS_OK)

   611 		return (r);

   612 	wb->ucs[wb->last++] = c;

   613 	return (NS_OK);

   614 }

   616 static void

   617 workbuf_shift(workbuf_t *wb, int32_t shift) {

   618 	int32_t nmove;

   620 	//assert(wb != nullptr && wb->cur >= shift);

   622 	nmove = wb->last - shift;

   623 	memmove(&wb->ucs[0], &wb->ucs[shift],

   624 		      nmove * sizeof(wb->ucs[0]));

   625 	memmove(&wb->cclass[0], &wb->cclass[shift],

   626 		      nmove * sizeof(wb->cclass[0]));

   627 	wb->cur -= shift;

   628 	wb->last -= shift;

   629 }

   631 static void

   632 workbuf_removevoid(workbuf_t *wb) {

   633 	int32_t i, j;

   634 	int32_t last = wb->last;

   636 	for (i = j = 0; i < last; i++) {

   637 		if (wb->cclass[i] >= 0) {

   638 			if (j < i) {

   639 				wb->ucs[j] = wb->ucs[i];

   640 				wb->cclass[j] = wb->cclass[i];

   641 			}

   642 			j++;

   643 		}

   644 	}

   645 	wb->cur -= last - j;

   646 	wb->last = j;

   647 }

   649 nsresult

   650 nsUnicodeNormalizer::NormalizeUnicodeNFD( const nsAString& aSrc, nsAString& aDest)

   651 {

   652   return mdn_normalize(false, false, aSrc, aDest);

   653 }

   655 nsresult

   656 nsUnicodeNormalizer::NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest)

   657 {

   658   return mdn_normalize(true, false, aSrc, aDest);

   659 }

   661 nsresult

   662 nsUnicodeNormalizer::NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest)

   663 {

   664   return mdn_normalize(false, true, aSrc, aDest);

   665 }

   667 nsresult

   668 nsUnicodeNormalizer::NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest)

   669 {

   670   return mdn_normalize(true, true, aSrc, aDest);

   671 }

   673 bool

   674 nsUnicodeNormalizer::Compose(uint32_t a, uint32_t b, uint32_t *ab)

   675 {

   676   return mdn__unicode_compose(a, b, ab) == NS_OK;

   677 }

   679 bool

   680 nsUnicodeNormalizer::DecomposeNonRecursively(uint32_t c, uint32_t *c1, uint32_t *c2)

   681 {

   682   // We can't use mdn__unicode_decompose here, because that does a recursive

   683   // decomposition that may yield more than two characters, but the harfbuzz

   684   // callback wants just a single-step decomp that is guaranteed to produce

   685   // no more than two characters. So we do a low-level lookup in the table

   686   // of decomp sequences.

   687   const uint32_t *seq;

   688   uint32_t seqidx = decompose_char(c, &seq);

   689   if (seqidx == 0 || ((seqidx & DECOMP_COMPAT) != 0)) {

   690     return false;

   691   }

   692   *c1 = *seq & ~END_BIT;

   693   if (*seq & END_BIT) {

   694     *c2 = 0;

   695   } else {

   696     *c2 = *++seq & ~END_BIT;

   697   }

   698   return true;

   699 }

The Tor Browser / file revision

intl/unicharutil/src/nsUnicodeNormalizer.cpp@fc2d59ddac77

intl/unicharutil/src/nsUnicodeNormalizer.cpp