intl/unicharutil/src/nsUnicodeNormalizer.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
     3 /* This file is modified from JPNIC's mDNKit, it is under both MPL and 
     4  * JPNIC's license.
     5  */
     7 /* This Source Code Form is subject to the terms of the Mozilla Public
     8  * License, v. 2.0. If a copy of the MPL was not distributed with this
     9  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
    11 /*
    12  * Copyright (c) 2000,2002 Japan Network Information Center.
    13  * All rights reserved.
    14  *  
    15  * By using this file, you agree to the terms and conditions set forth bellow.
    16  * 
    17  * 			LICENSE TERMS AND CONDITIONS 
    18  * 
    19  * The following License Terms and Conditions apply, unless a different
    20  * license is obtained from Japan Network Information Center ("JPNIC"),
    21  * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
    22  * Chiyoda-ku, Tokyo 101-0047, Japan.
    23  * 
    24  * 1. Use, Modification and Redistribution (including distribution of any
    25  *    modified or derived work) in source and/or binary forms is permitted
    26  *    under this License Terms and Conditions.
    27  * 
    28  * 2. Redistribution of source code must retain the copyright notices as they
    29  *    appear in each source code file, this License Terms and Conditions.
    30  * 
    31  * 3. Redistribution in binary form must reproduce the Copyright Notice,
    32  *    this License Terms and Conditions, in the documentation and/or other
    33  *    materials provided with the distribution.  For the purposes of binary
    34  *    distribution the "Copyright Notice" refers to the following language:
    35  *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
    36  * 
    37  * 4. The name of JPNIC may not be used to endorse or promote products
    38  *    derived from this Software without specific prior written approval of
    39  *    JPNIC.
    40  * 
    41  * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
    42  *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    43  *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
    44  *    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
    45  *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    46  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    47  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
    48  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
    49  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
    50  *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
    51  *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
    52  */
    54 #include <string.h>
    56 #include "nsMemory.h"
    57 #include "nsUnicodeNormalizer.h"
    58 #include "nsString.h"
    60 NS_IMPL_ISUPPORTS(nsUnicodeNormalizer, nsIUnicodeNormalizer)
    63 nsUnicodeNormalizer::nsUnicodeNormalizer()
    64 {
    65 }
    67 nsUnicodeNormalizer::~nsUnicodeNormalizer()
    68 {
    69 }
    73 #define END_BIT		0x80000000
    76 /*
    77  * Some constants for Hangul decomposition/composition.
    78  * These things were taken from unicode book. 
    79  */
    80 #define SBase		0xac00
    81 #define LBase		0x1100
    82 #define VBase		0x1161
    83 #define TBase		0x11a7
    84 #define LCount		19
    85 #define VCount		21
    86 #define TCount		28
    87 #define SLast		(SBase + LCount * VCount * TCount)
    89 struct composition {
    90 	uint32_t c2;	/* 2nd character */
    91 	uint32_t comp;	/* composed character */
    92 };
    95 #include "normalization_data.h"
    97 /*
    98  * Macro for multi-level index table.
    99  */
   100 #define LOOKUPTBL(vprefix, mprefix, v) \
   101 	DMAP(vprefix)[\
   102 		IMAP(vprefix)[\
   103 			IMAP(vprefix)[IDX0(mprefix, v)] + IDX1(mprefix, v)\
   104 		]\
   105 	].tbl[IDX2(mprefix, v)]
   107 #define IDX0(mprefix, v) IDX_0(v, BITS1(mprefix), BITS2(mprefix))
   108 #define IDX1(mprefix, v) IDX_1(v, BITS1(mprefix), BITS2(mprefix))
   109 #define IDX2(mprefix, v) IDX_2(v, BITS1(mprefix), BITS2(mprefix))
   111 #define IDX_0(v, bits1, bits2)	((v) >> ((bits1) + (bits2)))
   112 #define IDX_1(v, bits1, bits2)	(((v) >> (bits2)) & ((1 << (bits1)) - 1))
   113 #define IDX_2(v, bits1, bits2)	((v) & ((1 << (bits2)) - 1))
   115 #define BITS1(mprefix)	mprefix ## _BITS_1
   116 #define BITS2(mprefix)	mprefix ## _BITS_2
   118 #define IMAP(vprefix)	vprefix ## _imap
   119 #define DMAP(vprefix)	vprefix ## _table
   120 #define SEQ(vprefix)	vprefix ## _seq
   122 static int32_t
   123 canonclass(uint32_t c) {
   124 	/* Look up canonicalclass table. */
   125 	return (LOOKUPTBL(canon_class, CANON_CLASS, c));
   126 }
   128 static int32_t
   129 decompose_char(uint32_t c, const uint32_t **seqp)
   130 {
   131 	/* Look up decomposition table. */
   132 	int32_t seqidx = LOOKUPTBL(decompose, DECOMP, c);
   133 	*seqp = SEQ(decompose) + (seqidx & ~DECOMP_COMPAT);
   134 	return (seqidx);
   135 }
   137 static int32_t
   138 compose_char(uint32_t c,
   139 				const struct composition **compp)
   140 {
   141 	/* Look up composition table. */
   142 	int32_t seqidx = LOOKUPTBL(compose, CANON_COMPOSE, c);
   143 	*compp = SEQ(compose) + (seqidx & 0xffff);
   144 	return (seqidx >> 16);
   145 }
   147 static nsresult
   148 mdn__unicode_decompose(int32_t compat, uint32_t *v, size_t vlen,
   149 		       uint32_t c, int32_t *decomp_lenp)
   150 {
   151 	uint32_t *vorg = v;
   152 	int32_t seqidx;
   153 	const uint32_t *seq;
   155 	//assert(v != nullptr && vlen >= 0 && decomp_lenp != nullptr);
   157 	/*
   158 	 * First, check for Hangul.
   159 	 */
   160 	if (SBase <= c && c < SLast) {
   161 		int32_t idx, t_offset, v_offset, l_offset;
   163 		idx = c - SBase;
   164 		t_offset = idx % TCount;
   165 		idx /= TCount;
   166 		v_offset = idx % VCount;
   167 		l_offset = idx / VCount;
   168 		if ((t_offset == 0 && vlen < 2) || (t_offset > 0 && vlen < 3))
   169 			return (NS_ERROR_UNORM_MOREOUTPUT);
   170 		*v++ = LBase + l_offset;
   171 		*v++ = VBase + v_offset;
   172 		if (t_offset > 0)
   173 			*v++ = TBase + t_offset;
   174 		*decomp_lenp = v - vorg;
   175 		return (NS_OK);
   176 	}
   178 	/*
   179 	 * Look up decomposition table.  If no decomposition is defined
   180 	 * or if it is a compatibility decomosition when canonical
   181 	 * decomposition requested, return 'NS_SUCCESS_UNORM_NOTFOUND'.
   182 	 */
   183 	seqidx = decompose_char(c, &seq);
   184 	if (seqidx == 0 || (compat == 0 && (seqidx & DECOMP_COMPAT) != 0))
   185 		return (NS_SUCCESS_UNORM_NOTFOUND);
   187 	/*
   188 	 * Copy the decomposed sequence.  The end of the sequence are
   189 	 * marked with END_BIT.
   190 	 */
   191 	do {
   192 		uint32_t c;
   193 		int32_t dlen;
   194 		nsresult r;
   196 		c = *seq & ~END_BIT;
   198 		/* Decompose recursively. */
   199 		r = mdn__unicode_decompose(compat, v, vlen, c, &dlen);
   200 		if (r == NS_OK) {
   201 			v += dlen;
   202 			vlen -= dlen;
   203 		} else if (r == NS_SUCCESS_UNORM_NOTFOUND) {
   204 			if (vlen < 1)
   205 				return (NS_ERROR_UNORM_MOREOUTPUT);
   206 			*v++ = c;
   207 			vlen--;
   208 		} else {
   209 			return (r);
   210 		}
   212 	} while ((*seq++ & END_BIT) == 0);
   214 	*decomp_lenp = v - vorg;
   216 	return (NS_OK);
   217 }
   219 static int32_t
   220 mdn__unicode_iscompositecandidate(uint32_t c)
   221 {
   222 	const struct composition *dummy;
   224 	/* Check for Hangul */
   225 	if ((LBase <= c && c < LBase + LCount) || (SBase <= c && c < SLast))
   226 		return (1);
   228 	/*
   229 	 * Look up composition table.  If there are no composition
   230 	 * that begins with the given character, it is not a
   231 	 * composition candidate.
   232 	 */
   233 	if (compose_char(c, &dummy) == 0)
   234 		return (0);
   235 	else
   236 		return (1);
   237 }
   239 static nsresult
   240 mdn__unicode_compose(uint32_t c1, uint32_t c2, uint32_t *compp)
   241 {
   242 	int32_t n;
   243 	int32_t lo, hi;
   244 	const struct composition *cseq;
   246 	//assert(compp != nullptr);
   248 	/*
   249 	 * Check for Hangul.
   250 	 */
   251 	if (LBase <= c1 && c1 < LBase + LCount &&
   252 	    VBase <= c2 && c2 < VBase + VCount) {
   253 		/*
   254 		 * Hangul L and V.
   255 		 */
   256 		*compp = SBase +
   257 			((c1 - LBase) * VCount + (c2 - VBase)) * TCount;
   258 		return (NS_OK);
   259 	} else if (SBase <= c1 && c1 < SLast &&
   260 		   TBase <= c2 && c2 < TBase + TCount &&
   261 		   (c1 - SBase) % TCount == 0) {
   262 		/*
   263 		 * Hangul LV and T.
   264 		 */
   265 		*compp = c1 + (c2 - TBase);
   266 		return (NS_OK);
   267 	}
   269 	/*
   270 	 * Look up composition table.  If the result is 0, no composition
   271 	 * is defined.  Otherwise, upper 16bits of the result contains
   272 	 * the number of composition that begins with 'c1', and the lower
   273 	 * 16bits is the offset in 'compose_seq'.
   274 	 */
   275 	if ((n = compose_char(c1, &cseq)) == 0)
   276 		return (NS_SUCCESS_UNORM_NOTFOUND);
   278 	/*
   279 	 * The composite sequences are sorted by the 2nd character 'c2'.
   280 	 * So we can use binary search.
   281 	 */
   282 	lo = 0;
   283 	hi = n - 1;
   284 	while (lo <= hi) {
   285 		int32_t mid = (lo + hi) / 2;
   287 		if (cseq[mid].c2 < c2) {
   288 			lo = mid + 1;
   289 		} else if (cseq[mid].c2 > c2) {
   290 			hi = mid - 1;
   291 		} else {
   292 			*compp = cseq[mid].comp;
   293 			return (NS_OK);
   294 		}
   295 	}
   296 	return (NS_SUCCESS_UNORM_NOTFOUND);
   297 }
   300 #define WORKBUF_SIZE		128
   301 #define WORKBUF_SIZE_MAX	10000
   303 typedef struct {
   304 	int32_t cur;		/* pointing now processing character */
   305 	int32_t last;		/* pointing just after the last character */
   306 	int32_t size;		/* size of UCS and CLASS array */
   307 	uint32_t *ucs;	/* UCS-4 characters */
   308 	int32_t *cclass;		/* and their canonical classes */
   309 	uint32_t ucs_buf[WORKBUF_SIZE];	/* local buffer */
   310 	int32_t class_buf[WORKBUF_SIZE];		/* ditto */
   311 } workbuf_t;
   313 static nsresult	decompose(workbuf_t *wb, uint32_t c, int32_t compat);
   314 static void		get_class(workbuf_t *wb);
   315 static void		reorder(workbuf_t *wb);
   316 static void		compose(workbuf_t *wb);
   317 static nsresult flush_before_cur(workbuf_t *wb, nsAString& aToStr);
   318 static void		workbuf_init(workbuf_t *wb);
   319 static void		workbuf_free(workbuf_t *wb);
   320 static nsresult	workbuf_extend(workbuf_t *wb);
   321 static nsresult	workbuf_append(workbuf_t *wb, uint32_t c);
   322 static void		workbuf_shift(workbuf_t *wb, int32_t shift);
   323 static void		workbuf_removevoid(workbuf_t *wb);
   326 static nsresult
   327 mdn_normalize(bool do_composition, bool compat,
   328 	  const nsAString& aSrcStr, nsAString& aToStr)
   329 {
   330 	workbuf_t wb;
   331 	nsresult r = NS_OK;
   332 	/*
   333 	 * Initialize working buffer.
   334 	 */
   335 	workbuf_init(&wb);
   337 	nsAString::const_iterator start, end;
   338 	aSrcStr.BeginReading(start); 
   339 	aSrcStr.EndReading(end); 
   341 	while (start != end) {
   342 		uint32_t c;
   343 		char16_t curChar;
   345 		//assert(wb.cur == wb.last);
   347 		/*
   348 		 * Get one character from 'from'.
   349 		 */
   350 		curChar= *start++;
   352 		if (NS_IS_HIGH_SURROGATE(curChar) && start != end && NS_IS_LOW_SURROGATE(*(start)) ) {
   353 			c = SURROGATE_TO_UCS4(curChar, *start);
   354 			++start;
   355 		} else {
   356 			c = curChar;
   357 		}
   359 		/*
   360 		 * Decompose it.
   361 		 */
   362 		if ((r = decompose(&wb, c, compat)) != NS_OK)
   363 			break;
   365 		/*
   366 		 * Get canonical class.
   367 		 */
   368 		get_class(&wb);
   370 		/*
   371 		 * Reorder & compose.
   372 		 */
   373 		for (; wb.cur < wb.last; wb.cur++) {
   374 			if (wb.cur == 0) {
   375 				continue;
   376 			} else if (wb.cclass[wb.cur] > 0) {
   377 				/*
   378 				 * This is not a starter. Try reordering.
   379 				 * Note that characters up to it are
   380 				 * already in canonical order.
   381 				 */
   382 				reorder(&wb);
   383 				continue;
   384 			}
   386 			/*
   387 			 * This is a starter character, and there are
   388 			 * some characters before it.  Those characters
   389 			 * have been reordered properly, and
   390 			 * ready for composition.
   391 			 */
   392 			if (do_composition && wb.cclass[0] == 0)
   393 				compose(&wb);
   395 			/*
   396 			 * If CUR points to a starter character,
   397 			 * then process of characters before CUR are
   398 			 * already finished, because any further
   399 			 * reordering/composition for them are blocked
   400 			 * by the starter CUR points.
   401 			 */
   402 			if (wb.cur > 0 && wb.cclass[wb.cur] == 0) {
   403 				/* Flush everything before CUR. */
   404 				r = flush_before_cur(&wb, aToStr);
   405 				if (r != NS_OK)
   406 					break;
   407 			}
   408 		}
   409 	}
   411 	if (r == NS_OK) {
   412 		if (do_composition && wb.cur > 0 && wb.cclass[0] == 0) {
   413 			/*
   414 			 * There is some characters left in WB.
   415 			 * They are ordered, but not composed yet.
   416 			 * Now CUR points just after the last character in WB,
   417 			 * and since compose() tries to compose characters
   418 			 * between top and CUR inclusive, we must make CUR
   419 			 * one character back during compose().
   420 			 */
   421 			wb.cur--;
   422 			compose(&wb);
   423 			wb.cur++;
   424 		}
   425 		/*
   426 		 * Call this even when WB.CUR == 0, to make TO
   427 		 * NUL-terminated.
   428 		 */
   429 		r = flush_before_cur(&wb, aToStr);
   430 	}
   432 	workbuf_free(&wb);
   434 	return (r);
   435 }
   437 static nsresult
   438 decompose(workbuf_t *wb, uint32_t c, int32_t compat) {
   439 	nsresult r;
   440 	int32_t dec_len;
   442 again:
   443 	r = mdn__unicode_decompose(compat, wb->ucs + wb->last,
   444 				   wb->size - wb->last, c, &dec_len);
   445 	switch (r) {
   446 	case NS_OK:
   447 		wb->last += dec_len;
   448 		return (NS_OK);
   449 	case NS_SUCCESS_UNORM_NOTFOUND:
   450 		return (workbuf_append(wb, c));
   451 	case NS_ERROR_UNORM_MOREOUTPUT:
   452 		if ((r = workbuf_extend(wb)) != NS_OK)
   453 			return (r);
   454 		if (wb->size > WORKBUF_SIZE_MAX) {
   455 			// "mdn__unormalize_form*: " "working buffer too large\n"
   456 			return (NS_ERROR_FAILURE);
   457 		}
   458 		goto again;
   459 	default:
   460 		return (r);
   461 	}
   462 	/* NOTREACHED */
   463 }
   465 static void		
   466 get_class(workbuf_t *wb) {
   467 	int32_t i;
   469 	for (i = wb->cur; i < wb->last; i++)
   470 		wb->cclass[i] = canonclass(wb->ucs[i]);
   471 }
   473 static void
   474 reorder(workbuf_t *wb) {
   475 	uint32_t c;
   476 	int32_t i;
   477 	int32_t cclass;
   479 	//assert(wb != nullptr);
   481 	i = wb->cur;
   482 	c = wb->ucs[i];
   483 	cclass = wb->cclass[i];
   485 	while (i > 0 && wb->cclass[i - 1] > cclass) {
   486 		wb->ucs[i] = wb->ucs[i - 1];
   487 		wb->cclass[i] =wb->cclass[i - 1];
   488 		i--;
   489 		wb->ucs[i] = c;
   490 		wb->cclass[i] = cclass;
   491 	}
   492 }
   494 static void
   495 compose(workbuf_t *wb) {
   496 	int32_t cur;
   497 	uint32_t *ucs;
   498 	int32_t *cclass;
   499 	int32_t last_class;
   500 	int32_t nvoids;
   501 	int32_t i;
   503 	//assert(wb != nullptr && wb->cclass[0] == 0);
   505 	cur = wb->cur;
   506 	ucs = wb->ucs;
   507 	cclass = wb->cclass;
   509 	/*
   510 	 * If there are no decomposition sequence that begins with
   511 	 * the top character, composition is impossible.
   512 	 */
   513 	if (!mdn__unicode_iscompositecandidate(ucs[0]))
   514 		return;
   516 	last_class = 0;
   517 	nvoids = 0;
   518 	for (i = 1; i <= cur; i++) {
   519 		uint32_t c;
   520 		int32_t cl = cclass[i];
   522 		if ((last_class < cl || cl == 0) &&
   523 		    mdn__unicode_compose(ucs[0], ucs[i],
   524 					 &c) == NS_OK) {
   525 			/*
   526 			 * Replace the top character with the composed one.
   527 			 */
   528 			ucs[0] = c;
   529 			cclass[0] = canonclass(c);
   531 			cclass[i] = -1;	/* void this character */
   532 			nvoids++;
   533 		} else {
   534 			last_class = cl;
   535 		}
   536 	}
   538 	/* Purge void characters, if any. */
   539 	if (nvoids > 0)
   540 		workbuf_removevoid(wb);
   541 }
   543 static nsresult
   544 flush_before_cur(workbuf_t *wb, nsAString& aToStr) 
   545 {
   546 	int32_t i;
   548 	for (i = 0; i < wb->cur; i++) {
   549 		if (!IS_IN_BMP(wb->ucs[i])) {
   550 			aToStr.Append((char16_t)H_SURROGATE(wb->ucs[i]));
   551 			aToStr.Append((char16_t)L_SURROGATE(wb->ucs[i]));
   552 		} else {
   553 			aToStr.Append((char16_t)(wb->ucs[i]));
   554 		}
   555 	}
   557 	workbuf_shift(wb, wb->cur);
   559 	return (NS_OK);
   560 }
   562 static void
   563 workbuf_init(workbuf_t *wb) {
   564 	wb->cur = 0;
   565 	wb->last = 0;
   566 	wb->size = WORKBUF_SIZE;
   567 	wb->ucs = wb->ucs_buf;
   568 	wb->cclass = wb->class_buf;
   569 }
   571 static void
   572 workbuf_free(workbuf_t *wb) {
   573 	if (wb->ucs != wb->ucs_buf) {
   574 		nsMemory::Free(wb->ucs);
   575 		nsMemory::Free(wb->cclass);
   576 	}
   577 }
   579 static nsresult
   580 workbuf_extend(workbuf_t *wb) {
   581 	int32_t newsize = wb->size * 3;
   583 	if (wb->ucs == wb->ucs_buf) {
   584 		wb->ucs = (uint32_t*)nsMemory::Alloc(sizeof(wb->ucs[0]) * newsize);
   585 		if (!wb->ucs)
   586 			return NS_ERROR_OUT_OF_MEMORY;
   587 		wb->cclass = (int32_t*)nsMemory::Alloc(sizeof(wb->cclass[0]) * newsize);
   588 		if (!wb->cclass) {
   589 			nsMemory::Free(wb->ucs);
   590 			wb->ucs = nullptr;
   591 			return NS_ERROR_OUT_OF_MEMORY;
   592 		}
   593 	} else {
   594 		void* buf = nsMemory::Realloc(wb->ucs, sizeof(wb->ucs[0]) * newsize);
   595 		if (!buf)
   596 			return NS_ERROR_OUT_OF_MEMORY;
   597 		wb->ucs = (uint32_t*)buf;
   598 		buf = nsMemory::Realloc(wb->cclass, sizeof(wb->cclass[0]) * newsize);
   599 		if (!buf)
   600 			return NS_ERROR_OUT_OF_MEMORY;
   601 		wb->cclass = (int32_t*)buf;
   602 	}
   603 	return (NS_OK);
   604 }
   606 static nsresult
   607 workbuf_append(workbuf_t *wb, uint32_t c) {
   608 	nsresult r;
   610 	if (wb->last >= wb->size && (r = workbuf_extend(wb)) != NS_OK)
   611 		return (r);
   612 	wb->ucs[wb->last++] = c;
   613 	return (NS_OK);
   614 }
   616 static void
   617 workbuf_shift(workbuf_t *wb, int32_t shift) {
   618 	int32_t nmove;
   620 	//assert(wb != nullptr && wb->cur >= shift);
   622 	nmove = wb->last - shift;
   623 	memmove(&wb->ucs[0], &wb->ucs[shift],
   624 		      nmove * sizeof(wb->ucs[0]));
   625 	memmove(&wb->cclass[0], &wb->cclass[shift],
   626 		      nmove * sizeof(wb->cclass[0]));
   627 	wb->cur -= shift;
   628 	wb->last -= shift;
   629 }
   631 static void
   632 workbuf_removevoid(workbuf_t *wb) {
   633 	int32_t i, j;
   634 	int32_t last = wb->last;
   636 	for (i = j = 0; i < last; i++) {
   637 		if (wb->cclass[i] >= 0) {
   638 			if (j < i) {
   639 				wb->ucs[j] = wb->ucs[i];
   640 				wb->cclass[j] = wb->cclass[i];
   641 			}
   642 			j++;
   643 		}
   644 	}
   645 	wb->cur -= last - j;
   646 	wb->last = j;
   647 }
   649 nsresult  
   650 nsUnicodeNormalizer::NormalizeUnicodeNFD( const nsAString& aSrc, nsAString& aDest)
   651 {
   652   return mdn_normalize(false, false, aSrc, aDest);
   653 }
   655 nsresult  
   656 nsUnicodeNormalizer::NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest)
   657 {
   658   return mdn_normalize(true, false, aSrc, aDest);
   659 }
   661 nsresult  
   662 nsUnicodeNormalizer::NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest)
   663 {
   664   return mdn_normalize(false, true, aSrc, aDest);
   665 }
   667 nsresult  
   668 nsUnicodeNormalizer::NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest)
   669 {
   670   return mdn_normalize(true, true, aSrc, aDest);
   671 }
   673 bool
   674 nsUnicodeNormalizer::Compose(uint32_t a, uint32_t b, uint32_t *ab)
   675 {
   676   return mdn__unicode_compose(a, b, ab) == NS_OK;
   677 }
   679 bool
   680 nsUnicodeNormalizer::DecomposeNonRecursively(uint32_t c, uint32_t *c1, uint32_t *c2)
   681 {
   682   // We can't use mdn__unicode_decompose here, because that does a recursive
   683   // decomposition that may yield more than two characters, but the harfbuzz
   684   // callback wants just a single-step decomp that is guaranteed to produce
   685   // no more than two characters. So we do a low-level lookup in the table
   686   // of decomp sequences.
   687   const uint32_t *seq;
   688   uint32_t seqidx = decompose_char(c, &seq);
   689   if (seqidx == 0 || ((seqidx & DECOMP_COMPAT) != 0)) {
   690     return false;
   691   }
   692   *c1 = *seq & ~END_BIT;
   693   if (*seq & END_BIT) {
   694     *c2 = 0;
   695   } else {
   696     *c2 = *++seq & ~END_BIT;
   697   }
   698   return true;
   699 }

mercurial