|
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
|
2 |
|
3 /* This file is modified from JPNIC's mDNKit, it is under both MPL and |
|
4 * JPNIC's license. |
|
5 */ |
|
6 |
|
7 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
8 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
9 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
10 |
|
11 /* |
|
12 * Copyright (c) 2000,2002 Japan Network Information Center. |
|
13 * All rights reserved. |
|
14 * |
|
15 * By using this file, you agree to the terms and conditions set forth bellow. |
|
16 * |
|
17 * LICENSE TERMS AND CONDITIONS |
|
18 * |
|
19 * The following License Terms and Conditions apply, unless a different |
|
20 * license is obtained from Japan Network Information Center ("JPNIC"), |
|
21 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, |
|
22 * Chiyoda-ku, Tokyo 101-0047, Japan. |
|
23 * |
|
24 * 1. Use, Modification and Redistribution (including distribution of any |
|
25 * modified or derived work) in source and/or binary forms is permitted |
|
26 * under this License Terms and Conditions. |
|
27 * |
|
28 * 2. Redistribution of source code must retain the copyright notices as they |
|
29 * appear in each source code file, this License Terms and Conditions. |
|
30 * |
|
31 * 3. Redistribution in binary form must reproduce the Copyright Notice, |
|
32 * this License Terms and Conditions, in the documentation and/or other |
|
33 * materials provided with the distribution. For the purposes of binary |
|
34 * distribution the "Copyright Notice" refers to the following language: |
|
35 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." |
|
36 * |
|
37 * 4. The name of JPNIC may not be used to endorse or promote products |
|
38 * derived from this Software without specific prior written approval of |
|
39 * JPNIC. |
|
40 * |
|
41 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC |
|
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
|
44 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE |
|
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|
46 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|
47 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
|
48 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
|
49 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
|
50 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
|
51 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. |
|
52 */ |
|
53 |
|
54 #include <string.h> |
|
55 |
|
56 #include "nsMemory.h" |
|
57 #include "nsUnicodeNormalizer.h" |
|
58 #include "nsString.h" |
|
59 |
|
60 NS_IMPL_ISUPPORTS(nsUnicodeNormalizer, nsIUnicodeNormalizer) |
|
61 |
|
62 |
|
63 nsUnicodeNormalizer::nsUnicodeNormalizer() |
|
64 { |
|
65 } |
|
66 |
|
67 nsUnicodeNormalizer::~nsUnicodeNormalizer() |
|
68 { |
|
69 } |
|
70 |
|
71 |
|
72 |
|
73 #define END_BIT 0x80000000 |
|
74 |
|
75 |
|
76 /* |
|
77 * Some constants for Hangul decomposition/composition. |
|
78 * These things were taken from unicode book. |
|
79 */ |
|
80 #define SBase 0xac00 |
|
81 #define LBase 0x1100 |
|
82 #define VBase 0x1161 |
|
83 #define TBase 0x11a7 |
|
84 #define LCount 19 |
|
85 #define VCount 21 |
|
86 #define TCount 28 |
|
87 #define SLast (SBase + LCount * VCount * TCount) |
|
88 |
|
89 struct composition { |
|
90 uint32_t c2; /* 2nd character */ |
|
91 uint32_t comp; /* composed character */ |
|
92 }; |
|
93 |
|
94 |
|
95 #include "normalization_data.h" |
|
96 |
|
97 /* |
|
98 * Macro for multi-level index table. |
|
99 */ |
|
100 #define LOOKUPTBL(vprefix, mprefix, v) \ |
|
101 DMAP(vprefix)[\ |
|
102 IMAP(vprefix)[\ |
|
103 IMAP(vprefix)[IDX0(mprefix, v)] + IDX1(mprefix, v)\ |
|
104 ]\ |
|
105 ].tbl[IDX2(mprefix, v)] |
|
106 |
|
107 #define IDX0(mprefix, v) IDX_0(v, BITS1(mprefix), BITS2(mprefix)) |
|
108 #define IDX1(mprefix, v) IDX_1(v, BITS1(mprefix), BITS2(mprefix)) |
|
109 #define IDX2(mprefix, v) IDX_2(v, BITS1(mprefix), BITS2(mprefix)) |
|
110 |
|
111 #define IDX_0(v, bits1, bits2) ((v) >> ((bits1) + (bits2))) |
|
112 #define IDX_1(v, bits1, bits2) (((v) >> (bits2)) & ((1 << (bits1)) - 1)) |
|
113 #define IDX_2(v, bits1, bits2) ((v) & ((1 << (bits2)) - 1)) |
|
114 |
|
115 #define BITS1(mprefix) mprefix ## _BITS_1 |
|
116 #define BITS2(mprefix) mprefix ## _BITS_2 |
|
117 |
|
118 #define IMAP(vprefix) vprefix ## _imap |
|
119 #define DMAP(vprefix) vprefix ## _table |
|
120 #define SEQ(vprefix) vprefix ## _seq |
|
121 |
|
122 static int32_t |
|
123 canonclass(uint32_t c) { |
|
124 /* Look up canonicalclass table. */ |
|
125 return (LOOKUPTBL(canon_class, CANON_CLASS, c)); |
|
126 } |
|
127 |
|
128 static int32_t |
|
129 decompose_char(uint32_t c, const uint32_t **seqp) |
|
130 { |
|
131 /* Look up decomposition table. */ |
|
132 int32_t seqidx = LOOKUPTBL(decompose, DECOMP, c); |
|
133 *seqp = SEQ(decompose) + (seqidx & ~DECOMP_COMPAT); |
|
134 return (seqidx); |
|
135 } |
|
136 |
|
137 static int32_t |
|
138 compose_char(uint32_t c, |
|
139 const struct composition **compp) |
|
140 { |
|
141 /* Look up composition table. */ |
|
142 int32_t seqidx = LOOKUPTBL(compose, CANON_COMPOSE, c); |
|
143 *compp = SEQ(compose) + (seqidx & 0xffff); |
|
144 return (seqidx >> 16); |
|
145 } |
|
146 |
|
147 static nsresult |
|
148 mdn__unicode_decompose(int32_t compat, uint32_t *v, size_t vlen, |
|
149 uint32_t c, int32_t *decomp_lenp) |
|
150 { |
|
151 uint32_t *vorg = v; |
|
152 int32_t seqidx; |
|
153 const uint32_t *seq; |
|
154 |
|
155 //assert(v != nullptr && vlen >= 0 && decomp_lenp != nullptr); |
|
156 |
|
157 /* |
|
158 * First, check for Hangul. |
|
159 */ |
|
160 if (SBase <= c && c < SLast) { |
|
161 int32_t idx, t_offset, v_offset, l_offset; |
|
162 |
|
163 idx = c - SBase; |
|
164 t_offset = idx % TCount; |
|
165 idx /= TCount; |
|
166 v_offset = idx % VCount; |
|
167 l_offset = idx / VCount; |
|
168 if ((t_offset == 0 && vlen < 2) || (t_offset > 0 && vlen < 3)) |
|
169 return (NS_ERROR_UNORM_MOREOUTPUT); |
|
170 *v++ = LBase + l_offset; |
|
171 *v++ = VBase + v_offset; |
|
172 if (t_offset > 0) |
|
173 *v++ = TBase + t_offset; |
|
174 *decomp_lenp = v - vorg; |
|
175 return (NS_OK); |
|
176 } |
|
177 |
|
178 /* |
|
179 * Look up decomposition table. If no decomposition is defined |
|
180 * or if it is a compatibility decomosition when canonical |
|
181 * decomposition requested, return 'NS_SUCCESS_UNORM_NOTFOUND'. |
|
182 */ |
|
183 seqidx = decompose_char(c, &seq); |
|
184 if (seqidx == 0 || (compat == 0 && (seqidx & DECOMP_COMPAT) != 0)) |
|
185 return (NS_SUCCESS_UNORM_NOTFOUND); |
|
186 |
|
187 /* |
|
188 * Copy the decomposed sequence. The end of the sequence are |
|
189 * marked with END_BIT. |
|
190 */ |
|
191 do { |
|
192 uint32_t c; |
|
193 int32_t dlen; |
|
194 nsresult r; |
|
195 |
|
196 c = *seq & ~END_BIT; |
|
197 |
|
198 /* Decompose recursively. */ |
|
199 r = mdn__unicode_decompose(compat, v, vlen, c, &dlen); |
|
200 if (r == NS_OK) { |
|
201 v += dlen; |
|
202 vlen -= dlen; |
|
203 } else if (r == NS_SUCCESS_UNORM_NOTFOUND) { |
|
204 if (vlen < 1) |
|
205 return (NS_ERROR_UNORM_MOREOUTPUT); |
|
206 *v++ = c; |
|
207 vlen--; |
|
208 } else { |
|
209 return (r); |
|
210 } |
|
211 |
|
212 } while ((*seq++ & END_BIT) == 0); |
|
213 |
|
214 *decomp_lenp = v - vorg; |
|
215 |
|
216 return (NS_OK); |
|
217 } |
|
218 |
|
219 static int32_t |
|
220 mdn__unicode_iscompositecandidate(uint32_t c) |
|
221 { |
|
222 const struct composition *dummy; |
|
223 |
|
224 /* Check for Hangul */ |
|
225 if ((LBase <= c && c < LBase + LCount) || (SBase <= c && c < SLast)) |
|
226 return (1); |
|
227 |
|
228 /* |
|
229 * Look up composition table. If there are no composition |
|
230 * that begins with the given character, it is not a |
|
231 * composition candidate. |
|
232 */ |
|
233 if (compose_char(c, &dummy) == 0) |
|
234 return (0); |
|
235 else |
|
236 return (1); |
|
237 } |
|
238 |
|
239 static nsresult |
|
240 mdn__unicode_compose(uint32_t c1, uint32_t c2, uint32_t *compp) |
|
241 { |
|
242 int32_t n; |
|
243 int32_t lo, hi; |
|
244 const struct composition *cseq; |
|
245 |
|
246 //assert(compp != nullptr); |
|
247 |
|
248 /* |
|
249 * Check for Hangul. |
|
250 */ |
|
251 if (LBase <= c1 && c1 < LBase + LCount && |
|
252 VBase <= c2 && c2 < VBase + VCount) { |
|
253 /* |
|
254 * Hangul L and V. |
|
255 */ |
|
256 *compp = SBase + |
|
257 ((c1 - LBase) * VCount + (c2 - VBase)) * TCount; |
|
258 return (NS_OK); |
|
259 } else if (SBase <= c1 && c1 < SLast && |
|
260 TBase <= c2 && c2 < TBase + TCount && |
|
261 (c1 - SBase) % TCount == 0) { |
|
262 /* |
|
263 * Hangul LV and T. |
|
264 */ |
|
265 *compp = c1 + (c2 - TBase); |
|
266 return (NS_OK); |
|
267 } |
|
268 |
|
269 /* |
|
270 * Look up composition table. If the result is 0, no composition |
|
271 * is defined. Otherwise, upper 16bits of the result contains |
|
272 * the number of composition that begins with 'c1', and the lower |
|
273 * 16bits is the offset in 'compose_seq'. |
|
274 */ |
|
275 if ((n = compose_char(c1, &cseq)) == 0) |
|
276 return (NS_SUCCESS_UNORM_NOTFOUND); |
|
277 |
|
278 /* |
|
279 * The composite sequences are sorted by the 2nd character 'c2'. |
|
280 * So we can use binary search. |
|
281 */ |
|
282 lo = 0; |
|
283 hi = n - 1; |
|
284 while (lo <= hi) { |
|
285 int32_t mid = (lo + hi) / 2; |
|
286 |
|
287 if (cseq[mid].c2 < c2) { |
|
288 lo = mid + 1; |
|
289 } else if (cseq[mid].c2 > c2) { |
|
290 hi = mid - 1; |
|
291 } else { |
|
292 *compp = cseq[mid].comp; |
|
293 return (NS_OK); |
|
294 } |
|
295 } |
|
296 return (NS_SUCCESS_UNORM_NOTFOUND); |
|
297 } |
|
298 |
|
299 |
|
300 #define WORKBUF_SIZE 128 |
|
301 #define WORKBUF_SIZE_MAX 10000 |
|
302 |
|
303 typedef struct { |
|
304 int32_t cur; /* pointing now processing character */ |
|
305 int32_t last; /* pointing just after the last character */ |
|
306 int32_t size; /* size of UCS and CLASS array */ |
|
307 uint32_t *ucs; /* UCS-4 characters */ |
|
308 int32_t *cclass; /* and their canonical classes */ |
|
309 uint32_t ucs_buf[WORKBUF_SIZE]; /* local buffer */ |
|
310 int32_t class_buf[WORKBUF_SIZE]; /* ditto */ |
|
311 } workbuf_t; |
|
312 |
|
313 static nsresult decompose(workbuf_t *wb, uint32_t c, int32_t compat); |
|
314 static void get_class(workbuf_t *wb); |
|
315 static void reorder(workbuf_t *wb); |
|
316 static void compose(workbuf_t *wb); |
|
317 static nsresult flush_before_cur(workbuf_t *wb, nsAString& aToStr); |
|
318 static void workbuf_init(workbuf_t *wb); |
|
319 static void workbuf_free(workbuf_t *wb); |
|
320 static nsresult workbuf_extend(workbuf_t *wb); |
|
321 static nsresult workbuf_append(workbuf_t *wb, uint32_t c); |
|
322 static void workbuf_shift(workbuf_t *wb, int32_t shift); |
|
323 static void workbuf_removevoid(workbuf_t *wb); |
|
324 |
|
325 |
|
326 static nsresult |
|
327 mdn_normalize(bool do_composition, bool compat, |
|
328 const nsAString& aSrcStr, nsAString& aToStr) |
|
329 { |
|
330 workbuf_t wb; |
|
331 nsresult r = NS_OK; |
|
332 /* |
|
333 * Initialize working buffer. |
|
334 */ |
|
335 workbuf_init(&wb); |
|
336 |
|
337 nsAString::const_iterator start, end; |
|
338 aSrcStr.BeginReading(start); |
|
339 aSrcStr.EndReading(end); |
|
340 |
|
341 while (start != end) { |
|
342 uint32_t c; |
|
343 char16_t curChar; |
|
344 |
|
345 //assert(wb.cur == wb.last); |
|
346 |
|
347 /* |
|
348 * Get one character from 'from'. |
|
349 */ |
|
350 curChar= *start++; |
|
351 |
|
352 if (NS_IS_HIGH_SURROGATE(curChar) && start != end && NS_IS_LOW_SURROGATE(*(start)) ) { |
|
353 c = SURROGATE_TO_UCS4(curChar, *start); |
|
354 ++start; |
|
355 } else { |
|
356 c = curChar; |
|
357 } |
|
358 |
|
359 /* |
|
360 * Decompose it. |
|
361 */ |
|
362 if ((r = decompose(&wb, c, compat)) != NS_OK) |
|
363 break; |
|
364 |
|
365 /* |
|
366 * Get canonical class. |
|
367 */ |
|
368 get_class(&wb); |
|
369 |
|
370 /* |
|
371 * Reorder & compose. |
|
372 */ |
|
373 for (; wb.cur < wb.last; wb.cur++) { |
|
374 if (wb.cur == 0) { |
|
375 continue; |
|
376 } else if (wb.cclass[wb.cur] > 0) { |
|
377 /* |
|
378 * This is not a starter. Try reordering. |
|
379 * Note that characters up to it are |
|
380 * already in canonical order. |
|
381 */ |
|
382 reorder(&wb); |
|
383 continue; |
|
384 } |
|
385 |
|
386 /* |
|
387 * This is a starter character, and there are |
|
388 * some characters before it. Those characters |
|
389 * have been reordered properly, and |
|
390 * ready for composition. |
|
391 */ |
|
392 if (do_composition && wb.cclass[0] == 0) |
|
393 compose(&wb); |
|
394 |
|
395 /* |
|
396 * If CUR points to a starter character, |
|
397 * then process of characters before CUR are |
|
398 * already finished, because any further |
|
399 * reordering/composition for them are blocked |
|
400 * by the starter CUR points. |
|
401 */ |
|
402 if (wb.cur > 0 && wb.cclass[wb.cur] == 0) { |
|
403 /* Flush everything before CUR. */ |
|
404 r = flush_before_cur(&wb, aToStr); |
|
405 if (r != NS_OK) |
|
406 break; |
|
407 } |
|
408 } |
|
409 } |
|
410 |
|
411 if (r == NS_OK) { |
|
412 if (do_composition && wb.cur > 0 && wb.cclass[0] == 0) { |
|
413 /* |
|
414 * There is some characters left in WB. |
|
415 * They are ordered, but not composed yet. |
|
416 * Now CUR points just after the last character in WB, |
|
417 * and since compose() tries to compose characters |
|
418 * between top and CUR inclusive, we must make CUR |
|
419 * one character back during compose(). |
|
420 */ |
|
421 wb.cur--; |
|
422 compose(&wb); |
|
423 wb.cur++; |
|
424 } |
|
425 /* |
|
426 * Call this even when WB.CUR == 0, to make TO |
|
427 * NUL-terminated. |
|
428 */ |
|
429 r = flush_before_cur(&wb, aToStr); |
|
430 } |
|
431 |
|
432 workbuf_free(&wb); |
|
433 |
|
434 return (r); |
|
435 } |
|
436 |
|
437 static nsresult |
|
438 decompose(workbuf_t *wb, uint32_t c, int32_t compat) { |
|
439 nsresult r; |
|
440 int32_t dec_len; |
|
441 |
|
442 again: |
|
443 r = mdn__unicode_decompose(compat, wb->ucs + wb->last, |
|
444 wb->size - wb->last, c, &dec_len); |
|
445 switch (r) { |
|
446 case NS_OK: |
|
447 wb->last += dec_len; |
|
448 return (NS_OK); |
|
449 case NS_SUCCESS_UNORM_NOTFOUND: |
|
450 return (workbuf_append(wb, c)); |
|
451 case NS_ERROR_UNORM_MOREOUTPUT: |
|
452 if ((r = workbuf_extend(wb)) != NS_OK) |
|
453 return (r); |
|
454 if (wb->size > WORKBUF_SIZE_MAX) { |
|
455 // "mdn__unormalize_form*: " "working buffer too large\n" |
|
456 return (NS_ERROR_FAILURE); |
|
457 } |
|
458 goto again; |
|
459 default: |
|
460 return (r); |
|
461 } |
|
462 /* NOTREACHED */ |
|
463 } |
|
464 |
|
465 static void |
|
466 get_class(workbuf_t *wb) { |
|
467 int32_t i; |
|
468 |
|
469 for (i = wb->cur; i < wb->last; i++) |
|
470 wb->cclass[i] = canonclass(wb->ucs[i]); |
|
471 } |
|
472 |
|
473 static void |
|
474 reorder(workbuf_t *wb) { |
|
475 uint32_t c; |
|
476 int32_t i; |
|
477 int32_t cclass; |
|
478 |
|
479 //assert(wb != nullptr); |
|
480 |
|
481 i = wb->cur; |
|
482 c = wb->ucs[i]; |
|
483 cclass = wb->cclass[i]; |
|
484 |
|
485 while (i > 0 && wb->cclass[i - 1] > cclass) { |
|
486 wb->ucs[i] = wb->ucs[i - 1]; |
|
487 wb->cclass[i] =wb->cclass[i - 1]; |
|
488 i--; |
|
489 wb->ucs[i] = c; |
|
490 wb->cclass[i] = cclass; |
|
491 } |
|
492 } |
|
493 |
|
494 static void |
|
495 compose(workbuf_t *wb) { |
|
496 int32_t cur; |
|
497 uint32_t *ucs; |
|
498 int32_t *cclass; |
|
499 int32_t last_class; |
|
500 int32_t nvoids; |
|
501 int32_t i; |
|
502 |
|
503 //assert(wb != nullptr && wb->cclass[0] == 0); |
|
504 |
|
505 cur = wb->cur; |
|
506 ucs = wb->ucs; |
|
507 cclass = wb->cclass; |
|
508 |
|
509 /* |
|
510 * If there are no decomposition sequence that begins with |
|
511 * the top character, composition is impossible. |
|
512 */ |
|
513 if (!mdn__unicode_iscompositecandidate(ucs[0])) |
|
514 return; |
|
515 |
|
516 last_class = 0; |
|
517 nvoids = 0; |
|
518 for (i = 1; i <= cur; i++) { |
|
519 uint32_t c; |
|
520 int32_t cl = cclass[i]; |
|
521 |
|
522 if ((last_class < cl || cl == 0) && |
|
523 mdn__unicode_compose(ucs[0], ucs[i], |
|
524 &c) == NS_OK) { |
|
525 /* |
|
526 * Replace the top character with the composed one. |
|
527 */ |
|
528 ucs[0] = c; |
|
529 cclass[0] = canonclass(c); |
|
530 |
|
531 cclass[i] = -1; /* void this character */ |
|
532 nvoids++; |
|
533 } else { |
|
534 last_class = cl; |
|
535 } |
|
536 } |
|
537 |
|
538 /* Purge void characters, if any. */ |
|
539 if (nvoids > 0) |
|
540 workbuf_removevoid(wb); |
|
541 } |
|
542 |
|
543 static nsresult |
|
544 flush_before_cur(workbuf_t *wb, nsAString& aToStr) |
|
545 { |
|
546 int32_t i; |
|
547 |
|
548 for (i = 0; i < wb->cur; i++) { |
|
549 if (!IS_IN_BMP(wb->ucs[i])) { |
|
550 aToStr.Append((char16_t)H_SURROGATE(wb->ucs[i])); |
|
551 aToStr.Append((char16_t)L_SURROGATE(wb->ucs[i])); |
|
552 } else { |
|
553 aToStr.Append((char16_t)(wb->ucs[i])); |
|
554 } |
|
555 } |
|
556 |
|
557 workbuf_shift(wb, wb->cur); |
|
558 |
|
559 return (NS_OK); |
|
560 } |
|
561 |
|
562 static void |
|
563 workbuf_init(workbuf_t *wb) { |
|
564 wb->cur = 0; |
|
565 wb->last = 0; |
|
566 wb->size = WORKBUF_SIZE; |
|
567 wb->ucs = wb->ucs_buf; |
|
568 wb->cclass = wb->class_buf; |
|
569 } |
|
570 |
|
571 static void |
|
572 workbuf_free(workbuf_t *wb) { |
|
573 if (wb->ucs != wb->ucs_buf) { |
|
574 nsMemory::Free(wb->ucs); |
|
575 nsMemory::Free(wb->cclass); |
|
576 } |
|
577 } |
|
578 |
|
579 static nsresult |
|
580 workbuf_extend(workbuf_t *wb) { |
|
581 int32_t newsize = wb->size * 3; |
|
582 |
|
583 if (wb->ucs == wb->ucs_buf) { |
|
584 wb->ucs = (uint32_t*)nsMemory::Alloc(sizeof(wb->ucs[0]) * newsize); |
|
585 if (!wb->ucs) |
|
586 return NS_ERROR_OUT_OF_MEMORY; |
|
587 wb->cclass = (int32_t*)nsMemory::Alloc(sizeof(wb->cclass[0]) * newsize); |
|
588 if (!wb->cclass) { |
|
589 nsMemory::Free(wb->ucs); |
|
590 wb->ucs = nullptr; |
|
591 return NS_ERROR_OUT_OF_MEMORY; |
|
592 } |
|
593 } else { |
|
594 void* buf = nsMemory::Realloc(wb->ucs, sizeof(wb->ucs[0]) * newsize); |
|
595 if (!buf) |
|
596 return NS_ERROR_OUT_OF_MEMORY; |
|
597 wb->ucs = (uint32_t*)buf; |
|
598 buf = nsMemory::Realloc(wb->cclass, sizeof(wb->cclass[0]) * newsize); |
|
599 if (!buf) |
|
600 return NS_ERROR_OUT_OF_MEMORY; |
|
601 wb->cclass = (int32_t*)buf; |
|
602 } |
|
603 return (NS_OK); |
|
604 } |
|
605 |
|
606 static nsresult |
|
607 workbuf_append(workbuf_t *wb, uint32_t c) { |
|
608 nsresult r; |
|
609 |
|
610 if (wb->last >= wb->size && (r = workbuf_extend(wb)) != NS_OK) |
|
611 return (r); |
|
612 wb->ucs[wb->last++] = c; |
|
613 return (NS_OK); |
|
614 } |
|
615 |
|
616 static void |
|
617 workbuf_shift(workbuf_t *wb, int32_t shift) { |
|
618 int32_t nmove; |
|
619 |
|
620 //assert(wb != nullptr && wb->cur >= shift); |
|
621 |
|
622 nmove = wb->last - shift; |
|
623 memmove(&wb->ucs[0], &wb->ucs[shift], |
|
624 nmove * sizeof(wb->ucs[0])); |
|
625 memmove(&wb->cclass[0], &wb->cclass[shift], |
|
626 nmove * sizeof(wb->cclass[0])); |
|
627 wb->cur -= shift; |
|
628 wb->last -= shift; |
|
629 } |
|
630 |
|
631 static void |
|
632 workbuf_removevoid(workbuf_t *wb) { |
|
633 int32_t i, j; |
|
634 int32_t last = wb->last; |
|
635 |
|
636 for (i = j = 0; i < last; i++) { |
|
637 if (wb->cclass[i] >= 0) { |
|
638 if (j < i) { |
|
639 wb->ucs[j] = wb->ucs[i]; |
|
640 wb->cclass[j] = wb->cclass[i]; |
|
641 } |
|
642 j++; |
|
643 } |
|
644 } |
|
645 wb->cur -= last - j; |
|
646 wb->last = j; |
|
647 } |
|
648 |
|
649 nsresult |
|
650 nsUnicodeNormalizer::NormalizeUnicodeNFD( const nsAString& aSrc, nsAString& aDest) |
|
651 { |
|
652 return mdn_normalize(false, false, aSrc, aDest); |
|
653 } |
|
654 |
|
655 nsresult |
|
656 nsUnicodeNormalizer::NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest) |
|
657 { |
|
658 return mdn_normalize(true, false, aSrc, aDest); |
|
659 } |
|
660 |
|
661 nsresult |
|
662 nsUnicodeNormalizer::NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest) |
|
663 { |
|
664 return mdn_normalize(false, true, aSrc, aDest); |
|
665 } |
|
666 |
|
667 nsresult |
|
668 nsUnicodeNormalizer::NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest) |
|
669 { |
|
670 return mdn_normalize(true, true, aSrc, aDest); |
|
671 } |
|
672 |
|
673 bool |
|
674 nsUnicodeNormalizer::Compose(uint32_t a, uint32_t b, uint32_t *ab) |
|
675 { |
|
676 return mdn__unicode_compose(a, b, ab) == NS_OK; |
|
677 } |
|
678 |
|
679 bool |
|
680 nsUnicodeNormalizer::DecomposeNonRecursively(uint32_t c, uint32_t *c1, uint32_t *c2) |
|
681 { |
|
682 // We can't use mdn__unicode_decompose here, because that does a recursive |
|
683 // decomposition that may yield more than two characters, but the harfbuzz |
|
684 // callback wants just a single-step decomp that is guaranteed to produce |
|
685 // no more than two characters. So we do a low-level lookup in the table |
|
686 // of decomp sequences. |
|
687 const uint32_t *seq; |
|
688 uint32_t seqidx = decompose_char(c, &seq); |
|
689 if (seqidx == 0 || ((seqidx & DECOMP_COMPAT) != 0)) { |
|
690 return false; |
|
691 } |
|
692 *c1 = *seq & ~END_BIT; |
|
693 if (*seq & END_BIT) { |
|
694 *c2 = 0; |
|
695 } else { |
|
696 *c2 = *++seq & ~END_BIT; |
|
697 } |
|
698 return true; |
|
699 } |