Sat, 03 Jan 2015 20:18:00 +0100
Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 /* This file is modified from JPNIC's mDNKit, it is under both MPL and
4 * JPNIC's license.
5 */
7 /* This Source Code Form is subject to the terms of the Mozilla Public
8 * License, v. 2.0. If a copy of the MPL was not distributed with this
9 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
11 /*
12 * Copyright (c) 2000,2002 Japan Network Information Center.
13 * All rights reserved.
14 *
15 * By using this file, you agree to the terms and conditions set forth bellow.
16 *
17 * LICENSE TERMS AND CONDITIONS
18 *
19 * The following License Terms and Conditions apply, unless a different
20 * license is obtained from Japan Network Information Center ("JPNIC"),
21 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
22 * Chiyoda-ku, Tokyo 101-0047, Japan.
23 *
24 * 1. Use, Modification and Redistribution (including distribution of any
25 * modified or derived work) in source and/or binary forms is permitted
26 * under this License Terms and Conditions.
27 *
28 * 2. Redistribution of source code must retain the copyright notices as they
29 * appear in each source code file, this License Terms and Conditions.
30 *
31 * 3. Redistribution in binary form must reproduce the Copyright Notice,
32 * this License Terms and Conditions, in the documentation and/or other
33 * materials provided with the distribution. For the purposes of binary
34 * distribution the "Copyright Notice" refers to the following language:
35 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
36 *
37 * 4. The name of JPNIC may not be used to endorse or promote products
38 * derived from this Software without specific prior written approval of
39 * JPNIC.
40 *
41 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
42 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
43 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
44 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
46 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
47 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
48 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
49 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
50 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
51 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
52 */
54 #include <string.h>
56 #include "nsMemory.h"
57 #include "nsUnicodeNormalizer.h"
58 #include "nsString.h"
60 NS_IMPL_ISUPPORTS(nsUnicodeNormalizer, nsIUnicodeNormalizer)
63 nsUnicodeNormalizer::nsUnicodeNormalizer()
64 {
65 }
67 nsUnicodeNormalizer::~nsUnicodeNormalizer()
68 {
69 }
73 #define END_BIT 0x80000000
76 /*
77 * Some constants for Hangul decomposition/composition.
78 * These things were taken from unicode book.
79 */
80 #define SBase 0xac00
81 #define LBase 0x1100
82 #define VBase 0x1161
83 #define TBase 0x11a7
84 #define LCount 19
85 #define VCount 21
86 #define TCount 28
87 #define SLast (SBase + LCount * VCount * TCount)
89 struct composition {
90 uint32_t c2; /* 2nd character */
91 uint32_t comp; /* composed character */
92 };
95 #include "normalization_data.h"
97 /*
98 * Macro for multi-level index table.
99 */
100 #define LOOKUPTBL(vprefix, mprefix, v) \
101 DMAP(vprefix)[\
102 IMAP(vprefix)[\
103 IMAP(vprefix)[IDX0(mprefix, v)] + IDX1(mprefix, v)\
104 ]\
105 ].tbl[IDX2(mprefix, v)]
107 #define IDX0(mprefix, v) IDX_0(v, BITS1(mprefix), BITS2(mprefix))
108 #define IDX1(mprefix, v) IDX_1(v, BITS1(mprefix), BITS2(mprefix))
109 #define IDX2(mprefix, v) IDX_2(v, BITS1(mprefix), BITS2(mprefix))
111 #define IDX_0(v, bits1, bits2) ((v) >> ((bits1) + (bits2)))
112 #define IDX_1(v, bits1, bits2) (((v) >> (bits2)) & ((1 << (bits1)) - 1))
113 #define IDX_2(v, bits1, bits2) ((v) & ((1 << (bits2)) - 1))
115 #define BITS1(mprefix) mprefix ## _BITS_1
116 #define BITS2(mprefix) mprefix ## _BITS_2
118 #define IMAP(vprefix) vprefix ## _imap
119 #define DMAP(vprefix) vprefix ## _table
120 #define SEQ(vprefix) vprefix ## _seq
122 static int32_t
123 canonclass(uint32_t c) {
124 /* Look up canonicalclass table. */
125 return (LOOKUPTBL(canon_class, CANON_CLASS, c));
126 }
128 static int32_t
129 decompose_char(uint32_t c, const uint32_t **seqp)
130 {
131 /* Look up decomposition table. */
132 int32_t seqidx = LOOKUPTBL(decompose, DECOMP, c);
133 *seqp = SEQ(decompose) + (seqidx & ~DECOMP_COMPAT);
134 return (seqidx);
135 }
137 static int32_t
138 compose_char(uint32_t c,
139 const struct composition **compp)
140 {
141 /* Look up composition table. */
142 int32_t seqidx = LOOKUPTBL(compose, CANON_COMPOSE, c);
143 *compp = SEQ(compose) + (seqidx & 0xffff);
144 return (seqidx >> 16);
145 }
147 static nsresult
148 mdn__unicode_decompose(int32_t compat, uint32_t *v, size_t vlen,
149 uint32_t c, int32_t *decomp_lenp)
150 {
151 uint32_t *vorg = v;
152 int32_t seqidx;
153 const uint32_t *seq;
155 //assert(v != nullptr && vlen >= 0 && decomp_lenp != nullptr);
157 /*
158 * First, check for Hangul.
159 */
160 if (SBase <= c && c < SLast) {
161 int32_t idx, t_offset, v_offset, l_offset;
163 idx = c - SBase;
164 t_offset = idx % TCount;
165 idx /= TCount;
166 v_offset = idx % VCount;
167 l_offset = idx / VCount;
168 if ((t_offset == 0 && vlen < 2) || (t_offset > 0 && vlen < 3))
169 return (NS_ERROR_UNORM_MOREOUTPUT);
170 *v++ = LBase + l_offset;
171 *v++ = VBase + v_offset;
172 if (t_offset > 0)
173 *v++ = TBase + t_offset;
174 *decomp_lenp = v - vorg;
175 return (NS_OK);
176 }
178 /*
179 * Look up decomposition table. If no decomposition is defined
180 * or if it is a compatibility decomosition when canonical
181 * decomposition requested, return 'NS_SUCCESS_UNORM_NOTFOUND'.
182 */
183 seqidx = decompose_char(c, &seq);
184 if (seqidx == 0 || (compat == 0 && (seqidx & DECOMP_COMPAT) != 0))
185 return (NS_SUCCESS_UNORM_NOTFOUND);
187 /*
188 * Copy the decomposed sequence. The end of the sequence are
189 * marked with END_BIT.
190 */
191 do {
192 uint32_t c;
193 int32_t dlen;
194 nsresult r;
196 c = *seq & ~END_BIT;
198 /* Decompose recursively. */
199 r = mdn__unicode_decompose(compat, v, vlen, c, &dlen);
200 if (r == NS_OK) {
201 v += dlen;
202 vlen -= dlen;
203 } else if (r == NS_SUCCESS_UNORM_NOTFOUND) {
204 if (vlen < 1)
205 return (NS_ERROR_UNORM_MOREOUTPUT);
206 *v++ = c;
207 vlen--;
208 } else {
209 return (r);
210 }
212 } while ((*seq++ & END_BIT) == 0);
214 *decomp_lenp = v - vorg;
216 return (NS_OK);
217 }
219 static int32_t
220 mdn__unicode_iscompositecandidate(uint32_t c)
221 {
222 const struct composition *dummy;
224 /* Check for Hangul */
225 if ((LBase <= c && c < LBase + LCount) || (SBase <= c && c < SLast))
226 return (1);
228 /*
229 * Look up composition table. If there are no composition
230 * that begins with the given character, it is not a
231 * composition candidate.
232 */
233 if (compose_char(c, &dummy) == 0)
234 return (0);
235 else
236 return (1);
237 }
239 static nsresult
240 mdn__unicode_compose(uint32_t c1, uint32_t c2, uint32_t *compp)
241 {
242 int32_t n;
243 int32_t lo, hi;
244 const struct composition *cseq;
246 //assert(compp != nullptr);
248 /*
249 * Check for Hangul.
250 */
251 if (LBase <= c1 && c1 < LBase + LCount &&
252 VBase <= c2 && c2 < VBase + VCount) {
253 /*
254 * Hangul L and V.
255 */
256 *compp = SBase +
257 ((c1 - LBase) * VCount + (c2 - VBase)) * TCount;
258 return (NS_OK);
259 } else if (SBase <= c1 && c1 < SLast &&
260 TBase <= c2 && c2 < TBase + TCount &&
261 (c1 - SBase) % TCount == 0) {
262 /*
263 * Hangul LV and T.
264 */
265 *compp = c1 + (c2 - TBase);
266 return (NS_OK);
267 }
269 /*
270 * Look up composition table. If the result is 0, no composition
271 * is defined. Otherwise, upper 16bits of the result contains
272 * the number of composition that begins with 'c1', and the lower
273 * 16bits is the offset in 'compose_seq'.
274 */
275 if ((n = compose_char(c1, &cseq)) == 0)
276 return (NS_SUCCESS_UNORM_NOTFOUND);
278 /*
279 * The composite sequences are sorted by the 2nd character 'c2'.
280 * So we can use binary search.
281 */
282 lo = 0;
283 hi = n - 1;
284 while (lo <= hi) {
285 int32_t mid = (lo + hi) / 2;
287 if (cseq[mid].c2 < c2) {
288 lo = mid + 1;
289 } else if (cseq[mid].c2 > c2) {
290 hi = mid - 1;
291 } else {
292 *compp = cseq[mid].comp;
293 return (NS_OK);
294 }
295 }
296 return (NS_SUCCESS_UNORM_NOTFOUND);
297 }
300 #define WORKBUF_SIZE 128
301 #define WORKBUF_SIZE_MAX 10000
303 typedef struct {
304 int32_t cur; /* pointing now processing character */
305 int32_t last; /* pointing just after the last character */
306 int32_t size; /* size of UCS and CLASS array */
307 uint32_t *ucs; /* UCS-4 characters */
308 int32_t *cclass; /* and their canonical classes */
309 uint32_t ucs_buf[WORKBUF_SIZE]; /* local buffer */
310 int32_t class_buf[WORKBUF_SIZE]; /* ditto */
311 } workbuf_t;
313 static nsresult decompose(workbuf_t *wb, uint32_t c, int32_t compat);
314 static void get_class(workbuf_t *wb);
315 static void reorder(workbuf_t *wb);
316 static void compose(workbuf_t *wb);
317 static nsresult flush_before_cur(workbuf_t *wb, nsAString& aToStr);
318 static void workbuf_init(workbuf_t *wb);
319 static void workbuf_free(workbuf_t *wb);
320 static nsresult workbuf_extend(workbuf_t *wb);
321 static nsresult workbuf_append(workbuf_t *wb, uint32_t c);
322 static void workbuf_shift(workbuf_t *wb, int32_t shift);
323 static void workbuf_removevoid(workbuf_t *wb);
326 static nsresult
327 mdn_normalize(bool do_composition, bool compat,
328 const nsAString& aSrcStr, nsAString& aToStr)
329 {
330 workbuf_t wb;
331 nsresult r = NS_OK;
332 /*
333 * Initialize working buffer.
334 */
335 workbuf_init(&wb);
337 nsAString::const_iterator start, end;
338 aSrcStr.BeginReading(start);
339 aSrcStr.EndReading(end);
341 while (start != end) {
342 uint32_t c;
343 char16_t curChar;
345 //assert(wb.cur == wb.last);
347 /*
348 * Get one character from 'from'.
349 */
350 curChar= *start++;
352 if (NS_IS_HIGH_SURROGATE(curChar) && start != end && NS_IS_LOW_SURROGATE(*(start)) ) {
353 c = SURROGATE_TO_UCS4(curChar, *start);
354 ++start;
355 } else {
356 c = curChar;
357 }
359 /*
360 * Decompose it.
361 */
362 if ((r = decompose(&wb, c, compat)) != NS_OK)
363 break;
365 /*
366 * Get canonical class.
367 */
368 get_class(&wb);
370 /*
371 * Reorder & compose.
372 */
373 for (; wb.cur < wb.last; wb.cur++) {
374 if (wb.cur == 0) {
375 continue;
376 } else if (wb.cclass[wb.cur] > 0) {
377 /*
378 * This is not a starter. Try reordering.
379 * Note that characters up to it are
380 * already in canonical order.
381 */
382 reorder(&wb);
383 continue;
384 }
386 /*
387 * This is a starter character, and there are
388 * some characters before it. Those characters
389 * have been reordered properly, and
390 * ready for composition.
391 */
392 if (do_composition && wb.cclass[0] == 0)
393 compose(&wb);
395 /*
396 * If CUR points to a starter character,
397 * then process of characters before CUR are
398 * already finished, because any further
399 * reordering/composition for them are blocked
400 * by the starter CUR points.
401 */
402 if (wb.cur > 0 && wb.cclass[wb.cur] == 0) {
403 /* Flush everything before CUR. */
404 r = flush_before_cur(&wb, aToStr);
405 if (r != NS_OK)
406 break;
407 }
408 }
409 }
411 if (r == NS_OK) {
412 if (do_composition && wb.cur > 0 && wb.cclass[0] == 0) {
413 /*
414 * There is some characters left in WB.
415 * They are ordered, but not composed yet.
416 * Now CUR points just after the last character in WB,
417 * and since compose() tries to compose characters
418 * between top and CUR inclusive, we must make CUR
419 * one character back during compose().
420 */
421 wb.cur--;
422 compose(&wb);
423 wb.cur++;
424 }
425 /*
426 * Call this even when WB.CUR == 0, to make TO
427 * NUL-terminated.
428 */
429 r = flush_before_cur(&wb, aToStr);
430 }
432 workbuf_free(&wb);
434 return (r);
435 }
437 static nsresult
438 decompose(workbuf_t *wb, uint32_t c, int32_t compat) {
439 nsresult r;
440 int32_t dec_len;
442 again:
443 r = mdn__unicode_decompose(compat, wb->ucs + wb->last,
444 wb->size - wb->last, c, &dec_len);
445 switch (r) {
446 case NS_OK:
447 wb->last += dec_len;
448 return (NS_OK);
449 case NS_SUCCESS_UNORM_NOTFOUND:
450 return (workbuf_append(wb, c));
451 case NS_ERROR_UNORM_MOREOUTPUT:
452 if ((r = workbuf_extend(wb)) != NS_OK)
453 return (r);
454 if (wb->size > WORKBUF_SIZE_MAX) {
455 // "mdn__unormalize_form*: " "working buffer too large\n"
456 return (NS_ERROR_FAILURE);
457 }
458 goto again;
459 default:
460 return (r);
461 }
462 /* NOTREACHED */
463 }
465 static void
466 get_class(workbuf_t *wb) {
467 int32_t i;
469 for (i = wb->cur; i < wb->last; i++)
470 wb->cclass[i] = canonclass(wb->ucs[i]);
471 }
473 static void
474 reorder(workbuf_t *wb) {
475 uint32_t c;
476 int32_t i;
477 int32_t cclass;
479 //assert(wb != nullptr);
481 i = wb->cur;
482 c = wb->ucs[i];
483 cclass = wb->cclass[i];
485 while (i > 0 && wb->cclass[i - 1] > cclass) {
486 wb->ucs[i] = wb->ucs[i - 1];
487 wb->cclass[i] =wb->cclass[i - 1];
488 i--;
489 wb->ucs[i] = c;
490 wb->cclass[i] = cclass;
491 }
492 }
494 static void
495 compose(workbuf_t *wb) {
496 int32_t cur;
497 uint32_t *ucs;
498 int32_t *cclass;
499 int32_t last_class;
500 int32_t nvoids;
501 int32_t i;
503 //assert(wb != nullptr && wb->cclass[0] == 0);
505 cur = wb->cur;
506 ucs = wb->ucs;
507 cclass = wb->cclass;
509 /*
510 * If there are no decomposition sequence that begins with
511 * the top character, composition is impossible.
512 */
513 if (!mdn__unicode_iscompositecandidate(ucs[0]))
514 return;
516 last_class = 0;
517 nvoids = 0;
518 for (i = 1; i <= cur; i++) {
519 uint32_t c;
520 int32_t cl = cclass[i];
522 if ((last_class < cl || cl == 0) &&
523 mdn__unicode_compose(ucs[0], ucs[i],
524 &c) == NS_OK) {
525 /*
526 * Replace the top character with the composed one.
527 */
528 ucs[0] = c;
529 cclass[0] = canonclass(c);
531 cclass[i] = -1; /* void this character */
532 nvoids++;
533 } else {
534 last_class = cl;
535 }
536 }
538 /* Purge void characters, if any. */
539 if (nvoids > 0)
540 workbuf_removevoid(wb);
541 }
543 static nsresult
544 flush_before_cur(workbuf_t *wb, nsAString& aToStr)
545 {
546 int32_t i;
548 for (i = 0; i < wb->cur; i++) {
549 if (!IS_IN_BMP(wb->ucs[i])) {
550 aToStr.Append((char16_t)H_SURROGATE(wb->ucs[i]));
551 aToStr.Append((char16_t)L_SURROGATE(wb->ucs[i]));
552 } else {
553 aToStr.Append((char16_t)(wb->ucs[i]));
554 }
555 }
557 workbuf_shift(wb, wb->cur);
559 return (NS_OK);
560 }
562 static void
563 workbuf_init(workbuf_t *wb) {
564 wb->cur = 0;
565 wb->last = 0;
566 wb->size = WORKBUF_SIZE;
567 wb->ucs = wb->ucs_buf;
568 wb->cclass = wb->class_buf;
569 }
571 static void
572 workbuf_free(workbuf_t *wb) {
573 if (wb->ucs != wb->ucs_buf) {
574 nsMemory::Free(wb->ucs);
575 nsMemory::Free(wb->cclass);
576 }
577 }
579 static nsresult
580 workbuf_extend(workbuf_t *wb) {
581 int32_t newsize = wb->size * 3;
583 if (wb->ucs == wb->ucs_buf) {
584 wb->ucs = (uint32_t*)nsMemory::Alloc(sizeof(wb->ucs[0]) * newsize);
585 if (!wb->ucs)
586 return NS_ERROR_OUT_OF_MEMORY;
587 wb->cclass = (int32_t*)nsMemory::Alloc(sizeof(wb->cclass[0]) * newsize);
588 if (!wb->cclass) {
589 nsMemory::Free(wb->ucs);
590 wb->ucs = nullptr;
591 return NS_ERROR_OUT_OF_MEMORY;
592 }
593 } else {
594 void* buf = nsMemory::Realloc(wb->ucs, sizeof(wb->ucs[0]) * newsize);
595 if (!buf)
596 return NS_ERROR_OUT_OF_MEMORY;
597 wb->ucs = (uint32_t*)buf;
598 buf = nsMemory::Realloc(wb->cclass, sizeof(wb->cclass[0]) * newsize);
599 if (!buf)
600 return NS_ERROR_OUT_OF_MEMORY;
601 wb->cclass = (int32_t*)buf;
602 }
603 return (NS_OK);
604 }
606 static nsresult
607 workbuf_append(workbuf_t *wb, uint32_t c) {
608 nsresult r;
610 if (wb->last >= wb->size && (r = workbuf_extend(wb)) != NS_OK)
611 return (r);
612 wb->ucs[wb->last++] = c;
613 return (NS_OK);
614 }
616 static void
617 workbuf_shift(workbuf_t *wb, int32_t shift) {
618 int32_t nmove;
620 //assert(wb != nullptr && wb->cur >= shift);
622 nmove = wb->last - shift;
623 memmove(&wb->ucs[0], &wb->ucs[shift],
624 nmove * sizeof(wb->ucs[0]));
625 memmove(&wb->cclass[0], &wb->cclass[shift],
626 nmove * sizeof(wb->cclass[0]));
627 wb->cur -= shift;
628 wb->last -= shift;
629 }
631 static void
632 workbuf_removevoid(workbuf_t *wb) {
633 int32_t i, j;
634 int32_t last = wb->last;
636 for (i = j = 0; i < last; i++) {
637 if (wb->cclass[i] >= 0) {
638 if (j < i) {
639 wb->ucs[j] = wb->ucs[i];
640 wb->cclass[j] = wb->cclass[i];
641 }
642 j++;
643 }
644 }
645 wb->cur -= last - j;
646 wb->last = j;
647 }
649 nsresult
650 nsUnicodeNormalizer::NormalizeUnicodeNFD( const nsAString& aSrc, nsAString& aDest)
651 {
652 return mdn_normalize(false, false, aSrc, aDest);
653 }
655 nsresult
656 nsUnicodeNormalizer::NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest)
657 {
658 return mdn_normalize(true, false, aSrc, aDest);
659 }
661 nsresult
662 nsUnicodeNormalizer::NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest)
663 {
664 return mdn_normalize(false, true, aSrc, aDest);
665 }
667 nsresult
668 nsUnicodeNormalizer::NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest)
669 {
670 return mdn_normalize(true, true, aSrc, aDest);
671 }
673 bool
674 nsUnicodeNormalizer::Compose(uint32_t a, uint32_t b, uint32_t *ab)
675 {
676 return mdn__unicode_compose(a, b, ab) == NS_OK;
677 }
679 bool
680 nsUnicodeNormalizer::DecomposeNonRecursively(uint32_t c, uint32_t *c1, uint32_t *c2)
681 {
682 // We can't use mdn__unicode_decompose here, because that does a recursive
683 // decomposition that may yield more than two characters, but the harfbuzz
684 // callback wants just a single-step decomp that is guaranteed to produce
685 // no more than two characters. So we do a low-level lookup in the table
686 // of decomp sequences.
687 const uint32_t *seq;
688 uint32_t seqidx = decompose_char(c, &seq);
689 if (seqidx == 0 || ((seqidx & DECOMP_COMPAT) != 0)) {
690 return false;
691 }
692 *c1 = *seq & ~END_BIT;
693 if (*seq & END_BIT) {
694 *c2 = 0;
695 } else {
696 *c2 = *++seq & ~END_BIT;
697 }
698 return true;
699 }