Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | * Copyright (C) 2012 Apple Inc. All rights reserved. |
michael@0 | 3 | * |
michael@0 | 4 | * Redistribution and use in source and binary forms, with or without |
michael@0 | 5 | * modification, are permitted provided that the following conditions |
michael@0 | 6 | * are met: |
michael@0 | 7 | * 1. Redistributions of source code must retain the above copyright |
michael@0 | 8 | * notice, this list of conditions and the following disclaimer. |
michael@0 | 9 | * 2. Redistributions in binary form must reproduce the above copyright |
michael@0 | 10 | * notice, this list of conditions and the following disclaimer in the |
michael@0 | 11 | * documentation and/or other materials provided with the distribution. |
michael@0 | 12 | * |
michael@0 | 13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
michael@0 | 14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
michael@0 | 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
michael@0 | 16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
michael@0 | 17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
michael@0 | 18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
michael@0 | 19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
michael@0 | 20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
michael@0 | 21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
michael@0 | 22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
michael@0 | 23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
michael@0 | 24 | */ |
michael@0 | 25 | |
michael@0 | 26 | // See ES 5.1, 15.10.2.8 |
michael@0 | 27 | function canonicalize(ch) |
michael@0 | 28 | { |
michael@0 | 29 | var u = String.fromCharCode(ch).toUpperCase(); |
michael@0 | 30 | if (u.length > 1) |
michael@0 | 31 | return ch; |
michael@0 | 32 | var cu = u.charCodeAt(0); |
michael@0 | 33 | if (ch >= 128 && cu < 128) |
michael@0 | 34 | return ch; |
michael@0 | 35 | return cu; |
michael@0 | 36 | } |
michael@0 | 37 | |
michael@0 | 38 | var MAX_UCS2 = 0xFFFF; |
michael@0 | 39 | var MAX_LATIN = 0xFF; |
michael@0 | 40 | |
michael@0 | 41 | var groupedCanonically = []; |
michael@0 | 42 | // Pass 1: populate groupedCanonically - this is mapping from canonicalized |
michael@0 | 43 | // values back to the set of character code that canonicalize to them. |
michael@0 | 44 | for (var i = 0; i <= MAX_UCS2; ++i) { |
michael@0 | 45 | var ch = canonicalize(i); |
michael@0 | 46 | if (!groupedCanonically[ch]) |
michael@0 | 47 | groupedCanonically[ch] = []; |
michael@0 | 48 | groupedCanonically[ch].push(i); |
michael@0 | 49 | } |
michael@0 | 50 | |
michael@0 | 51 | var typeInfo = []; |
michael@0 | 52 | var latinTypeInfo = []; |
michael@0 | 53 | var characterSetInfo = []; |
michael@0 | 54 | // Pass 2: populate typeInfo & characterSetInfo. For every character calculate |
michael@0 | 55 | // a typeInfo value, described by the types above, and a value payload. |
michael@0 | 56 | for (cu in groupedCanonically) { |
michael@0 | 57 | // The set of characters that canonicalize to cu |
michael@0 | 58 | var characters = groupedCanonically[cu]; |
michael@0 | 59 | |
michael@0 | 60 | // If there is only one, it is unique. |
michael@0 | 61 | if (characters.length == 1) { |
michael@0 | 62 | typeInfo[characters[0]] = "CanonicalizeUnique:0"; |
michael@0 | 63 | latinTypeInfo[characters[0]] = characters[0] <= MAX_LATIN ? "CanonicalizeLatinSelf:0" : "CanonicalizeLatinInvalid:0"; |
michael@0 | 64 | continue; |
michael@0 | 65 | } |
michael@0 | 66 | |
michael@0 | 67 | // Sort the array. |
michael@0 | 68 | characters.sort(function(x,y){return x-y;}); |
michael@0 | 69 | |
michael@0 | 70 | // If there are more than two characters, create an entry in characterSetInfo. |
michael@0 | 71 | if (characters.length > 2) { |
michael@0 | 72 | for (i in characters) |
michael@0 | 73 | typeInfo[characters[i]] = "CanonicalizeSet:" + characterSetInfo.length; |
michael@0 | 74 | characterSetInfo.push(characters); |
michael@0 | 75 | |
michael@0 | 76 | if (characters[1] <= MAX_LATIN) |
michael@0 | 77 | throw new Error("sets with more than one latin character not supported!"); |
michael@0 | 78 | if (characters[0] <= MAX_LATIN) { |
michael@0 | 79 | for (i in characters) |
michael@0 | 80 | latinTypeInfo[characters[i]] = "CanonicalizeLatinOther:" + characters[0]; |
michael@0 | 81 | latinTypeInfo[characters[0]] = "CanonicalizeLatinSelf:0"; |
michael@0 | 82 | } else { |
michael@0 | 83 | for (i in characters) |
michael@0 | 84 | latinTypeInfo[characters[i]] = "CanonicalizeLatinInvalid:0"; |
michael@0 | 85 | } |
michael@0 | 86 | |
michael@0 | 87 | continue; |
michael@0 | 88 | } |
michael@0 | 89 | |
michael@0 | 90 | // We have a pair, mark alternating ranges, otherwise track whether this is the low or high partner. |
michael@0 | 91 | var lo = characters[0]; |
michael@0 | 92 | var hi = characters[1]; |
michael@0 | 93 | var delta = hi - lo; |
michael@0 | 94 | if (delta == 1) { |
michael@0 | 95 | var type = lo & 1 ? "CanonicalizeAlternatingUnaligned:0" : "CanonicalizeAlternatingAligned:0"; |
michael@0 | 96 | typeInfo[lo] = type; |
michael@0 | 97 | typeInfo[hi] = type; |
michael@0 | 98 | } else { |
michael@0 | 99 | typeInfo[lo] = "CanonicalizeRangeLo:" + delta; |
michael@0 | 100 | typeInfo[hi] = "CanonicalizeRangeHi:" + delta; |
michael@0 | 101 | } |
michael@0 | 102 | |
michael@0 | 103 | if (lo > MAX_LATIN) { |
michael@0 | 104 | latinTypeInfo[lo] = "CanonicalizeLatinInvalid:0"; |
michael@0 | 105 | latinTypeInfo[hi] = "CanonicalizeLatinInvalid:0"; |
michael@0 | 106 | } else if (hi > MAX_LATIN) { |
michael@0 | 107 | latinTypeInfo[lo] = "CanonicalizeLatinSelf:0"; |
michael@0 | 108 | latinTypeInfo[hi] = "CanonicalizeLatinOther:" + lo; |
michael@0 | 109 | } else { |
michael@0 | 110 | if (delta != 0x20 || lo & 0x20) |
michael@0 | 111 | throw new Error("pairs of latin characters that don't mask with 0x20 not supported!"); |
michael@0 | 112 | latinTypeInfo[lo] = "CanonicalizeLatinMask0x20:0"; |
michael@0 | 113 | latinTypeInfo[hi] = "CanonicalizeLatinMask0x20:0"; |
michael@0 | 114 | } |
michael@0 | 115 | } |
michael@0 | 116 | |
michael@0 | 117 | var rangeInfo = []; |
michael@0 | 118 | // Pass 3: coallesce types into ranges. |
michael@0 | 119 | for (var end = 0; end <= MAX_UCS2; ++end) { |
michael@0 | 120 | var begin = end; |
michael@0 | 121 | var type = typeInfo[end]; |
michael@0 | 122 | while (end < MAX_UCS2 && typeInfo[end + 1] == type) |
michael@0 | 123 | ++end; |
michael@0 | 124 | rangeInfo.push({begin:begin, end:end, type:type}); |
michael@0 | 125 | } |
michael@0 | 126 | |
michael@0 | 127 | var latinRangeInfo = []; |
michael@0 | 128 | // Pass 4: coallesce latin-1 types into ranges. |
michael@0 | 129 | for (var end = 0; end <= MAX_UCS2; ++end) { |
michael@0 | 130 | var begin = end; |
michael@0 | 131 | var type = latinTypeInfo[end]; |
michael@0 | 132 | while (end < MAX_UCS2 && latinTypeInfo[end + 1] == type) |
michael@0 | 133 | ++end; |
michael@0 | 134 | latinRangeInfo.push({begin:begin, end:end, type:type}); |
michael@0 | 135 | } |
michael@0 | 136 | |
michael@0 | 137 | |
michael@0 | 138 | // Helper function to convert a number to a fixed width hex representation of a C uint16_t. |
michael@0 | 139 | function hex(x) |
michael@0 | 140 | { |
michael@0 | 141 | var s = Number(x).toString(16); |
michael@0 | 142 | while (s.length < 4) |
michael@0 | 143 | s = 0 + s; |
michael@0 | 144 | return "0x" + s + "u"; |
michael@0 | 145 | } |
michael@0 | 146 | |
michael@0 | 147 | var copyright = ( |
michael@0 | 148 | "/*" + "\n" + |
michael@0 | 149 | " * Copyright (C) 2012 Apple Inc. All rights reserved." + "\n" + |
michael@0 | 150 | " *" + "\n" + |
michael@0 | 151 | " * Redistribution and use in source and binary forms, with or without" + "\n" + |
michael@0 | 152 | " * modification, are permitted provided that the following conditions" + "\n" + |
michael@0 | 153 | " * are met:" + "\n" + |
michael@0 | 154 | " * 1. Redistributions of source code must retain the above copyright" + "\n" + |
michael@0 | 155 | " * notice, this list of conditions and the following disclaimer." + "\n" + |
michael@0 | 156 | " * 2. Redistributions in binary form must reproduce the above copyright" + "\n" + |
michael@0 | 157 | " * notice, this list of conditions and the following disclaimer in the" + "\n" + |
michael@0 | 158 | " * documentation and/or other materials provided with the distribution." + "\n" + |
michael@0 | 159 | " *" + "\n" + |
michael@0 | 160 | " * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY" + "\n" + |
michael@0 | 161 | " * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE" + "\n" + |
michael@0 | 162 | " * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR" + "\n" + |
michael@0 | 163 | " * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR" + "\n" + |
michael@0 | 164 | " * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL," + "\n" + |
michael@0 | 165 | " * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO," + "\n" + |
michael@0 | 166 | " * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR" + "\n" + |
michael@0 | 167 | " * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY" + "\n" + |
michael@0 | 168 | " * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT" + "\n" + |
michael@0 | 169 | " * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE" + "\n" + |
michael@0 | 170 | " * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. " + "\n" + |
michael@0 | 171 | " */"); |
michael@0 | 172 | |
michael@0 | 173 | print(copyright); |
michael@0 | 174 | print(); |
michael@0 | 175 | print("// DO NOT EDIT! - this file autogenerated by YarrCanonicalizeUCS2.js"); |
michael@0 | 176 | print(); |
michael@0 | 177 | print('#include "yarr/YarrCanonicalizeUCS2.h"'); |
michael@0 | 178 | print(); |
michael@0 | 179 | print('#include <stddef.h>'); |
michael@0 | 180 | print(); |
michael@0 | 181 | print("namespace JSC { namespace Yarr {"); |
michael@0 | 182 | print(); |
michael@0 | 183 | |
michael@0 | 184 | for (i in characterSetInfo) { |
michael@0 | 185 | var characters = "" |
michael@0 | 186 | var set = characterSetInfo[i]; |
michael@0 | 187 | for (var j in set) |
michael@0 | 188 | characters += hex(set[j]) + ", "; |
michael@0 | 189 | print("uint16_t ucs2CharacterSet" + i + "[] = { " + characters + "0 };"); |
michael@0 | 190 | } |
michael@0 | 191 | print(); |
michael@0 | 192 | print("static const size_t UCS2_CANONICALIZATION_SETS = " + characterSetInfo.length + ";"); |
michael@0 | 193 | print("uint16_t* characterSetInfo[UCS2_CANONICALIZATION_SETS] = {"); |
michael@0 | 194 | for (i in characterSetInfo) |
michael@0 | 195 | print(" ucs2CharacterSet" + i + ","); |
michael@0 | 196 | print("};"); |
michael@0 | 197 | print(); |
michael@0 | 198 | print("const size_t UCS2_CANONICALIZATION_RANGES = " + rangeInfo.length + ";"); |
michael@0 | 199 | print("UCS2CanonicalizationRange rangeInfo[UCS2_CANONICALIZATION_RANGES] = {"); |
michael@0 | 200 | for (i in rangeInfo) { |
michael@0 | 201 | var info = rangeInfo[i]; |
michael@0 | 202 | var typeAndValue = info.type.split(':'); |
michael@0 | 203 | print(" { " + hex(info.begin) + ", " + hex(info.end) + ", " + hex(typeAndValue[1]) + ", " + typeAndValue[0] + " },"); |
michael@0 | 204 | } |
michael@0 | 205 | print("};"); |
michael@0 | 206 | print(); |
michael@0 | 207 | print("const size_t LATIN_CANONICALIZATION_RANGES = " + latinRangeInfo.length + ";"); |
michael@0 | 208 | print("LatinCanonicalizationRange latinRangeInfo[LATIN_CANONICALIZATION_RANGES] = {"); |
michael@0 | 209 | for (i in latinRangeInfo) { |
michael@0 | 210 | var info = latinRangeInfo[i]; |
michael@0 | 211 | var typeAndValue = info.type.split(':'); |
michael@0 | 212 | print(" { " + hex(info.begin) + ", " + hex(info.end) + ", " + hex(typeAndValue[1]) + ", " + typeAndValue[0] + " },"); |
michael@0 | 213 | } |
michael@0 | 214 | print("};"); |
michael@0 | 215 | print(); |
michael@0 | 216 | print("} } // JSC::Yarr"); |
michael@0 | 217 | print(); |
michael@0 | 218 |