Sat, 03 Jan 2015 20:18:00 +0100
Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.
michael@0 | 1 | #!/usr/local/bin/perl |
michael@0 | 2 | # -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- |
michael@0 | 3 | # |
michael@0 | 4 | # This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 5 | # License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 6 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
michael@0 | 7 | %gb18030tounicode = {}; |
michael@0 | 8 | %unicodetogb18030 = {}; |
michael@0 | 9 | %unicodetocp936 = {}; |
michael@0 | 10 | %cp936tounicode = {}; |
michael@0 | 11 | %tounicodecommon = {}; |
michael@0 | 12 | %gb18030tounicodeuniq = {}; |
michael@0 | 13 | %gb180304btounicode = {}; |
michael@0 | 14 | %cp936tounicodeuniq = {}; |
michael@0 | 15 | |
michael@0 | 16 | %map = {}; |
michael@0 | 17 | $rowwidth = ((0xff - 0x80)+(0x7f - 0x40)); |
michael@0 | 18 | sub cp936tonum() |
michael@0 | 19 | { |
michael@0 | 20 | my($cp936) = (@_); |
michael@0 | 21 | my($first,$second,$jnum); |
michael@0 | 22 | $first = hex(substr($cp936,2,2)); |
michael@0 | 23 | $second = hex(substr($cp936,4,2)); |
michael@0 | 24 | $jnum = ($first - 0x81 ) * $rowwidth; |
michael@0 | 25 | if($second >= 0x80) |
michael@0 | 26 | { |
michael@0 | 27 | $jnum += $second - 0x80 + (0x7f-0x40); |
michael@0 | 28 | } |
michael@0 | 29 | else |
michael@0 | 30 | { |
michael@0 | 31 | $jnum += $second - 0x40; |
michael@0 | 32 | } |
michael@0 | 33 | return $jnum; |
michael@0 | 34 | } |
michael@0 | 35 | sub addeudc() |
michael@0 | 36 | { |
michael@0 | 37 | my($l,$h,$hl,$us); |
michael@0 | 38 | |
michael@0 | 39 | $u = 0xE000; |
michael@0 | 40 | $us = sprintf "%04X", $u; |
michael@0 | 41 | # For AAA1-AFFE |
michael@0 | 42 | for($h=0xAA; $h <=0xAF;$h++) |
michael@0 | 43 | { |
michael@0 | 44 | for($l=0xA1; $l <=0xFE;$l++,$u++) |
michael@0 | 45 | { |
michael@0 | 46 | $us = sprintf "%04X", $u; |
michael@0 | 47 | $hl = sprintf "%02X%02X", $h, $l; |
michael@0 | 48 | $unicodetocp936{$us} = $hl; |
michael@0 | 49 | } |
michael@0 | 50 | } |
michael@0 | 51 | |
michael@0 | 52 | # For F8A1-FEFE |
michael@0 | 53 | $us = sprintf "%04X", $u; |
michael@0 | 54 | for($h=0xF8; $h <=0xFE;$h++) |
michael@0 | 55 | { |
michael@0 | 56 | for($l=0xA1; $l <=0xFE;$l++,$u++) |
michael@0 | 57 | { |
michael@0 | 58 | $us = sprintf "%04X", $u; |
michael@0 | 59 | $hl = sprintf "%02X%02X", $h, $l; |
michael@0 | 60 | $unicodetocp936{$us} = $hl; |
michael@0 | 61 | } |
michael@0 | 62 | } |
michael@0 | 63 | |
michael@0 | 64 | # For A140-A7A0 |
michael@0 | 65 | $us = sprintf "%04X", $u; |
michael@0 | 66 | for($h=0xA1; $h <=0xA7;$h++) |
michael@0 | 67 | { |
michael@0 | 68 | for($l=0x40; $l <=0x7E;$l++,$u++) |
michael@0 | 69 | { |
michael@0 | 70 | $us = sprintf "%04X", $u; |
michael@0 | 71 | $hl = sprintf "%02X%02X", $h, $l; |
michael@0 | 72 | $unicodetocp936{$us} = $hl; |
michael@0 | 73 | } |
michael@0 | 74 | # We need to skip 7F |
michael@0 | 75 | for($l=0x80; $l <=0xA0;$l++,$u++) |
michael@0 | 76 | { |
michael@0 | 77 | $us = sprintf "%04X", $u; |
michael@0 | 78 | $hl = sprintf "%02X%02X", $h, $l; |
michael@0 | 79 | $unicodetocp936{$us} = $hl; |
michael@0 | 80 | } |
michael@0 | 81 | } |
michael@0 | 82 | } |
michael@0 | 83 | |
michael@0 | 84 | sub readcp936() |
michael@0 | 85 | { |
michael@0 | 86 | open(CP936, "<CP936.txt") || die "Cannot open CP936 file"; |
michael@0 | 87 | while(<CP936>) |
michael@0 | 88 | { |
michael@0 | 89 | if(! /^#/) { |
michael@0 | 90 | chop(); |
michael@0 | 91 | ($gb, $u) = split(/\t/, $_); |
michael@0 | 92 | if($u =~ /^0x/) { |
michael@0 | 93 | $u1 = substr($u, 2, 4); |
michael@0 | 94 | $gb1 = substr($gb, 2, 4); |
michael@0 | 95 | $cp936tounicode{$gb1} = $u1; |
michael@0 | 96 | if($unicodetocp936{$u1} == "") { |
michael@0 | 97 | $unicodetocp936{$u1} = $gb1; |
michael@0 | 98 | } else { |
michael@0 | 99 | "WARNING: Unicode " . $u1 . " already map to CP936 " . |
michael@0 | 100 | $unicodetocp936{$u1} . " when we try to map to " . $gb1 . "\n"; |
michael@0 | 101 | } |
michael@0 | 102 | |
michael@0 | 103 | } |
michael@0 | 104 | } |
michael@0 | 105 | } |
michael@0 | 106 | } |
michael@0 | 107 | sub readgb18030() |
michael@0 | 108 | { |
michael@0 | 109 | open(GB18030, "<GB18030") || die "Cannot open GB18030 file"; |
michael@0 | 110 | while(<GB18030>) |
michael@0 | 111 | { |
michael@0 | 112 | if(/^[0-9A-F]/) { |
michael@0 | 113 | chop(); |
michael@0 | 114 | ($u, $gb) = split(/\s/, $_); |
michael@0 | 115 | $gb18030tounicode{$gb} = $u; |
michael@0 | 116 | if( $unicodetogb18030{$u} == "" ) { |
michael@0 | 117 | $unicodetogb18030{$u} = $gb; |
michael@0 | 118 | } else { |
michael@0 | 119 | "WARNING: Unicode " . $u1 . " already map to CP936 " . |
michael@0 | 120 | $unicodetocp936{$u1} . " when we try to map to " . $gb1 . "\n"; |
michael@0 | 121 | } |
michael@0 | 122 | } |
michael@0 | 123 | } |
michael@0 | 124 | } |
michael@0 | 125 | sub splittable() |
michael@0 | 126 | { |
michael@0 | 127 | my($i, $u); |
michael@0 | 128 | for($i = 0; $i < 0x10000; $i++) { |
michael@0 | 129 | $u = sprintf "%04X", $i; |
michael@0 | 130 | if($unicodetogb18030{$u} eq $unicodetocp936{$u}) { |
michael@0 | 131 | if($unicodetogb18030{$u} ne "") { |
michael@0 | 132 | $tounicodecommon{$unicodetogb18030{$u}} = $u; |
michael@0 | 133 | } else { |
michael@0 | 134 | # print $u . "|" . $unicodetogb18030{$u} . "|" . $unicodetocp936{$u} . "\n"; |
michael@0 | 135 | } |
michael@0 | 136 | } else { |
michael@0 | 137 | if($unicodetogb18030{$u} ne "" ) { |
michael@0 | 138 | if($unicodetogb18030{$u}.length > 4) { |
michael@0 | 139 | $gb180304btounicode{$unicodetogb18030{$u}} = $u; |
michael@0 | 140 | } else { |
michael@0 | 141 | $gb18030tounicodeuniq{$unicodetogb18030{$u}} = $u; |
michael@0 | 142 | } |
michael@0 | 143 | } |
michael@0 | 144 | if($unicodetocp936{$u} ne "" ) { |
michael@0 | 145 | $cp936tounicodeuniq{$unicodetocp936{$u}} = $u; |
michael@0 | 146 | } |
michael@0 | 147 | } |
michael@0 | 148 | } |
michael@0 | 149 | } |
michael@0 | 150 | sub gb4bytestoidx() |
michael@0 | 151 | { |
michael@0 | 152 | my($gb) = @_; |
michael@0 | 153 | my($b1,$b2, $b3, $b4,$idx); |
michael@0 | 154 | $b1 = hex(substr($gb, 0, 2)) - 0x81; |
michael@0 | 155 | $b2 = hex(substr($gb, 2, 2)) - 0x30; |
michael@0 | 156 | $b3 = hex(substr($gb, 4, 2)) - 0x81; |
michael@0 | 157 | $b4 = hex(substr($gb, 6, 2)) - 0x30; |
michael@0 | 158 | $idx = sprintf "%04X" , ((($b1 * 10) + $b2 ) * 126 + $b3) * 10 + $b4; |
michael@0 | 159 | return $idx; |
michael@0 | 160 | } |
michael@0 | 161 | sub printcommontable() |
michael@0 | 162 | { |
michael@0 | 163 | open ( GBKCOMMON, ">gbkcommon.txt" ) || die "cannot open gbkcommon.txt"; |
michael@0 | 164 | foreach $gb (sort(keys %tounicodecommon)) { |
michael@0 | 165 | print GBKCOMMON "0x" . $gb . "\t0x" . $tounicodecommon{$gb} . "\n"; |
michael@0 | 166 | } |
michael@0 | 167 | close GBKCOMMON; |
michael@0 | 168 | } |
michael@0 | 169 | sub printcp936table() |
michael@0 | 170 | { |
michael@0 | 171 | open ( CP936UNIQ, ">cp936uniq.txt" ) || die "cannot open cp936uniq.txt"; |
michael@0 | 172 | foreach $gb (sort(keys %cp936tounicodeuniq)) { |
michael@0 | 173 | print CP936UNIQ "0x" . $gb . "\t0x" . $cp936tounicodeuniq{$gb} . "\n"; |
michael@0 | 174 | } |
michael@0 | 175 | close CP936UNIQ; |
michael@0 | 176 | } |
michael@0 | 177 | sub printgb180304btable() |
michael@0 | 178 | { |
michael@0 | 179 | open ( GB180304B, ">gb180304b.txt" ) || die "cannot open gb180304b.txt"; |
michael@0 | 180 | foreach $gb (sort(keys %gb180304btounicode)) { |
michael@0 | 181 | if($gb180304btounicode{$gb} ne "FFFF" ) { |
michael@0 | 182 | print GB180304B "0x" . &gb4bytestoidx($gb) . "\t0x" . $gb180304btounicode{$gb} . "\t# 0x" . $gb . "\n"; |
michael@0 | 183 | } |
michael@0 | 184 | } |
michael@0 | 185 | close GB180304B; |
michael@0 | 186 | } |
michael@0 | 187 | sub printgb18030table() |
michael@0 | 188 | { |
michael@0 | 189 | open ( GB18030UNIQ, ">gb18030uniq.txt" ) || die "cannot open gb18030uniq.txt"; |
michael@0 | 190 | foreach $gb (sort(keys %gb18030tounicodeuniq)) { |
michael@0 | 191 | print GB18030UNIQ "0x" . $gb . "\t0x" . $gb18030tounicodeuniq{$gb} . "\n"; |
michael@0 | 192 | } |
michael@0 | 193 | close GB18030UNIQ; |
michael@0 | 194 | } |
michael@0 | 195 | |
michael@0 | 196 | sub genufut() |
michael@0 | 197 | { |
michael@0 | 198 | print ( "umaptable -uf < gb18030uniq.txt > gb18030uniq2b.uf\n"); |
michael@0 | 199 | system( "umaptable -uf < gb18030uniq.txt > gb18030uniq2b.uf"); |
michael@0 | 200 | |
michael@0 | 201 | print ( "umaptable -ut < gb18030uniq.txt > gb18030uniq2b.ut\n"); |
michael@0 | 202 | system( "umaptable -ut < gb18030uniq.txt > gb18030uniq2b.ut"); |
michael@0 | 203 | |
michael@0 | 204 | print ( "umaptable -uf < cp936uniq.txt > gbkuniq2b.uf\n") ; |
michael@0 | 205 | system( "umaptable -uf < cp936uniq.txt > gbkuniq2b.uf") ; |
michael@0 | 206 | |
michael@0 | 207 | print ( "umaptable -ut < cp936uniq.txt > gbkuniq2b.ut\n") ; |
michael@0 | 208 | system( "umaptable -ut < cp936uniq.txt > gbkuniq2b.ut") ; |
michael@0 | 209 | |
michael@0 | 210 | print ( "umaptable -uf < gb180304b.txt > gb180304bytes.uf\n") ; |
michael@0 | 211 | system( "umaptable -uf < gb180304b.txt > gb180304bytes.uf") ; |
michael@0 | 212 | |
michael@0 | 213 | print ( "umaptable -ut < gb180304b.txt > gb180304bytes.ut\n") ; |
michael@0 | 214 | system( "umaptable -ut < gb180304b.txt > gb180304bytes.ut") ; |
michael@0 | 215 | |
michael@0 | 216 | print ( "perl cp936tocdx.pl > cp936map.h\n"); |
michael@0 | 217 | system( "perl cp936tocdx.pl > cp936map.h"); |
michael@0 | 218 | } |
michael@0 | 219 | |
michael@0 | 220 | &readgb18030(); |
michael@0 | 221 | &readcp936(); |
michael@0 | 222 | &addeudc(); |
michael@0 | 223 | &splittable(); |
michael@0 | 224 | &printcommontable(); |
michael@0 | 225 | &printgb180304btable(); |
michael@0 | 226 | &printgb18030table(); |
michael@0 | 227 | &printcp936table(); |
michael@0 | 228 | &genufut(); |