Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 #!/usr/local/bin/perl
2 # This Source Code Form is subject to the terms of the Mozilla Public
3 # License, v. 2.0. If a copy of the MPL was not distributed with this
4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 use strict;
7 require "genverifier.pm";
8 use genverifier;
11 my(@utf8_cls);
12 my(@utf8_st);
13 my($utf8_ver);
15 #
16 #
17 # UTF8 encode the UCS4 into 1 to 4 bytes
18 #
19 # 1 byte 00 00 00 00 00 00 00 7f
20 # 2 bytes 00 00 00 80 00 00 07 ff
21 # 3 bytes 00 00 08 00 00 00 ff ff
22 # 4 bytes 00 01 00 00 00 10 ff ff
23 #
24 # However, since Surrogate area should not be encoded into UTF8 as
25 # a Surrogate pair, we can remove the surrogate area from UTF8
26 #
27 # 1 byte 00 00 00 00 00 00 00 7f
28 # 2 bytes 00 00 00 80 00 00 07 ff
29 # 3 bytes 00 00 08 00 00 00 d7 ff
30 # 00 00 e0 00 00 00 ff ff
31 # 4 bytes 00 01 00 00 00 10 ff ff
32 #
33 # Now we break them into 6 bits group for 2-4 bytes UTF8
34 #
35 # 1 byte 00 7f
36 # 2 bytes 02 00 1f 3f
37 # 3 bytes 00 20 00 0d 1f 3f
38 # 0e 00 00 0f 3f 3f
39 # 4 bytes 00 10 00 00 04 0f 3f 3f
40 #
41 # Break down more
42 #
43 # 1 byte 00 7f
44 # 2 bytes 02 00 1f 3f
45 # 3 bytes 00 20 00 00 3f 3f
46 # 01 00 00 0c 3f 3f
47 # 0d 00 00 0d 1f 3f
48 # 0e 00 00 0f 3f 3f
49 # 4 bytes 00 10 00 00 00 3f 3f 3f
50 # 01 00 00 00 03 3f 3f 3f
51 # 04 00 00 00 04 0f 3f 3f
52 #
53 # Now, add
54 # c0 to the lead byte of 2 bytes UTF8
55 # e0 to the lead byte of 3 bytes UTF8
56 # f0 to the lead byte of 4 bytes UTF8
57 # 80 to the trail bytes
58 #
59 # 1 byte 00 7f
60 # 2 bytes c2 80 df bf
61 # 3 bytes e0 a0 80 e0 bf bf
62 # e1 80 80 ec bf bf
63 # ed 80 80 ed 9f bf
64 # ee 80 80 ef bf bf
65 # 4 bytes f0 90 80 80 f0 bf bf bf
66 # f1 80 80 80 f3 bf bf bf
67 # f4 80 80 80 f4 8f bf bf
68 #
69 #
70 # Now we can construct our state diagram
71 #
72 # 0:0x0e,0x0f,0x1b->Error
73 # 0:[0-0x7f]->0
74 # 0:[c2-df]->3
75 # 0:e0->4
76 # 0:[e1-ec, ee-ef]->5
77 # 0:ed->6
78 # 0:f0->7
79 # 0:[f1-f3]->8
80 # 0:f4->9
81 # 0:*->Error
82 # 3:[80-bf]->0
83 # 3:*->Error
84 # 4:[a0-bf]->3
85 # 4:*->Error
86 # 5:[80-bf]->3
87 # 5:*->Error
88 # 6:[80-9f]->3
89 # 6:*->Error
90 # 7:[90-bf]->5
91 # 7:*->Error
92 # 8:[80-bf]->5
93 # 8:*->Error
94 # 9:[80-8f]->5
95 # 9:*->Error
96 #
97 # Now, we classified chars into class
98 #
99 # 00,0e,0f,1b:k0
100 # 01-0d,10-1a,1c-7f:k1
101 # 80-8f:k2
102 # 90-9f:k3
103 # a0-bf:k4
104 # c0-c1:k0
105 # c2-df:k5
106 # e0:k6
107 # e1-ec:k7
108 # ed:k8
109 # ee-ef:k7
110 # f0:k9
111 # f1-f3:k10
112 # f4:k11
113 # f5-ff:k0
114 #
115 # Now, let's put them into array form
117 @utf8_cls = (
118 [ 0x00 , 0x00 , 1 ],
119 [ 0x0e , 0x0f , 0 ],
120 [ 0x1b , 0x1b , 0 ],
121 [ 0x01 , 0x0d , 1 ],
122 [ 0x10 , 0x1a , 1 ],
123 [ 0x1c , 0x7f , 1 ],
124 [ 0x80 , 0x8f , 2 ],
125 [ 0x90 , 0x9f , 3 ],
126 [ 0xa0 , 0xbf , 4 ],
127 [ 0xc0 , 0xc1 , 0 ],
128 [ 0xc2 , 0xdf , 5 ],
129 [ 0xe0 , 0xe0 , 6 ],
130 [ 0xe1 , 0xec , 7 ],
131 [ 0xed , 0xed , 8 ],
132 [ 0xee , 0xef , 7 ],
133 [ 0xf0 , 0xf0 , 9 ],
134 [ 0xf1 , 0xf3 , 10 ],
135 [ 0xf4 , 0xf4 , 11 ],
136 [ 0xf5 , 0xff , 0 ],
137 );
138 #
139 # Now, we write the state diagram in class
140 #
141 # 0:k0->Error
142 # 0:k1->0
143 # 0:k5->3
144 # 0:k6->4
145 # 0:k7->5
146 # 0:k8->6
147 # 0:k9->7
148 # 0:k10->8
149 # 0:k11->9
150 # 0:*->Error
151 # 3:k2,k3,k4->0
152 # 3:*->Error
153 # 4:k4->3
154 # 4:*->Error
155 # 5:k2,k3,k4->3
156 # 5:*->Error
157 # 6:k2,k3->3
158 # 6:*->Error
159 # 7:k3,k4->5
160 # 7:*->Error
161 # 8:k2,k3,k4->5
162 # 8:*->Error
163 # 9:k2->5
164 # 9:*->Error
165 #
166 # Now, let's put them into array
167 #
168 package genverifier;
169 @utf8_st = (
170 # 0 1 2 3 4 5 6 7 8 9 10 11
171 1, 0, 1, 1, 1, 3, 4, 5, 6, 7, 8, 9, # state 0 Start
172 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 1 Error
173 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # state 2 ItsMe
174 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, # state 3
175 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, # state 4
176 1, 1, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, # state 5
177 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, # state 6
178 1, 1, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, # state 7
179 1, 1, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, # state 8
180 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, # state 9
181 );
185 $utf8_ver = genverifier::GenVerifier("UTF8", "UTF-8", \@utf8_cls, 12, \@utf8_st);
186 print $utf8_ver;