intl/unicharutil/tests/NormalizationTest.cpp

branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
equal deleted inserted replaced
-1:000000000000 0:272c07612522
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6 #include <stdio.h>
7 #include "nsXPCOM.h"
8 #include "nsIUnicodeNormalizer.h"
9 #include "nsStringAPI.h"
10 #include "nsCharTraits.h"
11 #include "nsServiceManagerUtils.h"
12
13 struct testcaseLine {
14 wchar_t* c1;
15 wchar_t* c2;
16 wchar_t* c3;
17 wchar_t* c4;
18 wchar_t* c5;
19 char* description;
20 };
21
22 #ifdef DEBUG_smontagu
23 #define DEBUG_NAMED_TESTCASE(t, s) \
24 printf(t ": "); \
25 for (uint32_t i = 0; i < s.Length(); ++i) \
26 printf("%x ", s.CharAt(i)); \
27 printf("\n")
28 #else
29 #define DEBUG_NAMED_TESTCASE(t, s)
30 #endif
31
32 #define DEBUG_TESTCASE(x) DEBUG_NAMED_TESTCASE(#x, x)
33
34 #define NORMALIZE_AND_COMPARE(base, comparison, form, description) \
35 normalized.Truncate();\
36 normalizer->NormalizeUnicode##form(comparison, normalized);\
37 DEBUG_NAMED_TESTCASE(#form "(" #comparison ")", normalized);\
38 if (!base.Equals(normalized)) {\
39 rv = false;\
40 showError(description, #base " != " #form "(" #comparison ")\n");\
41 }
42
43 NS_DEFINE_CID(kUnicodeNormalizerCID, NS_UNICODE_NORMALIZER_CID);
44
45 nsIUnicodeNormalizer *normalizer;
46 bool verboseMode = false;
47
48 #include "NormalizationData.h"
49
50 void showError(const char* description, const char* errorText)
51 {
52 if (verboseMode)
53 printf("%s failed: %s", description, errorText);
54 }
55
56 bool TestInvariants(testcaseLine* testLine)
57 {
58 nsAutoString c1, c2, c3, c4, c5, normalized;
59 c1 = nsDependentString((char16_t*)testLine->c1);
60 c2 = nsDependentString((char16_t*)testLine->c2);
61 c3 = nsDependentString((char16_t*)testLine->c3);
62 c4 = nsDependentString((char16_t*)testLine->c4);
63 c5 = nsDependentString((char16_t*)testLine->c5);
64 bool rv = true;
65
66 /*
67 1. The following invariants must be true for all conformant implementations
68
69 NFC
70 c2 == NFC(c1) == NFC(c2) == NFC(c3)
71 */
72 DEBUG_TESTCASE(c2);
73 NORMALIZE_AND_COMPARE(c2, c1, NFC, testLine->description);
74 NORMALIZE_AND_COMPARE(c2, c2, NFC, testLine->description);
75 NORMALIZE_AND_COMPARE(c2, c3, NFC, testLine->description);
76
77 /*
78 c4 == NFC(c4) == NFC(c5)
79 */
80 DEBUG_TESTCASE(c4);
81 NORMALIZE_AND_COMPARE(c4, c4, NFC, testLine->description);
82 NORMALIZE_AND_COMPARE(c4, c5, NFC, testLine->description);
83
84 /*
85 NFD
86 c3 == NFD(c1) == NFD(c2) == NFD(c3)
87 */
88 DEBUG_TESTCASE(c3);
89 NORMALIZE_AND_COMPARE(c3, c1, NFD, testLine->description);
90 NORMALIZE_AND_COMPARE(c3, c2, NFD, testLine->description);
91 NORMALIZE_AND_COMPARE(c3, c3, NFD, testLine->description);
92 /*
93 c5 == NFD(c4) == NFD(c5)
94 */
95 DEBUG_TESTCASE(c5);
96 NORMALIZE_AND_COMPARE(c5, c4, NFD, testLine->description);
97 NORMALIZE_AND_COMPARE(c5, c5, NFD, testLine->description);
98
99 /*
100 NFKC
101 c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
102 */
103 DEBUG_TESTCASE(c4);
104 NORMALIZE_AND_COMPARE(c4, c1, NFKC, testLine->description);
105 NORMALIZE_AND_COMPARE(c4, c2, NFKC, testLine->description);
106 NORMALIZE_AND_COMPARE(c4, c3, NFKC, testLine->description);
107 NORMALIZE_AND_COMPARE(c4, c4, NFKC, testLine->description);
108 NORMALIZE_AND_COMPARE(c4, c5, NFKC, testLine->description);
109
110 /*
111 NFKD
112 c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
113 */
114 DEBUG_TESTCASE(c5);
115 NORMALIZE_AND_COMPARE(c5, c1, NFKD, testLine->description);
116 NORMALIZE_AND_COMPARE(c5, c2, NFKD, testLine->description);
117 NORMALIZE_AND_COMPARE(c5, c3, NFKD, testLine->description);
118 NORMALIZE_AND_COMPARE(c5, c4, NFKD, testLine->description);
119 NORMALIZE_AND_COMPARE(c5, c5, NFKD, testLine->description);
120
121 return rv;
122 }
123
124 uint32_t UTF32CodepointFromTestcase(testcaseLine* testLine)
125 {
126 if (!IS_SURROGATE(testLine->c1[0]))
127 return testLine->c1[0];
128
129 NS_ASSERTION(NS_IS_HIGH_SURROGATE(testLine->c1[0]) &&
130 NS_IS_LOW_SURROGATE(testLine->c1[1]),
131 "Test data neither in BMP nor legal surrogate pair");
132 return SURROGATE_TO_UCS4(testLine->c1[0], testLine->c1[1]);
133 }
134
135 bool TestUnspecifiedCodepoint(uint32_t codepoint)
136 {
137 bool rv = true;
138 char16_t unicharArray[3];
139 nsAutoString X, normalized;
140 char description[9];
141
142 if (IS_IN_BMP(codepoint)) {
143 unicharArray[0] = codepoint;
144 unicharArray[1] = 0;
145 X = nsDependentString(unicharArray);
146 }
147 else {
148 unicharArray[0] = H_SURROGATE(codepoint);
149 unicharArray[1] = L_SURROGATE(codepoint);
150 unicharArray[2] = 0;
151 X = nsDependentString(unicharArray);
152 }
153
154 /*
155 2. For every code point X assigned in this version of Unicode that is not specifically
156 listed in Part 1, the following invariants must be true for all conformant
157 implementations:
158
159 X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
160 */
161 DEBUG_TESTCASE(X);
162 sprintf(description, "U+%04X", codepoint);
163 NORMALIZE_AND_COMPARE(X, X, NFC, description);
164 NORMALIZE_AND_COMPARE(X, X, NFD, description);
165 NORMALIZE_AND_COMPARE(X, X, NFKC, description);
166 NORMALIZE_AND_COMPARE(X, X, NFKD, description);
167 return rv;
168 }
169
170 void TestPart0()
171 {
172 printf("Test Part0: Specific cases\n");
173
174 uint32_t i = 0;
175 uint32_t numFailed = 0;
176 uint32_t numPassed = 0;
177
178 while (Part0TestData[i].c1[0] != 0) {
179 if (TestInvariants(&Part0TestData[i++]))
180 ++numPassed;
181 else
182 ++numFailed;
183 }
184 printf(" %d cases passed, %d failed\n\n", numPassed, numFailed);
185 }
186
187 void TestPart1()
188 {
189 printf("Test Part1: Character by character test\n");
190
191 uint32_t i = 0;
192 uint32_t numFailed = 0;
193 uint32_t numPassed = 0;
194 uint32_t codepoint;
195 uint32_t testDataCodepoint = UTF32CodepointFromTestcase(&Part1TestData[i]);
196
197 for (codepoint = 1; codepoint < 0x110000; ++codepoint) {
198 if (testDataCodepoint == codepoint) {
199 if (TestInvariants(&Part1TestData[i]))
200 ++numPassed;
201 else
202 ++numFailed;
203 testDataCodepoint = UTF32CodepointFromTestcase(&Part1TestData[++i]);
204 } else {
205 if (TestUnspecifiedCodepoint(codepoint))
206 ++numPassed;
207 else
208 ++numFailed;
209 }
210 }
211 printf(" %d cases passed, %d failed\n\n", numPassed, numFailed);
212 }
213
214 void TestPart2()
215 {
216 printf("Test Part2: Canonical Order Test\n");
217
218 uint32_t i = 0;
219 uint32_t numFailed = 0;
220 uint32_t numPassed = 0;
221
222 while (Part2TestData[i].c1[0] != 0) {
223 if (TestInvariants(&Part2TestData[i++]))
224 ++numPassed;
225 else
226 ++numFailed;
227 }
228 printf(" %d cases passed, %d failed\n\n", numPassed, numFailed);
229 }
230
231 void TestPart3()
232 {
233 printf("Test Part3: PRI #29 Test\n");
234
235 uint32_t i = 0;
236 uint32_t numFailed = 0;
237 uint32_t numPassed = 0;
238
239 while (Part3TestData[i].c1[0] != 0) {
240 if (TestInvariants(&Part3TestData[i++]))
241 ++numPassed;
242 else
243 ++numFailed;
244 }
245 printf(" %d cases passed, %d failed\n\n", numPassed, numFailed);
246 }
247
248 int main(int argc, char** argv) {
249 if (sizeof(wchar_t) != 2) {
250 printf("This test can only be run where sizeof(wchar_t) == 2\n");
251 return 1;
252 }
253 if (strlen(versionText) == 0) {
254 printf("No testcases: to run the tests generate the header file using\n");
255 printf(" perl genNormalizationData.pl\n");
256 printf("in intl/unichar/tools and rebuild\n");
257 return 1;
258 }
259
260 printf("NormalizationTest: test nsIUnicodeNormalizer. UCD version: %s\n",
261 versionText);
262 if (argc <= 1)
263 verboseMode = false;
264 else if ((argc == 2) && (!strcmp(argv[1], "-v")))
265 verboseMode = true;
266 else {
267 printf(" Usage: NormalizationTest [OPTION]..\n");
268 printf("Options:\n");
269 printf(" -v Verbose mode\n");
270 return 1;
271 }
272
273 nsresult rv = NS_InitXPCOM2(nullptr, nullptr, nullptr);
274 if (NS_FAILED(rv)) {
275 printf("NS_InitXPCOM2 failed\n");
276 return 1;
277 }
278
279 normalizer = nullptr;
280 nsresult res;
281 res = CallGetService(kUnicodeNormalizerCID, &normalizer);
282
283 if(NS_FAILED(res) || !normalizer) {
284 printf("GetService failed\n");
285 return 1;
286 }
287
288 TestPart0();
289 TestPart1();
290 TestPart2();
291 TestPart3();
292
293 NS_RELEASE(normalizer);
294
295 printf("Test finished \n");
296 return 0;
297 }

mercurial