Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | #include <stdio.h> |
michael@0 | 7 | #include "nsXPCOM.h" |
michael@0 | 8 | #include "nsIEntityConverter.h" |
michael@0 | 9 | #include "nsISaveAsCharset.h" |
michael@0 | 10 | #include "nsCOMPtr.h" |
michael@0 | 11 | #include "nsIUnicodeNormalizer.h" |
michael@0 | 12 | #include "nsStringAPI.h" |
michael@0 | 13 | #include "nsUnicharUtils.h" |
michael@0 | 14 | #include "nsMemory.h" |
michael@0 | 15 | #include "nsComponentManagerUtils.h" |
michael@0 | 16 | #include "nsServiceManagerUtils.h" |
michael@0 | 17 | |
michael@0 | 18 | NS_DEFINE_CID(kEntityConverterCID, NS_ENTITYCONVERTER_CID); |
michael@0 | 19 | NS_DEFINE_CID(kSaveAsCharsetCID, NS_SAVEASCHARSET_CID); |
michael@0 | 20 | NS_DEFINE_CID(kUnicodeNormalizerCID, NS_UNICODE_NORMALIZER_CID); |
michael@0 | 21 | |
michael@0 | 22 | #define TESTLEN 32 |
michael@0 | 23 | #define T2LEN TESTLEN |
michael@0 | 24 | #define T3LEN TESTLEN |
michael@0 | 25 | #define T4LEN TESTLEN |
michael@0 | 26 | |
michael@0 | 27 | // test data for ToUpper |
michael@0 | 28 | static char16_t t2data [T2LEN+1] = { |
michael@0 | 29 | 0x0031 , // 0 |
michael@0 | 30 | 0x0019 , // 1 |
michael@0 | 31 | 0x0043 , // 2 |
michael@0 | 32 | 0x0067 , // 3 |
michael@0 | 33 | 0x00C8 , // 4 |
michael@0 | 34 | 0x00E9 , // 5 |
michael@0 | 35 | 0x0147 , // 6 |
michael@0 | 36 | 0x01C4 , // 7 |
michael@0 | 37 | 0x01C6 , // 8 |
michael@0 | 38 | 0x01C5 , // 9 |
michael@0 | 39 | 0x03C0 , // 10 |
michael@0 | 40 | 0x03B2 , // 11 |
michael@0 | 41 | 0x0438 , // 12 |
michael@0 | 42 | 0x04A5 , // 13 |
michael@0 | 43 | 0x05D0 , // 14 |
michael@0 | 44 | 0x0A20 , // 15 |
michael@0 | 45 | 0x30B0 , // 16 |
michael@0 | 46 | 0x5185 , // 17 |
michael@0 | 47 | 0xC021 , // 18 |
michael@0 | 48 | 0xFF48 , // 19 |
michael@0 | 49 | 0x01C7 , // 20 |
michael@0 | 50 | 0x01C8 , // 21 |
michael@0 | 51 | 0x01C9 , // 22 |
michael@0 | 52 | 0x01CA , // 23 |
michael@0 | 53 | 0x01CB , // 24 |
michael@0 | 54 | 0x01CC , // 25 |
michael@0 | 55 | 0x01F1 , // 26 |
michael@0 | 56 | 0x01F2 , // 27 |
michael@0 | 57 | 0x01F3 , // 28 |
michael@0 | 58 | 0x0250 , // 29 |
michael@0 | 59 | 0x0271 , // 30 |
michael@0 | 60 | 0xA641 , // 31 |
michael@0 | 61 | 0x00 |
michael@0 | 62 | }; |
michael@0 | 63 | // expected result for ToUpper |
michael@0 | 64 | static char16_t t2result[T2LEN+1] = { |
michael@0 | 65 | 0x0031 , // 0 |
michael@0 | 66 | 0x0019 , // 1 |
michael@0 | 67 | 0x0043 , // 2 |
michael@0 | 68 | 0x0047 , // 3 |
michael@0 | 69 | 0x00C8 , // 4 |
michael@0 | 70 | 0x00C9 , // 5 |
michael@0 | 71 | 0x0147 , // 6 |
michael@0 | 72 | 0x01C4 , // 7 |
michael@0 | 73 | 0x01C4 , // 8 |
michael@0 | 74 | 0x01C4 , // 9 |
michael@0 | 75 | 0x03A0 , // 10 |
michael@0 | 76 | 0x0392 , // 11 |
michael@0 | 77 | 0x0418 , // 12 |
michael@0 | 78 | 0x04A4 , // 13 |
michael@0 | 79 | 0x05D0 , // 14 |
michael@0 | 80 | 0x0A20 , // 15 |
michael@0 | 81 | 0x30B0 , // 16 |
michael@0 | 82 | 0x5185 , // 17 |
michael@0 | 83 | 0xC021 , // 18 |
michael@0 | 84 | 0xFF28 , // 19 |
michael@0 | 85 | 0x01C7 , // 20 |
michael@0 | 86 | 0x01C7 , // 21 |
michael@0 | 87 | 0x01C7 , // 22 |
michael@0 | 88 | 0x01CA , // 23 |
michael@0 | 89 | 0x01CA , // 24 |
michael@0 | 90 | 0x01CA , // 25 |
michael@0 | 91 | 0x01F1 , // 26 |
michael@0 | 92 | 0x01F1 , // 27 |
michael@0 | 93 | 0x01F1 , // 28 |
michael@0 | 94 | 0x2C6F , // 29 |
michael@0 | 95 | 0x2C6E , // 30 |
michael@0 | 96 | 0xA640 , // 31 |
michael@0 | 97 | 0x00 |
michael@0 | 98 | }; |
michael@0 | 99 | // test data for ToLower |
michael@0 | 100 | static char16_t t3data [T3LEN+1] = { |
michael@0 | 101 | 0x0031 , // 0 |
michael@0 | 102 | 0x0019 , // 1 |
michael@0 | 103 | 0x0043 , // 2 |
michael@0 | 104 | 0x0067 , // 3 |
michael@0 | 105 | 0x00C8 , // 4 |
michael@0 | 106 | 0x00E9 , // 5 |
michael@0 | 107 | 0x0147 , // 6 |
michael@0 | 108 | 0x01C4 , // 7 |
michael@0 | 109 | 0x01C6 , // 8 |
michael@0 | 110 | 0x01C5 , // 9 |
michael@0 | 111 | 0x03A0 , // 10 |
michael@0 | 112 | 0x0392 , // 11 |
michael@0 | 113 | 0x0418 , // 12 |
michael@0 | 114 | 0x04A4 , // 13 |
michael@0 | 115 | 0x05D0 , // 14 |
michael@0 | 116 | 0x0A20 , // 15 |
michael@0 | 117 | 0x30B0 , // 16 |
michael@0 | 118 | 0x5187 , // 17 |
michael@0 | 119 | 0xC023 , // 18 |
michael@0 | 120 | 0xFF28 , // 19 |
michael@0 | 121 | 0x01C7 , // 20 |
michael@0 | 122 | 0x01C8 , // 21 |
michael@0 | 123 | 0x01C9 , // 22 |
michael@0 | 124 | 0x01CA , // 23 |
michael@0 | 125 | 0x01CB , // 24 |
michael@0 | 126 | 0x01CC , // 25 |
michael@0 | 127 | 0x01F1 , // 26 |
michael@0 | 128 | 0x01F2 , // 27 |
michael@0 | 129 | 0x01F3 , // 28 |
michael@0 | 130 | 0x2C6F , // 29 |
michael@0 | 131 | 0x2C6E , // 30 |
michael@0 | 132 | 0xA640 , // 31 |
michael@0 | 133 | 0x00 |
michael@0 | 134 | }; |
michael@0 | 135 | // expected result for ToLower |
michael@0 | 136 | static char16_t t3result[T3LEN+1] = { |
michael@0 | 137 | 0x0031 , // 0 |
michael@0 | 138 | 0x0019 , // 1 |
michael@0 | 139 | 0x0063 , // 2 |
michael@0 | 140 | 0x0067 , // 3 |
michael@0 | 141 | 0x00E8 , // 4 |
michael@0 | 142 | 0x00E9 , // 5 |
michael@0 | 143 | 0x0148 , // 6 |
michael@0 | 144 | 0x01C6 , // 7 |
michael@0 | 145 | 0x01C6 , // 8 |
michael@0 | 146 | 0x01C6 , // 9 |
michael@0 | 147 | 0x03C0 , // 10 |
michael@0 | 148 | 0x03B2 , // 11 |
michael@0 | 149 | 0x0438 , // 12 |
michael@0 | 150 | 0x04A5 , // 13 |
michael@0 | 151 | 0x05D0 , // 14 |
michael@0 | 152 | 0x0A20 , // 15 |
michael@0 | 153 | 0x30B0 , // 16 |
michael@0 | 154 | 0x5187 , // 17 |
michael@0 | 155 | 0xC023 , // 18 |
michael@0 | 156 | 0xFF48 , // 19 |
michael@0 | 157 | 0x01C9 , // 20 |
michael@0 | 158 | 0x01C9 , // 21 |
michael@0 | 159 | 0x01C9 , // 22 |
michael@0 | 160 | 0x01CC , // 23 |
michael@0 | 161 | 0x01CC , // 24 |
michael@0 | 162 | 0x01CC , // 25 |
michael@0 | 163 | 0x01F3 , // 26 |
michael@0 | 164 | 0x01F3 , // 27 |
michael@0 | 165 | 0x01F3 , // 28 |
michael@0 | 166 | 0x0250 , // 29 |
michael@0 | 167 | 0x0271 , // 30 |
michael@0 | 168 | 0xA641 , // 31 |
michael@0 | 169 | 0x00 |
michael@0 | 170 | }; |
michael@0 | 171 | // test data for ToTitle |
michael@0 | 172 | static char16_t t4data [T4LEN+2] = { |
michael@0 | 173 | 0x0031 , // 0 |
michael@0 | 174 | 0x0019 , // 1 |
michael@0 | 175 | 0x0043 , // 2 |
michael@0 | 176 | 0x0067 , // 3 |
michael@0 | 177 | 0x00C8 , // 4 |
michael@0 | 178 | 0x00E9 , // 5 |
michael@0 | 179 | 0x0147 , // 6 |
michael@0 | 180 | 0x01C4 , // 7 |
michael@0 | 181 | 0x01C6 , // 8 |
michael@0 | 182 | 0x01C5 , // 9 |
michael@0 | 183 | 0x03C0 , // 10 |
michael@0 | 184 | 0x03B2 , // 11 |
michael@0 | 185 | 0x0438 , // 12 |
michael@0 | 186 | 0x04A5 , // 13 |
michael@0 | 187 | 0x05D0 , // 14 |
michael@0 | 188 | 0x0A20 , // 15 |
michael@0 | 189 | 0x30B0 , // 16 |
michael@0 | 190 | 0x5189 , // 17 |
michael@0 | 191 | 0xC013 , // 18 |
michael@0 | 192 | 0xFF52 , // 19 |
michael@0 | 193 | 0x01C7 , // 20 |
michael@0 | 194 | 0x01C8 , // 21 |
michael@0 | 195 | 0x01C9 , // 22 |
michael@0 | 196 | 0x01CA , // 23 |
michael@0 | 197 | 0x01CB , // 24 |
michael@0 | 198 | 0x01CC , // 25 |
michael@0 | 199 | 0x01F1 , // 26 |
michael@0 | 200 | 0x01F2 , // 27 |
michael@0 | 201 | 0x01F3 , // 28 |
michael@0 | 202 | 0x0250 , // 29 |
michael@0 | 203 | 0x0271 , // 30 |
michael@0 | 204 | 0xA641 , // 31 |
michael@0 | 205 | 0x0041 , // Dummy entry to prevent overflow |
michael@0 | 206 | 0x00 |
michael@0 | 207 | }; |
michael@0 | 208 | // expected result for ToTitle |
michael@0 | 209 | static char16_t t4result[T4LEN+2] = { |
michael@0 | 210 | 0x0031 , // 0 |
michael@0 | 211 | 0x0019 , // 1 |
michael@0 | 212 | 0x0043 , // 2 |
michael@0 | 213 | 0x0047 , // 3 |
michael@0 | 214 | 0x00C8 , // 4 |
michael@0 | 215 | 0x00C9 , // 5 |
michael@0 | 216 | 0x0147 , // 6 |
michael@0 | 217 | 0x01C4 , // 7 |
michael@0 | 218 | 0x01C5 , // 8 |
michael@0 | 219 | 0x01C5 , // 9 |
michael@0 | 220 | 0x03A0 , // 10 |
michael@0 | 221 | 0x0392 , // 11 |
michael@0 | 222 | 0x0418 , // 12 |
michael@0 | 223 | 0x04A4 , // 13 |
michael@0 | 224 | 0x05D0 , // 14 |
michael@0 | 225 | 0x0A20 , // 15 |
michael@0 | 226 | 0x30B0 , // 16 |
michael@0 | 227 | 0x5189 , // 17 |
michael@0 | 228 | 0xC013 , // 18 |
michael@0 | 229 | 0xFF32 , // 19 |
michael@0 | 230 | 0x01C7 , // 20 |
michael@0 | 231 | 0x01C8 , // 21 |
michael@0 | 232 | 0x01C8 , // 22 |
michael@0 | 233 | 0x01CA , // 23 |
michael@0 | 234 | 0x01CB , // 24 |
michael@0 | 235 | 0x01CB , // 25 |
michael@0 | 236 | 0x01F1 , // 26 |
michael@0 | 237 | 0x01F2 , // 27 |
michael@0 | 238 | 0x01F2 , // 28 |
michael@0 | 239 | 0x2C6F , // 29 |
michael@0 | 240 | 0x2C6E , // 30 |
michael@0 | 241 | 0xA640 , // 31 |
michael@0 | 242 | 0x0041 , // Dummy entry to prevent overflow |
michael@0 | 243 | 0x00 |
michael@0 | 244 | }; |
michael@0 | 245 | |
michael@0 | 246 | static unsigned char t6lhs[] = { |
michael@0 | 247 | 0x31 , // 0 |
michael@0 | 248 | 0x19 , // 1 |
michael@0 | 249 | 0x43 , // 2 |
michael@0 | 250 | 0x67 , // 3 |
michael@0 | 251 | 0xC3, 0x88 , // 4 |
michael@0 | 252 | 0xC3, 0xA9 , // 5 |
michael@0 | 253 | 0xC5, 0x87 , // 6 |
michael@0 | 254 | 0xC7, 0x84 , // 7 |
michael@0 | 255 | 0xC7, 0x86 , // 8 |
michael@0 | 256 | 0xC7, 0x85 , // 9 |
michael@0 | 257 | 0xCF, 0x80 , // 10 |
michael@0 | 258 | 0xCE, 0xB2 , // 11 |
michael@0 | 259 | 0xD0, 0xB8 , // 12 |
michael@0 | 260 | 0xD2, 0xA5 , // 13 |
michael@0 | 261 | 0xD7, 0x90 , // 14 |
michael@0 | 262 | 0xE0, 0xA8, 0xA0 , // 15 |
michael@0 | 263 | 0xE3, 0x82, 0xB0 , // 16 |
michael@0 | 264 | 0xE5, 0x86, 0x85 , // 17 |
michael@0 | 265 | 0xEC, 0x80, 0xA1 , // 18 |
michael@0 | 266 | 0xEF, 0xBD, 0x88 , // 19 |
michael@0 | 267 | 0xC7, 0x87 , // 20 |
michael@0 | 268 | 0xC7, 0x88 , // 21 |
michael@0 | 269 | 0xC7, 0x89 , // 22 |
michael@0 | 270 | 0xC7, 0x8A , // 23 |
michael@0 | 271 | 0xC7, 0x8B , // 24 |
michael@0 | 272 | 0xC7, 0x8C , // 25 |
michael@0 | 273 | 0xC7, 0xB1 , // 26 |
michael@0 | 274 | 0xC7, 0xB2 , // 27 |
michael@0 | 275 | 0xC7, 0xB3 , // 28 |
michael@0 | 276 | 0xC9, 0x90 , // 29 |
michael@0 | 277 | 0xC9, 0xB1 , // 30 |
michael@0 | 278 | 0xEA, 0x99, 0x81 , // 31 |
michael@0 | 279 | 0x00 |
michael@0 | 280 | }; |
michael@0 | 281 | |
michael@0 | 282 | static unsigned char t6rhs[] = { |
michael@0 | 283 | 0x31 , // 0 |
michael@0 | 284 | 0x19 , // 1 |
michael@0 | 285 | 0x43 , // 2 |
michael@0 | 286 | 0x47 , // 3 |
michael@0 | 287 | 0xC3, 0x88 , // 4 |
michael@0 | 288 | 0xC3, 0x89 , // 5 |
michael@0 | 289 | 0xC5, 0x87 , // 6 |
michael@0 | 290 | 0xC7, 0x84 , // 7 |
michael@0 | 291 | 0xC7, 0x84 , // 8 |
michael@0 | 292 | 0xC7, 0x84 , // 9 |
michael@0 | 293 | 0xCE, 0xA0 , // 10 |
michael@0 | 294 | 0xCE, 0x92 , // 11 |
michael@0 | 295 | 0xD0, 0x98 , // 12 |
michael@0 | 296 | 0xD2, 0xA4 , // 13 |
michael@0 | 297 | 0xD7, 0x90 , // 14 |
michael@0 | 298 | 0xE0, 0xA8, 0xA0 , // 15 |
michael@0 | 299 | 0xE3, 0x82, 0xB0 , // 16 |
michael@0 | 300 | 0xE5, 0x86, 0x85 , // 17 |
michael@0 | 301 | 0xEC, 0x80, 0xA1 , // 18 |
michael@0 | 302 | 0xEF, 0xBC, 0xA8 , // 19 |
michael@0 | 303 | 0xC7, 0x87 , // 20 |
michael@0 | 304 | 0xC7, 0x87 , // 21 |
michael@0 | 305 | 0xC7, 0x87 , // 22 |
michael@0 | 306 | 0xC7, 0x8a , // 23 |
michael@0 | 307 | 0xC7, 0x8a , // 24 |
michael@0 | 308 | 0xC7, 0x8a , // 25 |
michael@0 | 309 | 0xC7, 0xB1 , // 26 |
michael@0 | 310 | 0xC7, 0xB1 , // 27 |
michael@0 | 311 | 0xC7, 0xB1 , // 28 |
michael@0 | 312 | 0xE2, 0xB1, 0xAF , // 29 |
michael@0 | 313 | 0xE2, 0xB1, 0xAE , // 30 |
michael@0 | 314 | 0xEA, 0x99, 0x80 , // 31 |
michael@0 | 315 | 0x00 |
michael@0 | 316 | }; |
michael@0 | 317 | |
michael@0 | 318 | static const char *t7lhs = "aBcDeFGHIJKL1!!2!!a!uuuu"; |
michael@0 | 319 | static const char *t7rhs = "AbCdEFghijkL1!!2!!A!UUuU"; |
michael@0 | 320 | |
michael@0 | 321 | static const char *t8lhs = "aazzz"; |
michael@0 | 322 | static const char *t8rhs = "aBa"; |
michael@0 | 323 | |
michael@0 | 324 | static const char *t9lhs = "@a"; |
michael@0 | 325 | static const char *t9rhs = "`a"; |
michael@0 | 326 | |
michael@0 | 327 | bool CharByCharCompareEqual(const char *a, const char *b, |
michael@0 | 328 | uint32_t aLen, uint32_t bLen) |
michael@0 | 329 | { |
michael@0 | 330 | // Do basically a CaseInsensitiveCompare(), but using |
michael@0 | 331 | // CaseInsensitiveUTF8CharsEqual(). |
michael@0 | 332 | |
michael@0 | 333 | const char *aEnd = a + aLen; |
michael@0 | 334 | const char *bEnd = b + bLen; |
michael@0 | 335 | while (a < aEnd && b < bEnd) { |
michael@0 | 336 | bool err; |
michael@0 | 337 | if (!CaseInsensitiveUTF8CharsEqual(a, b, aEnd, bEnd, &a, &b, &err) || err) |
michael@0 | 338 | return false; |
michael@0 | 339 | } |
michael@0 | 340 | return true; |
michael@0 | 341 | } |
michael@0 | 342 | |
michael@0 | 343 | void TestCaseConversion() |
michael@0 | 344 | { |
michael@0 | 345 | printf("==========================\n"); |
michael@0 | 346 | printf("Start case conversion test\n"); |
michael@0 | 347 | printf("==========================\n"); |
michael@0 | 348 | |
michael@0 | 349 | int i; |
michael@0 | 350 | char16_t buf[256]; |
michael@0 | 351 | |
michael@0 | 352 | printf("Test 1 - ToUpper(char16_t, char16_t*):\n"); |
michael@0 | 353 | for(i=0;i < T2LEN ; i++) |
michael@0 | 354 | { |
michael@0 | 355 | char16_t ch = ToUpperCase(t2data[i]); |
michael@0 | 356 | if(ch != t2result[i]) |
michael@0 | 357 | printf("\tFailed!! result unexpected %d\n", i); |
michael@0 | 358 | } |
michael@0 | 359 | |
michael@0 | 360 | |
michael@0 | 361 | printf("Test 2 - ToLower(char16_t, char16_t*):\n"); |
michael@0 | 362 | for(i=0;i < T3LEN; i++) |
michael@0 | 363 | { |
michael@0 | 364 | char16_t ch = ToLowerCase(t3data[i]); |
michael@0 | 365 | if(ch != t3result[i]) |
michael@0 | 366 | printf("\tFailed!! result unexpected %d\n", i); |
michael@0 | 367 | } |
michael@0 | 368 | |
michael@0 | 369 | printf("Test 3 - ToTitle(char16_t, char16_t*):\n"); |
michael@0 | 370 | for(i=0;i < T4LEN; i++) |
michael@0 | 371 | { |
michael@0 | 372 | char16_t ch = ToTitleCase(t4data[i]); |
michael@0 | 373 | if(ch != t4result[i]) |
michael@0 | 374 | printf("\tFailed!! result unexpected %d\n", i); |
michael@0 | 375 | } |
michael@0 | 376 | |
michael@0 | 377 | printf("Test 4 - ToUpper(char16_t*, char16_t*, uint32_t):\n"); |
michael@0 | 378 | ToUpperCase(t2data, buf, T2LEN); |
michael@0 | 379 | for(i = 0; i < T2LEN; i++) |
michael@0 | 380 | { |
michael@0 | 381 | if(buf[i] != t2result[i]) |
michael@0 | 382 | { |
michael@0 | 383 | printf("\tFailed!! result unexpected %d\n", i); |
michael@0 | 384 | break; |
michael@0 | 385 | } |
michael@0 | 386 | } |
michael@0 | 387 | |
michael@0 | 388 | printf("Test 5 - ToLower(char16_t*, char16_t*, uint32_t):\n"); |
michael@0 | 389 | ToLowerCase(t3data, buf, T3LEN); |
michael@0 | 390 | for(i = 0; i < T3LEN; i++) |
michael@0 | 391 | { |
michael@0 | 392 | if(buf[i] != t3result[i]) |
michael@0 | 393 | { |
michael@0 | 394 | printf("\tFailed!! result unexpected %d\n", i); |
michael@0 | 395 | break; |
michael@0 | 396 | } |
michael@0 | 397 | } |
michael@0 | 398 | |
michael@0 | 399 | printf("Test 6 - CaseInsensitiveCompare UTF-8 (1):\n"); |
michael@0 | 400 | if (CaseInsensitiveCompare((char*)t6lhs, (char*)t6rhs, sizeof(t6lhs), sizeof(t6rhs))) |
michael@0 | 401 | printf("\tFailed!\n"); |
michael@0 | 402 | if (!CharByCharCompareEqual((char*)t6lhs, (char*)t6rhs, sizeof(t6lhs), sizeof(t6rhs))) |
michael@0 | 403 | printf("\tFailed character-by-character comparison!\n"); |
michael@0 | 404 | |
michael@0 | 405 | printf("Test 7 - CaseInsensitiveCompare UTF-8 (2):\n"); |
michael@0 | 406 | if (CaseInsensitiveCompare(t7lhs, t7rhs, strlen(t7lhs), strlen(t7rhs))) |
michael@0 | 407 | printf("\tFailed!\n"); |
michael@0 | 408 | if (!CharByCharCompareEqual(t7lhs, t7rhs, sizeof(t7lhs), sizeof(t7rhs))) |
michael@0 | 409 | printf("\tFailed character-by-character comparison!\n"); |
michael@0 | 410 | |
michael@0 | 411 | printf("Test 8a - CaseInsensitiveCompare UTF-8 (3):\n"); |
michael@0 | 412 | if (CaseInsensitiveCompare(t8lhs, t8rhs, strlen(t8lhs), strlen(t8rhs)) != -1) |
michael@0 | 413 | printf("\tFailed!\n"); |
michael@0 | 414 | if (CharByCharCompareEqual(t8lhs, t8rhs, strlen(t8lhs), strlen(t8rhs))) |
michael@0 | 415 | printf("\tFailed character-by-character comparison!\n"); |
michael@0 | 416 | |
michael@0 | 417 | printf("Test 8b - CaseInsensitiveCompare UTF-8 (4):\n"); |
michael@0 | 418 | if (CaseInsensitiveCompare(t8rhs, t8lhs, strlen(t8rhs), strlen(t8lhs)) != 1) |
michael@0 | 419 | printf("\tFailed!\n"); |
michael@0 | 420 | |
michael@0 | 421 | // This test may seem a bit strange. But it's actually an easy bug to make |
michael@0 | 422 | // if we tried to be clever and say that two ASCII characters x and y are |
michael@0 | 423 | // case-insensitively equal if (x & ~0x20) == (y & ~0x20). |
michael@0 | 424 | printf("Test 9 - CaseInsensitiveCompare UTF-8 (5):\n"); |
michael@0 | 425 | if (CaseInsensitiveCompare(t9rhs, t9lhs, strlen(t9lhs), strlen(t9rhs)) != 1) |
michael@0 | 426 | printf("\tFailed!\n"); |
michael@0 | 427 | if (CharByCharCompareEqual(t9lhs, t9rhs, strlen(t9lhs), strlen(t9rhs))) |
michael@0 | 428 | printf("\tFailed character-by-character comparison!\n"); |
michael@0 | 429 | |
michael@0 | 430 | printf("===========================\n"); |
michael@0 | 431 | printf("Finish case conversion test\n"); |
michael@0 | 432 | printf("===========================\n"); |
michael@0 | 433 | } |
michael@0 | 434 | |
michael@0 | 435 | static void FuzzOneInvalidCaseConversion() |
michael@0 | 436 | { |
michael@0 | 437 | uint32_t aLen = rand() % 32; |
michael@0 | 438 | uint32_t bLen = rand() % 32; |
michael@0 | 439 | |
michael@0 | 440 | // We could use a static length-32 buffer for these, but then Valgrind |
michael@0 | 441 | // wouldn't be able to detect errors. |
michael@0 | 442 | unsigned char *aBuf = (unsigned char*)malloc(aLen * sizeof(unsigned char)); |
michael@0 | 443 | unsigned char *bBuf = (unsigned char*)malloc(bLen * sizeof(unsigned char)); |
michael@0 | 444 | |
michael@0 | 445 | for (uint32_t i = 0; i < aLen; i++) { |
michael@0 | 446 | aBuf[i] = rand() & 0xff; |
michael@0 | 447 | } |
michael@0 | 448 | |
michael@0 | 449 | for (uint32_t i = 0; i < bLen; i++) { |
michael@0 | 450 | bBuf[i] = rand() & 0xff; |
michael@0 | 451 | } |
michael@0 | 452 | |
michael@0 | 453 | if (!CaseInsensitiveCompare((char*)aBuf, (char*)bBuf, aLen, bLen)) |
michael@0 | 454 | printf("\tSurprise, two random strings compared insensitively as equal!\n"); |
michael@0 | 455 | if (CharByCharCompareEqual((char*)aBuf, (char*)bBuf, aLen, bLen)) |
michael@0 | 456 | printf("\tSurprise, two random strings compared as exactly equal!\n"); |
michael@0 | 457 | |
michael@0 | 458 | free(aBuf); |
michael@0 | 459 | free(bBuf); |
michael@0 | 460 | } |
michael@0 | 461 | |
michael@0 | 462 | static void FuzzCaseConversion() |
michael@0 | 463 | { |
michael@0 | 464 | printf("==========================\n"); |
michael@0 | 465 | printf("Start fuzz case conversion\n"); |
michael@0 | 466 | printf("==========================\n"); |
michael@0 | 467 | |
michael@0 | 468 | srand(0); |
michael@0 | 469 | |
michael@0 | 470 | printf("Fuzzing invalid UTF8 data...\n"); |
michael@0 | 471 | for (uint32_t i = 0; i < 100000; i++) { |
michael@0 | 472 | FuzzOneInvalidCaseConversion(); |
michael@0 | 473 | } |
michael@0 | 474 | |
michael@0 | 475 | printf("===========================\n"); |
michael@0 | 476 | printf("Finish fuzz case conversion\n"); |
michael@0 | 477 | printf("===========================\n"); |
michael@0 | 478 | } |
michael@0 | 479 | |
michael@0 | 480 | static void TestEntityConversion(uint32_t version) |
michael@0 | 481 | { |
michael@0 | 482 | printf("==============================\n"); |
michael@0 | 483 | printf("Start nsIEntityConverter Test \n"); |
michael@0 | 484 | printf("==============================\n"); |
michael@0 | 485 | |
michael@0 | 486 | uint32_t i; |
michael@0 | 487 | nsString inString; |
michael@0 | 488 | char16_t uChar; |
michael@0 | 489 | nsresult res; |
michael@0 | 490 | |
michael@0 | 491 | |
michael@0 | 492 | inString.Assign(NS_ConvertASCIItoUTF16("\xA0\xA1\xA2\xA3")); |
michael@0 | 493 | uChar = (char16_t) 8364; //euro |
michael@0 | 494 | inString.Append(&uChar, 1); |
michael@0 | 495 | uChar = (char16_t) 9830; // |
michael@0 | 496 | inString.Append(&uChar, 1); |
michael@0 | 497 | |
michael@0 | 498 | nsCOMPtr <nsIEntityConverter> entityConv = do_CreateInstance(kEntityConverterCID, &res);; |
michael@0 | 499 | if (NS_FAILED(res)) {printf("\tFailed!! return value != NS_OK\n"); return;} |
michael@0 | 500 | |
michael@0 | 501 | const char16_t *data; |
michael@0 | 502 | uint32_t length = NS_StringGetData(inString, &data); |
michael@0 | 503 | |
michael@0 | 504 | // convert char by char |
michael@0 | 505 | for (i = 0; i < length; i++) { |
michael@0 | 506 | char *entity = nullptr; |
michael@0 | 507 | res = entityConv->ConvertToEntity(data[i], version, &entity); |
michael@0 | 508 | if (NS_SUCCEEDED(res) && entity) { |
michael@0 | 509 | printf("%c %s\n", data[i], entity); |
michael@0 | 510 | nsMemory::Free(entity); |
michael@0 | 511 | } |
michael@0 | 512 | } |
michael@0 | 513 | |
michael@0 | 514 | // convert at once as a string |
michael@0 | 515 | char16_t *entities; |
michael@0 | 516 | res = entityConv->ConvertToEntities(inString.get(), version, &entities); |
michael@0 | 517 | if (NS_SUCCEEDED(res) && entities) { |
michael@0 | 518 | for (char16_t *centity = entities; *centity; ++centity) { |
michael@0 | 519 | printf("%c", (char) *centity); |
michael@0 | 520 | if (';' == (char) *centity) |
michael@0 | 521 | printf("\n"); |
michael@0 | 522 | } |
michael@0 | 523 | nsMemory::Free(entities); |
michael@0 | 524 | } |
michael@0 | 525 | |
michael@0 | 526 | printf("==============================\n"); |
michael@0 | 527 | printf("Finish nsIEntityConverter Test \n"); |
michael@0 | 528 | printf("==============================\n\n"); |
michael@0 | 529 | } |
michael@0 | 530 | |
michael@0 | 531 | static void TestSaveAsCharset() |
michael@0 | 532 | { |
michael@0 | 533 | printf("==============================\n"); |
michael@0 | 534 | printf("Start nsISaveAsCharset Test \n"); |
michael@0 | 535 | printf("==============================\n"); |
michael@0 | 536 | |
michael@0 | 537 | nsresult res; |
michael@0 | 538 | |
michael@0 | 539 | nsString inString; |
michael@0 | 540 | inString.Assign(NS_ConvertASCIItoUTF16("\x61\x62\x80\xA0\x63")); |
michael@0 | 541 | char *outString; |
michael@0 | 542 | |
michael@0 | 543 | const char16_t *data; |
michael@0 | 544 | uint32_t length = NS_StringGetData(inString, &data); |
michael@0 | 545 | |
michael@0 | 546 | // first, dump input string |
michael@0 | 547 | for (uint32_t i = 0; i < length; i++) { |
michael@0 | 548 | printf("%c ", data[i]); |
michael@0 | 549 | } |
michael@0 | 550 | printf("\n"); |
michael@0 | 551 | |
michael@0 | 552 | nsCOMPtr <nsISaveAsCharset> saveAsCharset = do_CreateInstance(kSaveAsCharsetCID, &res); |
michael@0 | 553 | if (NS_FAILED(res)) {printf("\tFailed!! return value != NS_OK\n");} |
michael@0 | 554 | |
michael@0 | 555 | printf("ISO-8859-1 attr_plainTextDefault entityNone\n"); |
michael@0 | 556 | res = saveAsCharset->Init("ISO-8859-1", |
michael@0 | 557 | nsISaveAsCharset::attr_plainTextDefault, |
michael@0 | 558 | nsIEntityConverter::entityNone); |
michael@0 | 559 | if (NS_FAILED(res)) {printf("\tFailed!! return value != NS_OK\n");} |
michael@0 | 560 | res = saveAsCharset->Convert(inString.get(), &outString); |
michael@0 | 561 | if (NS_FAILED(res)) {printf("\tFailed!! return value != NS_OK\n");} |
michael@0 | 562 | if (!outString) {printf("\tFailed!! output null\n");} |
michael@0 | 563 | else {printf("%s\n", outString); nsMemory::Free(outString);} |
michael@0 | 564 | |
michael@0 | 565 | printf("ISO-2022-JP attr_plainTextDefault entityNone\n"); |
michael@0 | 566 | res = saveAsCharset->Init("ISO-2022-JP", |
michael@0 | 567 | nsISaveAsCharset::attr_plainTextDefault, |
michael@0 | 568 | nsIEntityConverter::entityNone); |
michael@0 | 569 | if (NS_FAILED(res)) {printf("\tFailed!! return value != NS_OK\n");} |
michael@0 | 570 | res = saveAsCharset->Convert(inString.get(), &outString); |
michael@0 | 571 | if (NS_FAILED(res)) {printf("\tFailed!! return value != NS_OK\n");} |
michael@0 | 572 | if (!outString) {printf("\tFailed!! output null\n");} |
michael@0 | 573 | else {printf("%s\n", outString); nsMemory::Free(outString);} |
michael@0 | 574 | if (NS_ERROR_UENC_NOMAPPING == res) { |
michael@0 | 575 | outString = ToNewUTF8String(inString); |
michael@0 | 576 | if (!outString) {printf("\tFailed!! output null\n");} |
michael@0 | 577 | else {printf("Fall back to UTF-8: %s\n", outString); nsMemory::Free(outString);} |
michael@0 | 578 | } |
michael@0 | 579 | |
michael@0 | 580 | printf("ISO-2022-JP attr_FallbackQuestionMark entityNone\n"); |
michael@0 | 581 | res = saveAsCharset->Init("ISO-2022-JP", |
michael@0 | 582 | nsISaveAsCharset::attr_FallbackQuestionMark, |
michael@0 | 583 | nsIEntityConverter::entityNone); |
michael@0 | 584 | if (NS_FAILED(res)) {printf("\tFailed!! return value != NS_OK\n");} |
michael@0 | 585 | res = saveAsCharset->Convert(inString.get(), &outString); |
michael@0 | 586 | if (NS_FAILED(res)) {printf("\tFailed!! return value != NS_OK\n");} |
michael@0 | 587 | if (!outString) {printf("\tFailed!! output null\n");} |
michael@0 | 588 | else {printf("%s\n", outString); nsMemory::Free(outString);} |
michael@0 | 589 | |
michael@0 | 590 | printf("ISO-2022-JP attr_FallbackEscapeU entityNone\n"); |
michael@0 | 591 | res = saveAsCharset->Init("ISO-2022-JP", |
michael@0 | 592 | nsISaveAsCharset::attr_FallbackEscapeU, |
michael@0 | 593 | nsIEntityConverter::entityNone); |
michael@0 | 594 | if (NS_FAILED(res)) {printf("\tFailed!! return value != NS_OK\n");} |
michael@0 | 595 | res = saveAsCharset->Convert(inString.get(), &outString); |
michael@0 | 596 | if (NS_FAILED(res)) {printf("\tFailed!! return value != NS_OK\n");} |
michael@0 | 597 | if (!outString) {printf("\tFailed!! output null\n");} |
michael@0 | 598 | else {printf("%s\n", outString); nsMemory::Free(outString);} |
michael@0 | 599 | |
michael@0 | 600 | printf("ISO-8859-1 attr_htmlTextDefault html40Latin1\n"); |
michael@0 | 601 | res = saveAsCharset->Init("ISO-8859-1", |
michael@0 | 602 | nsISaveAsCharset::attr_htmlTextDefault, |
michael@0 | 603 | nsIEntityConverter::html40Latin1); |
michael@0 | 604 | if (NS_FAILED(res)) {printf("\tFailed!! return value != NS_OK\n");} |
michael@0 | 605 | res = saveAsCharset->Convert(inString.get(), &outString); |
michael@0 | 606 | if (NS_ERROR_UENC_NOMAPPING != res && NS_FAILED(res)) {printf("\tFailed!! return value != NS_OK\n");} |
michael@0 | 607 | if (!outString) {printf("\tFailed!! output null\n");} |
michael@0 | 608 | else {printf("%s\n", outString); nsMemory::Free(outString);} |
michael@0 | 609 | |
michael@0 | 610 | printf("ISO-8859-1 attr_FallbackHexNCR+attr_EntityAfterCharsetConv html40Latin1 \n"); |
michael@0 | 611 | res = saveAsCharset->Init("ISO-8859-1", |
michael@0 | 612 | nsISaveAsCharset::attr_FallbackHexNCR + |
michael@0 | 613 | nsISaveAsCharset::attr_EntityAfterCharsetConv, |
michael@0 | 614 | nsIEntityConverter::html40Latin1); |
michael@0 | 615 | if (NS_FAILED(res)) {printf("\tFailed!! return value != NS_OK\n");} |
michael@0 | 616 | res = saveAsCharset->Convert(inString.get(), &outString); |
michael@0 | 617 | if (NS_ERROR_UENC_NOMAPPING != res && NS_FAILED(res)) {printf("\tFailed!! return value != NS_OK\n");} |
michael@0 | 618 | if (!outString) {printf("\tFailed!! output null\n");} |
michael@0 | 619 | else {printf("%s\n", outString); nsMemory::Free(outString);} |
michael@0 | 620 | |
michael@0 | 621 | |
michael@0 | 622 | printf("==============================\n"); |
michael@0 | 623 | printf("Finish nsISaveAsCharset Test \n"); |
michael@0 | 624 | printf("==============================\n\n"); |
michael@0 | 625 | } |
michael@0 | 626 | |
michael@0 | 627 | static char16_t normStr[] = |
michael@0 | 628 | { |
michael@0 | 629 | 0x00E1, |
michael@0 | 630 | 0x0061, |
michael@0 | 631 | 0x0301, |
michael@0 | 632 | 0x0107, |
michael@0 | 633 | 0x0063, |
michael@0 | 634 | 0x0301, |
michael@0 | 635 | 0x0000 |
michael@0 | 636 | }; |
michael@0 | 637 | |
michael@0 | 638 | static char16_t nfdForm[] = |
michael@0 | 639 | { |
michael@0 | 640 | 0x0061, |
michael@0 | 641 | 0x0301, |
michael@0 | 642 | 0x0061, |
michael@0 | 643 | 0x0301, |
michael@0 | 644 | 0x0063, |
michael@0 | 645 | 0x0301, |
michael@0 | 646 | 0x0063, |
michael@0 | 647 | 0x0301, |
michael@0 | 648 | 0x0000 |
michael@0 | 649 | }; |
michael@0 | 650 | |
michael@0 | 651 | void TestNormalization() |
michael@0 | 652 | { |
michael@0 | 653 | printf("==============================\n"); |
michael@0 | 654 | printf("Start nsIUnicodeNormalizer Test \n"); |
michael@0 | 655 | printf("==============================\n"); |
michael@0 | 656 | nsIUnicodeNormalizer *t = nullptr; |
michael@0 | 657 | nsresult res; |
michael@0 | 658 | res = CallGetService(kUnicodeNormalizerCID, &t); |
michael@0 | 659 | |
michael@0 | 660 | printf("Test 1 - GetService():\n"); |
michael@0 | 661 | if(NS_FAILED(res) || !t) { |
michael@0 | 662 | printf("\t1st Norm GetService failed\n"); |
michael@0 | 663 | } else { |
michael@0 | 664 | NS_RELEASE(t); |
michael@0 | 665 | } |
michael@0 | 666 | |
michael@0 | 667 | res = CallGetService(kUnicodeNormalizerCID, &t); |
michael@0 | 668 | |
michael@0 | 669 | if(NS_FAILED(res) || !t) { |
michael@0 | 670 | printf("\t2nd GetService failed\n"); |
michael@0 | 671 | } else { |
michael@0 | 672 | printf("Test 2 - NormalizeUnicode(uint32_t, const nsAString&, nsAString&):\n"); |
michael@0 | 673 | nsAutoString resultStr; |
michael@0 | 674 | res = t->NormalizeUnicodeNFD(nsDependentString(normStr), resultStr); |
michael@0 | 675 | if (resultStr.Equals(nsDependentString(nfdForm))) { |
michael@0 | 676 | printf(" Succeeded in NFD UnicodeNormalizer test. \n"); |
michael@0 | 677 | } else { |
michael@0 | 678 | printf(" Failed in NFD UnicodeNormalizer test. \n"); |
michael@0 | 679 | } |
michael@0 | 680 | |
michael@0 | 681 | NS_RELEASE(t); |
michael@0 | 682 | } |
michael@0 | 683 | printf("==============================\n"); |
michael@0 | 684 | printf("Finish nsIUnicodeNormalizer Test \n"); |
michael@0 | 685 | printf("==============================\n"); |
michael@0 | 686 | |
michael@0 | 687 | } |
michael@0 | 688 | |
michael@0 | 689 | |
michael@0 | 690 | int main(int argc, char** argv) { |
michael@0 | 691 | |
michael@0 | 692 | nsresult rv = NS_InitXPCOM2(nullptr, nullptr, nullptr); |
michael@0 | 693 | if (NS_FAILED(rv)) { |
michael@0 | 694 | printf("NS_InitXPCOM2 failed\n"); |
michael@0 | 695 | return 1; |
michael@0 | 696 | } |
michael@0 | 697 | |
michael@0 | 698 | // -------------------------------------------- |
michael@0 | 699 | |
michael@0 | 700 | TestCaseConversion(); |
michael@0 | 701 | |
michael@0 | 702 | // -------------------------------------------- |
michael@0 | 703 | |
michael@0 | 704 | FuzzCaseConversion(); |
michael@0 | 705 | |
michael@0 | 706 | // -------------------------------------------- |
michael@0 | 707 | |
michael@0 | 708 | TestEntityConversion(nsIEntityConverter::html40); |
michael@0 | 709 | |
michael@0 | 710 | // -------------------------------------------- |
michael@0 | 711 | |
michael@0 | 712 | TestSaveAsCharset(); |
michael@0 | 713 | |
michael@0 | 714 | // -------------------------------------------- |
michael@0 | 715 | |
michael@0 | 716 | TestNormalization(); |
michael@0 | 717 | |
michael@0 | 718 | // -------------------------------------------- |
michael@0 | 719 | printf("Finish All The Test Cases\n"); |
michael@0 | 720 | |
michael@0 | 721 | return 0; |
michael@0 | 722 | } |