Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | #ifndef nsCharTraits_h___ |
michael@0 | 7 | #define nsCharTraits_h___ |
michael@0 | 8 | |
michael@0 | 9 | #include <ctype.h> // for |EOF|, |WEOF| |
michael@0 | 10 | #include <string.h> // for |memcpy|, et al |
michael@0 | 11 | |
michael@0 | 12 | #include "nscore.h" // for |char16_t| |
michael@0 | 13 | |
michael@0 | 14 | // This file may be used (through nsUTF8Utils.h) from non-XPCOM code, in |
michael@0 | 15 | // particular the standalone software updater. In that case stub out |
michael@0 | 16 | // the macros provided by nsDebug.h which are only usable when linking XPCOM |
michael@0 | 17 | |
michael@0 | 18 | #ifdef NS_NO_XPCOM |
michael@0 | 19 | #define NS_WARNING(msg) |
michael@0 | 20 | #define NS_ASSERTION(cond, msg) |
michael@0 | 21 | #define NS_ERROR(msg) |
michael@0 | 22 | #else |
michael@0 | 23 | #include "nsDebug.h" // for NS_ASSERTION |
michael@0 | 24 | #endif |
michael@0 | 25 | |
michael@0 | 26 | /* |
michael@0 | 27 | * Some macros for converting char16_t (UTF-16) to and from Unicode scalar |
michael@0 | 28 | * values. |
michael@0 | 29 | * |
michael@0 | 30 | * Note that UTF-16 represents all Unicode scalar values up to U+10FFFF by |
michael@0 | 31 | * using "surrogate pairs". These consist of a high surrogate, i.e. a code |
michael@0 | 32 | * point in the range U+D800 - U+DBFF, and a low surrogate, i.e. a code point |
michael@0 | 33 | * in the range U+DC00 - U+DFFF, like this: |
michael@0 | 34 | * |
michael@0 | 35 | * U+D800 U+DC00 = U+10000 |
michael@0 | 36 | * U+D800 U+DC01 = U+10001 |
michael@0 | 37 | * ... |
michael@0 | 38 | * U+DBFF U+DFFE = U+10FFFE |
michael@0 | 39 | * U+DBFF U+DFFF = U+10FFFF |
michael@0 | 40 | * |
michael@0 | 41 | * These surrogate code points U+D800 - U+DFFF are not themselves valid Unicode |
michael@0 | 42 | * scalar values and are not well-formed UTF-16 except as high-surrogate / |
michael@0 | 43 | * low-surrogate pairs. |
michael@0 | 44 | */ |
michael@0 | 45 | |
michael@0 | 46 | #define PLANE1_BASE uint32_t(0x00010000) |
michael@0 | 47 | // High surrogates are in the range 0xD800 -- OxDBFF |
michael@0 | 48 | #define NS_IS_HIGH_SURROGATE(u) ((uint32_t(u) & 0xFFFFFC00) == 0xD800) |
michael@0 | 49 | // Low surrogates are in the range 0xDC00 -- 0xDFFF |
michael@0 | 50 | #define NS_IS_LOW_SURROGATE(u) ((uint32_t(u) & 0xFFFFFC00) == 0xDC00) |
michael@0 | 51 | // Faster than testing NS_IS_HIGH_SURROGATE || NS_IS_LOW_SURROGATE |
michael@0 | 52 | #define IS_SURROGATE(u) ((uint32_t(u) & 0xFFFFF800) == 0xD800) |
michael@0 | 53 | |
michael@0 | 54 | // Everything else is not a surrogate: 0x000 -- 0xD7FF, 0xE000 -- 0xFFFF |
michael@0 | 55 | |
michael@0 | 56 | // N = (H - 0xD800) * 0x400 + 0x10000 + (L - 0xDC00) |
michael@0 | 57 | // I wonder whether we could somehow assert that H is a high surrogate |
michael@0 | 58 | // and L is a low surrogate |
michael@0 | 59 | #define SURROGATE_TO_UCS4(h, l) (((uint32_t(h) & 0x03FF) << 10) + \ |
michael@0 | 60 | (uint32_t(l) & 0x03FF) + PLANE1_BASE) |
michael@0 | 61 | |
michael@0 | 62 | // Extract surrogates from a UCS4 char |
michael@0 | 63 | // Reference: the Unicode standard 4.0, section 3.9 |
michael@0 | 64 | // Since (c - 0x10000) >> 10 == (c >> 10) - 0x0080 and |
michael@0 | 65 | // 0xD7C0 == 0xD800 - 0x0080, |
michael@0 | 66 | // ((c - 0x10000) >> 10) + 0xD800 can be simplified to |
michael@0 | 67 | #define H_SURROGATE(c) char16_t(char16_t(uint32_t(c) >> 10) + \ |
michael@0 | 68 | char16_t(0xD7C0)) |
michael@0 | 69 | // where it's to be noted that 0xD7C0 is not bitwise-OR'd |
michael@0 | 70 | // but added. |
michael@0 | 71 | |
michael@0 | 72 | // Since 0x10000 & 0x03FF == 0, |
michael@0 | 73 | // (c - 0x10000) & 0x03FF == c & 0x03FF so that |
michael@0 | 74 | // ((c - 0x10000) & 0x03FF) | 0xDC00 is equivalent to |
michael@0 | 75 | #define L_SURROGATE(c) char16_t(char16_t(uint32_t(c) & uint32_t(0x03FF)) | \ |
michael@0 | 76 | char16_t(0xDC00)) |
michael@0 | 77 | |
michael@0 | 78 | #define IS_IN_BMP(ucs) (uint32_t(ucs) < PLANE1_BASE) |
michael@0 | 79 | #define UCS2_REPLACEMENT_CHAR char16_t(0xFFFD) |
michael@0 | 80 | |
michael@0 | 81 | #define UCS_END uint32_t(0x00110000) |
michael@0 | 82 | #define IS_VALID_CHAR(c) ((uint32_t(c) < UCS_END) && !IS_SURROGATE(c)) |
michael@0 | 83 | #define ENSURE_VALID_CHAR(c) (IS_VALID_CHAR(c) ? (c) : UCS2_REPLACEMENT_CHAR) |
michael@0 | 84 | |
michael@0 | 85 | template <class CharT> struct nsCharTraits {}; |
michael@0 | 86 | |
michael@0 | 87 | template <> |
michael@0 | 88 | struct nsCharTraits<char16_t> |
michael@0 | 89 | { |
michael@0 | 90 | typedef char16_t char_type; |
michael@0 | 91 | typedef uint16_t unsigned_char_type; |
michael@0 | 92 | typedef char incompatible_char_type; |
michael@0 | 93 | |
michael@0 | 94 | static char_type* const sEmptyBuffer; |
michael@0 | 95 | |
michael@0 | 96 | static |
michael@0 | 97 | void |
michael@0 | 98 | assign( char_type& lhs, char_type rhs ) |
michael@0 | 99 | { |
michael@0 | 100 | lhs = rhs; |
michael@0 | 101 | } |
michael@0 | 102 | |
michael@0 | 103 | |
michael@0 | 104 | // integer representation of characters: |
michael@0 | 105 | typedef int int_type; |
michael@0 | 106 | |
michael@0 | 107 | static |
michael@0 | 108 | char_type |
michael@0 | 109 | to_char_type( int_type c ) |
michael@0 | 110 | { |
michael@0 | 111 | return char_type(c); |
michael@0 | 112 | } |
michael@0 | 113 | |
michael@0 | 114 | static |
michael@0 | 115 | int_type |
michael@0 | 116 | to_int_type( char_type c ) |
michael@0 | 117 | { |
michael@0 | 118 | return int_type( static_cast<unsigned_char_type>(c) ); |
michael@0 | 119 | } |
michael@0 | 120 | |
michael@0 | 121 | static |
michael@0 | 122 | bool |
michael@0 | 123 | eq_int_type( int_type lhs, int_type rhs ) |
michael@0 | 124 | { |
michael@0 | 125 | return lhs == rhs; |
michael@0 | 126 | } |
michael@0 | 127 | |
michael@0 | 128 | |
michael@0 | 129 | // |char_type| comparisons: |
michael@0 | 130 | |
michael@0 | 131 | static |
michael@0 | 132 | bool |
michael@0 | 133 | eq( char_type lhs, char_type rhs ) |
michael@0 | 134 | { |
michael@0 | 135 | return lhs == rhs; |
michael@0 | 136 | } |
michael@0 | 137 | |
michael@0 | 138 | static |
michael@0 | 139 | bool |
michael@0 | 140 | lt( char_type lhs, char_type rhs ) |
michael@0 | 141 | { |
michael@0 | 142 | return lhs < rhs; |
michael@0 | 143 | } |
michael@0 | 144 | |
michael@0 | 145 | |
michael@0 | 146 | // operations on s[n] arrays: |
michael@0 | 147 | |
michael@0 | 148 | static |
michael@0 | 149 | char_type* |
michael@0 | 150 | move( char_type* s1, const char_type* s2, size_t n ) |
michael@0 | 151 | { |
michael@0 | 152 | return static_cast<char_type*>(memmove(s1, s2, n * sizeof(char_type))); |
michael@0 | 153 | } |
michael@0 | 154 | |
michael@0 | 155 | static |
michael@0 | 156 | char_type* |
michael@0 | 157 | copy( char_type* s1, const char_type* s2, size_t n ) |
michael@0 | 158 | { |
michael@0 | 159 | return static_cast<char_type*>(memcpy(s1, s2, n * sizeof(char_type))); |
michael@0 | 160 | } |
michael@0 | 161 | |
michael@0 | 162 | static |
michael@0 | 163 | char_type* |
michael@0 | 164 | copyASCII( char_type* s1, const char* s2, size_t n ) |
michael@0 | 165 | { |
michael@0 | 166 | for (char_type* s = s1; n--; ++s, ++s2) { |
michael@0 | 167 | NS_ASSERTION(!(*s2 & ~0x7F), "Unexpected non-ASCII character"); |
michael@0 | 168 | *s = *s2; |
michael@0 | 169 | } |
michael@0 | 170 | return s1; |
michael@0 | 171 | } |
michael@0 | 172 | |
michael@0 | 173 | static |
michael@0 | 174 | char_type* |
michael@0 | 175 | assign( char_type* s, size_t n, char_type c ) |
michael@0 | 176 | { |
michael@0 | 177 | char_type* result = s; |
michael@0 | 178 | while ( n-- ) |
michael@0 | 179 | assign(*s++, c); |
michael@0 | 180 | return result; |
michael@0 | 181 | } |
michael@0 | 182 | |
michael@0 | 183 | static |
michael@0 | 184 | int |
michael@0 | 185 | compare( const char_type* s1, const char_type* s2, size_t n ) |
michael@0 | 186 | { |
michael@0 | 187 | for ( ; n--; ++s1, ++s2 ) |
michael@0 | 188 | { |
michael@0 | 189 | if ( !eq(*s1, *s2) ) |
michael@0 | 190 | return to_int_type(*s1) - to_int_type(*s2); |
michael@0 | 191 | } |
michael@0 | 192 | |
michael@0 | 193 | return 0; |
michael@0 | 194 | } |
michael@0 | 195 | |
michael@0 | 196 | static |
michael@0 | 197 | int |
michael@0 | 198 | compareASCII( const char_type* s1, const char* s2, size_t n ) |
michael@0 | 199 | { |
michael@0 | 200 | for ( ; n--; ++s1, ++s2 ) |
michael@0 | 201 | { |
michael@0 | 202 | NS_ASSERTION(!(*s2 & ~0x7F), "Unexpected non-ASCII character"); |
michael@0 | 203 | if ( !eq_int_type(to_int_type(*s1), to_int_type(*s2)) ) |
michael@0 | 204 | return to_int_type(*s1) - to_int_type(*s2); |
michael@0 | 205 | } |
michael@0 | 206 | |
michael@0 | 207 | return 0; |
michael@0 | 208 | } |
michael@0 | 209 | |
michael@0 | 210 | // this version assumes that s2 is null-terminated and s1 has length n. |
michael@0 | 211 | // if s1 is shorter than s2 then we return -1; if s1 is longer than s2, |
michael@0 | 212 | // we return 1. |
michael@0 | 213 | static |
michael@0 | 214 | int |
michael@0 | 215 | compareASCIINullTerminated( const char_type* s1, size_t n, const char* s2 ) |
michael@0 | 216 | { |
michael@0 | 217 | for ( ; n--; ++s1, ++s2 ) |
michael@0 | 218 | { |
michael@0 | 219 | if ( !*s2 ) |
michael@0 | 220 | return 1; |
michael@0 | 221 | NS_ASSERTION(!(*s2 & ~0x7F), "Unexpected non-ASCII character"); |
michael@0 | 222 | if ( !eq_int_type(to_int_type(*s1), to_int_type(*s2)) ) |
michael@0 | 223 | return to_int_type(*s1) - to_int_type(*s2); |
michael@0 | 224 | } |
michael@0 | 225 | |
michael@0 | 226 | if ( *s2 ) |
michael@0 | 227 | return -1; |
michael@0 | 228 | |
michael@0 | 229 | return 0; |
michael@0 | 230 | } |
michael@0 | 231 | |
michael@0 | 232 | /** |
michael@0 | 233 | * Convert c to its lower-case form, but only if c is in the ASCII |
michael@0 | 234 | * range. Otherwise leave it alone. |
michael@0 | 235 | */ |
michael@0 | 236 | static |
michael@0 | 237 | char_type |
michael@0 | 238 | ASCIIToLower( char_type c ) |
michael@0 | 239 | { |
michael@0 | 240 | if (c >= 'A' && c <= 'Z') |
michael@0 | 241 | return char_type(c + ('a' - 'A')); |
michael@0 | 242 | |
michael@0 | 243 | return c; |
michael@0 | 244 | } |
michael@0 | 245 | |
michael@0 | 246 | static |
michael@0 | 247 | int |
michael@0 | 248 | compareLowerCaseToASCII( const char_type* s1, const char* s2, size_t n ) |
michael@0 | 249 | { |
michael@0 | 250 | for ( ; n--; ++s1, ++s2 ) |
michael@0 | 251 | { |
michael@0 | 252 | NS_ASSERTION(!(*s2 & ~0x7F), "Unexpected non-ASCII character"); |
michael@0 | 253 | NS_ASSERTION(!(*s2 >= 'A' && *s2 <= 'Z'), |
michael@0 | 254 | "Unexpected uppercase character"); |
michael@0 | 255 | char_type lower_s1 = ASCIIToLower(*s1); |
michael@0 | 256 | if ( lower_s1 != to_char_type(*s2) ) |
michael@0 | 257 | return to_int_type(lower_s1) - to_int_type(*s2); |
michael@0 | 258 | } |
michael@0 | 259 | |
michael@0 | 260 | return 0; |
michael@0 | 261 | } |
michael@0 | 262 | |
michael@0 | 263 | // this version assumes that s2 is null-terminated and s1 has length n. |
michael@0 | 264 | // if s1 is shorter than s2 then we return -1; if s1 is longer than s2, |
michael@0 | 265 | // we return 1. |
michael@0 | 266 | static |
michael@0 | 267 | int |
michael@0 | 268 | compareLowerCaseToASCIINullTerminated( const char_type* s1, size_t n, const char* s2 ) |
michael@0 | 269 | { |
michael@0 | 270 | for ( ; n--; ++s1, ++s2 ) |
michael@0 | 271 | { |
michael@0 | 272 | if ( !*s2 ) |
michael@0 | 273 | return 1; |
michael@0 | 274 | NS_ASSERTION(!(*s2 & ~0x7F), "Unexpected non-ASCII character"); |
michael@0 | 275 | NS_ASSERTION(!(*s2 >= 'A' && *s2 <= 'Z'), |
michael@0 | 276 | "Unexpected uppercase character"); |
michael@0 | 277 | char_type lower_s1 = ASCIIToLower(*s1); |
michael@0 | 278 | if ( lower_s1 != to_char_type(*s2) ) |
michael@0 | 279 | return to_int_type(lower_s1) - to_int_type(*s2); |
michael@0 | 280 | } |
michael@0 | 281 | |
michael@0 | 282 | if ( *s2 ) |
michael@0 | 283 | return -1; |
michael@0 | 284 | |
michael@0 | 285 | return 0; |
michael@0 | 286 | } |
michael@0 | 287 | |
michael@0 | 288 | static |
michael@0 | 289 | size_t |
michael@0 | 290 | length( const char_type* s ) |
michael@0 | 291 | { |
michael@0 | 292 | size_t result = 0; |
michael@0 | 293 | while ( !eq(*s++, char_type(0)) ) |
michael@0 | 294 | ++result; |
michael@0 | 295 | return result; |
michael@0 | 296 | } |
michael@0 | 297 | |
michael@0 | 298 | static |
michael@0 | 299 | const char_type* |
michael@0 | 300 | find( const char_type* s, size_t n, char_type c ) |
michael@0 | 301 | { |
michael@0 | 302 | while ( n-- ) |
michael@0 | 303 | { |
michael@0 | 304 | if ( eq(*s, c) ) |
michael@0 | 305 | return s; |
michael@0 | 306 | ++s; |
michael@0 | 307 | } |
michael@0 | 308 | |
michael@0 | 309 | return 0; |
michael@0 | 310 | } |
michael@0 | 311 | }; |
michael@0 | 312 | |
michael@0 | 313 | template <> |
michael@0 | 314 | struct nsCharTraits<char> |
michael@0 | 315 | { |
michael@0 | 316 | typedef char char_type; |
michael@0 | 317 | typedef unsigned char unsigned_char_type; |
michael@0 | 318 | typedef char16_t incompatible_char_type; |
michael@0 | 319 | |
michael@0 | 320 | static char_type* const sEmptyBuffer; |
michael@0 | 321 | |
michael@0 | 322 | static |
michael@0 | 323 | void |
michael@0 | 324 | assign( char_type& lhs, char_type rhs ) |
michael@0 | 325 | { |
michael@0 | 326 | lhs = rhs; |
michael@0 | 327 | } |
michael@0 | 328 | |
michael@0 | 329 | |
michael@0 | 330 | // integer representation of characters: |
michael@0 | 331 | |
michael@0 | 332 | typedef int int_type; |
michael@0 | 333 | |
michael@0 | 334 | static |
michael@0 | 335 | char_type |
michael@0 | 336 | to_char_type( int_type c ) |
michael@0 | 337 | { |
michael@0 | 338 | return char_type(c); |
michael@0 | 339 | } |
michael@0 | 340 | |
michael@0 | 341 | static |
michael@0 | 342 | int_type |
michael@0 | 343 | to_int_type( char_type c ) |
michael@0 | 344 | { |
michael@0 | 345 | return int_type( static_cast<unsigned_char_type>(c) ); |
michael@0 | 346 | } |
michael@0 | 347 | |
michael@0 | 348 | static |
michael@0 | 349 | bool |
michael@0 | 350 | eq_int_type( int_type lhs, int_type rhs ) |
michael@0 | 351 | { |
michael@0 | 352 | return lhs == rhs; |
michael@0 | 353 | } |
michael@0 | 354 | |
michael@0 | 355 | |
michael@0 | 356 | // |char_type| comparisons: |
michael@0 | 357 | |
michael@0 | 358 | static |
michael@0 | 359 | bool |
michael@0 | 360 | eq( char_type lhs, char_type rhs ) |
michael@0 | 361 | { |
michael@0 | 362 | return lhs == rhs; |
michael@0 | 363 | } |
michael@0 | 364 | |
michael@0 | 365 | static |
michael@0 | 366 | bool |
michael@0 | 367 | lt( char_type lhs, char_type rhs ) |
michael@0 | 368 | { |
michael@0 | 369 | return lhs < rhs; |
michael@0 | 370 | } |
michael@0 | 371 | |
michael@0 | 372 | |
michael@0 | 373 | // operations on s[n] arrays: |
michael@0 | 374 | |
michael@0 | 375 | static |
michael@0 | 376 | char_type* |
michael@0 | 377 | move( char_type* s1, const char_type* s2, size_t n ) |
michael@0 | 378 | { |
michael@0 | 379 | return static_cast<char_type*>(memmove(s1, s2, n * sizeof(char_type))); |
michael@0 | 380 | } |
michael@0 | 381 | |
michael@0 | 382 | static |
michael@0 | 383 | char_type* |
michael@0 | 384 | copy( char_type* s1, const char_type* s2, size_t n ) |
michael@0 | 385 | { |
michael@0 | 386 | return static_cast<char_type*>(memcpy(s1, s2, n * sizeof(char_type))); |
michael@0 | 387 | } |
michael@0 | 388 | |
michael@0 | 389 | static |
michael@0 | 390 | char_type* |
michael@0 | 391 | copyASCII( char_type* s1, const char* s2, size_t n ) |
michael@0 | 392 | { |
michael@0 | 393 | return copy(s1, s2, n); |
michael@0 | 394 | } |
michael@0 | 395 | |
michael@0 | 396 | static |
michael@0 | 397 | char_type* |
michael@0 | 398 | assign( char_type* s, size_t n, char_type c ) |
michael@0 | 399 | { |
michael@0 | 400 | return static_cast<char_type*>(memset(s, to_int_type(c), n)); |
michael@0 | 401 | } |
michael@0 | 402 | |
michael@0 | 403 | static |
michael@0 | 404 | int |
michael@0 | 405 | compare( const char_type* s1, const char_type* s2, size_t n ) |
michael@0 | 406 | { |
michael@0 | 407 | return memcmp(s1, s2, n); |
michael@0 | 408 | } |
michael@0 | 409 | |
michael@0 | 410 | static |
michael@0 | 411 | int |
michael@0 | 412 | compareASCII( const char_type* s1, const char* s2, size_t n ) |
michael@0 | 413 | { |
michael@0 | 414 | #ifdef DEBUG |
michael@0 | 415 | for (size_t i = 0; i < n; ++i) |
michael@0 | 416 | { |
michael@0 | 417 | NS_ASSERTION(!(s2[i] & ~0x7F), "Unexpected non-ASCII character"); |
michael@0 | 418 | } |
michael@0 | 419 | #endif |
michael@0 | 420 | return compare(s1, s2, n); |
michael@0 | 421 | } |
michael@0 | 422 | |
michael@0 | 423 | // this version assumes that s2 is null-terminated and s1 has length n. |
michael@0 | 424 | // if s1 is shorter than s2 then we return -1; if s1 is longer than s2, |
michael@0 | 425 | // we return 1. |
michael@0 | 426 | static |
michael@0 | 427 | int |
michael@0 | 428 | compareASCIINullTerminated( const char_type* s1, size_t n, const char* s2 ) |
michael@0 | 429 | { |
michael@0 | 430 | // can't use strcmp here because we don't want to stop when s1 |
michael@0 | 431 | // contains a null |
michael@0 | 432 | for ( ; n--; ++s1, ++s2 ) |
michael@0 | 433 | { |
michael@0 | 434 | if ( !*s2 ) |
michael@0 | 435 | return 1; |
michael@0 | 436 | NS_ASSERTION(!(*s2 & ~0x7F), "Unexpected non-ASCII character"); |
michael@0 | 437 | if ( *s1 != *s2 ) |
michael@0 | 438 | return to_int_type(*s1) - to_int_type(*s2); |
michael@0 | 439 | } |
michael@0 | 440 | |
michael@0 | 441 | if ( *s2 ) |
michael@0 | 442 | return -1; |
michael@0 | 443 | |
michael@0 | 444 | return 0; |
michael@0 | 445 | } |
michael@0 | 446 | |
michael@0 | 447 | /** |
michael@0 | 448 | * Convert c to its lower-case form, but only if c is ASCII. |
michael@0 | 449 | */ |
michael@0 | 450 | static |
michael@0 | 451 | char_type |
michael@0 | 452 | ASCIIToLower( char_type c ) |
michael@0 | 453 | { |
michael@0 | 454 | if (c >= 'A' && c <= 'Z') |
michael@0 | 455 | return char_type(c + ('a' - 'A')); |
michael@0 | 456 | |
michael@0 | 457 | return c; |
michael@0 | 458 | } |
michael@0 | 459 | |
michael@0 | 460 | static |
michael@0 | 461 | int |
michael@0 | 462 | compareLowerCaseToASCII( const char_type* s1, const char* s2, size_t n ) |
michael@0 | 463 | { |
michael@0 | 464 | for ( ; n--; ++s1, ++s2 ) |
michael@0 | 465 | { |
michael@0 | 466 | NS_ASSERTION(!(*s2 & ~0x7F), "Unexpected non-ASCII character"); |
michael@0 | 467 | NS_ASSERTION(!(*s2 >= 'A' && *s2 <= 'Z'), |
michael@0 | 468 | "Unexpected uppercase character"); |
michael@0 | 469 | char_type lower_s1 = ASCIIToLower(*s1); |
michael@0 | 470 | if ( lower_s1 != *s2 ) |
michael@0 | 471 | return to_int_type(lower_s1) - to_int_type(*s2); |
michael@0 | 472 | } |
michael@0 | 473 | return 0; |
michael@0 | 474 | } |
michael@0 | 475 | |
michael@0 | 476 | // this version assumes that s2 is null-terminated and s1 has length n. |
michael@0 | 477 | // if s1 is shorter than s2 then we return -1; if s1 is longer than s2, |
michael@0 | 478 | // we return 1. |
michael@0 | 479 | static |
michael@0 | 480 | int |
michael@0 | 481 | compareLowerCaseToASCIINullTerminated( const char_type* s1, size_t n, const char* s2 ) |
michael@0 | 482 | { |
michael@0 | 483 | for ( ; n--; ++s1, ++s2 ) |
michael@0 | 484 | { |
michael@0 | 485 | if ( !*s2 ) |
michael@0 | 486 | return 1; |
michael@0 | 487 | NS_ASSERTION(!(*s2 & ~0x7F), "Unexpected non-ASCII character"); |
michael@0 | 488 | NS_ASSERTION(!(*s2 >= 'A' && *s2 <= 'Z'), |
michael@0 | 489 | "Unexpected uppercase character"); |
michael@0 | 490 | char_type lower_s1 = ASCIIToLower(*s1); |
michael@0 | 491 | if ( lower_s1 != *s2 ) |
michael@0 | 492 | return to_int_type(lower_s1) - to_int_type(*s2); |
michael@0 | 493 | } |
michael@0 | 494 | |
michael@0 | 495 | if ( *s2 ) |
michael@0 | 496 | return -1; |
michael@0 | 497 | |
michael@0 | 498 | return 0; |
michael@0 | 499 | } |
michael@0 | 500 | |
michael@0 | 501 | static |
michael@0 | 502 | size_t |
michael@0 | 503 | length( const char_type* s ) |
michael@0 | 504 | { |
michael@0 | 505 | return strlen(s); |
michael@0 | 506 | } |
michael@0 | 507 | |
michael@0 | 508 | static |
michael@0 | 509 | const char_type* |
michael@0 | 510 | find( const char_type* s, size_t n, char_type c ) |
michael@0 | 511 | { |
michael@0 | 512 | return reinterpret_cast<const char_type*>(memchr(s, to_int_type(c), n)); |
michael@0 | 513 | } |
michael@0 | 514 | }; |
michael@0 | 515 | |
michael@0 | 516 | template <class InputIterator> |
michael@0 | 517 | struct nsCharSourceTraits |
michael@0 | 518 | { |
michael@0 | 519 | typedef typename InputIterator::difference_type difference_type; |
michael@0 | 520 | |
michael@0 | 521 | static |
michael@0 | 522 | uint32_t |
michael@0 | 523 | readable_distance( const InputIterator& first, const InputIterator& last ) |
michael@0 | 524 | { |
michael@0 | 525 | // assumes single fragment |
michael@0 | 526 | return uint32_t(last.get() - first.get()); |
michael@0 | 527 | } |
michael@0 | 528 | |
michael@0 | 529 | static |
michael@0 | 530 | const typename InputIterator::value_type* |
michael@0 | 531 | read( const InputIterator& iter ) |
michael@0 | 532 | { |
michael@0 | 533 | return iter.get(); |
michael@0 | 534 | } |
michael@0 | 535 | |
michael@0 | 536 | static |
michael@0 | 537 | void |
michael@0 | 538 | advance( InputIterator& s, difference_type n ) |
michael@0 | 539 | { |
michael@0 | 540 | s.advance(n); |
michael@0 | 541 | } |
michael@0 | 542 | }; |
michael@0 | 543 | |
michael@0 | 544 | template <class CharT> |
michael@0 | 545 | struct nsCharSourceTraits<CharT*> |
michael@0 | 546 | { |
michael@0 | 547 | typedef ptrdiff_t difference_type; |
michael@0 | 548 | |
michael@0 | 549 | static |
michael@0 | 550 | uint32_t |
michael@0 | 551 | readable_distance( CharT* s ) |
michael@0 | 552 | { |
michael@0 | 553 | return uint32_t(nsCharTraits<CharT>::length(s)); |
michael@0 | 554 | // return numeric_limits<uint32_t>::max(); |
michael@0 | 555 | } |
michael@0 | 556 | |
michael@0 | 557 | static |
michael@0 | 558 | uint32_t |
michael@0 | 559 | readable_distance( CharT* first, CharT* last ) |
michael@0 | 560 | { |
michael@0 | 561 | return uint32_t(last-first); |
michael@0 | 562 | } |
michael@0 | 563 | |
michael@0 | 564 | static |
michael@0 | 565 | const CharT* |
michael@0 | 566 | read( CharT* s ) |
michael@0 | 567 | { |
michael@0 | 568 | return s; |
michael@0 | 569 | } |
michael@0 | 570 | |
michael@0 | 571 | static |
michael@0 | 572 | void |
michael@0 | 573 | advance( CharT*& s, difference_type n ) |
michael@0 | 574 | { |
michael@0 | 575 | s += n; |
michael@0 | 576 | } |
michael@0 | 577 | }; |
michael@0 | 578 | |
michael@0 | 579 | template <class OutputIterator> |
michael@0 | 580 | struct nsCharSinkTraits |
michael@0 | 581 | { |
michael@0 | 582 | static |
michael@0 | 583 | void |
michael@0 | 584 | write( OutputIterator& iter, const typename OutputIterator::value_type* s, uint32_t n ) |
michael@0 | 585 | { |
michael@0 | 586 | iter.write(s, n); |
michael@0 | 587 | } |
michael@0 | 588 | }; |
michael@0 | 589 | |
michael@0 | 590 | template <class CharT> |
michael@0 | 591 | struct nsCharSinkTraits<CharT*> |
michael@0 | 592 | { |
michael@0 | 593 | static |
michael@0 | 594 | void |
michael@0 | 595 | write( CharT*& iter, const CharT* s, uint32_t n ) |
michael@0 | 596 | { |
michael@0 | 597 | nsCharTraits<CharT>::move(iter, s, n); |
michael@0 | 598 | iter += n; |
michael@0 | 599 | } |
michael@0 | 600 | }; |
michael@0 | 601 | |
michael@0 | 602 | #endif // !defined(nsCharTraits_h___) |