xpcom/string/public/nsCharTraits.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 #ifndef nsCharTraits_h___
     7 #define nsCharTraits_h___
     9 #include <ctype.h> // for |EOF|, |WEOF|
    10 #include <string.h> // for |memcpy|, et al
    12 #include "nscore.h" // for |char16_t|
    14 // This file may be used (through nsUTF8Utils.h) from non-XPCOM code, in
    15 // particular the standalone software updater. In that case stub out
    16 // the macros provided by nsDebug.h which are only usable when linking XPCOM
    18 #ifdef NS_NO_XPCOM
    19 #define NS_WARNING(msg)
    20 #define NS_ASSERTION(cond, msg)
    21 #define NS_ERROR(msg)
    22 #else
    23 #include "nsDebug.h"  // for NS_ASSERTION
    24 #endif
    26 /*
    27  * Some macros for converting char16_t (UTF-16) to and from Unicode scalar
    28  * values.
    29  *
    30  * Note that UTF-16 represents all Unicode scalar values up to U+10FFFF by
    31  * using "surrogate pairs". These consist of a high surrogate, i.e. a code
    32  * point in the range U+D800 - U+DBFF, and a low surrogate, i.e. a code point
    33  * in the range U+DC00 - U+DFFF, like this:
    34  *
    35  *  U+D800 U+DC00 =  U+10000
    36  *  U+D800 U+DC01 =  U+10001
    37  *  ...
    38  *  U+DBFF U+DFFE = U+10FFFE
    39  *  U+DBFF U+DFFF = U+10FFFF
    40  *
    41  * These surrogate code points U+D800 - U+DFFF are not themselves valid Unicode
    42  * scalar values and are not well-formed UTF-16 except as high-surrogate /
    43  * low-surrogate pairs.
    44  */
    46 #define PLANE1_BASE          uint32_t(0x00010000)
    47 // High surrogates are in the range 0xD800 -- OxDBFF
    48 #define NS_IS_HIGH_SURROGATE(u) ((uint32_t(u) & 0xFFFFFC00) == 0xD800)
    49 // Low surrogates are in the range 0xDC00 -- 0xDFFF
    50 #define NS_IS_LOW_SURROGATE(u)  ((uint32_t(u) & 0xFFFFFC00) == 0xDC00)
    51 // Faster than testing NS_IS_HIGH_SURROGATE || NS_IS_LOW_SURROGATE
    52 #define IS_SURROGATE(u)      ((uint32_t(u) & 0xFFFFF800) == 0xD800)
    54 // Everything else is not a surrogate: 0x000 -- 0xD7FF, 0xE000 -- 0xFFFF
    56 // N = (H - 0xD800) * 0x400 + 0x10000 + (L - 0xDC00)
    57 // I wonder whether we could somehow assert that H is a high surrogate
    58 // and L is a low surrogate
    59 #define SURROGATE_TO_UCS4(h, l) (((uint32_t(h) & 0x03FF) << 10) + \
    60                                  (uint32_t(l) & 0x03FF) + PLANE1_BASE)
    62 // Extract surrogates from a UCS4 char
    63 // Reference: the Unicode standard 4.0, section 3.9
    64 // Since (c - 0x10000) >> 10 == (c >> 10) - 0x0080 and 
    65 // 0xD7C0 == 0xD800 - 0x0080,
    66 // ((c - 0x10000) >> 10) + 0xD800 can be simplified to
    67 #define H_SURROGATE(c) char16_t(char16_t(uint32_t(c) >> 10) + \
    68                                  char16_t(0xD7C0)) 
    69 // where it's to be noted that 0xD7C0 is not bitwise-OR'd
    70 // but added.
    72 // Since 0x10000 & 0x03FF == 0, 
    73 // (c - 0x10000) & 0x03FF == c & 0x03FF so that
    74 // ((c - 0x10000) & 0x03FF) | 0xDC00 is equivalent to
    75 #define L_SURROGATE(c) char16_t(char16_t(uint32_t(c) & uint32_t(0x03FF)) | \
    76                                  char16_t(0xDC00))
    78 #define IS_IN_BMP(ucs) (uint32_t(ucs) < PLANE1_BASE)
    79 #define UCS2_REPLACEMENT_CHAR char16_t(0xFFFD)
    81 #define UCS_END uint32_t(0x00110000)
    82 #define IS_VALID_CHAR(c) ((uint32_t(c) < UCS_END) && !IS_SURROGATE(c))
    83 #define ENSURE_VALID_CHAR(c) (IS_VALID_CHAR(c) ? (c) : UCS2_REPLACEMENT_CHAR)
    85 template <class CharT> struct nsCharTraits {};
    87 template <>
    88 struct nsCharTraits<char16_t>
    89   {
    90     typedef char16_t char_type;
    91     typedef uint16_t  unsigned_char_type;
    92     typedef char      incompatible_char_type;
    94     static char_type* const sEmptyBuffer;
    96     static
    97     void
    98     assign( char_type& lhs, char_type rhs )
    99       {
   100         lhs = rhs;
   101       }
   104       // integer representation of characters:
   105     typedef int int_type;
   107     static
   108     char_type
   109     to_char_type( int_type c )
   110       {
   111         return char_type(c);
   112       }
   114     static
   115     int_type
   116     to_int_type( char_type c )
   117       {
   118         return int_type( static_cast<unsigned_char_type>(c) );
   119       }
   121     static
   122     bool
   123     eq_int_type( int_type lhs, int_type rhs )
   124       {
   125         return lhs == rhs;
   126       }
   129       // |char_type| comparisons:
   131     static
   132     bool
   133     eq( char_type lhs, char_type rhs )
   134       {
   135         return lhs == rhs;
   136       }
   138     static
   139     bool
   140     lt( char_type lhs, char_type rhs )
   141       {
   142         return lhs < rhs;
   143       }
   146       // operations on s[n] arrays:
   148     static
   149     char_type*
   150     move( char_type* s1, const char_type* s2, size_t n )
   151       {
   152         return static_cast<char_type*>(memmove(s1, s2, n * sizeof(char_type)));
   153       }
   155     static
   156     char_type*
   157     copy( char_type* s1, const char_type* s2, size_t n )
   158       {
   159         return static_cast<char_type*>(memcpy(s1, s2, n * sizeof(char_type)));
   160       }
   162     static
   163     char_type*
   164     copyASCII( char_type* s1, const char* s2, size_t n )
   165       {
   166         for (char_type* s = s1; n--; ++s, ++s2) {
   167           NS_ASSERTION(!(*s2 & ~0x7F), "Unexpected non-ASCII character");
   168           *s = *s2;
   169         }
   170         return s1;
   171       }
   173     static
   174     char_type*
   175     assign( char_type* s, size_t n, char_type c )
   176       {
   177         char_type* result = s;
   178         while ( n-- )
   179           assign(*s++, c);
   180         return result;
   181       }
   183     static
   184     int
   185     compare( const char_type* s1, const char_type* s2, size_t n )
   186       {
   187         for ( ; n--; ++s1, ++s2 )
   188           {
   189             if ( !eq(*s1, *s2) )
   190               return to_int_type(*s1) - to_int_type(*s2);
   191           }
   193         return 0;
   194       }
   196     static
   197     int
   198     compareASCII( const char_type* s1, const char* s2, size_t n )
   199       {
   200         for ( ; n--; ++s1, ++s2 )
   201           {
   202             NS_ASSERTION(!(*s2 & ~0x7F), "Unexpected non-ASCII character");
   203             if ( !eq_int_type(to_int_type(*s1), to_int_type(*s2)) )
   204               return to_int_type(*s1) - to_int_type(*s2);
   205           }
   207         return 0;
   208       }
   210     // this version assumes that s2 is null-terminated and s1 has length n.
   211     // if s1 is shorter than s2 then we return -1; if s1 is longer than s2,
   212     // we return 1.
   213     static
   214     int
   215     compareASCIINullTerminated( const char_type* s1, size_t n, const char* s2 )
   216       {
   217         for ( ; n--; ++s1, ++s2 )
   218           {
   219             if ( !*s2 )
   220               return 1;
   221             NS_ASSERTION(!(*s2 & ~0x7F), "Unexpected non-ASCII character");
   222             if ( !eq_int_type(to_int_type(*s1), to_int_type(*s2)) )
   223               return to_int_type(*s1) - to_int_type(*s2);
   224           }
   226         if ( *s2 )
   227           return -1;
   229         return 0;
   230       }
   232     /**
   233      * Convert c to its lower-case form, but only if c is in the ASCII
   234      * range. Otherwise leave it alone.
   235      */
   236     static
   237     char_type
   238     ASCIIToLower( char_type c )
   239       {
   240         if (c >= 'A' && c <= 'Z')
   241           return char_type(c + ('a' - 'A'));
   243         return c;
   244       }
   246     static
   247     int
   248     compareLowerCaseToASCII( const char_type* s1, const char* s2, size_t n )
   249       {
   250         for ( ; n--; ++s1, ++s2 )
   251           {
   252             NS_ASSERTION(!(*s2 & ~0x7F), "Unexpected non-ASCII character");
   253             NS_ASSERTION(!(*s2 >= 'A' && *s2 <= 'Z'),
   254                          "Unexpected uppercase character");
   255             char_type lower_s1 = ASCIIToLower(*s1);
   256             if ( lower_s1 != to_char_type(*s2) )
   257               return to_int_type(lower_s1) - to_int_type(*s2);
   258           }
   260         return 0;
   261       }
   263     // this version assumes that s2 is null-terminated and s1 has length n.
   264     // if s1 is shorter than s2 then we return -1; if s1 is longer than s2,
   265     // we return 1.
   266     static
   267     int
   268     compareLowerCaseToASCIINullTerminated( const char_type* s1, size_t n, const char* s2 )
   269       {
   270         for ( ; n--; ++s1, ++s2 )
   271           {
   272             if ( !*s2 )
   273               return 1;
   274             NS_ASSERTION(!(*s2 & ~0x7F), "Unexpected non-ASCII character");
   275             NS_ASSERTION(!(*s2 >= 'A' && *s2 <= 'Z'),
   276                          "Unexpected uppercase character");
   277             char_type lower_s1 = ASCIIToLower(*s1);
   278             if ( lower_s1 != to_char_type(*s2) )
   279               return to_int_type(lower_s1) - to_int_type(*s2);
   280           }
   282         if ( *s2 )
   283           return -1;
   285         return 0;
   286       }
   288     static
   289     size_t
   290     length( const char_type* s )
   291       {
   292         size_t result = 0;
   293         while ( !eq(*s++, char_type(0)) )
   294           ++result;
   295         return result;
   296       }
   298     static
   299     const char_type*
   300     find( const char_type* s, size_t n, char_type c )
   301       {
   302         while ( n-- )
   303           {
   304             if ( eq(*s, c) )
   305               return s;
   306             ++s;
   307           }
   309         return 0;
   310       }
   311   };
   313 template <>
   314 struct nsCharTraits<char>
   315   {
   316     typedef char           char_type;
   317     typedef unsigned char  unsigned_char_type;
   318     typedef char16_t      incompatible_char_type;
   320     static char_type* const sEmptyBuffer;
   322     static
   323     void
   324     assign( char_type& lhs, char_type rhs )
   325       {
   326         lhs = rhs;
   327       }
   330       // integer representation of characters:
   332     typedef int int_type;
   334     static
   335     char_type
   336     to_char_type( int_type c )
   337       {
   338         return char_type(c);
   339       }
   341     static
   342     int_type
   343     to_int_type( char_type c )
   344       {
   345         return int_type( static_cast<unsigned_char_type>(c) );
   346       }
   348     static
   349     bool
   350     eq_int_type( int_type lhs, int_type rhs )
   351       {
   352         return lhs == rhs;
   353       }
   356       // |char_type| comparisons:
   358     static
   359     bool
   360     eq( char_type lhs, char_type rhs )
   361       {
   362         return lhs == rhs;
   363       }
   365     static
   366     bool
   367     lt( char_type lhs, char_type rhs )
   368       {
   369         return lhs < rhs;
   370       }
   373       // operations on s[n] arrays:
   375     static
   376     char_type*
   377     move( char_type* s1, const char_type* s2, size_t n )
   378       {
   379         return static_cast<char_type*>(memmove(s1, s2, n * sizeof(char_type)));
   380       }
   382     static
   383     char_type*
   384     copy( char_type* s1, const char_type* s2, size_t n )
   385       {
   386         return static_cast<char_type*>(memcpy(s1, s2, n * sizeof(char_type)));
   387       }
   389     static
   390     char_type*
   391     copyASCII( char_type* s1, const char* s2, size_t n )
   392       {
   393         return copy(s1, s2, n);
   394       }
   396     static
   397     char_type*
   398     assign( char_type* s, size_t n, char_type c )
   399       {
   400         return static_cast<char_type*>(memset(s, to_int_type(c), n));
   401       }
   403     static
   404     int
   405     compare( const char_type* s1, const char_type* s2, size_t n )
   406       {
   407         return memcmp(s1, s2, n);
   408       }
   410     static
   411     int
   412     compareASCII( const char_type* s1, const char* s2, size_t n )
   413       {
   414 #ifdef DEBUG
   415         for (size_t i = 0; i < n; ++i)
   416           {
   417             NS_ASSERTION(!(s2[i] & ~0x7F), "Unexpected non-ASCII character");
   418           }
   419 #endif
   420         return compare(s1, s2, n);
   421       }
   423     // this version assumes that s2 is null-terminated and s1 has length n.
   424     // if s1 is shorter than s2 then we return -1; if s1 is longer than s2,
   425     // we return 1.
   426     static
   427     int
   428     compareASCIINullTerminated( const char_type* s1, size_t n, const char* s2 )
   429       {
   430         // can't use strcmp here because we don't want to stop when s1
   431         // contains a null
   432         for ( ; n--; ++s1, ++s2 )
   433           {
   434             if ( !*s2 )
   435               return 1;
   436             NS_ASSERTION(!(*s2 & ~0x7F), "Unexpected non-ASCII character");
   437             if ( *s1 != *s2 )
   438               return to_int_type(*s1) - to_int_type(*s2);
   439           }
   441         if ( *s2 )
   442           return -1;
   444         return 0;
   445       }
   447     /**
   448      * Convert c to its lower-case form, but only if c is ASCII.
   449      */
   450     static
   451     char_type
   452     ASCIIToLower( char_type c )
   453       {
   454         if (c >= 'A' && c <= 'Z')
   455           return char_type(c + ('a' - 'A'));
   457         return c;
   458       }
   460     static
   461     int
   462     compareLowerCaseToASCII( const char_type* s1, const char* s2, size_t n )
   463       {
   464         for ( ; n--; ++s1, ++s2 )
   465           {
   466             NS_ASSERTION(!(*s2 & ~0x7F), "Unexpected non-ASCII character");
   467             NS_ASSERTION(!(*s2 >= 'A' && *s2 <= 'Z'),
   468                          "Unexpected uppercase character");
   469             char_type lower_s1 = ASCIIToLower(*s1);
   470             if ( lower_s1 != *s2 )
   471               return to_int_type(lower_s1) - to_int_type(*s2);
   472           }
   473         return 0;
   474       }
   476     // this version assumes that s2 is null-terminated and s1 has length n.
   477     // if s1 is shorter than s2 then we return -1; if s1 is longer than s2,
   478     // we return 1.
   479     static
   480     int
   481     compareLowerCaseToASCIINullTerminated( const char_type* s1, size_t n, const char* s2 )
   482       {
   483         for ( ; n--; ++s1, ++s2 )
   484           {
   485             if ( !*s2 )
   486               return 1;
   487             NS_ASSERTION(!(*s2 & ~0x7F), "Unexpected non-ASCII character");
   488             NS_ASSERTION(!(*s2 >= 'A' && *s2 <= 'Z'),
   489                          "Unexpected uppercase character");
   490             char_type lower_s1 = ASCIIToLower(*s1);
   491             if ( lower_s1 != *s2 )
   492               return to_int_type(lower_s1) - to_int_type(*s2);
   493           }
   495         if ( *s2 )
   496           return -1;
   498         return 0;
   499       }
   501     static
   502     size_t
   503     length( const char_type* s )
   504       {
   505         return strlen(s);
   506       }
   508     static
   509     const char_type*
   510     find( const char_type* s, size_t n, char_type c )
   511       {
   512         return reinterpret_cast<const char_type*>(memchr(s, to_int_type(c), n));
   513       }
   514   };
   516 template <class InputIterator>
   517 struct nsCharSourceTraits
   518   {
   519     typedef typename InputIterator::difference_type difference_type;
   521     static
   522     uint32_t
   523     readable_distance( const InputIterator& first, const InputIterator& last )
   524       {
   525         // assumes single fragment
   526         return uint32_t(last.get() - first.get());
   527       }
   529     static
   530     const typename InputIterator::value_type*
   531     read( const InputIterator& iter )
   532       {
   533         return iter.get();
   534       }
   536     static
   537     void
   538     advance( InputIterator& s, difference_type n )
   539       {
   540         s.advance(n);
   541       }
   542   };
   544 template <class CharT>
   545 struct nsCharSourceTraits<CharT*>
   546   {
   547     typedef ptrdiff_t difference_type;
   549     static
   550     uint32_t
   551     readable_distance( CharT* s )
   552       {
   553         return uint32_t(nsCharTraits<CharT>::length(s));
   554 //      return numeric_limits<uint32_t>::max();
   555       }
   557     static
   558     uint32_t
   559     readable_distance( CharT* first, CharT* last )
   560       {
   561         return uint32_t(last-first);
   562       }
   564     static
   565     const CharT*
   566     read( CharT* s )
   567       {
   568         return s;
   569       }
   571     static
   572     void
   573     advance( CharT*& s, difference_type n )
   574       {
   575         s += n;
   576       }
   577   };
   579 template <class OutputIterator>
   580 struct nsCharSinkTraits
   581   {
   582     static
   583     void
   584     write( OutputIterator& iter, const typename OutputIterator::value_type* s, uint32_t n )
   585       {
   586         iter.write(s, n);
   587       }
   588   };
   590 template <class CharT>
   591 struct nsCharSinkTraits<CharT*>
   592   {
   593     static
   594     void
   595     write( CharT*& iter, const CharT* s, uint32_t n )
   596       {
   597         nsCharTraits<CharT>::move(iter, s, n);
   598         iter += n;
   599       }
   600   };
   602 #endif // !defined(nsCharTraits_h___)

mercurial