mfbt/Char16.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/mfbt/Char16.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,202 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */
     1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +/* Implements a UTF-16 character type. */
    1.11 +
    1.12 +#ifndef mozilla_Char16_h
    1.13 +#define mozilla_Char16_h
    1.14 +
    1.15 +#ifdef __cplusplus
    1.16 +
    1.17 +/*
    1.18 + * C++11 introduces a char16_t type and support for UTF-16 string and character
    1.19 + * literals. C++11's char16_t is a distinct builtin type. Technically, char16_t
    1.20 + * is a 16-bit code unit of a Unicode code point, not a "character".
    1.21 + */
    1.22 +
    1.23 +#ifdef _MSC_VER
    1.24 +   /*
    1.25 +    * C++11 says char16_t is a distinct builtin type, but Windows's yvals.h
    1.26 +    * typedefs char16_t as an unsigned short. We would like to alias char16_t
    1.27 +    * to Windows's 16-bit wchar_t so we can declare UTF-16 literals as constant
    1.28 +    * expressions (and pass char16_t pointers to Windows APIs). We #define
    1.29 +    * _CHAR16T here in order to prevent yvals.h from overriding our char16_t
    1.30 +    * typedefs, which we set to wchar_t for C++ code.
    1.31 +    *
    1.32 +    * In addition, #defining _CHAR16T will prevent yvals.h from defining a
    1.33 +    * char32_t type, so we have to undo that damage here and provide our own,
    1.34 +    * which is identical to the yvals.h type.
    1.35 +    */
    1.36 +#  define MOZ_UTF16_HELPER(s) L##s
    1.37 +#  define _CHAR16T
    1.38 +   typedef wchar_t char16_t;
    1.39 +   typedef unsigned int char32_t;
    1.40 +#else
    1.41 +   /* C++11 has a builtin char16_t type. */
    1.42 +#  define MOZ_UTF16_HELPER(s) u##s
    1.43 +   /**
    1.44 +    * This macro is used to distinguish when char16_t would be a distinct
    1.45 +    * typedef from wchar_t.
    1.46 +    */
    1.47 +#  define MOZ_CHAR16_IS_NOT_WCHAR
    1.48 +#  ifdef WIN32
    1.49 +#    define MOZ_USE_CHAR16_WRAPPER
    1.50 +#  endif
    1.51 +#endif
    1.52 +
    1.53 +#ifdef MOZ_USE_CHAR16_WRAPPER
    1.54 +# include <string>
    1.55 +  /**
    1.56 +   * Win32 API extensively uses wchar_t, which is represented by a separated
    1.57 +   * builtin type than char16_t per spec. It's not the case for MSVC, but GCC
    1.58 +   * follows the spec. We want to mix wchar_t and char16_t on Windows builds.
    1.59 +   * This class is supposed to make it easier. It stores char16_t const pointer,
    1.60 +   * but provides implicit casts for wchar_t as well. On other platforms, we
    1.61 +   * simply use |typedef const char16_t* char16ptr_t|. Here, we want to make
    1.62 +   * the class as similar to this typedef, including providing some casts that
    1.63 +   * are allowed by the typedef.
    1.64 +   */
    1.65 +class char16ptr_t
    1.66 +{
    1.67 +  private:
    1.68 +    const char16_t* ptr;
    1.69 +    static_assert(sizeof(char16_t) == sizeof(wchar_t), "char16_t and wchar_t sizes differ");
    1.70 +
    1.71 +  public:
    1.72 +    char16ptr_t(const char16_t* p) : ptr(p) {}
    1.73 +    char16ptr_t(const wchar_t* p) : ptr(reinterpret_cast<const char16_t*>(p)) {}
    1.74 +
    1.75 +    /* Without this, nullptr assignment would be ambiguous. */
    1.76 +    constexpr char16ptr_t(decltype(nullptr)) : ptr(nullptr) {}
    1.77 +
    1.78 +    operator const char16_t*() const {
    1.79 +      return ptr;
    1.80 +    }
    1.81 +    operator const wchar_t*() const {
    1.82 +      return reinterpret_cast<const wchar_t*>(ptr);
    1.83 +    }
    1.84 +    operator const void*() const {
    1.85 +      return ptr;
    1.86 +    }
    1.87 +    operator bool() const {
    1.88 +      return ptr != nullptr;
    1.89 +    }
    1.90 +    operator std::wstring() const {
    1.91 +      return std::wstring(static_cast<const wchar_t*>(*this));
    1.92 +    }
    1.93 +
    1.94 +    /* Explicit cast operators to allow things like (char16_t*)str. */
    1.95 +    explicit operator char16_t*() const {
    1.96 +      return const_cast<char16_t*>(ptr);
    1.97 +    }
    1.98 +    explicit operator wchar_t*() const {
    1.99 +      return const_cast<wchar_t*>(static_cast<const wchar_t*>(*this));
   1.100 +    }
   1.101 +    explicit operator int() const {
   1.102 +      return reinterpret_cast<intptr_t>(ptr);
   1.103 +    }
   1.104 +    explicit operator unsigned int() const {
   1.105 +      return reinterpret_cast<uintptr_t>(ptr);
   1.106 +    }
   1.107 +    explicit operator long() const {
   1.108 +      return reinterpret_cast<intptr_t>(ptr);
   1.109 +    }
   1.110 +    explicit operator unsigned long() const {
   1.111 +      return reinterpret_cast<uintptr_t>(ptr);
   1.112 +    }
   1.113 +    explicit operator long long() const {
   1.114 +      return reinterpret_cast<intptr_t>(ptr);
   1.115 +    }
   1.116 +    explicit operator unsigned long long() const {
   1.117 +      return reinterpret_cast<uintptr_t>(ptr);
   1.118 +    }
   1.119 +
   1.120 +    /**
   1.121 +     * Some Windows API calls accept BYTE* but require that data actually be WCHAR*.
   1.122 +     * Supporting this requires explicit operators to support the requisite explicit
   1.123 +     * casts.
   1.124 +     */
   1.125 +    explicit operator const char*() const {
   1.126 +      return reinterpret_cast<const char*>(ptr);
   1.127 +    }
   1.128 +    explicit operator const unsigned char*() const {
   1.129 +      return reinterpret_cast<const unsigned char*>(ptr);
   1.130 +    }
   1.131 +    explicit operator unsigned char*() const {
   1.132 +      return const_cast<unsigned char*>(reinterpret_cast<const unsigned char*>(ptr));
   1.133 +    }
   1.134 +    explicit operator void*() const {
   1.135 +      return const_cast<char16_t*>(ptr);
   1.136 +    }
   1.137 +
   1.138 +    /* Some operators used on pointers. */
   1.139 +    char16_t operator[](size_t i) const {
   1.140 +      return ptr[i];
   1.141 +    }
   1.142 +    bool operator==(const char16ptr_t &x) const {
   1.143 +      return ptr == x.ptr;
   1.144 +    }
   1.145 +    bool operator==(decltype(nullptr)) const {
   1.146 +      return ptr == nullptr;
   1.147 +    }
   1.148 +    bool operator!=(const char16ptr_t &x) const {
   1.149 +      return ptr != x.ptr;
   1.150 +    }
   1.151 +    bool operator!=(decltype(nullptr)) const {
   1.152 +      return ptr != nullptr;
   1.153 +    }
   1.154 +    char16ptr_t operator+(int aValue) const {
   1.155 +      return char16ptr_t(ptr + aValue);
   1.156 +    }
   1.157 +    char16ptr_t operator+(unsigned int aValue) const {
   1.158 +      return char16ptr_t(ptr + aValue);
   1.159 +    }
   1.160 +    char16ptr_t operator+(long aValue) const {
   1.161 +      return char16ptr_t(ptr + aValue);
   1.162 +    }
   1.163 +    char16ptr_t operator+(unsigned long aValue) const {
   1.164 +      return char16ptr_t(ptr + aValue);
   1.165 +    }
   1.166 +    char16ptr_t operator+(long long aValue) const {
   1.167 +      return char16ptr_t(ptr + aValue);
   1.168 +    }
   1.169 +    char16ptr_t operator+(unsigned long long aValue) const {
   1.170 +      return char16ptr_t(ptr + aValue);
   1.171 +    }
   1.172 +    ptrdiff_t operator-(const char16ptr_t &other) const {
   1.173 +      return ptr - other.ptr;
   1.174 +    }
   1.175 +};
   1.176 +
   1.177 +inline decltype((char*)0-(char*)0)
   1.178 +operator-(const char16_t* x, const char16ptr_t y) {
   1.179 +  return x - static_cast<const char16_t*>(y);
   1.180 +}
   1.181 +
   1.182 +#else
   1.183 +
   1.184 +typedef const char16_t* char16ptr_t;
   1.185 +
   1.186 +#endif
   1.187 +
   1.188 +/*
   1.189 + * Macro arguments used in concatenation or stringification won't be expanded.
   1.190 + * Therefore, in order for |MOZ_UTF16(FOO)| to work as expected (which is to
   1.191 + * expand |FOO| before doing whatever |MOZ_UTF16| needs to do to it) a helper
   1.192 + * macro, |MOZ_UTF16_HELPER| needs to be inserted in between to allow the macro
   1.193 + * argument to expand. See "3.10.6 Separate Expansion of Macro Arguments" of the
   1.194 + * CPP manual for a more accurate and precise explanation.
   1.195 + */
   1.196 +#define MOZ_UTF16(s) MOZ_UTF16_HELPER(s)
   1.197 +
   1.198 +static_assert(sizeof(char16_t) == 2, "Is char16_t type 16 bits?");
   1.199 +static_assert(char16_t(-1) > char16_t(0), "Is char16_t type unsigned?");
   1.200 +static_assert(sizeof(MOZ_UTF16('A')) == 2, "Is char literal 16 bits?");
   1.201 +static_assert(sizeof(MOZ_UTF16("")[0]) == 2, "Is string char 16 bits?");
   1.202 +
   1.203 +#endif
   1.204 +
   1.205 +#endif /* mozilla_Char16_h */

mercurial