1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/mfbt/Char16.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,202 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ 1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +/* Implements a UTF-16 character type. */ 1.11 + 1.12 +#ifndef mozilla_Char16_h 1.13 +#define mozilla_Char16_h 1.14 + 1.15 +#ifdef __cplusplus 1.16 + 1.17 +/* 1.18 + * C++11 introduces a char16_t type and support for UTF-16 string and character 1.19 + * literals. C++11's char16_t is a distinct builtin type. Technically, char16_t 1.20 + * is a 16-bit code unit of a Unicode code point, not a "character". 1.21 + */ 1.22 + 1.23 +#ifdef _MSC_VER 1.24 + /* 1.25 + * C++11 says char16_t is a distinct builtin type, but Windows's yvals.h 1.26 + * typedefs char16_t as an unsigned short. We would like to alias char16_t 1.27 + * to Windows's 16-bit wchar_t so we can declare UTF-16 literals as constant 1.28 + * expressions (and pass char16_t pointers to Windows APIs). We #define 1.29 + * _CHAR16T here in order to prevent yvals.h from overriding our char16_t 1.30 + * typedefs, which we set to wchar_t for C++ code. 1.31 + * 1.32 + * In addition, #defining _CHAR16T will prevent yvals.h from defining a 1.33 + * char32_t type, so we have to undo that damage here and provide our own, 1.34 + * which is identical to the yvals.h type. 1.35 + */ 1.36 +# define MOZ_UTF16_HELPER(s) L##s 1.37 +# define _CHAR16T 1.38 + typedef wchar_t char16_t; 1.39 + typedef unsigned int char32_t; 1.40 +#else 1.41 + /* C++11 has a builtin char16_t type. */ 1.42 +# define MOZ_UTF16_HELPER(s) u##s 1.43 + /** 1.44 + * This macro is used to distinguish when char16_t would be a distinct 1.45 + * typedef from wchar_t. 1.46 + */ 1.47 +# define MOZ_CHAR16_IS_NOT_WCHAR 1.48 +# ifdef WIN32 1.49 +# define MOZ_USE_CHAR16_WRAPPER 1.50 +# endif 1.51 +#endif 1.52 + 1.53 +#ifdef MOZ_USE_CHAR16_WRAPPER 1.54 +# include <string> 1.55 + /** 1.56 + * Win32 API extensively uses wchar_t, which is represented by a separated 1.57 + * builtin type than char16_t per spec. It's not the case for MSVC, but GCC 1.58 + * follows the spec. We want to mix wchar_t and char16_t on Windows builds. 1.59 + * This class is supposed to make it easier. It stores char16_t const pointer, 1.60 + * but provides implicit casts for wchar_t as well. On other platforms, we 1.61 + * simply use |typedef const char16_t* char16ptr_t|. Here, we want to make 1.62 + * the class as similar to this typedef, including providing some casts that 1.63 + * are allowed by the typedef. 1.64 + */ 1.65 +class char16ptr_t 1.66 +{ 1.67 + private: 1.68 + const char16_t* ptr; 1.69 + static_assert(sizeof(char16_t) == sizeof(wchar_t), "char16_t and wchar_t sizes differ"); 1.70 + 1.71 + public: 1.72 + char16ptr_t(const char16_t* p) : ptr(p) {} 1.73 + char16ptr_t(const wchar_t* p) : ptr(reinterpret_cast<const char16_t*>(p)) {} 1.74 + 1.75 + /* Without this, nullptr assignment would be ambiguous. */ 1.76 + constexpr char16ptr_t(decltype(nullptr)) : ptr(nullptr) {} 1.77 + 1.78 + operator const char16_t*() const { 1.79 + return ptr; 1.80 + } 1.81 + operator const wchar_t*() const { 1.82 + return reinterpret_cast<const wchar_t*>(ptr); 1.83 + } 1.84 + operator const void*() const { 1.85 + return ptr; 1.86 + } 1.87 + operator bool() const { 1.88 + return ptr != nullptr; 1.89 + } 1.90 + operator std::wstring() const { 1.91 + return std::wstring(static_cast<const wchar_t*>(*this)); 1.92 + } 1.93 + 1.94 + /* Explicit cast operators to allow things like (char16_t*)str. */ 1.95 + explicit operator char16_t*() const { 1.96 + return const_cast<char16_t*>(ptr); 1.97 + } 1.98 + explicit operator wchar_t*() const { 1.99 + return const_cast<wchar_t*>(static_cast<const wchar_t*>(*this)); 1.100 + } 1.101 + explicit operator int() const { 1.102 + return reinterpret_cast<intptr_t>(ptr); 1.103 + } 1.104 + explicit operator unsigned int() const { 1.105 + return reinterpret_cast<uintptr_t>(ptr); 1.106 + } 1.107 + explicit operator long() const { 1.108 + return reinterpret_cast<intptr_t>(ptr); 1.109 + } 1.110 + explicit operator unsigned long() const { 1.111 + return reinterpret_cast<uintptr_t>(ptr); 1.112 + } 1.113 + explicit operator long long() const { 1.114 + return reinterpret_cast<intptr_t>(ptr); 1.115 + } 1.116 + explicit operator unsigned long long() const { 1.117 + return reinterpret_cast<uintptr_t>(ptr); 1.118 + } 1.119 + 1.120 + /** 1.121 + * Some Windows API calls accept BYTE* but require that data actually be WCHAR*. 1.122 + * Supporting this requires explicit operators to support the requisite explicit 1.123 + * casts. 1.124 + */ 1.125 + explicit operator const char*() const { 1.126 + return reinterpret_cast<const char*>(ptr); 1.127 + } 1.128 + explicit operator const unsigned char*() const { 1.129 + return reinterpret_cast<const unsigned char*>(ptr); 1.130 + } 1.131 + explicit operator unsigned char*() const { 1.132 + return const_cast<unsigned char*>(reinterpret_cast<const unsigned char*>(ptr)); 1.133 + } 1.134 + explicit operator void*() const { 1.135 + return const_cast<char16_t*>(ptr); 1.136 + } 1.137 + 1.138 + /* Some operators used on pointers. */ 1.139 + char16_t operator[](size_t i) const { 1.140 + return ptr[i]; 1.141 + } 1.142 + bool operator==(const char16ptr_t &x) const { 1.143 + return ptr == x.ptr; 1.144 + } 1.145 + bool operator==(decltype(nullptr)) const { 1.146 + return ptr == nullptr; 1.147 + } 1.148 + bool operator!=(const char16ptr_t &x) const { 1.149 + return ptr != x.ptr; 1.150 + } 1.151 + bool operator!=(decltype(nullptr)) const { 1.152 + return ptr != nullptr; 1.153 + } 1.154 + char16ptr_t operator+(int aValue) const { 1.155 + return char16ptr_t(ptr + aValue); 1.156 + } 1.157 + char16ptr_t operator+(unsigned int aValue) const { 1.158 + return char16ptr_t(ptr + aValue); 1.159 + } 1.160 + char16ptr_t operator+(long aValue) const { 1.161 + return char16ptr_t(ptr + aValue); 1.162 + } 1.163 + char16ptr_t operator+(unsigned long aValue) const { 1.164 + return char16ptr_t(ptr + aValue); 1.165 + } 1.166 + char16ptr_t operator+(long long aValue) const { 1.167 + return char16ptr_t(ptr + aValue); 1.168 + } 1.169 + char16ptr_t operator+(unsigned long long aValue) const { 1.170 + return char16ptr_t(ptr + aValue); 1.171 + } 1.172 + ptrdiff_t operator-(const char16ptr_t &other) const { 1.173 + return ptr - other.ptr; 1.174 + } 1.175 +}; 1.176 + 1.177 +inline decltype((char*)0-(char*)0) 1.178 +operator-(const char16_t* x, const char16ptr_t y) { 1.179 + return x - static_cast<const char16_t*>(y); 1.180 +} 1.181 + 1.182 +#else 1.183 + 1.184 +typedef const char16_t* char16ptr_t; 1.185 + 1.186 +#endif 1.187 + 1.188 +/* 1.189 + * Macro arguments used in concatenation or stringification won't be expanded. 1.190 + * Therefore, in order for |MOZ_UTF16(FOO)| to work as expected (which is to 1.191 + * expand |FOO| before doing whatever |MOZ_UTF16| needs to do to it) a helper 1.192 + * macro, |MOZ_UTF16_HELPER| needs to be inserted in between to allow the macro 1.193 + * argument to expand. See "3.10.6 Separate Expansion of Macro Arguments" of the 1.194 + * CPP manual for a more accurate and precise explanation. 1.195 + */ 1.196 +#define MOZ_UTF16(s) MOZ_UTF16_HELPER(s) 1.197 + 1.198 +static_assert(sizeof(char16_t) == 2, "Is char16_t type 16 bits?"); 1.199 +static_assert(char16_t(-1) > char16_t(0), "Is char16_t type unsigned?"); 1.200 +static_assert(sizeof(MOZ_UTF16('A')) == 2, "Is char literal 16 bits?"); 1.201 +static_assert(sizeof(MOZ_UTF16("")[0]) == 2, "Is string char 16 bits?"); 1.202 + 1.203 +#endif 1.204 + 1.205 +#endif /* mozilla_Char16_h */