michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
michael@0:  * vim: set ts=8 sts=4 et sw=4 tw=99:
michael@0:  *
michael@0:  * ***** BEGIN LICENSE BLOCK *****
michael@0:  * Copyright (C) 2007, 2008, 2009 Apple Inc. All rights reserved.
michael@0:  *
michael@0:  * Redistribution and use in source and binary forms, with or without
michael@0:  * modification, are permitted provided that the following conditions
michael@0:  * are met:
michael@0:  *
michael@0:  * 1.  Redistributions of source code must retain the above copyright
michael@0:  *     notice, this list of conditions and the following disclaimer.
michael@0:  * 2.  Redistributions in binary form must reproduce the above copyright
michael@0:  *     notice, this list of conditions and the following disclaimer in the
michael@0:  *     documentation and/or other materials provided with the distribution.
michael@0:  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
michael@0:  *     its contributors may be used to endorse or promote products derived
michael@0:  *     from this software without specific prior written permission.
michael@0:  *
michael@0:  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
michael@0:  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
michael@0:  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
michael@0:  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
michael@0:  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
michael@0:  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
michael@0:  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
michael@0:  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
michael@0:  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
michael@0:  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
michael@0:  *
michael@0:  * ***** END LICENSE BLOCK ***** */
michael@0: 
michael@0: #ifndef yarr_ASCIICType_h
michael@0: #define yarr_ASCIICType_h
michael@0: 
michael@0: #include "assembler/wtf/Assertions.h"
michael@0: 
michael@0: // The behavior of many of the functions in the <ctype.h> header is dependent
michael@0: // on the current locale. But in the WebKit project, all uses of those functions
michael@0: // are in code processing something that's not locale-specific. These equivalents
michael@0: // for some of the <ctype.h> functions are named more explicitly, not dependent
michael@0: // on the C library locale, and we should also optimize them as needed.
michael@0: 
michael@0: // All functions return false or leave the character unchanged if passed a character
michael@0: // that is outside the range 0-7F. So they can be used on Unicode strings or
michael@0: // characters if the intent is to do processing only if the character is ASCII.
michael@0: 
michael@0: namespace WTF {
michael@0: 
michael@0:     inline bool isASCII(char c) { return !(c & ~0x7F); }
michael@0:     inline bool isASCII(unsigned short c) { return !(c & ~0x7F); }
michael@0: #if !WTF_COMPILER_MSVC || defined(_NATIVE_WCHAR_T_DEFINED)
michael@0:     inline bool isASCII(wchar_t c) { return !(c & ~0x7F); }
michael@0: #endif
michael@0:     inline bool isASCII(int c) { return !(c & ~0x7F); }
michael@0:     inline bool isASCII(unsigned c) { return !(c & ~0x7F); }
michael@0: 
michael@0:     inline bool isASCIIAlpha(char c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
michael@0:     inline bool isASCIIAlpha(unsigned short c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
michael@0: #if !WTF_COMPILER_MSVC || defined(_NATIVE_WCHAR_T_DEFINED)
michael@0:     inline bool isASCIIAlpha(wchar_t c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
michael@0: #endif
michael@0:     inline bool isASCIIAlpha(int c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
michael@0:     inline bool isASCIIAlpha(unsigned c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
michael@0: 
michael@0:     inline bool isASCIIAlphanumeric(char c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
michael@0:     inline bool isASCIIAlphanumeric(unsigned short c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
michael@0: #if !WTF_COMPILER_MSVC || defined(_NATIVE_WCHAR_T_DEFINED)
michael@0:     inline bool isASCIIAlphanumeric(wchar_t c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
michael@0: #endif
michael@0:     inline bool isASCIIAlphanumeric(int c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
michael@0:     inline bool isASCIIAlphanumeric(unsigned c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
michael@0: 
michael@0:     inline bool isASCIIDigit(char c) { return (c >= '0') & (c <= '9'); }
michael@0:     inline bool isASCIIDigit(unsigned short c) { return (c >= '0') & (c <= '9'); }
michael@0: #if !WTF_COMPILER_MSVC || defined(_NATIVE_WCHAR_T_DEFINED)
michael@0:     inline bool isASCIIDigit(wchar_t c) { return (c >= '0') & (c <= '9'); }
michael@0: #endif
michael@0:     inline bool isASCIIDigit(int c) { return (c >= '0') & (c <= '9'); }
michael@0:     inline bool isASCIIDigit(unsigned c) { return (c >= '0') & (c <= '9'); }
michael@0: 
michael@0:     inline bool isASCIIHexDigit(char c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
michael@0:     inline bool isASCIIHexDigit(unsigned short c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
michael@0: #if !WTF_COMPILER_MSVC || defined(_NATIVE_WCHAR_T_DEFINED)
michael@0:     inline bool isASCIIHexDigit(wchar_t c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
michael@0: #endif
michael@0:     inline bool isASCIIHexDigit(int c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
michael@0:     inline bool isASCIIHexDigit(unsigned c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
michael@0: 
michael@0:     inline bool isASCIIOctalDigit(char c) { return (c >= '0') & (c <= '7'); }
michael@0:     inline bool isASCIIOctalDigit(unsigned short c) { return (c >= '0') & (c <= '7'); }
michael@0: #if !WTF_COMPILER_MSVC || defined(_NATIVE_WCHAR_T_DEFINED)
michael@0:     inline bool isASCIIOctalDigit(wchar_t c) { return (c >= '0') & (c <= '7'); }
michael@0: #endif
michael@0:     inline bool isASCIIOctalDigit(int c) { return (c >= '0') & (c <= '7'); }
michael@0:     inline bool isASCIIOctalDigit(unsigned c) { return (c >= '0') & (c <= '7'); }
michael@0: 
michael@0:     inline bool isASCIILower(char c) { return c >= 'a' && c <= 'z'; }
michael@0:     inline bool isASCIILower(unsigned short c) { return c >= 'a' && c <= 'z'; }
michael@0: #if !WTF_COMPILER_MSVC || defined(_NATIVE_WCHAR_T_DEFINED)
michael@0:     inline bool isASCIILower(wchar_t c) { return c >= 'a' && c <= 'z'; }
michael@0: #endif
michael@0:     inline bool isASCIILower(int c) { return c >= 'a' && c <= 'z'; }
michael@0:     inline bool isASCIILower(unsigned c) { return c >= 'a' && c <= 'z'; }
michael@0: 
michael@0:     inline bool isASCIIUpper(char c) { return c >= 'A' && c <= 'Z'; }
michael@0:     inline bool isASCIIUpper(unsigned short c) { return c >= 'A' && c <= 'Z'; }
michael@0: #if !WTF_COMPILER_MSVC || defined(_NATIVE_WCHAR_T_DEFINED)
michael@0:     inline bool isASCIIUpper(wchar_t c) { return c >= 'A' && c <= 'Z'; }
michael@0: #endif
michael@0:     inline bool isASCIIUpper(int c) { return c >= 'A' && c <= 'Z'; }
michael@0:     inline bool isASCIIUpper(unsigned c) { return c >= 'A' && c <= 'Z'; }
michael@0: 
michael@0:     /*
michael@0:         Statistics from a run of Apple's page load test for callers of isASCIISpace:
michael@0: 
michael@0:             character          count
michael@0:             ---------          -----
michael@0:             non-spaces         689383
michael@0:         20  space              294720
michael@0:         0A  \n                 89059
michael@0:         09  \t                 28320
michael@0:         0D  \r                 0
michael@0:         0C  \f                 0
michael@0:         0B  \v                 0
michael@0:     */
michael@0:     inline bool isASCIISpace(char c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
michael@0:     inline bool isASCIISpace(unsigned short c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
michael@0: #if !WTF_COMPILER_MSVC || defined(_NATIVE_WCHAR_T_DEFINED)
michael@0:     inline bool isASCIISpace(wchar_t c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
michael@0: #endif
michael@0:     inline bool isASCIISpace(int c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
michael@0:     inline bool isASCIISpace(unsigned c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
michael@0: 
michael@0:     inline char toASCIILower(char c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
michael@0:     inline unsigned short toASCIILower(unsigned short c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
michael@0: #if !WTF_COMPILER_MSVC || defined(_NATIVE_WCHAR_T_DEFINED)
michael@0:     inline wchar_t toASCIILower(wchar_t c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
michael@0: #endif
michael@0:     inline int toASCIILower(int c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
michael@0:     inline unsigned toASCIILower(unsigned c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
michael@0: 
michael@0:     // FIXME: Why do these need static_cast?
michael@0:     inline char toASCIIUpper(char c) { return static_cast<char>(c & ~((c >= 'a' && c <= 'z') << 5)); }
michael@0:     inline unsigned short toASCIIUpper(unsigned short c) { return static_cast<unsigned short>(c & ~((c >= 'a' && c <= 'z') << 5)); }
michael@0: #if !WTF_COMPILER_MSVC || defined(_NATIVE_WCHAR_T_DEFINED)
michael@0:     inline wchar_t toASCIIUpper(wchar_t c) { return static_cast<wchar_t>(c & ~((c >= 'a' && c <= 'z') << 5)); }
michael@0: #endif
michael@0:     inline int toASCIIUpper(int c) { return static_cast<int>(c & ~((c >= 'a' && c <= 'z') << 5)); }
michael@0:     inline unsigned toASCIIUpper(unsigned c) { return static_cast<unsigned>(c & ~((c >= 'a' && c <= 'z') << 5)); }
michael@0: 
michael@0:     inline int toASCIIHexValue(char c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
michael@0:     inline int toASCIIHexValue(unsigned short c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
michael@0: #if !WTF_COMPILER_MSVC || defined(_NATIVE_WCHAR_T_DEFINED)
michael@0:     inline int toASCIIHexValue(wchar_t c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
michael@0: #endif
michael@0:     inline int toASCIIHexValue(int c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
michael@0:     inline int toASCIIHexValue(unsigned c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
michael@0: 
michael@0:     inline bool isASCIIPrintable(char c) { return c >= ' ' && c <= '~'; }
michael@0:     inline bool isASCIIPrintable(unsigned short c) { return c >= ' ' && c <= '~'; }
michael@0: #if !WTF_COMPILER_MSVC || defined(_NATIVE_WCHAR_T_DEFINED)
michael@0:     inline bool isASCIIPrintable(wchar_t c) { return c >= ' ' && c <= '~'; }
michael@0: #endif
michael@0:     inline bool isASCIIPrintable(int c) { return c >= ' ' && c <= '~'; }
michael@0:     inline bool isASCIIPrintable(unsigned c) { return c >= ' ' && c <= '~'; }
michael@0: }
michael@0: 
michael@0: using WTF::isASCII;
michael@0: using WTF::isASCIIAlpha;
michael@0: using WTF::isASCIIAlphanumeric;
michael@0: using WTF::isASCIIDigit;
michael@0: using WTF::isASCIIHexDigit;
michael@0: using WTF::isASCIILower;
michael@0: using WTF::isASCIIOctalDigit;
michael@0: using WTF::isASCIIPrintable;
michael@0: using WTF::isASCIISpace;
michael@0: using WTF::isASCIIUpper;
michael@0: using WTF::toASCIIHexValue;
michael@0: using WTF::toASCIILower;
michael@0: using WTF::toASCIIUpper;
michael@0: 
michael@0: #endif /* yarr_ASCIICType_h */