michael@0: 
michael@0: /*
michael@0:  * Copyright 2006 The Android Open Source Project
michael@0:  *
michael@0:  * Use of this source code is governed by a BSD-style license that can be
michael@0:  * found in the LICENSE file.
michael@0:  */
michael@0: 
michael@0: 
michael@0: #ifndef SkMath_DEFINED
michael@0: #define SkMath_DEFINED
michael@0: 
michael@0: #include "SkTypes.h"
michael@0: 
michael@0: /**
michael@0:  *  Computes numer1 * numer2 / denom in full 64 intermediate precision.
michael@0:  *  It is an error for denom to be 0. There is no special handling if
michael@0:  *  the result overflows 32bits.
michael@0:  */
michael@0: int32_t SkMulDiv(int32_t numer1, int32_t numer2, int32_t denom);
michael@0: 
michael@0: /**
michael@0:  *  Computes (numer1 << shift) / denom in full 64 intermediate precision.
michael@0:  *  It is an error for denom to be 0. There is no special handling if
michael@0:  *  the result overflows 32bits.
michael@0:  */
michael@0: int32_t SkDivBits(int32_t numer, int32_t denom, int shift);
michael@0: 
michael@0: /**
michael@0:  *  Return the integer square root of value, with a bias of bitBias
michael@0:  */
michael@0: int32_t SkSqrtBits(int32_t value, int bitBias);
michael@0: 
michael@0: /** Return the integer square root of n, treated as a SkFixed (16.16)
michael@0:  */
michael@0: #define SkSqrt32(n)         SkSqrtBits(n, 15)
michael@0: 
michael@0: // 64bit -> 32bit utilities
michael@0: 
michael@0: /**
michael@0:  *  Return true iff the 64bit value can exactly be represented in signed 32bits
michael@0:  */
michael@0: static inline bool sk_64_isS32(int64_t value) {
michael@0:     return (int32_t)value == value;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  *  Return the 64bit argument as signed 32bits, asserting in debug that the arg
michael@0:  *  exactly fits in signed 32bits. In the release build, no checks are preformed
michael@0:  *  and the return value if the arg does not fit is undefined.
michael@0:  */
michael@0: static inline int32_t sk_64_asS32(int64_t value) {
michael@0:     SkASSERT(sk_64_isS32(value));
michael@0:     return (int32_t)value;
michael@0: }
michael@0: 
michael@0: // Handy util that can be passed two ints, and will automatically promote to
michael@0: // 64bits before the multiply, so the caller doesn't have to remember to cast
michael@0: // e.g. (int64_t)a * b;
michael@0: static inline int64_t sk_64_mul(int64_t a, int64_t b) {
michael@0:     return a * b;
michael@0: }
michael@0: 
michael@0: ///////////////////////////////////////////////////////////////////////////////
michael@0: 
michael@0: //! Returns the number of leading zero bits (0...32)
michael@0: int SkCLZ_portable(uint32_t);
michael@0: 
michael@0: #ifndef SkCLZ
michael@0:     #if defined(_MSC_VER) && _MSC_VER >= 1400
michael@0:         #include <intrin.h>
michael@0: 
michael@0:         static inline int SkCLZ(uint32_t mask) {
michael@0:             if (mask) {
michael@0:                 DWORD index;
michael@0:                 _BitScanReverse(&index, mask);
michael@0:                 return index ^ 0x1F;
michael@0:             } else {
michael@0:                 return 32;
michael@0:             }
michael@0:         }
michael@0:     #elif defined(SK_CPU_ARM) || defined(__GNUC__) || defined(__clang__)
michael@0:         static inline int SkCLZ(uint32_t mask) {
michael@0:             // __builtin_clz(0) is undefined, so we have to detect that case.
michael@0:             return mask ? __builtin_clz(mask) : 32;
michael@0:         }
michael@0:     #else
michael@0:         #define SkCLZ(x)    SkCLZ_portable(x)
michael@0:     #endif
michael@0: #endif
michael@0: 
michael@0: /**
michael@0:  *  Returns (value < 0 ? 0 : value) efficiently (i.e. no compares or branches)
michael@0:  */
michael@0: static inline int SkClampPos(int value) {
michael@0:     return value & ~(value >> 31);
michael@0: }
michael@0: 
michael@0: /** Given an integer and a positive (max) integer, return the value
michael@0:  *  pinned against 0 and max, inclusive.
michael@0:  *  @param value    The value we want returned pinned between [0...max]
michael@0:  *  @param max      The positive max value
michael@0:  *  @return 0 if value < 0, max if value > max, else value
michael@0:  */
michael@0: static inline int SkClampMax(int value, int max) {
michael@0:     // ensure that max is positive
michael@0:     SkASSERT(max >= 0);
michael@0:     if (value < 0) {
michael@0:         value = 0;
michael@0:     }
michael@0:     if (value > max) {
michael@0:         value = max;
michael@0:     }
michael@0:     return value;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  *  Returns the smallest power-of-2 that is >= the specified value. If value
michael@0:  *  is already a power of 2, then it is returned unchanged. It is undefined
michael@0:  *  if value is <= 0.
michael@0:  */
michael@0: static inline int SkNextPow2(int value) {
michael@0:     SkASSERT(value > 0);
michael@0:     return 1 << (32 - SkCLZ(value - 1));
michael@0: }
michael@0: 
michael@0: /**
michael@0:  *  Returns the log2 of the specified value, were that value to be rounded up
michael@0:  *  to the next power of 2. It is undefined to pass 0. Examples:
michael@0:  *  SkNextLog2(1) -> 0
michael@0:  *  SkNextLog2(2) -> 1
michael@0:  *  SkNextLog2(3) -> 2
michael@0:  *  SkNextLog2(4) -> 2
michael@0:  *  SkNextLog2(5) -> 3
michael@0:  */
michael@0: static inline int SkNextLog2(uint32_t value) {
michael@0:     SkASSERT(value != 0);
michael@0:     return 32 - SkCLZ(value - 1);
michael@0: }
michael@0: 
michael@0: /**
michael@0:  *  Returns true if value is a power of 2. Does not explicitly check for
michael@0:  *  value <= 0.
michael@0:  */
michael@0: static inline bool SkIsPow2(int value) {
michael@0:     return (value & (value - 1)) == 0;
michael@0: }
michael@0: 
michael@0: ///////////////////////////////////////////////////////////////////////////////
michael@0: 
michael@0: /**
michael@0:  *  SkMulS16(a, b) multiplies a * b, but requires that a and b are both int16_t.
michael@0:  *  With this requirement, we can generate faster instructions on some
michael@0:  *  architectures.
michael@0:  */
michael@0: #ifdef SK_ARM_HAS_EDSP
michael@0:     static inline int32_t SkMulS16(S16CPU x, S16CPU y) {
michael@0:         SkASSERT((int16_t)x == x);
michael@0:         SkASSERT((int16_t)y == y);
michael@0:         int32_t product;
michael@0:         asm("smulbb %0, %1, %2 \n"
michael@0:             : "=r"(product)
michael@0:             : "r"(x), "r"(y)
michael@0:             );
michael@0:         return product;
michael@0:     }
michael@0: #else
michael@0:     #ifdef SK_DEBUG
michael@0:         static inline int32_t SkMulS16(S16CPU x, S16CPU y) {
michael@0:             SkASSERT((int16_t)x == x);
michael@0:             SkASSERT((int16_t)y == y);
michael@0:             return x * y;
michael@0:         }
michael@0:     #else
michael@0:         #define SkMulS16(x, y)  ((x) * (y))
michael@0:     #endif
michael@0: #endif
michael@0: 
michael@0: /**
michael@0:  *  Return a*b/((1 << shift) - 1), rounding any fractional bits.
michael@0:  *  Only valid if a and b are unsigned and <= 32767 and shift is > 0 and <= 8
michael@0:  */
michael@0: static inline unsigned SkMul16ShiftRound(U16CPU a, U16CPU b, int shift) {
michael@0:     SkASSERT(a <= 32767);
michael@0:     SkASSERT(b <= 32767);
michael@0:     SkASSERT(shift > 0 && shift <= 8);
michael@0:     unsigned prod = SkMulS16(a, b) + (1 << (shift - 1));
michael@0:     return (prod + (prod >> shift)) >> shift;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  *  Return a*b/255, rounding any fractional bits.
michael@0:  *  Only valid if a and b are unsigned and <= 32767.
michael@0:  */
michael@0: static inline U8CPU SkMulDiv255Round(U16CPU a, U16CPU b) {
michael@0:     SkASSERT(a <= 32767);
michael@0:     SkASSERT(b <= 32767);
michael@0:     unsigned prod = SkMulS16(a, b) + 128;
michael@0:     return (prod + (prod >> 8)) >> 8;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * Stores numer/denom and numer%denom into div and mod respectively.
michael@0:  */
michael@0: template <typename In, typename Out>
michael@0: inline void SkTDivMod(In numer, In denom, Out* div, Out* mod) {
michael@0: #ifdef SK_CPU_ARM
michael@0:     // If we wrote this as in the else branch, GCC won't fuse the two into one
michael@0:     // divmod call, but rather a div call followed by a divmod.  Silly!  This
michael@0:     // version is just as fast as calling __aeabi_[u]idivmod manually, but with
michael@0:     // prettier code.
michael@0:     //
michael@0:     // This benches as around 2x faster than the code in the else branch.
michael@0:     const In d = numer/denom;
michael@0:     *div = static_cast<Out>(d);
michael@0:     *mod = static_cast<Out>(numer-d*denom);
michael@0: #else
michael@0:     // On x86 this will just be a single idiv.
michael@0:     *div = static_cast<Out>(numer/denom);
michael@0:     *mod = static_cast<Out>(numer%denom);
michael@0: #endif  // SK_CPU_ARM
michael@0: }
michael@0: 
michael@0: #endif