michael@0: ////////////////////////////////////////////////////////////////////////////////
michael@0: ///
michael@0: /// General FIR digital filter routines with MMX optimization. 
michael@0: ///
michael@0: /// Note : MMX optimized functions reside in a separate, platform-specific file, 
michael@0: /// e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
michael@0: ///
michael@0: /// Author        : Copyright (c) Olli Parviainen
michael@0: /// Author e-mail : oparviai 'at' iki.fi
michael@0: /// SoundTouch WWW: http://www.surina.net/soundtouch
michael@0: ///
michael@0: ////////////////////////////////////////////////////////////////////////////////
michael@0: //
michael@0: // Last changed  : $Date: 2013-06-12 10:24:44 -0500 (Wed, 12 Jun 2013) $
michael@0: // File revision : $Revision: 4 $
michael@0: //
michael@0: // $Id: FIRFilter.cpp 171 2013-06-12 15:24:44Z oparviai $
michael@0: //
michael@0: ////////////////////////////////////////////////////////////////////////////////
michael@0: //
michael@0: // License :
michael@0: //
michael@0: //  SoundTouch audio processing library
michael@0: //  Copyright (c) Olli Parviainen
michael@0: //
michael@0: //  This library is free software; you can redistribute it and/or
michael@0: //  modify it under the terms of the GNU Lesser General Public
michael@0: //  License as published by the Free Software Foundation; either
michael@0: //  version 2.1 of the License, or (at your option) any later version.
michael@0: //
michael@0: //  This library is distributed in the hope that it will be useful,
michael@0: //  but WITHOUT ANY WARRANTY; without even the implied warranty of
michael@0: //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
michael@0: //  Lesser General Public License for more details.
michael@0: //
michael@0: //  You should have received a copy of the GNU Lesser General Public
michael@0: //  License along with this library; if not, write to the Free Software
michael@0: //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
michael@0: //
michael@0: ////////////////////////////////////////////////////////////////////////////////
michael@0: 
michael@0: #include <memory.h>
michael@0: #include <assert.h>
michael@0: #include <math.h>
michael@0: #include <stdlib.h>
michael@0: #include "FIRFilter.h"
michael@0: #include "cpu_detect.h"
michael@0: 
michael@0: #ifdef _MSC_VER
michael@0: #include <malloc.h>
michael@0: #define alloca _alloca
michael@0: #endif
michael@0: 
michael@0: using namespace soundtouch;
michael@0: 
michael@0: /*****************************************************************************
michael@0:  *
michael@0:  * Implementation of the class 'FIRFilter'
michael@0:  *
michael@0:  *****************************************************************************/
michael@0: 
michael@0: FIRFilter::FIRFilter()
michael@0: {
michael@0:     resultDivFactor = 0;
michael@0:     resultDivider = 0;
michael@0:     length = 0;
michael@0:     lengthDiv8 = 0;
michael@0:     filterCoeffs = NULL;
michael@0: }
michael@0: 
michael@0: 
michael@0: FIRFilter::~FIRFilter()
michael@0: {
michael@0:     delete[] filterCoeffs;
michael@0: }
michael@0: 
michael@0: // Usual C-version of the filter routine for stereo sound
michael@0: uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const
michael@0: {
michael@0:     uint i, j, end;
michael@0:     LONG_SAMPLETYPE suml, sumr;
michael@0: #ifdef SOUNDTOUCH_FLOAT_SAMPLES
michael@0:     // when using floating point samples, use a scaler instead of a divider
michael@0:     // because division is much slower operation than multiplying.
michael@0:     double dScaler = 1.0 / (double)resultDivider;
michael@0: #endif
michael@0: 
michael@0:     assert(length != 0);
michael@0:     assert(src != NULL);
michael@0:     assert(dest != NULL);
michael@0:     assert(filterCoeffs != NULL);
michael@0: 
michael@0:     end = 2 * (numSamples - length);
michael@0: 
michael@0:     for (j = 0; j < end; j += 2) 
michael@0:     {
michael@0:         const SAMPLETYPE *ptr;
michael@0: 
michael@0:         suml = sumr = 0;
michael@0:         ptr = src + j;
michael@0: 
michael@0:         for (i = 0; i < length; i += 4) 
michael@0:         {
michael@0:             // loop is unrolled by factor of 4 here for efficiency
michael@0:             suml += ptr[2 * i + 0] * filterCoeffs[i + 0] +
michael@0:                     ptr[2 * i + 2] * filterCoeffs[i + 1] +
michael@0:                     ptr[2 * i + 4] * filterCoeffs[i + 2] +
michael@0:                     ptr[2 * i + 6] * filterCoeffs[i + 3];
michael@0:             sumr += ptr[2 * i + 1] * filterCoeffs[i + 0] +
michael@0:                     ptr[2 * i + 3] * filterCoeffs[i + 1] +
michael@0:                     ptr[2 * i + 5] * filterCoeffs[i + 2] +
michael@0:                     ptr[2 * i + 7] * filterCoeffs[i + 3];
michael@0:         }
michael@0: 
michael@0: #ifdef SOUNDTOUCH_INTEGER_SAMPLES
michael@0:         suml >>= resultDivFactor;
michael@0:         sumr >>= resultDivFactor;
michael@0:         // saturate to 16 bit integer limits
michael@0:         suml = (suml < -32768) ? -32768 : (suml > 32767) ? 32767 : suml;
michael@0:         // saturate to 16 bit integer limits
michael@0:         sumr = (sumr < -32768) ? -32768 : (sumr > 32767) ? 32767 : sumr;
michael@0: #else
michael@0:         suml *= dScaler;
michael@0:         sumr *= dScaler;
michael@0: #endif // SOUNDTOUCH_INTEGER_SAMPLES
michael@0:         dest[j] = (SAMPLETYPE)suml;
michael@0:         dest[j + 1] = (SAMPLETYPE)sumr;
michael@0:     }
michael@0:     return numSamples - length;
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: 
michael@0: // Usual C-version of the filter routine for mono sound
michael@0: uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const
michael@0: {
michael@0:     uint i, j, end;
michael@0:     LONG_SAMPLETYPE sum;
michael@0: #ifdef SOUNDTOUCH_FLOAT_SAMPLES
michael@0:     // when using floating point samples, use a scaler instead of a divider
michael@0:     // because division is much slower operation than multiplying.
michael@0:     double dScaler = 1.0 / (double)resultDivider;
michael@0: #endif
michael@0: 
michael@0: 
michael@0:     assert(length != 0);
michael@0: 
michael@0:     end = numSamples - length;
michael@0:     for (j = 0; j < end; j ++) 
michael@0:     {
michael@0:         sum = 0;
michael@0:         for (i = 0; i < length; i += 4) 
michael@0:         {
michael@0:             // loop is unrolled by factor of 4 here for efficiency
michael@0:             sum += src[i + 0] * filterCoeffs[i + 0] + 
michael@0:                    src[i + 1] * filterCoeffs[i + 1] + 
michael@0:                    src[i + 2] * filterCoeffs[i + 2] + 
michael@0:                    src[i + 3] * filterCoeffs[i + 3];
michael@0:         }
michael@0: #ifdef SOUNDTOUCH_INTEGER_SAMPLES
michael@0:         sum >>= resultDivFactor;
michael@0:         // saturate to 16 bit integer limits
michael@0:         sum = (sum < -32768) ? -32768 : (sum > 32767) ? 32767 : sum;
michael@0: #else
michael@0:         sum *= dScaler;
michael@0: #endif // SOUNDTOUCH_INTEGER_SAMPLES
michael@0:         dest[j] = (SAMPLETYPE)sum;
michael@0:         src ++;
michael@0:     }
michael@0:     return end;
michael@0: }
michael@0: 
michael@0: 
michael@0: uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels) const
michael@0: {
michael@0:     uint i, j, end, c;
michael@0:     LONG_SAMPLETYPE *sum=(LONG_SAMPLETYPE*)alloca(numChannels*sizeof(*sum));
michael@0: #ifdef SOUNDTOUCH_FLOAT_SAMPLES
michael@0:     // when using floating point samples, use a scaler instead of a divider
michael@0:     // because division is much slower operation than multiplying.
michael@0:     double dScaler = 1.0 / (double)resultDivider;
michael@0: #endif
michael@0: 
michael@0:     assert(length != 0);
michael@0:     assert(src != NULL);
michael@0:     assert(dest != NULL);
michael@0:     assert(filterCoeffs != NULL);
michael@0: 
michael@0:     end = numChannels * (numSamples - length);
michael@0: 
michael@0:     for (c = 0; c < numChannels; c ++)
michael@0:     {
michael@0:         sum[c] = 0;
michael@0:     }
michael@0: 
michael@0:     for (j = 0; j < end; j += numChannels)
michael@0:     {
michael@0:         const SAMPLETYPE *ptr;
michael@0: 
michael@0:         ptr = src + j;
michael@0: 
michael@0:         for (i = 0; i < length; i ++)
michael@0:         {
michael@0:             SAMPLETYPE coef=filterCoeffs[i];
michael@0:             for (c = 0; c < numChannels; c ++)
michael@0:             {
michael@0:                 sum[c] += ptr[0] * coef;
michael@0:                 ptr ++;
michael@0:             }
michael@0:         }
michael@0:         
michael@0:         for (c = 0; c < numChannels; c ++)
michael@0:         {
michael@0: #ifdef SOUNDTOUCH_INTEGER_SAMPLES
michael@0:             sum[c] >>= resultDivFactor;
michael@0: #else
michael@0:             sum[c] *= dScaler;
michael@0: #endif // SOUNDTOUCH_INTEGER_SAMPLES
michael@0:             *dest = (SAMPLETYPE)sum[c];
michael@0:             dest++;
michael@0:             sum[c] = 0;
michael@0:         }
michael@0:     }
michael@0:     return numSamples - length;
michael@0: }
michael@0: 
michael@0: 
michael@0: // Set filter coeffiecients and length.
michael@0: //
michael@0: // Throws an exception if filter length isn't divisible by 8
michael@0: void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint uResultDivFactor)
michael@0: {
michael@0:     assert(newLength > 0);
michael@0:     if (newLength % 8) ST_THROW_RT_ERROR("FIR filter length not divisible by 8");
michael@0: 
michael@0:     lengthDiv8 = newLength / 8;
michael@0:     length = lengthDiv8 * 8;
michael@0:     assert(length == newLength);
michael@0: 
michael@0:     resultDivFactor = uResultDivFactor;
michael@0:     resultDivider = (SAMPLETYPE)::pow(2.0, (int)resultDivFactor);
michael@0: 
michael@0:     delete[] filterCoeffs;
michael@0:     filterCoeffs = new SAMPLETYPE[length];
michael@0:     memcpy(filterCoeffs, coeffs, length * sizeof(SAMPLETYPE));
michael@0: }
michael@0: 
michael@0: 
michael@0: uint FIRFilter::getLength() const
michael@0: {
michael@0:     return length;
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: // Applies the filter to the given sequence of samples. 
michael@0: //
michael@0: // Note : The amount of outputted samples is by value of 'filter_length' 
michael@0: // smaller than the amount of input samples.
michael@0: uint FIRFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels) const
michael@0: {
michael@0:     assert(length > 0);
michael@0:     assert(lengthDiv8 * 8 == length);
michael@0: 
michael@0:     if (numSamples < length) return 0;
michael@0: 
michael@0: #ifndef USE_MULTICH_ALWAYS
michael@0:     if (numChannels == 1)
michael@0:     {
michael@0:         return evaluateFilterMono(dest, src, numSamples);
michael@0:     } 
michael@0:     else if (numChannels == 2)
michael@0:     {
michael@0:         return evaluateFilterStereo(dest, src, numSamples);
michael@0:     }
michael@0:     else
michael@0: #endif // USE_MULTICH_ALWAYS
michael@0:     {
michael@0:         assert(numChannels > 0);
michael@0:         return evaluateFilterMulti(dest, src, numSamples, numChannels);
michael@0:     }
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: // Operator 'new' is overloaded so that it automatically creates a suitable instance 
michael@0: // depending on if we've a MMX-capable CPU available or not.
michael@0: void * FIRFilter::operator new(size_t s)
michael@0: {
michael@0:     // Notice! don't use "new FIRFilter" directly, use "newInstance" to create a new instance instead!
michael@0:     ST_THROW_RT_ERROR("Error in FIRFilter::new: Don't use 'new FIRFilter', use 'newInstance' member instead!");
michael@0:     return newInstance();
michael@0: }
michael@0: 
michael@0: 
michael@0: FIRFilter * FIRFilter::newInstance()
michael@0: {
michael@0: #if defined(SOUNDTOUCH_ALLOW_MMX) || defined(SOUNDTOUCH_ALLOW_SSE)
michael@0:     uint uExtensions;
michael@0: 
michael@0:     uExtensions = detectCPUextensions();
michael@0: #endif
michael@0: 
michael@0:     // Check if MMX/SSE instruction set extensions supported by CPU
michael@0: 
michael@0: #ifdef SOUNDTOUCH_ALLOW_MMX
michael@0:     // MMX routines available only with integer sample types
michael@0:     if (uExtensions & SUPPORT_MMX)
michael@0:     {
michael@0:         return ::new FIRFilterMMX;
michael@0:     }
michael@0:     else
michael@0: #endif // SOUNDTOUCH_ALLOW_MMX
michael@0: 
michael@0: #ifdef SOUNDTOUCH_ALLOW_SSE
michael@0:     if (uExtensions & SUPPORT_SSE)
michael@0:     {
michael@0:         // SSE support
michael@0:         return ::new FIRFilterSSE;
michael@0:     }
michael@0:     else
michael@0: #endif // SOUNDTOUCH_ALLOW_SSE
michael@0: 
michael@0:     {
michael@0:         // ISA optimizations not supported, use plain C version
michael@0:         return ::new FIRFilter;
michael@0:     }
michael@0: }