content/media/webaudio/blink/HRTFPanner.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/content/media/webaudio/blink/HRTFPanner.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,318 @@
     1.4 +/*
     1.5 + * Copyright (C) 2010, Google Inc. All rights reserved.
     1.6 + *
     1.7 + * Redistribution and use in source and binary forms, with or without
     1.8 + * modification, are permitted provided that the following conditions
     1.9 + * are met:
    1.10 + * 1.  Redistributions of source code must retain the above copyright
    1.11 + *    notice, this list of conditions and the following disclaimer.
    1.12 + * 2.  Redistributions in binary form must reproduce the above copyright
    1.13 + *    notice, this list of conditions and the following disclaimer in the
    1.14 + *    documentation and/or other materials provided with the distribution.
    1.15 + *
    1.16 + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
    1.17 + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
    1.18 + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
    1.19 + * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
    1.20 + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
    1.21 + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
    1.22 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
    1.23 + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    1.24 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    1.25 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    1.26 + */
    1.27 +
    1.28 +#include "HRTFPanner.h"
    1.29 +#include "HRTFDatabaseLoader.h"
    1.30 +
    1.31 +#include "FFTConvolver.h"
    1.32 +#include "HRTFDatabase.h"
    1.33 +
    1.34 +using namespace std;
    1.35 +using namespace mozilla;
    1.36 +using dom::ChannelInterpretation;
    1.37 +
    1.38 +namespace WebCore {
    1.39 +
    1.40 +// The value of 2 milliseconds is larger than the largest delay which exists in any HRTFKernel from the default HRTFDatabase (0.0136 seconds).
    1.41 +// We ASSERT the delay values used in process() with this value.
    1.42 +const double MaxDelayTimeSeconds = 0.002;
    1.43 +
    1.44 +const int UninitializedAzimuth = -1;
    1.45 +const unsigned RenderingQuantum = WEBAUDIO_BLOCK_SIZE;
    1.46 +
    1.47 +HRTFPanner::HRTFPanner(float sampleRate, mozilla::TemporaryRef<HRTFDatabaseLoader> databaseLoader)
    1.48 +    : m_databaseLoader(databaseLoader)
    1.49 +    , m_sampleRate(sampleRate)
    1.50 +    , m_crossfadeSelection(CrossfadeSelection1)
    1.51 +    , m_azimuthIndex1(UninitializedAzimuth)
    1.52 +    , m_azimuthIndex2(UninitializedAzimuth)
    1.53 +    // m_elevation1 and m_elevation2 are initialized in pan()
    1.54 +    , m_crossfadeX(0)
    1.55 +    , m_crossfadeIncr(0)
    1.56 +    , m_convolverL1(HRTFElevation::fftSizeForSampleRate(sampleRate))
    1.57 +    , m_convolverR1(m_convolverL1.fftSize())
    1.58 +    , m_convolverL2(m_convolverL1.fftSize())
    1.59 +    , m_convolverR2(m_convolverL1.fftSize())
    1.60 +    , m_delayLine(MaxDelayTimeSeconds * sampleRate, 1.0)
    1.61 +{
    1.62 +    MOZ_ASSERT(m_databaseLoader);
    1.63 +    MOZ_COUNT_CTOR(HRTFPanner);
    1.64 +
    1.65 +    m_tempL1.SetLength(RenderingQuantum);
    1.66 +    m_tempR1.SetLength(RenderingQuantum);
    1.67 +    m_tempL2.SetLength(RenderingQuantum);
    1.68 +    m_tempR2.SetLength(RenderingQuantum);
    1.69 +}
    1.70 +
    1.71 +HRTFPanner::~HRTFPanner()
    1.72 +{
    1.73 +    MOZ_COUNT_DTOR(HRTFPanner);
    1.74 +}
    1.75 +
    1.76 +size_t HRTFPanner::sizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
    1.77 +{
    1.78 +    size_t amount = aMallocSizeOf(this);
    1.79 +
    1.80 +    if (m_databaseLoader) {
    1.81 +        m_databaseLoader->sizeOfIncludingThis(aMallocSizeOf);
    1.82 +    }
    1.83 +
    1.84 +    amount += m_convolverL1.sizeOfExcludingThis(aMallocSizeOf);
    1.85 +    amount += m_convolverR1.sizeOfExcludingThis(aMallocSizeOf);
    1.86 +    amount += m_convolverL2.sizeOfExcludingThis(aMallocSizeOf);
    1.87 +    amount += m_convolverR2.sizeOfExcludingThis(aMallocSizeOf);
    1.88 +    amount += m_delayLine.SizeOfExcludingThis(aMallocSizeOf);
    1.89 +    amount += m_tempL1.SizeOfExcludingThis(aMallocSizeOf);
    1.90 +    amount += m_tempL2.SizeOfExcludingThis(aMallocSizeOf);
    1.91 +    amount += m_tempR1.SizeOfExcludingThis(aMallocSizeOf);
    1.92 +    amount += m_tempR2.SizeOfExcludingThis(aMallocSizeOf);
    1.93 +
    1.94 +    return amount;
    1.95 +}
    1.96 +
    1.97 +void HRTFPanner::reset()
    1.98 +{
    1.99 +    m_azimuthIndex1 = UninitializedAzimuth;
   1.100 +    m_azimuthIndex2 = UninitializedAzimuth;
   1.101 +    // m_elevation1 and m_elevation2 are initialized in pan()
   1.102 +    m_crossfadeSelection = CrossfadeSelection1;
   1.103 +    m_crossfadeX = 0.0f;
   1.104 +    m_crossfadeIncr = 0.0f;
   1.105 +    m_convolverL1.reset();
   1.106 +    m_convolverR1.reset();
   1.107 +    m_convolverL2.reset();
   1.108 +    m_convolverR2.reset();
   1.109 +    m_delayLine.Reset();
   1.110 +}
   1.111 +
   1.112 +int HRTFPanner::calculateDesiredAzimuthIndexAndBlend(double azimuth, double& azimuthBlend)
   1.113 +{
   1.114 +    // Convert the azimuth angle from the range -180 -> +180 into the range 0 -> 360.
   1.115 +    // The azimuth index may then be calculated from this positive value.
   1.116 +    if (azimuth < 0)
   1.117 +        azimuth += 360.0;
   1.118 +
   1.119 +    HRTFDatabase* database = m_databaseLoader->database();
   1.120 +    MOZ_ASSERT(database);
   1.121 +
   1.122 +    int numberOfAzimuths = database->numberOfAzimuths();
   1.123 +    const double angleBetweenAzimuths = 360.0 / numberOfAzimuths;
   1.124 +
   1.125 +    // Calculate the azimuth index and the blend (0 -> 1) for interpolation.
   1.126 +    double desiredAzimuthIndexFloat = azimuth / angleBetweenAzimuths;
   1.127 +    int desiredAzimuthIndex = static_cast<int>(desiredAzimuthIndexFloat);
   1.128 +    azimuthBlend = desiredAzimuthIndexFloat - static_cast<double>(desiredAzimuthIndex);
   1.129 +
   1.130 +    // We don't immediately start using this azimuth index, but instead approach this index from the last index we rendered at.
   1.131 +    // This minimizes the clicks and graininess for moving sources which occur otherwise.
   1.132 +    desiredAzimuthIndex = max(0, desiredAzimuthIndex);
   1.133 +    desiredAzimuthIndex = min(numberOfAzimuths - 1, desiredAzimuthIndex);
   1.134 +    return desiredAzimuthIndex;
   1.135 +}
   1.136 +
   1.137 +void HRTFPanner::pan(double desiredAzimuth, double elevation, const AudioChunk* inputBus, AudioChunk* outputBus)
   1.138 +{
   1.139 +#ifdef DEBUG
   1.140 +    unsigned numInputChannels =
   1.141 +        inputBus->IsNull() ? 0 : inputBus->mChannelData.Length();
   1.142 +
   1.143 +    MOZ_ASSERT(numInputChannels <= 2);
   1.144 +    MOZ_ASSERT(inputBus->mDuration == WEBAUDIO_BLOCK_SIZE);
   1.145 +#endif
   1.146 +
   1.147 +    bool isOutputGood = outputBus && outputBus->mChannelData.Length() == 2 && outputBus->mDuration == WEBAUDIO_BLOCK_SIZE;
   1.148 +    MOZ_ASSERT(isOutputGood);
   1.149 +
   1.150 +    if (!isOutputGood) {
   1.151 +        if (outputBus)
   1.152 +            outputBus->SetNull(outputBus->mDuration);
   1.153 +        return;
   1.154 +    }
   1.155 +
   1.156 +    HRTFDatabase* database = m_databaseLoader->database();
   1.157 +    if (!database) { // not yet loaded
   1.158 +        outputBus->SetNull(outputBus->mDuration);
   1.159 +        return;
   1.160 +    }
   1.161 +
   1.162 +    // IRCAM HRTF azimuths values from the loaded database is reversed from the panner's notion of azimuth.
   1.163 +    double azimuth = -desiredAzimuth;
   1.164 +
   1.165 +    bool isAzimuthGood = azimuth >= -180.0 && azimuth <= 180.0;
   1.166 +    MOZ_ASSERT(isAzimuthGood);
   1.167 +    if (!isAzimuthGood) {
   1.168 +        outputBus->SetNull(outputBus->mDuration);
   1.169 +        return;
   1.170 +    }
   1.171 +
   1.172 +    // Normally, we'll just be dealing with mono sources.
   1.173 +    // If we have a stereo input, implement stereo panning with left source processed by left HRTF, and right source by right HRTF.
   1.174 +
   1.175 +    // Get destination pointers.
   1.176 +    float* destinationL =
   1.177 +        static_cast<float*>(const_cast<void*>(outputBus->mChannelData[0]));
   1.178 +    float* destinationR =
   1.179 +        static_cast<float*>(const_cast<void*>(outputBus->mChannelData[1]));
   1.180 +
   1.181 +    double azimuthBlend;
   1.182 +    int desiredAzimuthIndex = calculateDesiredAzimuthIndexAndBlend(azimuth, azimuthBlend);
   1.183 +
   1.184 +    // Initially snap azimuth and elevation values to first values encountered.
   1.185 +    if (m_azimuthIndex1 == UninitializedAzimuth) {
   1.186 +        m_azimuthIndex1 = desiredAzimuthIndex;
   1.187 +        m_elevation1 = elevation;
   1.188 +    }
   1.189 +    if (m_azimuthIndex2 == UninitializedAzimuth) {
   1.190 +        m_azimuthIndex2 = desiredAzimuthIndex;
   1.191 +        m_elevation2 = elevation;
   1.192 +    }
   1.193 +
   1.194 +    // Cross-fade / transition over a period of around 45 milliseconds.
   1.195 +    // This is an empirical value tuned to be a reasonable trade-off between
   1.196 +    // smoothness and speed.
   1.197 +    const double fadeFrames = sampleRate() <= 48000 ? 2048 : 4096;
   1.198 +
   1.199 +    // Check for azimuth and elevation changes, initiating a cross-fade if needed.
   1.200 +    if (!m_crossfadeX && m_crossfadeSelection == CrossfadeSelection1) {
   1.201 +        if (desiredAzimuthIndex != m_azimuthIndex1 || elevation != m_elevation1) {
   1.202 +            // Cross-fade from 1 -> 2
   1.203 +            m_crossfadeIncr = 1 / fadeFrames;
   1.204 +            m_azimuthIndex2 = desiredAzimuthIndex;
   1.205 +            m_elevation2 = elevation;
   1.206 +        }
   1.207 +    }
   1.208 +    if (m_crossfadeX == 1 && m_crossfadeSelection == CrossfadeSelection2) {
   1.209 +        if (desiredAzimuthIndex != m_azimuthIndex2 || elevation != m_elevation2) {
   1.210 +            // Cross-fade from 2 -> 1
   1.211 +            m_crossfadeIncr = -1 / fadeFrames;
   1.212 +            m_azimuthIndex1 = desiredAzimuthIndex;
   1.213 +            m_elevation1 = elevation;
   1.214 +        }
   1.215 +    }
   1.216 +
   1.217 +    // Get the HRTFKernels and interpolated delays.
   1.218 +    HRTFKernel* kernelL1;
   1.219 +    HRTFKernel* kernelR1;
   1.220 +    HRTFKernel* kernelL2;
   1.221 +    HRTFKernel* kernelR2;
   1.222 +    double frameDelayL1;
   1.223 +    double frameDelayR1;
   1.224 +    double frameDelayL2;
   1.225 +    double frameDelayR2;
   1.226 +    database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex1, m_elevation1, kernelL1, kernelR1, frameDelayL1, frameDelayR1);
   1.227 +    database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex2, m_elevation2, kernelL2, kernelR2, frameDelayL2, frameDelayR2);
   1.228 +
   1.229 +    bool areKernelsGood = kernelL1 && kernelR1 && kernelL2 && kernelR2;
   1.230 +    MOZ_ASSERT(areKernelsGood);
   1.231 +    if (!areKernelsGood) {
   1.232 +        outputBus->SetNull(outputBus->mDuration);
   1.233 +        return;
   1.234 +    }
   1.235 +
   1.236 +    MOZ_ASSERT(frameDelayL1 / sampleRate() < MaxDelayTimeSeconds && frameDelayR1 / sampleRate() < MaxDelayTimeSeconds);
   1.237 +    MOZ_ASSERT(frameDelayL2 / sampleRate() < MaxDelayTimeSeconds && frameDelayR2 / sampleRate() < MaxDelayTimeSeconds);
   1.238 +
   1.239 +    // Crossfade inter-aural delays based on transitions.
   1.240 +    double frameDelaysL[WEBAUDIO_BLOCK_SIZE];
   1.241 +    double frameDelaysR[WEBAUDIO_BLOCK_SIZE];
   1.242 +    {
   1.243 +      float x = m_crossfadeX;
   1.244 +      float incr = m_crossfadeIncr;
   1.245 +      for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) {
   1.246 +        frameDelaysL[i] = (1 - x) * frameDelayL1 + x * frameDelayL2;
   1.247 +        frameDelaysR[i] = (1 - x) * frameDelayR1 + x * frameDelayR2;
   1.248 +        x += incr;
   1.249 +      }
   1.250 +    }
   1.251 +
   1.252 +    // First run through delay lines for inter-aural time difference.
   1.253 +    m_delayLine.Write(*inputBus);
   1.254 +    // "Speakers" means a mono input is read into both outputs (with possibly
   1.255 +    // different delays).
   1.256 +    m_delayLine.ReadChannel(frameDelaysL, outputBus, 0,
   1.257 +                            ChannelInterpretation::Speakers);
   1.258 +    m_delayLine.ReadChannel(frameDelaysR, outputBus, 1,
   1.259 +                            ChannelInterpretation::Speakers);
   1.260 +    m_delayLine.NextBlock();
   1.261 +
   1.262 +    bool needsCrossfading = m_crossfadeIncr;
   1.263 +
   1.264 +    // Have the convolvers render directly to the final destination if we're not cross-fading.
   1.265 +    float* convolutionDestinationL1 = needsCrossfading ? m_tempL1.Elements() : destinationL;
   1.266 +    float* convolutionDestinationR1 = needsCrossfading ? m_tempR1.Elements() : destinationR;
   1.267 +    float* convolutionDestinationL2 = needsCrossfading ? m_tempL2.Elements() : destinationL;
   1.268 +    float* convolutionDestinationR2 = needsCrossfading ? m_tempR2.Elements() : destinationR;
   1.269 +
   1.270 +    // Now do the convolutions.
   1.271 +    // Note that we avoid doing convolutions on both sets of convolvers if we're not currently cross-fading.
   1.272 +
   1.273 +    if (m_crossfadeSelection == CrossfadeSelection1 || needsCrossfading) {
   1.274 +        m_convolverL1.process(kernelL1->fftFrame(), destinationL, convolutionDestinationL1, WEBAUDIO_BLOCK_SIZE);
   1.275 +        m_convolverR1.process(kernelR1->fftFrame(), destinationR, convolutionDestinationR1, WEBAUDIO_BLOCK_SIZE);
   1.276 +    }
   1.277 +
   1.278 +    if (m_crossfadeSelection == CrossfadeSelection2 || needsCrossfading) {
   1.279 +        m_convolverL2.process(kernelL2->fftFrame(), destinationL, convolutionDestinationL2, WEBAUDIO_BLOCK_SIZE);
   1.280 +        m_convolverR2.process(kernelR2->fftFrame(), destinationR, convolutionDestinationR2, WEBAUDIO_BLOCK_SIZE);
   1.281 +    }
   1.282 +
   1.283 +    if (needsCrossfading) {
   1.284 +        // Apply linear cross-fade.
   1.285 +        float x = m_crossfadeX;
   1.286 +        float incr = m_crossfadeIncr;
   1.287 +        for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) {
   1.288 +            destinationL[i] = (1 - x) * convolutionDestinationL1[i] + x * convolutionDestinationL2[i];
   1.289 +            destinationR[i] = (1 - x) * convolutionDestinationR1[i] + x * convolutionDestinationR2[i];
   1.290 +            x += incr;
   1.291 +        }
   1.292 +        // Update cross-fade value from local.
   1.293 +        m_crossfadeX = x;
   1.294 +
   1.295 +        if (m_crossfadeIncr > 0 && fabs(m_crossfadeX - 1) < m_crossfadeIncr) {
   1.296 +            // We've fully made the crossfade transition from 1 -> 2.
   1.297 +            m_crossfadeSelection = CrossfadeSelection2;
   1.298 +            m_crossfadeX = 1;
   1.299 +            m_crossfadeIncr = 0;
   1.300 +        } else if (m_crossfadeIncr < 0 && fabs(m_crossfadeX) < -m_crossfadeIncr) {
   1.301 +            // We've fully made the crossfade transition from 2 -> 1.
   1.302 +            m_crossfadeSelection = CrossfadeSelection1;
   1.303 +            m_crossfadeX = 0;
   1.304 +            m_crossfadeIncr = 0;
   1.305 +        }
   1.306 +    }
   1.307 +}
   1.308 +
   1.309 +int HRTFPanner::maxTailFrames() const
   1.310 +{
   1.311 +    // Although the ideal tail time would be the length of the impulse
   1.312 +    // response, there is additional tail time from the approximations in the
   1.313 +    // implementation.  Because HRTFPanner is implemented with a DelayKernel
   1.314 +    // and a FFTConvolver, the tailTime of the HRTFPanner is the sum of the
   1.315 +    // tailTime of the DelayKernel and the tailTime of the FFTConvolver.
   1.316 +    // The FFTConvolver has a tail time of fftSize(), including latency of
   1.317 +    // fftSize()/2.
   1.318 +    return m_delayLine.MaxDelayTicks() + fftSize();
   1.319 +}
   1.320 +
   1.321 +} // namespace WebCore

mercurial