1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/content/media/webaudio/blink/HRTFPanner.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,318 @@ 1.4 +/* 1.5 + * Copyright (C) 2010, Google Inc. All rights reserved. 1.6 + * 1.7 + * Redistribution and use in source and binary forms, with or without 1.8 + * modification, are permitted provided that the following conditions 1.9 + * are met: 1.10 + * 1. Redistributions of source code must retain the above copyright 1.11 + * notice, this list of conditions and the following disclaimer. 1.12 + * 2. Redistributions in binary form must reproduce the above copyright 1.13 + * notice, this list of conditions and the following disclaimer in the 1.14 + * documentation and/or other materials provided with the distribution. 1.15 + * 1.16 + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY 1.17 + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 1.18 + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 1.19 + * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY 1.20 + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 1.21 + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 1.22 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 1.23 + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 1.24 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 1.25 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.26 + */ 1.27 + 1.28 +#include "HRTFPanner.h" 1.29 +#include "HRTFDatabaseLoader.h" 1.30 + 1.31 +#include "FFTConvolver.h" 1.32 +#include "HRTFDatabase.h" 1.33 + 1.34 +using namespace std; 1.35 +using namespace mozilla; 1.36 +using dom::ChannelInterpretation; 1.37 + 1.38 +namespace WebCore { 1.39 + 1.40 +// The value of 2 milliseconds is larger than the largest delay which exists in any HRTFKernel from the default HRTFDatabase (0.0136 seconds). 1.41 +// We ASSERT the delay values used in process() with this value. 1.42 +const double MaxDelayTimeSeconds = 0.002; 1.43 + 1.44 +const int UninitializedAzimuth = -1; 1.45 +const unsigned RenderingQuantum = WEBAUDIO_BLOCK_SIZE; 1.46 + 1.47 +HRTFPanner::HRTFPanner(float sampleRate, mozilla::TemporaryRef<HRTFDatabaseLoader> databaseLoader) 1.48 + : m_databaseLoader(databaseLoader) 1.49 + , m_sampleRate(sampleRate) 1.50 + , m_crossfadeSelection(CrossfadeSelection1) 1.51 + , m_azimuthIndex1(UninitializedAzimuth) 1.52 + , m_azimuthIndex2(UninitializedAzimuth) 1.53 + // m_elevation1 and m_elevation2 are initialized in pan() 1.54 + , m_crossfadeX(0) 1.55 + , m_crossfadeIncr(0) 1.56 + , m_convolverL1(HRTFElevation::fftSizeForSampleRate(sampleRate)) 1.57 + , m_convolverR1(m_convolverL1.fftSize()) 1.58 + , m_convolverL2(m_convolverL1.fftSize()) 1.59 + , m_convolverR2(m_convolverL1.fftSize()) 1.60 + , m_delayLine(MaxDelayTimeSeconds * sampleRate, 1.0) 1.61 +{ 1.62 + MOZ_ASSERT(m_databaseLoader); 1.63 + MOZ_COUNT_CTOR(HRTFPanner); 1.64 + 1.65 + m_tempL1.SetLength(RenderingQuantum); 1.66 + m_tempR1.SetLength(RenderingQuantum); 1.67 + m_tempL2.SetLength(RenderingQuantum); 1.68 + m_tempR2.SetLength(RenderingQuantum); 1.69 +} 1.70 + 1.71 +HRTFPanner::~HRTFPanner() 1.72 +{ 1.73 + MOZ_COUNT_DTOR(HRTFPanner); 1.74 +} 1.75 + 1.76 +size_t HRTFPanner::sizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const 1.77 +{ 1.78 + size_t amount = aMallocSizeOf(this); 1.79 + 1.80 + if (m_databaseLoader) { 1.81 + m_databaseLoader->sizeOfIncludingThis(aMallocSizeOf); 1.82 + } 1.83 + 1.84 + amount += m_convolverL1.sizeOfExcludingThis(aMallocSizeOf); 1.85 + amount += m_convolverR1.sizeOfExcludingThis(aMallocSizeOf); 1.86 + amount += m_convolverL2.sizeOfExcludingThis(aMallocSizeOf); 1.87 + amount += m_convolverR2.sizeOfExcludingThis(aMallocSizeOf); 1.88 + amount += m_delayLine.SizeOfExcludingThis(aMallocSizeOf); 1.89 + amount += m_tempL1.SizeOfExcludingThis(aMallocSizeOf); 1.90 + amount += m_tempL2.SizeOfExcludingThis(aMallocSizeOf); 1.91 + amount += m_tempR1.SizeOfExcludingThis(aMallocSizeOf); 1.92 + amount += m_tempR2.SizeOfExcludingThis(aMallocSizeOf); 1.93 + 1.94 + return amount; 1.95 +} 1.96 + 1.97 +void HRTFPanner::reset() 1.98 +{ 1.99 + m_azimuthIndex1 = UninitializedAzimuth; 1.100 + m_azimuthIndex2 = UninitializedAzimuth; 1.101 + // m_elevation1 and m_elevation2 are initialized in pan() 1.102 + m_crossfadeSelection = CrossfadeSelection1; 1.103 + m_crossfadeX = 0.0f; 1.104 + m_crossfadeIncr = 0.0f; 1.105 + m_convolverL1.reset(); 1.106 + m_convolverR1.reset(); 1.107 + m_convolverL2.reset(); 1.108 + m_convolverR2.reset(); 1.109 + m_delayLine.Reset(); 1.110 +} 1.111 + 1.112 +int HRTFPanner::calculateDesiredAzimuthIndexAndBlend(double azimuth, double& azimuthBlend) 1.113 +{ 1.114 + // Convert the azimuth angle from the range -180 -> +180 into the range 0 -> 360. 1.115 + // The azimuth index may then be calculated from this positive value. 1.116 + if (azimuth < 0) 1.117 + azimuth += 360.0; 1.118 + 1.119 + HRTFDatabase* database = m_databaseLoader->database(); 1.120 + MOZ_ASSERT(database); 1.121 + 1.122 + int numberOfAzimuths = database->numberOfAzimuths(); 1.123 + const double angleBetweenAzimuths = 360.0 / numberOfAzimuths; 1.124 + 1.125 + // Calculate the azimuth index and the blend (0 -> 1) for interpolation. 1.126 + double desiredAzimuthIndexFloat = azimuth / angleBetweenAzimuths; 1.127 + int desiredAzimuthIndex = static_cast<int>(desiredAzimuthIndexFloat); 1.128 + azimuthBlend = desiredAzimuthIndexFloat - static_cast<double>(desiredAzimuthIndex); 1.129 + 1.130 + // We don't immediately start using this azimuth index, but instead approach this index from the last index we rendered at. 1.131 + // This minimizes the clicks and graininess for moving sources which occur otherwise. 1.132 + desiredAzimuthIndex = max(0, desiredAzimuthIndex); 1.133 + desiredAzimuthIndex = min(numberOfAzimuths - 1, desiredAzimuthIndex); 1.134 + return desiredAzimuthIndex; 1.135 +} 1.136 + 1.137 +void HRTFPanner::pan(double desiredAzimuth, double elevation, const AudioChunk* inputBus, AudioChunk* outputBus) 1.138 +{ 1.139 +#ifdef DEBUG 1.140 + unsigned numInputChannels = 1.141 + inputBus->IsNull() ? 0 : inputBus->mChannelData.Length(); 1.142 + 1.143 + MOZ_ASSERT(numInputChannels <= 2); 1.144 + MOZ_ASSERT(inputBus->mDuration == WEBAUDIO_BLOCK_SIZE); 1.145 +#endif 1.146 + 1.147 + bool isOutputGood = outputBus && outputBus->mChannelData.Length() == 2 && outputBus->mDuration == WEBAUDIO_BLOCK_SIZE; 1.148 + MOZ_ASSERT(isOutputGood); 1.149 + 1.150 + if (!isOutputGood) { 1.151 + if (outputBus) 1.152 + outputBus->SetNull(outputBus->mDuration); 1.153 + return; 1.154 + } 1.155 + 1.156 + HRTFDatabase* database = m_databaseLoader->database(); 1.157 + if (!database) { // not yet loaded 1.158 + outputBus->SetNull(outputBus->mDuration); 1.159 + return; 1.160 + } 1.161 + 1.162 + // IRCAM HRTF azimuths values from the loaded database is reversed from the panner's notion of azimuth. 1.163 + double azimuth = -desiredAzimuth; 1.164 + 1.165 + bool isAzimuthGood = azimuth >= -180.0 && azimuth <= 180.0; 1.166 + MOZ_ASSERT(isAzimuthGood); 1.167 + if (!isAzimuthGood) { 1.168 + outputBus->SetNull(outputBus->mDuration); 1.169 + return; 1.170 + } 1.171 + 1.172 + // Normally, we'll just be dealing with mono sources. 1.173 + // If we have a stereo input, implement stereo panning with left source processed by left HRTF, and right source by right HRTF. 1.174 + 1.175 + // Get destination pointers. 1.176 + float* destinationL = 1.177 + static_cast<float*>(const_cast<void*>(outputBus->mChannelData[0])); 1.178 + float* destinationR = 1.179 + static_cast<float*>(const_cast<void*>(outputBus->mChannelData[1])); 1.180 + 1.181 + double azimuthBlend; 1.182 + int desiredAzimuthIndex = calculateDesiredAzimuthIndexAndBlend(azimuth, azimuthBlend); 1.183 + 1.184 + // Initially snap azimuth and elevation values to first values encountered. 1.185 + if (m_azimuthIndex1 == UninitializedAzimuth) { 1.186 + m_azimuthIndex1 = desiredAzimuthIndex; 1.187 + m_elevation1 = elevation; 1.188 + } 1.189 + if (m_azimuthIndex2 == UninitializedAzimuth) { 1.190 + m_azimuthIndex2 = desiredAzimuthIndex; 1.191 + m_elevation2 = elevation; 1.192 + } 1.193 + 1.194 + // Cross-fade / transition over a period of around 45 milliseconds. 1.195 + // This is an empirical value tuned to be a reasonable trade-off between 1.196 + // smoothness and speed. 1.197 + const double fadeFrames = sampleRate() <= 48000 ? 2048 : 4096; 1.198 + 1.199 + // Check for azimuth and elevation changes, initiating a cross-fade if needed. 1.200 + if (!m_crossfadeX && m_crossfadeSelection == CrossfadeSelection1) { 1.201 + if (desiredAzimuthIndex != m_azimuthIndex1 || elevation != m_elevation1) { 1.202 + // Cross-fade from 1 -> 2 1.203 + m_crossfadeIncr = 1 / fadeFrames; 1.204 + m_azimuthIndex2 = desiredAzimuthIndex; 1.205 + m_elevation2 = elevation; 1.206 + } 1.207 + } 1.208 + if (m_crossfadeX == 1 && m_crossfadeSelection == CrossfadeSelection2) { 1.209 + if (desiredAzimuthIndex != m_azimuthIndex2 || elevation != m_elevation2) { 1.210 + // Cross-fade from 2 -> 1 1.211 + m_crossfadeIncr = -1 / fadeFrames; 1.212 + m_azimuthIndex1 = desiredAzimuthIndex; 1.213 + m_elevation1 = elevation; 1.214 + } 1.215 + } 1.216 + 1.217 + // Get the HRTFKernels and interpolated delays. 1.218 + HRTFKernel* kernelL1; 1.219 + HRTFKernel* kernelR1; 1.220 + HRTFKernel* kernelL2; 1.221 + HRTFKernel* kernelR2; 1.222 + double frameDelayL1; 1.223 + double frameDelayR1; 1.224 + double frameDelayL2; 1.225 + double frameDelayR2; 1.226 + database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex1, m_elevation1, kernelL1, kernelR1, frameDelayL1, frameDelayR1); 1.227 + database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex2, m_elevation2, kernelL2, kernelR2, frameDelayL2, frameDelayR2); 1.228 + 1.229 + bool areKernelsGood = kernelL1 && kernelR1 && kernelL2 && kernelR2; 1.230 + MOZ_ASSERT(areKernelsGood); 1.231 + if (!areKernelsGood) { 1.232 + outputBus->SetNull(outputBus->mDuration); 1.233 + return; 1.234 + } 1.235 + 1.236 + MOZ_ASSERT(frameDelayL1 / sampleRate() < MaxDelayTimeSeconds && frameDelayR1 / sampleRate() < MaxDelayTimeSeconds); 1.237 + MOZ_ASSERT(frameDelayL2 / sampleRate() < MaxDelayTimeSeconds && frameDelayR2 / sampleRate() < MaxDelayTimeSeconds); 1.238 + 1.239 + // Crossfade inter-aural delays based on transitions. 1.240 + double frameDelaysL[WEBAUDIO_BLOCK_SIZE]; 1.241 + double frameDelaysR[WEBAUDIO_BLOCK_SIZE]; 1.242 + { 1.243 + float x = m_crossfadeX; 1.244 + float incr = m_crossfadeIncr; 1.245 + for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) { 1.246 + frameDelaysL[i] = (1 - x) * frameDelayL1 + x * frameDelayL2; 1.247 + frameDelaysR[i] = (1 - x) * frameDelayR1 + x * frameDelayR2; 1.248 + x += incr; 1.249 + } 1.250 + } 1.251 + 1.252 + // First run through delay lines for inter-aural time difference. 1.253 + m_delayLine.Write(*inputBus); 1.254 + // "Speakers" means a mono input is read into both outputs (with possibly 1.255 + // different delays). 1.256 + m_delayLine.ReadChannel(frameDelaysL, outputBus, 0, 1.257 + ChannelInterpretation::Speakers); 1.258 + m_delayLine.ReadChannel(frameDelaysR, outputBus, 1, 1.259 + ChannelInterpretation::Speakers); 1.260 + m_delayLine.NextBlock(); 1.261 + 1.262 + bool needsCrossfading = m_crossfadeIncr; 1.263 + 1.264 + // Have the convolvers render directly to the final destination if we're not cross-fading. 1.265 + float* convolutionDestinationL1 = needsCrossfading ? m_tempL1.Elements() : destinationL; 1.266 + float* convolutionDestinationR1 = needsCrossfading ? m_tempR1.Elements() : destinationR; 1.267 + float* convolutionDestinationL2 = needsCrossfading ? m_tempL2.Elements() : destinationL; 1.268 + float* convolutionDestinationR2 = needsCrossfading ? m_tempR2.Elements() : destinationR; 1.269 + 1.270 + // Now do the convolutions. 1.271 + // Note that we avoid doing convolutions on both sets of convolvers if we're not currently cross-fading. 1.272 + 1.273 + if (m_crossfadeSelection == CrossfadeSelection1 || needsCrossfading) { 1.274 + m_convolverL1.process(kernelL1->fftFrame(), destinationL, convolutionDestinationL1, WEBAUDIO_BLOCK_SIZE); 1.275 + m_convolverR1.process(kernelR1->fftFrame(), destinationR, convolutionDestinationR1, WEBAUDIO_BLOCK_SIZE); 1.276 + } 1.277 + 1.278 + if (m_crossfadeSelection == CrossfadeSelection2 || needsCrossfading) { 1.279 + m_convolverL2.process(kernelL2->fftFrame(), destinationL, convolutionDestinationL2, WEBAUDIO_BLOCK_SIZE); 1.280 + m_convolverR2.process(kernelR2->fftFrame(), destinationR, convolutionDestinationR2, WEBAUDIO_BLOCK_SIZE); 1.281 + } 1.282 + 1.283 + if (needsCrossfading) { 1.284 + // Apply linear cross-fade. 1.285 + float x = m_crossfadeX; 1.286 + float incr = m_crossfadeIncr; 1.287 + for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) { 1.288 + destinationL[i] = (1 - x) * convolutionDestinationL1[i] + x * convolutionDestinationL2[i]; 1.289 + destinationR[i] = (1 - x) * convolutionDestinationR1[i] + x * convolutionDestinationR2[i]; 1.290 + x += incr; 1.291 + } 1.292 + // Update cross-fade value from local. 1.293 + m_crossfadeX = x; 1.294 + 1.295 + if (m_crossfadeIncr > 0 && fabs(m_crossfadeX - 1) < m_crossfadeIncr) { 1.296 + // We've fully made the crossfade transition from 1 -> 2. 1.297 + m_crossfadeSelection = CrossfadeSelection2; 1.298 + m_crossfadeX = 1; 1.299 + m_crossfadeIncr = 0; 1.300 + } else if (m_crossfadeIncr < 0 && fabs(m_crossfadeX) < -m_crossfadeIncr) { 1.301 + // We've fully made the crossfade transition from 2 -> 1. 1.302 + m_crossfadeSelection = CrossfadeSelection1; 1.303 + m_crossfadeX = 0; 1.304 + m_crossfadeIncr = 0; 1.305 + } 1.306 + } 1.307 +} 1.308 + 1.309 +int HRTFPanner::maxTailFrames() const 1.310 +{ 1.311 + // Although the ideal tail time would be the length of the impulse 1.312 + // response, there is additional tail time from the approximations in the 1.313 + // implementation. Because HRTFPanner is implemented with a DelayKernel 1.314 + // and a FFTConvolver, the tailTime of the HRTFPanner is the sum of the 1.315 + // tailTime of the DelayKernel and the tailTime of the FFTConvolver. 1.316 + // The FFTConvolver has a tail time of fftSize(), including latency of 1.317 + // fftSize()/2. 1.318 + return m_delayLine.MaxDelayTicks() + fftSize(); 1.319 +} 1.320 + 1.321 +} // namespace WebCore