1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/content/media/AudioNodeExternalInputStream.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,455 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this file, 1.7 + * You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#include "AudioNodeEngine.h" 1.10 +#include "AudioNodeExternalInputStream.h" 1.11 +#include "AudioChannelFormat.h" 1.12 +#include "speex/speex_resampler.h" 1.13 + 1.14 +using namespace mozilla::dom; 1.15 + 1.16 +namespace mozilla { 1.17 + 1.18 +AudioNodeExternalInputStream::AudioNodeExternalInputStream(AudioNodeEngine* aEngine, TrackRate aSampleRate) 1.19 + : AudioNodeStream(aEngine, MediaStreamGraph::INTERNAL_STREAM, aSampleRate) 1.20 + , mCurrentOutputPosition(0) 1.21 +{ 1.22 + MOZ_COUNT_CTOR(AudioNodeExternalInputStream); 1.23 +} 1.24 + 1.25 +AudioNodeExternalInputStream::~AudioNodeExternalInputStream() 1.26 +{ 1.27 + MOZ_COUNT_DTOR(AudioNodeExternalInputStream); 1.28 +} 1.29 + 1.30 +AudioNodeExternalInputStream::TrackMapEntry::~TrackMapEntry() 1.31 +{ 1.32 + if (mResampler) { 1.33 + speex_resampler_destroy(mResampler); 1.34 + } 1.35 +} 1.36 + 1.37 +uint32_t 1.38 +AudioNodeExternalInputStream::GetTrackMapEntry(const StreamBuffer::Track& aTrack, 1.39 + GraphTime aFrom) 1.40 +{ 1.41 + AudioSegment* segment = aTrack.Get<AudioSegment>(); 1.42 + 1.43 + // Check the map for an existing entry corresponding to the input track. 1.44 + for (uint32_t i = 0; i < mTrackMap.Length(); ++i) { 1.45 + TrackMapEntry* map = &mTrackMap[i]; 1.46 + if (map->mTrackID == aTrack.GetID()) { 1.47 + return i; 1.48 + } 1.49 + } 1.50 + 1.51 + // Determine channel count by finding the first entry with non-silent data. 1.52 + AudioSegment::ChunkIterator ci(*segment); 1.53 + while (!ci.IsEnded() && ci->IsNull()) { 1.54 + ci.Next(); 1.55 + } 1.56 + if (ci.IsEnded()) { 1.57 + // The track is entirely silence so far, we can ignore it for now. 1.58 + return nsTArray<TrackMapEntry>::NoIndex; 1.59 + } 1.60 + 1.61 + // Create a speex resampler with the same sample rate and number of channels 1.62 + // as the track. 1.63 + SpeexResamplerState* resampler = nullptr; 1.64 + uint32_t channelCount = std::min((*ci).mChannelData.Length(), 1.65 + WebAudioUtils::MaxChannelCount); 1.66 + if (aTrack.GetRate() != mSampleRate) { 1.67 + resampler = speex_resampler_init(channelCount, 1.68 + aTrack.GetRate(), mSampleRate, SPEEX_RESAMPLER_QUALITY_DEFAULT, nullptr); 1.69 + speex_resampler_skip_zeros(resampler); 1.70 + } 1.71 + 1.72 + TrackMapEntry* map = mTrackMap.AppendElement(); 1.73 + map->mEndOfConsumedInputTicks = 0; 1.74 + map->mEndOfLastInputIntervalInInputStream = -1; 1.75 + map->mEndOfLastInputIntervalInOutputStream = -1; 1.76 + map->mSamplesPassedToResampler = 1.77 + TimeToTicksRoundUp(aTrack.GetRate(), GraphTimeToStreamTime(aFrom)); 1.78 + map->mResampler = resampler; 1.79 + map->mResamplerChannelCount = channelCount; 1.80 + map->mTrackID = aTrack.GetID(); 1.81 + return mTrackMap.Length() - 1; 1.82 +} 1.83 + 1.84 +static const uint32_t SPEEX_RESAMPLER_PROCESS_MAX_OUTPUT = 1000; 1.85 + 1.86 +template <typename T> static void 1.87 +ResampleChannelBuffer(SpeexResamplerState* aResampler, uint32_t aChannel, 1.88 + const T* aInput, uint32_t aInputDuration, 1.89 + nsTArray<float>* aOutput) 1.90 +{ 1.91 + if (!aResampler) { 1.92 + float* out = aOutput->AppendElements(aInputDuration); 1.93 + for (uint32_t i = 0; i < aInputDuration; ++i) { 1.94 + out[i] = AudioSampleToFloat(aInput[i]); 1.95 + } 1.96 + return; 1.97 + } 1.98 + 1.99 + uint32_t processed = 0; 1.100 + while (processed < aInputDuration) { 1.101 + uint32_t prevLength = aOutput->Length(); 1.102 + float* output = aOutput->AppendElements(SPEEX_RESAMPLER_PROCESS_MAX_OUTPUT); 1.103 + uint32_t in = aInputDuration - processed; 1.104 + uint32_t out = aOutput->Length() - prevLength; 1.105 + WebAudioUtils::SpeexResamplerProcess(aResampler, aChannel, 1.106 + aInput + processed, &in, 1.107 + output, &out); 1.108 + processed += in; 1.109 + aOutput->SetLength(prevLength + out); 1.110 + } 1.111 +} 1.112 + 1.113 +void 1.114 +AudioNodeExternalInputStream::TrackMapEntry::ResampleChannels(const nsTArray<const void*>& aBuffers, 1.115 + uint32_t aInputDuration, 1.116 + AudioSampleFormat aFormat, 1.117 + float aVolume) 1.118 +{ 1.119 + NS_ASSERTION(aBuffers.Length() == mResamplerChannelCount, 1.120 + "Channel count must be correct here"); 1.121 + 1.122 + nsAutoTArray<nsTArray<float>,2> resampledBuffers; 1.123 + resampledBuffers.SetLength(aBuffers.Length()); 1.124 + nsTArray<float> samplesAdjustedForVolume; 1.125 + nsAutoTArray<const float*,2> bufferPtrs; 1.126 + bufferPtrs.SetLength(aBuffers.Length()); 1.127 + 1.128 + for (uint32_t i = 0; i < aBuffers.Length(); ++i) { 1.129 + AudioSampleFormat format = aFormat; 1.130 + const void* buffer = aBuffers[i]; 1.131 + 1.132 + if (aVolume != 1.0f) { 1.133 + format = AUDIO_FORMAT_FLOAT32; 1.134 + samplesAdjustedForVolume.SetLength(aInputDuration); 1.135 + switch (aFormat) { 1.136 + case AUDIO_FORMAT_FLOAT32: 1.137 + ConvertAudioSamplesWithScale(static_cast<const float*>(buffer), 1.138 + samplesAdjustedForVolume.Elements(), 1.139 + aInputDuration, aVolume); 1.140 + break; 1.141 + case AUDIO_FORMAT_S16: 1.142 + ConvertAudioSamplesWithScale(static_cast<const int16_t*>(buffer), 1.143 + samplesAdjustedForVolume.Elements(), 1.144 + aInputDuration, aVolume); 1.145 + break; 1.146 + default: 1.147 + MOZ_ASSERT(false); 1.148 + return; 1.149 + } 1.150 + buffer = samplesAdjustedForVolume.Elements(); 1.151 + } 1.152 + 1.153 + switch (format) { 1.154 + case AUDIO_FORMAT_FLOAT32: 1.155 + ResampleChannelBuffer(mResampler, i, 1.156 + static_cast<const float*>(buffer), 1.157 + aInputDuration, &resampledBuffers[i]); 1.158 + break; 1.159 + case AUDIO_FORMAT_S16: 1.160 + ResampleChannelBuffer(mResampler, i, 1.161 + static_cast<const int16_t*>(buffer), 1.162 + aInputDuration, &resampledBuffers[i]); 1.163 + break; 1.164 + default: 1.165 + MOZ_ASSERT(false); 1.166 + return; 1.167 + } 1.168 + bufferPtrs[i] = resampledBuffers[i].Elements(); 1.169 + NS_ASSERTION(i == 0 || 1.170 + resampledBuffers[i].Length() == resampledBuffers[0].Length(), 1.171 + "Resampler made different decisions for different channels!"); 1.172 + } 1.173 + 1.174 + uint32_t length = resampledBuffers[0].Length(); 1.175 + nsRefPtr<ThreadSharedObject> buf = new SharedChannelArrayBuffer<float>(&resampledBuffers); 1.176 + mResampledData.AppendFrames(buf.forget(), bufferPtrs, length); 1.177 +} 1.178 + 1.179 +void 1.180 +AudioNodeExternalInputStream::TrackMapEntry::ResampleInputData(AudioSegment* aSegment) 1.181 +{ 1.182 + AudioSegment::ChunkIterator ci(*aSegment); 1.183 + while (!ci.IsEnded()) { 1.184 + const AudioChunk& chunk = *ci; 1.185 + nsAutoTArray<const void*,2> channels; 1.186 + if (chunk.GetDuration() > UINT32_MAX) { 1.187 + // This will cause us to OOM or overflow below. So let's just bail. 1.188 + NS_ERROR("Chunk duration out of bounds"); 1.189 + return; 1.190 + } 1.191 + uint32_t duration = uint32_t(chunk.GetDuration()); 1.192 + 1.193 + if (chunk.IsNull()) { 1.194 + nsAutoTArray<AudioDataValue,1024> silence; 1.195 + silence.SetLength(duration); 1.196 + PodZero(silence.Elements(), silence.Length()); 1.197 + channels.SetLength(mResamplerChannelCount); 1.198 + for (uint32_t i = 0; i < channels.Length(); ++i) { 1.199 + channels[i] = silence.Elements(); 1.200 + } 1.201 + ResampleChannels(channels, duration, AUDIO_OUTPUT_FORMAT, 0.0f); 1.202 + } else if (chunk.mChannelData.Length() == mResamplerChannelCount) { 1.203 + // Common case, since mResamplerChannelCount is set to the first chunk's 1.204 + // number of channels. 1.205 + channels.AppendElements(chunk.mChannelData); 1.206 + ResampleChannels(channels, duration, chunk.mBufferFormat, chunk.mVolume); 1.207 + } else { 1.208 + // Uncommon case. Since downmixing requires channels to be floats, 1.209 + // convert everything to floats now. 1.210 + uint32_t upChannels = GetAudioChannelsSuperset(chunk.mChannelData.Length(), mResamplerChannelCount); 1.211 + nsTArray<float> buffer; 1.212 + if (chunk.mBufferFormat == AUDIO_FORMAT_FLOAT32) { 1.213 + channels.AppendElements(chunk.mChannelData); 1.214 + } else { 1.215 + NS_ASSERTION(chunk.mBufferFormat == AUDIO_FORMAT_S16, "Unknown format"); 1.216 + if (duration > UINT32_MAX/chunk.mChannelData.Length()) { 1.217 + NS_ERROR("Chunk duration out of bounds"); 1.218 + return; 1.219 + } 1.220 + buffer.SetLength(chunk.mChannelData.Length()*duration); 1.221 + for (uint32_t i = 0; i < chunk.mChannelData.Length(); ++i) { 1.222 + const int16_t* samples = static_cast<const int16_t*>(chunk.mChannelData[i]); 1.223 + float* converted = &buffer[i*duration]; 1.224 + for (uint32_t j = 0; j < duration; ++j) { 1.225 + converted[j] = AudioSampleToFloat(samples[j]); 1.226 + } 1.227 + channels.AppendElement(converted); 1.228 + } 1.229 + } 1.230 + nsTArray<float> zeroes; 1.231 + if (channels.Length() < upChannels) { 1.232 + zeroes.SetLength(duration); 1.233 + PodZero(zeroes.Elements(), zeroes.Length()); 1.234 + AudioChannelsUpMix(&channels, upChannels, zeroes.Elements()); 1.235 + } 1.236 + if (channels.Length() == mResamplerChannelCount) { 1.237 + ResampleChannels(channels, duration, AUDIO_FORMAT_FLOAT32, chunk.mVolume); 1.238 + } else { 1.239 + nsTArray<float> output; 1.240 + if (duration > UINT32_MAX/mResamplerChannelCount) { 1.241 + NS_ERROR("Chunk duration out of bounds"); 1.242 + return; 1.243 + } 1.244 + output.SetLength(duration*mResamplerChannelCount); 1.245 + nsAutoTArray<float*,2> outputPtrs; 1.246 + nsAutoTArray<const void*,2> outputPtrsConst; 1.247 + for (uint32_t i = 0; i < mResamplerChannelCount; ++i) { 1.248 + outputPtrs.AppendElement(output.Elements() + i*duration); 1.249 + outputPtrsConst.AppendElement(outputPtrs[i]); 1.250 + } 1.251 + AudioChannelsDownMix(channels, outputPtrs.Elements(), outputPtrs.Length(), duration); 1.252 + ResampleChannels(outputPtrsConst, duration, AUDIO_FORMAT_FLOAT32, chunk.mVolume); 1.253 + } 1.254 + } 1.255 + ci.Next(); 1.256 + } 1.257 +} 1.258 + 1.259 +/** 1.260 + * Copies the data in aInput to aOffsetInBlock within aBlock. All samples must 1.261 + * be float. Both chunks must have the same number of channels (or else 1.262 + * aInput is null). aBlock must have been allocated with AllocateInputBlock. 1.263 + */ 1.264 +static void 1.265 +CopyChunkToBlock(const AudioChunk& aInput, AudioChunk *aBlock, uint32_t aOffsetInBlock) 1.266 +{ 1.267 + uint32_t d = aInput.GetDuration(); 1.268 + for (uint32_t i = 0; i < aBlock->mChannelData.Length(); ++i) { 1.269 + float* out = static_cast<float*>(const_cast<void*>(aBlock->mChannelData[i])) + 1.270 + aOffsetInBlock; 1.271 + if (aInput.IsNull()) { 1.272 + PodZero(out, d); 1.273 + } else { 1.274 + const float* in = static_cast<const float*>(aInput.mChannelData[i]); 1.275 + ConvertAudioSamplesWithScale(in, out, d, aInput.mVolume); 1.276 + } 1.277 + } 1.278 +} 1.279 + 1.280 +/** 1.281 + * Converts the data in aSegment to a single chunk aChunk. Every chunk in 1.282 + * aSegment must have the same number of channels (or be null). aSegment must have 1.283 + * duration WEBAUDIO_BLOCK_SIZE. Every chunk in aSegment must be in float format. 1.284 + */ 1.285 +static void 1.286 +ConvertSegmentToAudioBlock(AudioSegment* aSegment, AudioChunk* aBlock) 1.287 +{ 1.288 + NS_ASSERTION(aSegment->GetDuration() == WEBAUDIO_BLOCK_SIZE, "Bad segment duration"); 1.289 + 1.290 + { 1.291 + AudioSegment::ChunkIterator ci(*aSegment); 1.292 + NS_ASSERTION(!ci.IsEnded(), "Segment must have at least one chunk"); 1.293 + AudioChunk& firstChunk = *ci; 1.294 + ci.Next(); 1.295 + if (ci.IsEnded()) { 1.296 + *aBlock = firstChunk; 1.297 + return; 1.298 + } 1.299 + 1.300 + while (ci->IsNull() && !ci.IsEnded()) { 1.301 + ci.Next(); 1.302 + } 1.303 + if (ci.IsEnded()) { 1.304 + // All null. 1.305 + aBlock->SetNull(WEBAUDIO_BLOCK_SIZE); 1.306 + return; 1.307 + } 1.308 + 1.309 + AllocateAudioBlock(ci->mChannelData.Length(), aBlock); 1.310 + } 1.311 + 1.312 + AudioSegment::ChunkIterator ci(*aSegment); 1.313 + uint32_t duration = 0; 1.314 + while (!ci.IsEnded()) { 1.315 + CopyChunkToBlock(*ci, aBlock, duration); 1.316 + duration += ci->GetDuration(); 1.317 + ci.Next(); 1.318 + } 1.319 +} 1.320 + 1.321 +void 1.322 +AudioNodeExternalInputStream::ProcessInput(GraphTime aFrom, GraphTime aTo, 1.323 + uint32_t aFlags) 1.324 +{ 1.325 + // According to spec, number of outputs is always 1. 1.326 + mLastChunks.SetLength(1); 1.327 + 1.328 + // GC stuff can result in our input stream being destroyed before this stream. 1.329 + // Handle that. 1.330 + if (mInputs.IsEmpty()) { 1.331 + mLastChunks[0].SetNull(WEBAUDIO_BLOCK_SIZE); 1.332 + AdvanceOutputSegment(); 1.333 + return; 1.334 + } 1.335 + 1.336 + MOZ_ASSERT(mInputs.Length() == 1); 1.337 + 1.338 + MediaStream* source = mInputs[0]->GetSource(); 1.339 + nsAutoTArray<AudioSegment,1> audioSegments; 1.340 + nsAutoTArray<bool,1> trackMapEntriesUsed; 1.341 + uint32_t inputChannels = 0; 1.342 + for (StreamBuffer::TrackIter tracks(source->mBuffer, MediaSegment::AUDIO); 1.343 + !tracks.IsEnded(); tracks.Next()) { 1.344 + const StreamBuffer::Track& inputTrack = *tracks; 1.345 + // Create a TrackMapEntry if necessary. 1.346 + uint32_t trackMapIndex = GetTrackMapEntry(inputTrack, aFrom); 1.347 + // Maybe there's nothing in this track yet. If so, ignore it. (While the 1.348 + // track is only playing silence, we may not be able to determine the 1.349 + // correct number of channels to start resampling.) 1.350 + if (trackMapIndex == nsTArray<TrackMapEntry>::NoIndex) { 1.351 + continue; 1.352 + } 1.353 + 1.354 + while (trackMapEntriesUsed.Length() <= trackMapIndex) { 1.355 + trackMapEntriesUsed.AppendElement(false); 1.356 + } 1.357 + trackMapEntriesUsed[trackMapIndex] = true; 1.358 + 1.359 + TrackMapEntry* trackMap = &mTrackMap[trackMapIndex]; 1.360 + AudioSegment segment; 1.361 + GraphTime next; 1.362 + TrackRate inputTrackRate = inputTrack.GetRate(); 1.363 + for (GraphTime t = aFrom; t < aTo; t = next) { 1.364 + MediaInputPort::InputInterval interval = mInputs[0]->GetNextInputInterval(t); 1.365 + interval.mEnd = std::min(interval.mEnd, aTo); 1.366 + if (interval.mStart >= interval.mEnd) 1.367 + break; 1.368 + next = interval.mEnd; 1.369 + 1.370 + // Ticks >= startTicks and < endTicks are in the interval 1.371 + StreamTime outputEnd = GraphTimeToStreamTime(interval.mEnd); 1.372 + TrackTicks startTicks = trackMap->mSamplesPassedToResampler + segment.GetDuration(); 1.373 + StreamTime outputStart = GraphTimeToStreamTime(interval.mStart); 1.374 + NS_ASSERTION(startTicks == TimeToTicksRoundUp(inputTrackRate, outputStart), 1.375 + "Samples missing"); 1.376 + TrackTicks endTicks = TimeToTicksRoundUp(inputTrackRate, outputEnd); 1.377 + TrackTicks ticks = endTicks - startTicks; 1.378 + 1.379 + if (interval.mInputIsBlocked) { 1.380 + segment.AppendNullData(ticks); 1.381 + } else { 1.382 + // See comments in TrackUnionStream::CopyTrackData 1.383 + StreamTime inputStart = source->GraphTimeToStreamTime(interval.mStart); 1.384 + StreamTime inputEnd = source->GraphTimeToStreamTime(interval.mEnd); 1.385 + TrackTicks inputTrackEndPoint = 1.386 + inputTrack.IsEnded() ? inputTrack.GetEnd() : TRACK_TICKS_MAX; 1.387 + 1.388 + if (trackMap->mEndOfLastInputIntervalInInputStream != inputStart || 1.389 + trackMap->mEndOfLastInputIntervalInOutputStream != outputStart) { 1.390 + // Start of a new series of intervals where neither stream is blocked. 1.391 + trackMap->mEndOfConsumedInputTicks = TimeToTicksRoundDown(inputTrackRate, inputStart) - 1; 1.392 + } 1.393 + TrackTicks inputStartTicks = trackMap->mEndOfConsumedInputTicks; 1.394 + TrackTicks inputEndTicks = inputStartTicks + ticks; 1.395 + trackMap->mEndOfConsumedInputTicks = inputEndTicks; 1.396 + trackMap->mEndOfLastInputIntervalInInputStream = inputEnd; 1.397 + trackMap->mEndOfLastInputIntervalInOutputStream = outputEnd; 1.398 + 1.399 + if (inputStartTicks < 0) { 1.400 + // Data before the start of the track is just null. 1.401 + segment.AppendNullData(-inputStartTicks); 1.402 + inputStartTicks = 0; 1.403 + } 1.404 + if (inputEndTicks > inputStartTicks) { 1.405 + segment.AppendSlice(*inputTrack.GetSegment(), 1.406 + std::min(inputTrackEndPoint, inputStartTicks), 1.407 + std::min(inputTrackEndPoint, inputEndTicks)); 1.408 + } 1.409 + // Pad if we're looking past the end of the track 1.410 + segment.AppendNullData(ticks - segment.GetDuration()); 1.411 + } 1.412 + } 1.413 + 1.414 + trackMap->mSamplesPassedToResampler += segment.GetDuration(); 1.415 + trackMap->ResampleInputData(&segment); 1.416 + 1.417 + if (trackMap->mResampledData.GetDuration() < mCurrentOutputPosition + WEBAUDIO_BLOCK_SIZE) { 1.418 + // We don't have enough data. Delay it. 1.419 + trackMap->mResampledData.InsertNullDataAtStart( 1.420 + mCurrentOutputPosition + WEBAUDIO_BLOCK_SIZE - trackMap->mResampledData.GetDuration()); 1.421 + } 1.422 + audioSegments.AppendElement()->AppendSlice(trackMap->mResampledData, 1.423 + mCurrentOutputPosition, mCurrentOutputPosition + WEBAUDIO_BLOCK_SIZE); 1.424 + trackMap->mResampledData.ForgetUpTo(mCurrentOutputPosition + WEBAUDIO_BLOCK_SIZE); 1.425 + inputChannels = GetAudioChannelsSuperset(inputChannels, trackMap->mResamplerChannelCount); 1.426 + } 1.427 + 1.428 + for (int32_t i = mTrackMap.Length() - 1; i >= 0; --i) { 1.429 + if (i >= int32_t(trackMapEntriesUsed.Length()) || !trackMapEntriesUsed[i]) { 1.430 + mTrackMap.RemoveElementAt(i); 1.431 + } 1.432 + } 1.433 + 1.434 + uint32_t accumulateIndex = 0; 1.435 + if (inputChannels) { 1.436 + nsAutoTArray<float,GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE> downmixBuffer; 1.437 + for (uint32_t i = 0; i < audioSegments.Length(); ++i) { 1.438 + AudioChunk tmpChunk; 1.439 + ConvertSegmentToAudioBlock(&audioSegments[i], &tmpChunk); 1.440 + if (!tmpChunk.IsNull()) { 1.441 + if (accumulateIndex == 0) { 1.442 + AllocateAudioBlock(inputChannels, &mLastChunks[0]); 1.443 + } 1.444 + AccumulateInputChunk(accumulateIndex, tmpChunk, &mLastChunks[0], &downmixBuffer); 1.445 + accumulateIndex++; 1.446 + } 1.447 + } 1.448 + } 1.449 + if (accumulateIndex == 0) { 1.450 + mLastChunks[0].SetNull(WEBAUDIO_BLOCK_SIZE); 1.451 + } 1.452 + mCurrentOutputPosition += WEBAUDIO_BLOCK_SIZE; 1.453 + 1.454 + // Using AudioNodeStream's AdvanceOutputSegment to push the media stream graph along with null data. 1.455 + AdvanceOutputSegment(); 1.456 +} 1.457 + 1.458 +}