michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this file, michael@0: * You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "AudioNodeEngine.h" michael@0: #include "AudioNodeExternalInputStream.h" michael@0: #include "AudioChannelFormat.h" michael@0: #include "speex/speex_resampler.h" michael@0: michael@0: using namespace mozilla::dom; michael@0: michael@0: namespace mozilla { michael@0: michael@0: AudioNodeExternalInputStream::AudioNodeExternalInputStream(AudioNodeEngine* aEngine, TrackRate aSampleRate) michael@0: : AudioNodeStream(aEngine, MediaStreamGraph::INTERNAL_STREAM, aSampleRate) michael@0: , mCurrentOutputPosition(0) michael@0: { michael@0: MOZ_COUNT_CTOR(AudioNodeExternalInputStream); michael@0: } michael@0: michael@0: AudioNodeExternalInputStream::~AudioNodeExternalInputStream() michael@0: { michael@0: MOZ_COUNT_DTOR(AudioNodeExternalInputStream); michael@0: } michael@0: michael@0: AudioNodeExternalInputStream::TrackMapEntry::~TrackMapEntry() michael@0: { michael@0: if (mResampler) { michael@0: speex_resampler_destroy(mResampler); michael@0: } michael@0: } michael@0: michael@0: uint32_t michael@0: AudioNodeExternalInputStream::GetTrackMapEntry(const StreamBuffer::Track& aTrack, michael@0: GraphTime aFrom) michael@0: { michael@0: AudioSegment* segment = aTrack.Get(); michael@0: michael@0: // Check the map for an existing entry corresponding to the input track. michael@0: for (uint32_t i = 0; i < mTrackMap.Length(); ++i) { michael@0: TrackMapEntry* map = &mTrackMap[i]; michael@0: if (map->mTrackID == aTrack.GetID()) { michael@0: return i; michael@0: } michael@0: } michael@0: michael@0: // Determine channel count by finding the first entry with non-silent data. michael@0: AudioSegment::ChunkIterator ci(*segment); michael@0: while (!ci.IsEnded() && ci->IsNull()) { michael@0: ci.Next(); michael@0: } michael@0: if (ci.IsEnded()) { michael@0: // The track is entirely silence so far, we can ignore it for now. michael@0: return nsTArray::NoIndex; michael@0: } michael@0: michael@0: // Create a speex resampler with the same sample rate and number of channels michael@0: // as the track. michael@0: SpeexResamplerState* resampler = nullptr; michael@0: uint32_t channelCount = std::min((*ci).mChannelData.Length(), michael@0: WebAudioUtils::MaxChannelCount); michael@0: if (aTrack.GetRate() != mSampleRate) { michael@0: resampler = speex_resampler_init(channelCount, michael@0: aTrack.GetRate(), mSampleRate, SPEEX_RESAMPLER_QUALITY_DEFAULT, nullptr); michael@0: speex_resampler_skip_zeros(resampler); michael@0: } michael@0: michael@0: TrackMapEntry* map = mTrackMap.AppendElement(); michael@0: map->mEndOfConsumedInputTicks = 0; michael@0: map->mEndOfLastInputIntervalInInputStream = -1; michael@0: map->mEndOfLastInputIntervalInOutputStream = -1; michael@0: map->mSamplesPassedToResampler = michael@0: TimeToTicksRoundUp(aTrack.GetRate(), GraphTimeToStreamTime(aFrom)); michael@0: map->mResampler = resampler; michael@0: map->mResamplerChannelCount = channelCount; michael@0: map->mTrackID = aTrack.GetID(); michael@0: return mTrackMap.Length() - 1; michael@0: } michael@0: michael@0: static const uint32_t SPEEX_RESAMPLER_PROCESS_MAX_OUTPUT = 1000; michael@0: michael@0: template static void michael@0: ResampleChannelBuffer(SpeexResamplerState* aResampler, uint32_t aChannel, michael@0: const T* aInput, uint32_t aInputDuration, michael@0: nsTArray* aOutput) michael@0: { michael@0: if (!aResampler) { michael@0: float* out = aOutput->AppendElements(aInputDuration); michael@0: for (uint32_t i = 0; i < aInputDuration; ++i) { michael@0: out[i] = AudioSampleToFloat(aInput[i]); michael@0: } michael@0: return; michael@0: } michael@0: michael@0: uint32_t processed = 0; michael@0: while (processed < aInputDuration) { michael@0: uint32_t prevLength = aOutput->Length(); michael@0: float* output = aOutput->AppendElements(SPEEX_RESAMPLER_PROCESS_MAX_OUTPUT); michael@0: uint32_t in = aInputDuration - processed; michael@0: uint32_t out = aOutput->Length() - prevLength; michael@0: WebAudioUtils::SpeexResamplerProcess(aResampler, aChannel, michael@0: aInput + processed, &in, michael@0: output, &out); michael@0: processed += in; michael@0: aOutput->SetLength(prevLength + out); michael@0: } michael@0: } michael@0: michael@0: void michael@0: AudioNodeExternalInputStream::TrackMapEntry::ResampleChannels(const nsTArray& aBuffers, michael@0: uint32_t aInputDuration, michael@0: AudioSampleFormat aFormat, michael@0: float aVolume) michael@0: { michael@0: NS_ASSERTION(aBuffers.Length() == mResamplerChannelCount, michael@0: "Channel count must be correct here"); michael@0: michael@0: nsAutoTArray,2> resampledBuffers; michael@0: resampledBuffers.SetLength(aBuffers.Length()); michael@0: nsTArray samplesAdjustedForVolume; michael@0: nsAutoTArray bufferPtrs; michael@0: bufferPtrs.SetLength(aBuffers.Length()); michael@0: michael@0: for (uint32_t i = 0; i < aBuffers.Length(); ++i) { michael@0: AudioSampleFormat format = aFormat; michael@0: const void* buffer = aBuffers[i]; michael@0: michael@0: if (aVolume != 1.0f) { michael@0: format = AUDIO_FORMAT_FLOAT32; michael@0: samplesAdjustedForVolume.SetLength(aInputDuration); michael@0: switch (aFormat) { michael@0: case AUDIO_FORMAT_FLOAT32: michael@0: ConvertAudioSamplesWithScale(static_cast(buffer), michael@0: samplesAdjustedForVolume.Elements(), michael@0: aInputDuration, aVolume); michael@0: break; michael@0: case AUDIO_FORMAT_S16: michael@0: ConvertAudioSamplesWithScale(static_cast(buffer), michael@0: samplesAdjustedForVolume.Elements(), michael@0: aInputDuration, aVolume); michael@0: break; michael@0: default: michael@0: MOZ_ASSERT(false); michael@0: return; michael@0: } michael@0: buffer = samplesAdjustedForVolume.Elements(); michael@0: } michael@0: michael@0: switch (format) { michael@0: case AUDIO_FORMAT_FLOAT32: michael@0: ResampleChannelBuffer(mResampler, i, michael@0: static_cast(buffer), michael@0: aInputDuration, &resampledBuffers[i]); michael@0: break; michael@0: case AUDIO_FORMAT_S16: michael@0: ResampleChannelBuffer(mResampler, i, michael@0: static_cast(buffer), michael@0: aInputDuration, &resampledBuffers[i]); michael@0: break; michael@0: default: michael@0: MOZ_ASSERT(false); michael@0: return; michael@0: } michael@0: bufferPtrs[i] = resampledBuffers[i].Elements(); michael@0: NS_ASSERTION(i == 0 || michael@0: resampledBuffers[i].Length() == resampledBuffers[0].Length(), michael@0: "Resampler made different decisions for different channels!"); michael@0: } michael@0: michael@0: uint32_t length = resampledBuffers[0].Length(); michael@0: nsRefPtr buf = new SharedChannelArrayBuffer(&resampledBuffers); michael@0: mResampledData.AppendFrames(buf.forget(), bufferPtrs, length); michael@0: } michael@0: michael@0: void michael@0: AudioNodeExternalInputStream::TrackMapEntry::ResampleInputData(AudioSegment* aSegment) michael@0: { michael@0: AudioSegment::ChunkIterator ci(*aSegment); michael@0: while (!ci.IsEnded()) { michael@0: const AudioChunk& chunk = *ci; michael@0: nsAutoTArray channels; michael@0: if (chunk.GetDuration() > UINT32_MAX) { michael@0: // This will cause us to OOM or overflow below. So let's just bail. michael@0: NS_ERROR("Chunk duration out of bounds"); michael@0: return; michael@0: } michael@0: uint32_t duration = uint32_t(chunk.GetDuration()); michael@0: michael@0: if (chunk.IsNull()) { michael@0: nsAutoTArray silence; michael@0: silence.SetLength(duration); michael@0: PodZero(silence.Elements(), silence.Length()); michael@0: channels.SetLength(mResamplerChannelCount); michael@0: for (uint32_t i = 0; i < channels.Length(); ++i) { michael@0: channels[i] = silence.Elements(); michael@0: } michael@0: ResampleChannels(channels, duration, AUDIO_OUTPUT_FORMAT, 0.0f); michael@0: } else if (chunk.mChannelData.Length() == mResamplerChannelCount) { michael@0: // Common case, since mResamplerChannelCount is set to the first chunk's michael@0: // number of channels. michael@0: channels.AppendElements(chunk.mChannelData); michael@0: ResampleChannels(channels, duration, chunk.mBufferFormat, chunk.mVolume); michael@0: } else { michael@0: // Uncommon case. Since downmixing requires channels to be floats, michael@0: // convert everything to floats now. michael@0: uint32_t upChannels = GetAudioChannelsSuperset(chunk.mChannelData.Length(), mResamplerChannelCount); michael@0: nsTArray buffer; michael@0: if (chunk.mBufferFormat == AUDIO_FORMAT_FLOAT32) { michael@0: channels.AppendElements(chunk.mChannelData); michael@0: } else { michael@0: NS_ASSERTION(chunk.mBufferFormat == AUDIO_FORMAT_S16, "Unknown format"); michael@0: if (duration > UINT32_MAX/chunk.mChannelData.Length()) { michael@0: NS_ERROR("Chunk duration out of bounds"); michael@0: return; michael@0: } michael@0: buffer.SetLength(chunk.mChannelData.Length()*duration); michael@0: for (uint32_t i = 0; i < chunk.mChannelData.Length(); ++i) { michael@0: const int16_t* samples = static_cast(chunk.mChannelData[i]); michael@0: float* converted = &buffer[i*duration]; michael@0: for (uint32_t j = 0; j < duration; ++j) { michael@0: converted[j] = AudioSampleToFloat(samples[j]); michael@0: } michael@0: channels.AppendElement(converted); michael@0: } michael@0: } michael@0: nsTArray zeroes; michael@0: if (channels.Length() < upChannels) { michael@0: zeroes.SetLength(duration); michael@0: PodZero(zeroes.Elements(), zeroes.Length()); michael@0: AudioChannelsUpMix(&channels, upChannels, zeroes.Elements()); michael@0: } michael@0: if (channels.Length() == mResamplerChannelCount) { michael@0: ResampleChannels(channels, duration, AUDIO_FORMAT_FLOAT32, chunk.mVolume); michael@0: } else { michael@0: nsTArray output; michael@0: if (duration > UINT32_MAX/mResamplerChannelCount) { michael@0: NS_ERROR("Chunk duration out of bounds"); michael@0: return; michael@0: } michael@0: output.SetLength(duration*mResamplerChannelCount); michael@0: nsAutoTArray outputPtrs; michael@0: nsAutoTArray outputPtrsConst; michael@0: for (uint32_t i = 0; i < mResamplerChannelCount; ++i) { michael@0: outputPtrs.AppendElement(output.Elements() + i*duration); michael@0: outputPtrsConst.AppendElement(outputPtrs[i]); michael@0: } michael@0: AudioChannelsDownMix(channels, outputPtrs.Elements(), outputPtrs.Length(), duration); michael@0: ResampleChannels(outputPtrsConst, duration, AUDIO_FORMAT_FLOAT32, chunk.mVolume); michael@0: } michael@0: } michael@0: ci.Next(); michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Copies the data in aInput to aOffsetInBlock within aBlock. All samples must michael@0: * be float. Both chunks must have the same number of channels (or else michael@0: * aInput is null). aBlock must have been allocated with AllocateInputBlock. michael@0: */ michael@0: static void michael@0: CopyChunkToBlock(const AudioChunk& aInput, AudioChunk *aBlock, uint32_t aOffsetInBlock) michael@0: { michael@0: uint32_t d = aInput.GetDuration(); michael@0: for (uint32_t i = 0; i < aBlock->mChannelData.Length(); ++i) { michael@0: float* out = static_cast(const_cast(aBlock->mChannelData[i])) + michael@0: aOffsetInBlock; michael@0: if (aInput.IsNull()) { michael@0: PodZero(out, d); michael@0: } else { michael@0: const float* in = static_cast(aInput.mChannelData[i]); michael@0: ConvertAudioSamplesWithScale(in, out, d, aInput.mVolume); michael@0: } michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Converts the data in aSegment to a single chunk aChunk. Every chunk in michael@0: * aSegment must have the same number of channels (or be null). aSegment must have michael@0: * duration WEBAUDIO_BLOCK_SIZE. Every chunk in aSegment must be in float format. michael@0: */ michael@0: static void michael@0: ConvertSegmentToAudioBlock(AudioSegment* aSegment, AudioChunk* aBlock) michael@0: { michael@0: NS_ASSERTION(aSegment->GetDuration() == WEBAUDIO_BLOCK_SIZE, "Bad segment duration"); michael@0: michael@0: { michael@0: AudioSegment::ChunkIterator ci(*aSegment); michael@0: NS_ASSERTION(!ci.IsEnded(), "Segment must have at least one chunk"); michael@0: AudioChunk& firstChunk = *ci; michael@0: ci.Next(); michael@0: if (ci.IsEnded()) { michael@0: *aBlock = firstChunk; michael@0: return; michael@0: } michael@0: michael@0: while (ci->IsNull() && !ci.IsEnded()) { michael@0: ci.Next(); michael@0: } michael@0: if (ci.IsEnded()) { michael@0: // All null. michael@0: aBlock->SetNull(WEBAUDIO_BLOCK_SIZE); michael@0: return; michael@0: } michael@0: michael@0: AllocateAudioBlock(ci->mChannelData.Length(), aBlock); michael@0: } michael@0: michael@0: AudioSegment::ChunkIterator ci(*aSegment); michael@0: uint32_t duration = 0; michael@0: while (!ci.IsEnded()) { michael@0: CopyChunkToBlock(*ci, aBlock, duration); michael@0: duration += ci->GetDuration(); michael@0: ci.Next(); michael@0: } michael@0: } michael@0: michael@0: void michael@0: AudioNodeExternalInputStream::ProcessInput(GraphTime aFrom, GraphTime aTo, michael@0: uint32_t aFlags) michael@0: { michael@0: // According to spec, number of outputs is always 1. michael@0: mLastChunks.SetLength(1); michael@0: michael@0: // GC stuff can result in our input stream being destroyed before this stream. michael@0: // Handle that. michael@0: if (mInputs.IsEmpty()) { michael@0: mLastChunks[0].SetNull(WEBAUDIO_BLOCK_SIZE); michael@0: AdvanceOutputSegment(); michael@0: return; michael@0: } michael@0: michael@0: MOZ_ASSERT(mInputs.Length() == 1); michael@0: michael@0: MediaStream* source = mInputs[0]->GetSource(); michael@0: nsAutoTArray audioSegments; michael@0: nsAutoTArray trackMapEntriesUsed; michael@0: uint32_t inputChannels = 0; michael@0: for (StreamBuffer::TrackIter tracks(source->mBuffer, MediaSegment::AUDIO); michael@0: !tracks.IsEnded(); tracks.Next()) { michael@0: const StreamBuffer::Track& inputTrack = *tracks; michael@0: // Create a TrackMapEntry if necessary. michael@0: uint32_t trackMapIndex = GetTrackMapEntry(inputTrack, aFrom); michael@0: // Maybe there's nothing in this track yet. If so, ignore it. (While the michael@0: // track is only playing silence, we may not be able to determine the michael@0: // correct number of channels to start resampling.) michael@0: if (trackMapIndex == nsTArray::NoIndex) { michael@0: continue; michael@0: } michael@0: michael@0: while (trackMapEntriesUsed.Length() <= trackMapIndex) { michael@0: trackMapEntriesUsed.AppendElement(false); michael@0: } michael@0: trackMapEntriesUsed[trackMapIndex] = true; michael@0: michael@0: TrackMapEntry* trackMap = &mTrackMap[trackMapIndex]; michael@0: AudioSegment segment; michael@0: GraphTime next; michael@0: TrackRate inputTrackRate = inputTrack.GetRate(); michael@0: for (GraphTime t = aFrom; t < aTo; t = next) { michael@0: MediaInputPort::InputInterval interval = mInputs[0]->GetNextInputInterval(t); michael@0: interval.mEnd = std::min(interval.mEnd, aTo); michael@0: if (interval.mStart >= interval.mEnd) michael@0: break; michael@0: next = interval.mEnd; michael@0: michael@0: // Ticks >= startTicks and < endTicks are in the interval michael@0: StreamTime outputEnd = GraphTimeToStreamTime(interval.mEnd); michael@0: TrackTicks startTicks = trackMap->mSamplesPassedToResampler + segment.GetDuration(); michael@0: StreamTime outputStart = GraphTimeToStreamTime(interval.mStart); michael@0: NS_ASSERTION(startTicks == TimeToTicksRoundUp(inputTrackRate, outputStart), michael@0: "Samples missing"); michael@0: TrackTicks endTicks = TimeToTicksRoundUp(inputTrackRate, outputEnd); michael@0: TrackTicks ticks = endTicks - startTicks; michael@0: michael@0: if (interval.mInputIsBlocked) { michael@0: segment.AppendNullData(ticks); michael@0: } else { michael@0: // See comments in TrackUnionStream::CopyTrackData michael@0: StreamTime inputStart = source->GraphTimeToStreamTime(interval.mStart); michael@0: StreamTime inputEnd = source->GraphTimeToStreamTime(interval.mEnd); michael@0: TrackTicks inputTrackEndPoint = michael@0: inputTrack.IsEnded() ? inputTrack.GetEnd() : TRACK_TICKS_MAX; michael@0: michael@0: if (trackMap->mEndOfLastInputIntervalInInputStream != inputStart || michael@0: trackMap->mEndOfLastInputIntervalInOutputStream != outputStart) { michael@0: // Start of a new series of intervals where neither stream is blocked. michael@0: trackMap->mEndOfConsumedInputTicks = TimeToTicksRoundDown(inputTrackRate, inputStart) - 1; michael@0: } michael@0: TrackTicks inputStartTicks = trackMap->mEndOfConsumedInputTicks; michael@0: TrackTicks inputEndTicks = inputStartTicks + ticks; michael@0: trackMap->mEndOfConsumedInputTicks = inputEndTicks; michael@0: trackMap->mEndOfLastInputIntervalInInputStream = inputEnd; michael@0: trackMap->mEndOfLastInputIntervalInOutputStream = outputEnd; michael@0: michael@0: if (inputStartTicks < 0) { michael@0: // Data before the start of the track is just null. michael@0: segment.AppendNullData(-inputStartTicks); michael@0: inputStartTicks = 0; michael@0: } michael@0: if (inputEndTicks > inputStartTicks) { michael@0: segment.AppendSlice(*inputTrack.GetSegment(), michael@0: std::min(inputTrackEndPoint, inputStartTicks), michael@0: std::min(inputTrackEndPoint, inputEndTicks)); michael@0: } michael@0: // Pad if we're looking past the end of the track michael@0: segment.AppendNullData(ticks - segment.GetDuration()); michael@0: } michael@0: } michael@0: michael@0: trackMap->mSamplesPassedToResampler += segment.GetDuration(); michael@0: trackMap->ResampleInputData(&segment); michael@0: michael@0: if (trackMap->mResampledData.GetDuration() < mCurrentOutputPosition + WEBAUDIO_BLOCK_SIZE) { michael@0: // We don't have enough data. Delay it. michael@0: trackMap->mResampledData.InsertNullDataAtStart( michael@0: mCurrentOutputPosition + WEBAUDIO_BLOCK_SIZE - trackMap->mResampledData.GetDuration()); michael@0: } michael@0: audioSegments.AppendElement()->AppendSlice(trackMap->mResampledData, michael@0: mCurrentOutputPosition, mCurrentOutputPosition + WEBAUDIO_BLOCK_SIZE); michael@0: trackMap->mResampledData.ForgetUpTo(mCurrentOutputPosition + WEBAUDIO_BLOCK_SIZE); michael@0: inputChannels = GetAudioChannelsSuperset(inputChannels, trackMap->mResamplerChannelCount); michael@0: } michael@0: michael@0: for (int32_t i = mTrackMap.Length() - 1; i >= 0; --i) { michael@0: if (i >= int32_t(trackMapEntriesUsed.Length()) || !trackMapEntriesUsed[i]) { michael@0: mTrackMap.RemoveElementAt(i); michael@0: } michael@0: } michael@0: michael@0: uint32_t accumulateIndex = 0; michael@0: if (inputChannels) { michael@0: nsAutoTArray downmixBuffer; michael@0: for (uint32_t i = 0; i < audioSegments.Length(); ++i) { michael@0: AudioChunk tmpChunk; michael@0: ConvertSegmentToAudioBlock(&audioSegments[i], &tmpChunk); michael@0: if (!tmpChunk.IsNull()) { michael@0: if (accumulateIndex == 0) { michael@0: AllocateAudioBlock(inputChannels, &mLastChunks[0]); michael@0: } michael@0: AccumulateInputChunk(accumulateIndex, tmpChunk, &mLastChunks[0], &downmixBuffer); michael@0: accumulateIndex++; michael@0: } michael@0: } michael@0: } michael@0: if (accumulateIndex == 0) { michael@0: mLastChunks[0].SetNull(WEBAUDIO_BLOCK_SIZE); michael@0: } michael@0: mCurrentOutputPosition += WEBAUDIO_BLOCK_SIZE; michael@0: michael@0: // Using AudioNodeStream's AdvanceOutputSegment to push the media stream graph along with null data. michael@0: AdvanceOutputSegment(); michael@0: } michael@0: michael@0: }