|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this file, |
|
4 * You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include "AudioNodeEngine.h" |
|
7 #include "AudioNodeExternalInputStream.h" |
|
8 #include "AudioChannelFormat.h" |
|
9 #include "speex/speex_resampler.h" |
|
10 |
|
11 using namespace mozilla::dom; |
|
12 |
|
13 namespace mozilla { |
|
14 |
|
15 AudioNodeExternalInputStream::AudioNodeExternalInputStream(AudioNodeEngine* aEngine, TrackRate aSampleRate) |
|
16 : AudioNodeStream(aEngine, MediaStreamGraph::INTERNAL_STREAM, aSampleRate) |
|
17 , mCurrentOutputPosition(0) |
|
18 { |
|
19 MOZ_COUNT_CTOR(AudioNodeExternalInputStream); |
|
20 } |
|
21 |
|
22 AudioNodeExternalInputStream::~AudioNodeExternalInputStream() |
|
23 { |
|
24 MOZ_COUNT_DTOR(AudioNodeExternalInputStream); |
|
25 } |
|
26 |
|
27 AudioNodeExternalInputStream::TrackMapEntry::~TrackMapEntry() |
|
28 { |
|
29 if (mResampler) { |
|
30 speex_resampler_destroy(mResampler); |
|
31 } |
|
32 } |
|
33 |
|
34 uint32_t |
|
35 AudioNodeExternalInputStream::GetTrackMapEntry(const StreamBuffer::Track& aTrack, |
|
36 GraphTime aFrom) |
|
37 { |
|
38 AudioSegment* segment = aTrack.Get<AudioSegment>(); |
|
39 |
|
40 // Check the map for an existing entry corresponding to the input track. |
|
41 for (uint32_t i = 0; i < mTrackMap.Length(); ++i) { |
|
42 TrackMapEntry* map = &mTrackMap[i]; |
|
43 if (map->mTrackID == aTrack.GetID()) { |
|
44 return i; |
|
45 } |
|
46 } |
|
47 |
|
48 // Determine channel count by finding the first entry with non-silent data. |
|
49 AudioSegment::ChunkIterator ci(*segment); |
|
50 while (!ci.IsEnded() && ci->IsNull()) { |
|
51 ci.Next(); |
|
52 } |
|
53 if (ci.IsEnded()) { |
|
54 // The track is entirely silence so far, we can ignore it for now. |
|
55 return nsTArray<TrackMapEntry>::NoIndex; |
|
56 } |
|
57 |
|
58 // Create a speex resampler with the same sample rate and number of channels |
|
59 // as the track. |
|
60 SpeexResamplerState* resampler = nullptr; |
|
61 uint32_t channelCount = std::min((*ci).mChannelData.Length(), |
|
62 WebAudioUtils::MaxChannelCount); |
|
63 if (aTrack.GetRate() != mSampleRate) { |
|
64 resampler = speex_resampler_init(channelCount, |
|
65 aTrack.GetRate(), mSampleRate, SPEEX_RESAMPLER_QUALITY_DEFAULT, nullptr); |
|
66 speex_resampler_skip_zeros(resampler); |
|
67 } |
|
68 |
|
69 TrackMapEntry* map = mTrackMap.AppendElement(); |
|
70 map->mEndOfConsumedInputTicks = 0; |
|
71 map->mEndOfLastInputIntervalInInputStream = -1; |
|
72 map->mEndOfLastInputIntervalInOutputStream = -1; |
|
73 map->mSamplesPassedToResampler = |
|
74 TimeToTicksRoundUp(aTrack.GetRate(), GraphTimeToStreamTime(aFrom)); |
|
75 map->mResampler = resampler; |
|
76 map->mResamplerChannelCount = channelCount; |
|
77 map->mTrackID = aTrack.GetID(); |
|
78 return mTrackMap.Length() - 1; |
|
79 } |
|
80 |
|
81 static const uint32_t SPEEX_RESAMPLER_PROCESS_MAX_OUTPUT = 1000; |
|
82 |
|
83 template <typename T> static void |
|
84 ResampleChannelBuffer(SpeexResamplerState* aResampler, uint32_t aChannel, |
|
85 const T* aInput, uint32_t aInputDuration, |
|
86 nsTArray<float>* aOutput) |
|
87 { |
|
88 if (!aResampler) { |
|
89 float* out = aOutput->AppendElements(aInputDuration); |
|
90 for (uint32_t i = 0; i < aInputDuration; ++i) { |
|
91 out[i] = AudioSampleToFloat(aInput[i]); |
|
92 } |
|
93 return; |
|
94 } |
|
95 |
|
96 uint32_t processed = 0; |
|
97 while (processed < aInputDuration) { |
|
98 uint32_t prevLength = aOutput->Length(); |
|
99 float* output = aOutput->AppendElements(SPEEX_RESAMPLER_PROCESS_MAX_OUTPUT); |
|
100 uint32_t in = aInputDuration - processed; |
|
101 uint32_t out = aOutput->Length() - prevLength; |
|
102 WebAudioUtils::SpeexResamplerProcess(aResampler, aChannel, |
|
103 aInput + processed, &in, |
|
104 output, &out); |
|
105 processed += in; |
|
106 aOutput->SetLength(prevLength + out); |
|
107 } |
|
108 } |
|
109 |
|
110 void |
|
111 AudioNodeExternalInputStream::TrackMapEntry::ResampleChannels(const nsTArray<const void*>& aBuffers, |
|
112 uint32_t aInputDuration, |
|
113 AudioSampleFormat aFormat, |
|
114 float aVolume) |
|
115 { |
|
116 NS_ASSERTION(aBuffers.Length() == mResamplerChannelCount, |
|
117 "Channel count must be correct here"); |
|
118 |
|
119 nsAutoTArray<nsTArray<float>,2> resampledBuffers; |
|
120 resampledBuffers.SetLength(aBuffers.Length()); |
|
121 nsTArray<float> samplesAdjustedForVolume; |
|
122 nsAutoTArray<const float*,2> bufferPtrs; |
|
123 bufferPtrs.SetLength(aBuffers.Length()); |
|
124 |
|
125 for (uint32_t i = 0; i < aBuffers.Length(); ++i) { |
|
126 AudioSampleFormat format = aFormat; |
|
127 const void* buffer = aBuffers[i]; |
|
128 |
|
129 if (aVolume != 1.0f) { |
|
130 format = AUDIO_FORMAT_FLOAT32; |
|
131 samplesAdjustedForVolume.SetLength(aInputDuration); |
|
132 switch (aFormat) { |
|
133 case AUDIO_FORMAT_FLOAT32: |
|
134 ConvertAudioSamplesWithScale(static_cast<const float*>(buffer), |
|
135 samplesAdjustedForVolume.Elements(), |
|
136 aInputDuration, aVolume); |
|
137 break; |
|
138 case AUDIO_FORMAT_S16: |
|
139 ConvertAudioSamplesWithScale(static_cast<const int16_t*>(buffer), |
|
140 samplesAdjustedForVolume.Elements(), |
|
141 aInputDuration, aVolume); |
|
142 break; |
|
143 default: |
|
144 MOZ_ASSERT(false); |
|
145 return; |
|
146 } |
|
147 buffer = samplesAdjustedForVolume.Elements(); |
|
148 } |
|
149 |
|
150 switch (format) { |
|
151 case AUDIO_FORMAT_FLOAT32: |
|
152 ResampleChannelBuffer(mResampler, i, |
|
153 static_cast<const float*>(buffer), |
|
154 aInputDuration, &resampledBuffers[i]); |
|
155 break; |
|
156 case AUDIO_FORMAT_S16: |
|
157 ResampleChannelBuffer(mResampler, i, |
|
158 static_cast<const int16_t*>(buffer), |
|
159 aInputDuration, &resampledBuffers[i]); |
|
160 break; |
|
161 default: |
|
162 MOZ_ASSERT(false); |
|
163 return; |
|
164 } |
|
165 bufferPtrs[i] = resampledBuffers[i].Elements(); |
|
166 NS_ASSERTION(i == 0 || |
|
167 resampledBuffers[i].Length() == resampledBuffers[0].Length(), |
|
168 "Resampler made different decisions for different channels!"); |
|
169 } |
|
170 |
|
171 uint32_t length = resampledBuffers[0].Length(); |
|
172 nsRefPtr<ThreadSharedObject> buf = new SharedChannelArrayBuffer<float>(&resampledBuffers); |
|
173 mResampledData.AppendFrames(buf.forget(), bufferPtrs, length); |
|
174 } |
|
175 |
|
176 void |
|
177 AudioNodeExternalInputStream::TrackMapEntry::ResampleInputData(AudioSegment* aSegment) |
|
178 { |
|
179 AudioSegment::ChunkIterator ci(*aSegment); |
|
180 while (!ci.IsEnded()) { |
|
181 const AudioChunk& chunk = *ci; |
|
182 nsAutoTArray<const void*,2> channels; |
|
183 if (chunk.GetDuration() > UINT32_MAX) { |
|
184 // This will cause us to OOM or overflow below. So let's just bail. |
|
185 NS_ERROR("Chunk duration out of bounds"); |
|
186 return; |
|
187 } |
|
188 uint32_t duration = uint32_t(chunk.GetDuration()); |
|
189 |
|
190 if (chunk.IsNull()) { |
|
191 nsAutoTArray<AudioDataValue,1024> silence; |
|
192 silence.SetLength(duration); |
|
193 PodZero(silence.Elements(), silence.Length()); |
|
194 channels.SetLength(mResamplerChannelCount); |
|
195 for (uint32_t i = 0; i < channels.Length(); ++i) { |
|
196 channels[i] = silence.Elements(); |
|
197 } |
|
198 ResampleChannels(channels, duration, AUDIO_OUTPUT_FORMAT, 0.0f); |
|
199 } else if (chunk.mChannelData.Length() == mResamplerChannelCount) { |
|
200 // Common case, since mResamplerChannelCount is set to the first chunk's |
|
201 // number of channels. |
|
202 channels.AppendElements(chunk.mChannelData); |
|
203 ResampleChannels(channels, duration, chunk.mBufferFormat, chunk.mVolume); |
|
204 } else { |
|
205 // Uncommon case. Since downmixing requires channels to be floats, |
|
206 // convert everything to floats now. |
|
207 uint32_t upChannels = GetAudioChannelsSuperset(chunk.mChannelData.Length(), mResamplerChannelCount); |
|
208 nsTArray<float> buffer; |
|
209 if (chunk.mBufferFormat == AUDIO_FORMAT_FLOAT32) { |
|
210 channels.AppendElements(chunk.mChannelData); |
|
211 } else { |
|
212 NS_ASSERTION(chunk.mBufferFormat == AUDIO_FORMAT_S16, "Unknown format"); |
|
213 if (duration > UINT32_MAX/chunk.mChannelData.Length()) { |
|
214 NS_ERROR("Chunk duration out of bounds"); |
|
215 return; |
|
216 } |
|
217 buffer.SetLength(chunk.mChannelData.Length()*duration); |
|
218 for (uint32_t i = 0; i < chunk.mChannelData.Length(); ++i) { |
|
219 const int16_t* samples = static_cast<const int16_t*>(chunk.mChannelData[i]); |
|
220 float* converted = &buffer[i*duration]; |
|
221 for (uint32_t j = 0; j < duration; ++j) { |
|
222 converted[j] = AudioSampleToFloat(samples[j]); |
|
223 } |
|
224 channels.AppendElement(converted); |
|
225 } |
|
226 } |
|
227 nsTArray<float> zeroes; |
|
228 if (channels.Length() < upChannels) { |
|
229 zeroes.SetLength(duration); |
|
230 PodZero(zeroes.Elements(), zeroes.Length()); |
|
231 AudioChannelsUpMix(&channels, upChannels, zeroes.Elements()); |
|
232 } |
|
233 if (channels.Length() == mResamplerChannelCount) { |
|
234 ResampleChannels(channels, duration, AUDIO_FORMAT_FLOAT32, chunk.mVolume); |
|
235 } else { |
|
236 nsTArray<float> output; |
|
237 if (duration > UINT32_MAX/mResamplerChannelCount) { |
|
238 NS_ERROR("Chunk duration out of bounds"); |
|
239 return; |
|
240 } |
|
241 output.SetLength(duration*mResamplerChannelCount); |
|
242 nsAutoTArray<float*,2> outputPtrs; |
|
243 nsAutoTArray<const void*,2> outputPtrsConst; |
|
244 for (uint32_t i = 0; i < mResamplerChannelCount; ++i) { |
|
245 outputPtrs.AppendElement(output.Elements() + i*duration); |
|
246 outputPtrsConst.AppendElement(outputPtrs[i]); |
|
247 } |
|
248 AudioChannelsDownMix(channels, outputPtrs.Elements(), outputPtrs.Length(), duration); |
|
249 ResampleChannels(outputPtrsConst, duration, AUDIO_FORMAT_FLOAT32, chunk.mVolume); |
|
250 } |
|
251 } |
|
252 ci.Next(); |
|
253 } |
|
254 } |
|
255 |
|
256 /** |
|
257 * Copies the data in aInput to aOffsetInBlock within aBlock. All samples must |
|
258 * be float. Both chunks must have the same number of channels (or else |
|
259 * aInput is null). aBlock must have been allocated with AllocateInputBlock. |
|
260 */ |
|
261 static void |
|
262 CopyChunkToBlock(const AudioChunk& aInput, AudioChunk *aBlock, uint32_t aOffsetInBlock) |
|
263 { |
|
264 uint32_t d = aInput.GetDuration(); |
|
265 for (uint32_t i = 0; i < aBlock->mChannelData.Length(); ++i) { |
|
266 float* out = static_cast<float*>(const_cast<void*>(aBlock->mChannelData[i])) + |
|
267 aOffsetInBlock; |
|
268 if (aInput.IsNull()) { |
|
269 PodZero(out, d); |
|
270 } else { |
|
271 const float* in = static_cast<const float*>(aInput.mChannelData[i]); |
|
272 ConvertAudioSamplesWithScale(in, out, d, aInput.mVolume); |
|
273 } |
|
274 } |
|
275 } |
|
276 |
|
277 /** |
|
278 * Converts the data in aSegment to a single chunk aChunk. Every chunk in |
|
279 * aSegment must have the same number of channels (or be null). aSegment must have |
|
280 * duration WEBAUDIO_BLOCK_SIZE. Every chunk in aSegment must be in float format. |
|
281 */ |
|
282 static void |
|
283 ConvertSegmentToAudioBlock(AudioSegment* aSegment, AudioChunk* aBlock) |
|
284 { |
|
285 NS_ASSERTION(aSegment->GetDuration() == WEBAUDIO_BLOCK_SIZE, "Bad segment duration"); |
|
286 |
|
287 { |
|
288 AudioSegment::ChunkIterator ci(*aSegment); |
|
289 NS_ASSERTION(!ci.IsEnded(), "Segment must have at least one chunk"); |
|
290 AudioChunk& firstChunk = *ci; |
|
291 ci.Next(); |
|
292 if (ci.IsEnded()) { |
|
293 *aBlock = firstChunk; |
|
294 return; |
|
295 } |
|
296 |
|
297 while (ci->IsNull() && !ci.IsEnded()) { |
|
298 ci.Next(); |
|
299 } |
|
300 if (ci.IsEnded()) { |
|
301 // All null. |
|
302 aBlock->SetNull(WEBAUDIO_BLOCK_SIZE); |
|
303 return; |
|
304 } |
|
305 |
|
306 AllocateAudioBlock(ci->mChannelData.Length(), aBlock); |
|
307 } |
|
308 |
|
309 AudioSegment::ChunkIterator ci(*aSegment); |
|
310 uint32_t duration = 0; |
|
311 while (!ci.IsEnded()) { |
|
312 CopyChunkToBlock(*ci, aBlock, duration); |
|
313 duration += ci->GetDuration(); |
|
314 ci.Next(); |
|
315 } |
|
316 } |
|
317 |
|
318 void |
|
319 AudioNodeExternalInputStream::ProcessInput(GraphTime aFrom, GraphTime aTo, |
|
320 uint32_t aFlags) |
|
321 { |
|
322 // According to spec, number of outputs is always 1. |
|
323 mLastChunks.SetLength(1); |
|
324 |
|
325 // GC stuff can result in our input stream being destroyed before this stream. |
|
326 // Handle that. |
|
327 if (mInputs.IsEmpty()) { |
|
328 mLastChunks[0].SetNull(WEBAUDIO_BLOCK_SIZE); |
|
329 AdvanceOutputSegment(); |
|
330 return; |
|
331 } |
|
332 |
|
333 MOZ_ASSERT(mInputs.Length() == 1); |
|
334 |
|
335 MediaStream* source = mInputs[0]->GetSource(); |
|
336 nsAutoTArray<AudioSegment,1> audioSegments; |
|
337 nsAutoTArray<bool,1> trackMapEntriesUsed; |
|
338 uint32_t inputChannels = 0; |
|
339 for (StreamBuffer::TrackIter tracks(source->mBuffer, MediaSegment::AUDIO); |
|
340 !tracks.IsEnded(); tracks.Next()) { |
|
341 const StreamBuffer::Track& inputTrack = *tracks; |
|
342 // Create a TrackMapEntry if necessary. |
|
343 uint32_t trackMapIndex = GetTrackMapEntry(inputTrack, aFrom); |
|
344 // Maybe there's nothing in this track yet. If so, ignore it. (While the |
|
345 // track is only playing silence, we may not be able to determine the |
|
346 // correct number of channels to start resampling.) |
|
347 if (trackMapIndex == nsTArray<TrackMapEntry>::NoIndex) { |
|
348 continue; |
|
349 } |
|
350 |
|
351 while (trackMapEntriesUsed.Length() <= trackMapIndex) { |
|
352 trackMapEntriesUsed.AppendElement(false); |
|
353 } |
|
354 trackMapEntriesUsed[trackMapIndex] = true; |
|
355 |
|
356 TrackMapEntry* trackMap = &mTrackMap[trackMapIndex]; |
|
357 AudioSegment segment; |
|
358 GraphTime next; |
|
359 TrackRate inputTrackRate = inputTrack.GetRate(); |
|
360 for (GraphTime t = aFrom; t < aTo; t = next) { |
|
361 MediaInputPort::InputInterval interval = mInputs[0]->GetNextInputInterval(t); |
|
362 interval.mEnd = std::min(interval.mEnd, aTo); |
|
363 if (interval.mStart >= interval.mEnd) |
|
364 break; |
|
365 next = interval.mEnd; |
|
366 |
|
367 // Ticks >= startTicks and < endTicks are in the interval |
|
368 StreamTime outputEnd = GraphTimeToStreamTime(interval.mEnd); |
|
369 TrackTicks startTicks = trackMap->mSamplesPassedToResampler + segment.GetDuration(); |
|
370 StreamTime outputStart = GraphTimeToStreamTime(interval.mStart); |
|
371 NS_ASSERTION(startTicks == TimeToTicksRoundUp(inputTrackRate, outputStart), |
|
372 "Samples missing"); |
|
373 TrackTicks endTicks = TimeToTicksRoundUp(inputTrackRate, outputEnd); |
|
374 TrackTicks ticks = endTicks - startTicks; |
|
375 |
|
376 if (interval.mInputIsBlocked) { |
|
377 segment.AppendNullData(ticks); |
|
378 } else { |
|
379 // See comments in TrackUnionStream::CopyTrackData |
|
380 StreamTime inputStart = source->GraphTimeToStreamTime(interval.mStart); |
|
381 StreamTime inputEnd = source->GraphTimeToStreamTime(interval.mEnd); |
|
382 TrackTicks inputTrackEndPoint = |
|
383 inputTrack.IsEnded() ? inputTrack.GetEnd() : TRACK_TICKS_MAX; |
|
384 |
|
385 if (trackMap->mEndOfLastInputIntervalInInputStream != inputStart || |
|
386 trackMap->mEndOfLastInputIntervalInOutputStream != outputStart) { |
|
387 // Start of a new series of intervals where neither stream is blocked. |
|
388 trackMap->mEndOfConsumedInputTicks = TimeToTicksRoundDown(inputTrackRate, inputStart) - 1; |
|
389 } |
|
390 TrackTicks inputStartTicks = trackMap->mEndOfConsumedInputTicks; |
|
391 TrackTicks inputEndTicks = inputStartTicks + ticks; |
|
392 trackMap->mEndOfConsumedInputTicks = inputEndTicks; |
|
393 trackMap->mEndOfLastInputIntervalInInputStream = inputEnd; |
|
394 trackMap->mEndOfLastInputIntervalInOutputStream = outputEnd; |
|
395 |
|
396 if (inputStartTicks < 0) { |
|
397 // Data before the start of the track is just null. |
|
398 segment.AppendNullData(-inputStartTicks); |
|
399 inputStartTicks = 0; |
|
400 } |
|
401 if (inputEndTicks > inputStartTicks) { |
|
402 segment.AppendSlice(*inputTrack.GetSegment(), |
|
403 std::min(inputTrackEndPoint, inputStartTicks), |
|
404 std::min(inputTrackEndPoint, inputEndTicks)); |
|
405 } |
|
406 // Pad if we're looking past the end of the track |
|
407 segment.AppendNullData(ticks - segment.GetDuration()); |
|
408 } |
|
409 } |
|
410 |
|
411 trackMap->mSamplesPassedToResampler += segment.GetDuration(); |
|
412 trackMap->ResampleInputData(&segment); |
|
413 |
|
414 if (trackMap->mResampledData.GetDuration() < mCurrentOutputPosition + WEBAUDIO_BLOCK_SIZE) { |
|
415 // We don't have enough data. Delay it. |
|
416 trackMap->mResampledData.InsertNullDataAtStart( |
|
417 mCurrentOutputPosition + WEBAUDIO_BLOCK_SIZE - trackMap->mResampledData.GetDuration()); |
|
418 } |
|
419 audioSegments.AppendElement()->AppendSlice(trackMap->mResampledData, |
|
420 mCurrentOutputPosition, mCurrentOutputPosition + WEBAUDIO_BLOCK_SIZE); |
|
421 trackMap->mResampledData.ForgetUpTo(mCurrentOutputPosition + WEBAUDIO_BLOCK_SIZE); |
|
422 inputChannels = GetAudioChannelsSuperset(inputChannels, trackMap->mResamplerChannelCount); |
|
423 } |
|
424 |
|
425 for (int32_t i = mTrackMap.Length() - 1; i >= 0; --i) { |
|
426 if (i >= int32_t(trackMapEntriesUsed.Length()) || !trackMapEntriesUsed[i]) { |
|
427 mTrackMap.RemoveElementAt(i); |
|
428 } |
|
429 } |
|
430 |
|
431 uint32_t accumulateIndex = 0; |
|
432 if (inputChannels) { |
|
433 nsAutoTArray<float,GUESS_AUDIO_CHANNELS*WEBAUDIO_BLOCK_SIZE> downmixBuffer; |
|
434 for (uint32_t i = 0; i < audioSegments.Length(); ++i) { |
|
435 AudioChunk tmpChunk; |
|
436 ConvertSegmentToAudioBlock(&audioSegments[i], &tmpChunk); |
|
437 if (!tmpChunk.IsNull()) { |
|
438 if (accumulateIndex == 0) { |
|
439 AllocateAudioBlock(inputChannels, &mLastChunks[0]); |
|
440 } |
|
441 AccumulateInputChunk(accumulateIndex, tmpChunk, &mLastChunks[0], &downmixBuffer); |
|
442 accumulateIndex++; |
|
443 } |
|
444 } |
|
445 } |
|
446 if (accumulateIndex == 0) { |
|
447 mLastChunks[0].SetNull(WEBAUDIO_BLOCK_SIZE); |
|
448 } |
|
449 mCurrentOutputPosition += WEBAUDIO_BLOCK_SIZE; |
|
450 |
|
451 // Using AudioNodeStream's AdvanceOutputSegment to push the media stream graph along with null data. |
|
452 AdvanceOutputSegment(); |
|
453 } |
|
454 |
|
455 } |