|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this file, |
|
4 * You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include "AudioSegment.h" |
|
7 |
|
8 #include "AudioStream.h" |
|
9 #include "AudioMixer.h" |
|
10 #include "AudioChannelFormat.h" |
|
11 #include "Latency.h" |
|
12 #include "speex/speex_resampler.h" |
|
13 |
|
14 namespace mozilla { |
|
15 |
|
16 template <class SrcT, class DestT> |
|
17 static void |
|
18 InterleaveAndConvertBuffer(const SrcT** aSourceChannels, |
|
19 int32_t aLength, float aVolume, |
|
20 int32_t aChannels, |
|
21 DestT* aOutput) |
|
22 { |
|
23 DestT* output = aOutput; |
|
24 for (int32_t i = 0; i < aLength; ++i) { |
|
25 for (int32_t channel = 0; channel < aChannels; ++channel) { |
|
26 float v = AudioSampleToFloat(aSourceChannels[channel][i])*aVolume; |
|
27 *output = FloatToAudioSample<DestT>(v); |
|
28 ++output; |
|
29 } |
|
30 } |
|
31 } |
|
32 |
|
33 void |
|
34 InterleaveAndConvertBuffer(const void** aSourceChannels, |
|
35 AudioSampleFormat aSourceFormat, |
|
36 int32_t aLength, float aVolume, |
|
37 int32_t aChannels, |
|
38 AudioDataValue* aOutput) |
|
39 { |
|
40 switch (aSourceFormat) { |
|
41 case AUDIO_FORMAT_FLOAT32: |
|
42 InterleaveAndConvertBuffer(reinterpret_cast<const float**>(aSourceChannels), |
|
43 aLength, |
|
44 aVolume, |
|
45 aChannels, |
|
46 aOutput); |
|
47 break; |
|
48 case AUDIO_FORMAT_S16: |
|
49 InterleaveAndConvertBuffer(reinterpret_cast<const int16_t**>(aSourceChannels), |
|
50 aLength, |
|
51 aVolume, |
|
52 aChannels, |
|
53 aOutput); |
|
54 break; |
|
55 case AUDIO_FORMAT_SILENCE: |
|
56 // nothing to do here. |
|
57 break; |
|
58 } |
|
59 } |
|
60 |
|
61 void |
|
62 AudioSegment::ApplyVolume(float aVolume) |
|
63 { |
|
64 for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { |
|
65 ci->mVolume *= aVolume; |
|
66 } |
|
67 } |
|
68 |
|
69 static const int AUDIO_PROCESSING_FRAMES = 640; /* > 10ms of 48KHz audio */ |
|
70 static const uint8_t gZeroChannel[MAX_AUDIO_SAMPLE_SIZE*AUDIO_PROCESSING_FRAMES] = {0}; |
|
71 |
|
72 void |
|
73 DownmixAndInterleave(const nsTArray<const void*>& aChannelData, |
|
74 AudioSampleFormat aSourceFormat, int32_t aDuration, |
|
75 float aVolume, uint32_t aOutputChannels, |
|
76 AudioDataValue* aOutput) |
|
77 { |
|
78 nsAutoTArray<const void*,GUESS_AUDIO_CHANNELS> channelData; |
|
79 nsAutoTArray<float,AUDIO_PROCESSING_FRAMES*GUESS_AUDIO_CHANNELS> downmixConversionBuffer; |
|
80 nsAutoTArray<float,AUDIO_PROCESSING_FRAMES*GUESS_AUDIO_CHANNELS> downmixOutputBuffer; |
|
81 |
|
82 channelData.SetLength(aChannelData.Length()); |
|
83 if (aSourceFormat != AUDIO_FORMAT_FLOAT32) { |
|
84 NS_ASSERTION(aSourceFormat == AUDIO_FORMAT_S16, "unknown format"); |
|
85 downmixConversionBuffer.SetLength(aDuration*aChannelData.Length()); |
|
86 for (uint32_t i = 0; i < aChannelData.Length(); ++i) { |
|
87 float* conversionBuf = downmixConversionBuffer.Elements() + (i*aDuration); |
|
88 const int16_t* sourceBuf = static_cast<const int16_t*>(aChannelData[i]); |
|
89 for (uint32_t j = 0; j < (uint32_t)aDuration; ++j) { |
|
90 conversionBuf[j] = AudioSampleToFloat(sourceBuf[j]); |
|
91 } |
|
92 channelData[i] = conversionBuf; |
|
93 } |
|
94 } else { |
|
95 for (uint32_t i = 0; i < aChannelData.Length(); ++i) { |
|
96 channelData[i] = aChannelData[i]; |
|
97 } |
|
98 } |
|
99 |
|
100 downmixOutputBuffer.SetLength(aDuration*aOutputChannels); |
|
101 nsAutoTArray<float*,GUESS_AUDIO_CHANNELS> outputChannelBuffers; |
|
102 nsAutoTArray<const void*,GUESS_AUDIO_CHANNELS> outputChannelData; |
|
103 outputChannelBuffers.SetLength(aOutputChannels); |
|
104 outputChannelData.SetLength(aOutputChannels); |
|
105 for (uint32_t i = 0; i < (uint32_t)aOutputChannels; ++i) { |
|
106 outputChannelData[i] = outputChannelBuffers[i] = |
|
107 downmixOutputBuffer.Elements() + aDuration*i; |
|
108 } |
|
109 if (channelData.Length() > aOutputChannels) { |
|
110 AudioChannelsDownMix(channelData, outputChannelBuffers.Elements(), |
|
111 aOutputChannels, aDuration); |
|
112 } |
|
113 InterleaveAndConvertBuffer(outputChannelData.Elements(), AUDIO_FORMAT_FLOAT32, |
|
114 aDuration, aVolume, aOutputChannels, aOutput); |
|
115 } |
|
116 |
|
117 void AudioSegment::ResampleChunks(SpeexResamplerState* aResampler) |
|
118 { |
|
119 uint32_t inRate, outRate; |
|
120 |
|
121 if (mChunks.IsEmpty()) { |
|
122 return; |
|
123 } |
|
124 |
|
125 speex_resampler_get_rate(aResampler, &inRate, &outRate); |
|
126 |
|
127 AudioSampleFormat format = AUDIO_FORMAT_SILENCE; |
|
128 for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { |
|
129 if (ci->mBufferFormat != AUDIO_FORMAT_SILENCE) { |
|
130 format = ci->mBufferFormat; |
|
131 } |
|
132 } |
|
133 |
|
134 switch (format) { |
|
135 // If the format is silence at this point, all the chunks are silent. The |
|
136 // actual function we use does not matter, it's just a matter of changing |
|
137 // the chunks duration. |
|
138 case AUDIO_FORMAT_SILENCE: |
|
139 case AUDIO_FORMAT_FLOAT32: |
|
140 Resample<float>(aResampler, inRate, outRate); |
|
141 break; |
|
142 case AUDIO_FORMAT_S16: |
|
143 Resample<int16_t>(aResampler, inRate, outRate); |
|
144 break; |
|
145 default: |
|
146 MOZ_ASSERT(false); |
|
147 break; |
|
148 } |
|
149 } |
|
150 |
|
151 void |
|
152 AudioSegment::WriteTo(uint64_t aID, AudioStream* aOutput, AudioMixer* aMixer) |
|
153 { |
|
154 uint32_t outputChannels = aOutput->GetChannels(); |
|
155 nsAutoTArray<AudioDataValue,AUDIO_PROCESSING_FRAMES*GUESS_AUDIO_CHANNELS> buf; |
|
156 nsAutoTArray<const void*,GUESS_AUDIO_CHANNELS> channelData; |
|
157 // Offset in the buffer that will end up sent to the AudioStream, in samples. |
|
158 uint32_t offset = 0; |
|
159 |
|
160 if (!GetDuration()) { |
|
161 return; |
|
162 } |
|
163 |
|
164 uint32_t outBufferLength = GetDuration() * outputChannels; |
|
165 buf.SetLength(outBufferLength); |
|
166 |
|
167 |
|
168 for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { |
|
169 AudioChunk& c = *ci; |
|
170 uint32_t frames = c.mDuration; |
|
171 |
|
172 // If we have written data in the past, or we have real (non-silent) data |
|
173 // to write, we can proceed. Otherwise, it means we just started the |
|
174 // AudioStream, and we don't have real data to write to it (just silence). |
|
175 // To avoid overbuffering in the AudioStream, we simply drop the silence, |
|
176 // here. The stream will underrun and output silence anyways. |
|
177 if (c.mBuffer || aOutput->GetWritten()) { |
|
178 if (c.mBuffer && c.mBufferFormat != AUDIO_FORMAT_SILENCE) { |
|
179 channelData.SetLength(c.mChannelData.Length()); |
|
180 for (uint32_t i = 0; i < channelData.Length(); ++i) { |
|
181 channelData[i] = c.mChannelData[i]; |
|
182 } |
|
183 |
|
184 if (channelData.Length() < outputChannels) { |
|
185 // Up-mix. Note that this might actually make channelData have more |
|
186 // than outputChannels temporarily. |
|
187 AudioChannelsUpMix(&channelData, outputChannels, gZeroChannel); |
|
188 } |
|
189 |
|
190 if (channelData.Length() > outputChannels) { |
|
191 // Down-mix. |
|
192 DownmixAndInterleave(channelData, c.mBufferFormat, frames, |
|
193 c.mVolume, outputChannels, buf.Elements() + offset); |
|
194 } else { |
|
195 InterleaveAndConvertBuffer(channelData.Elements(), c.mBufferFormat, |
|
196 frames, c.mVolume, |
|
197 outputChannels, |
|
198 buf.Elements() + offset); |
|
199 } |
|
200 } else { |
|
201 // Assumes that a bit pattern of zeroes == 0.0f |
|
202 memset(buf.Elements() + offset, 0, outputChannels * frames * sizeof(AudioDataValue)); |
|
203 } |
|
204 offset += frames * outputChannels; |
|
205 } |
|
206 |
|
207 if (!c.mTimeStamp.IsNull()) { |
|
208 TimeStamp now = TimeStamp::Now(); |
|
209 // would be more efficient to c.mTimeStamp to ms on create time then pass here |
|
210 LogTime(AsyncLatencyLogger::AudioMediaStreamTrack, aID, |
|
211 (now - c.mTimeStamp).ToMilliseconds(), c.mTimeStamp); |
|
212 } |
|
213 } |
|
214 |
|
215 aOutput->Write(buf.Elements(), offset / outputChannels, &(mChunks[mChunks.Length() - 1].mTimeStamp)); |
|
216 |
|
217 if (aMixer) { |
|
218 aMixer->Mix(buf.Elements(), outputChannels, GetDuration(), aOutput->GetRate()); |
|
219 } |
|
220 aOutput->Start(); |
|
221 } |
|
222 |
|
223 } |