1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/content/media/AudioSegment.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,223 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this file, 1.7 + * You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#include "AudioSegment.h" 1.10 + 1.11 +#include "AudioStream.h" 1.12 +#include "AudioMixer.h" 1.13 +#include "AudioChannelFormat.h" 1.14 +#include "Latency.h" 1.15 +#include "speex/speex_resampler.h" 1.16 + 1.17 +namespace mozilla { 1.18 + 1.19 +template <class SrcT, class DestT> 1.20 +static void 1.21 +InterleaveAndConvertBuffer(const SrcT** aSourceChannels, 1.22 + int32_t aLength, float aVolume, 1.23 + int32_t aChannels, 1.24 + DestT* aOutput) 1.25 +{ 1.26 + DestT* output = aOutput; 1.27 + for (int32_t i = 0; i < aLength; ++i) { 1.28 + for (int32_t channel = 0; channel < aChannels; ++channel) { 1.29 + float v = AudioSampleToFloat(aSourceChannels[channel][i])*aVolume; 1.30 + *output = FloatToAudioSample<DestT>(v); 1.31 + ++output; 1.32 + } 1.33 + } 1.34 +} 1.35 + 1.36 +void 1.37 +InterleaveAndConvertBuffer(const void** aSourceChannels, 1.38 + AudioSampleFormat aSourceFormat, 1.39 + int32_t aLength, float aVolume, 1.40 + int32_t aChannels, 1.41 + AudioDataValue* aOutput) 1.42 +{ 1.43 + switch (aSourceFormat) { 1.44 + case AUDIO_FORMAT_FLOAT32: 1.45 + InterleaveAndConvertBuffer(reinterpret_cast<const float**>(aSourceChannels), 1.46 + aLength, 1.47 + aVolume, 1.48 + aChannels, 1.49 + aOutput); 1.50 + break; 1.51 + case AUDIO_FORMAT_S16: 1.52 + InterleaveAndConvertBuffer(reinterpret_cast<const int16_t**>(aSourceChannels), 1.53 + aLength, 1.54 + aVolume, 1.55 + aChannels, 1.56 + aOutput); 1.57 + break; 1.58 + case AUDIO_FORMAT_SILENCE: 1.59 + // nothing to do here. 1.60 + break; 1.61 + } 1.62 +} 1.63 + 1.64 +void 1.65 +AudioSegment::ApplyVolume(float aVolume) 1.66 +{ 1.67 + for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { 1.68 + ci->mVolume *= aVolume; 1.69 + } 1.70 +} 1.71 + 1.72 +static const int AUDIO_PROCESSING_FRAMES = 640; /* > 10ms of 48KHz audio */ 1.73 +static const uint8_t gZeroChannel[MAX_AUDIO_SAMPLE_SIZE*AUDIO_PROCESSING_FRAMES] = {0}; 1.74 + 1.75 +void 1.76 +DownmixAndInterleave(const nsTArray<const void*>& aChannelData, 1.77 + AudioSampleFormat aSourceFormat, int32_t aDuration, 1.78 + float aVolume, uint32_t aOutputChannels, 1.79 + AudioDataValue* aOutput) 1.80 +{ 1.81 + nsAutoTArray<const void*,GUESS_AUDIO_CHANNELS> channelData; 1.82 + nsAutoTArray<float,AUDIO_PROCESSING_FRAMES*GUESS_AUDIO_CHANNELS> downmixConversionBuffer; 1.83 + nsAutoTArray<float,AUDIO_PROCESSING_FRAMES*GUESS_AUDIO_CHANNELS> downmixOutputBuffer; 1.84 + 1.85 + channelData.SetLength(aChannelData.Length()); 1.86 + if (aSourceFormat != AUDIO_FORMAT_FLOAT32) { 1.87 + NS_ASSERTION(aSourceFormat == AUDIO_FORMAT_S16, "unknown format"); 1.88 + downmixConversionBuffer.SetLength(aDuration*aChannelData.Length()); 1.89 + for (uint32_t i = 0; i < aChannelData.Length(); ++i) { 1.90 + float* conversionBuf = downmixConversionBuffer.Elements() + (i*aDuration); 1.91 + const int16_t* sourceBuf = static_cast<const int16_t*>(aChannelData[i]); 1.92 + for (uint32_t j = 0; j < (uint32_t)aDuration; ++j) { 1.93 + conversionBuf[j] = AudioSampleToFloat(sourceBuf[j]); 1.94 + } 1.95 + channelData[i] = conversionBuf; 1.96 + } 1.97 + } else { 1.98 + for (uint32_t i = 0; i < aChannelData.Length(); ++i) { 1.99 + channelData[i] = aChannelData[i]; 1.100 + } 1.101 + } 1.102 + 1.103 + downmixOutputBuffer.SetLength(aDuration*aOutputChannels); 1.104 + nsAutoTArray<float*,GUESS_AUDIO_CHANNELS> outputChannelBuffers; 1.105 + nsAutoTArray<const void*,GUESS_AUDIO_CHANNELS> outputChannelData; 1.106 + outputChannelBuffers.SetLength(aOutputChannels); 1.107 + outputChannelData.SetLength(aOutputChannels); 1.108 + for (uint32_t i = 0; i < (uint32_t)aOutputChannels; ++i) { 1.109 + outputChannelData[i] = outputChannelBuffers[i] = 1.110 + downmixOutputBuffer.Elements() + aDuration*i; 1.111 + } 1.112 + if (channelData.Length() > aOutputChannels) { 1.113 + AudioChannelsDownMix(channelData, outputChannelBuffers.Elements(), 1.114 + aOutputChannels, aDuration); 1.115 + } 1.116 + InterleaveAndConvertBuffer(outputChannelData.Elements(), AUDIO_FORMAT_FLOAT32, 1.117 + aDuration, aVolume, aOutputChannels, aOutput); 1.118 +} 1.119 + 1.120 +void AudioSegment::ResampleChunks(SpeexResamplerState* aResampler) 1.121 +{ 1.122 + uint32_t inRate, outRate; 1.123 + 1.124 + if (mChunks.IsEmpty()) { 1.125 + return; 1.126 + } 1.127 + 1.128 + speex_resampler_get_rate(aResampler, &inRate, &outRate); 1.129 + 1.130 + AudioSampleFormat format = AUDIO_FORMAT_SILENCE; 1.131 + for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { 1.132 + if (ci->mBufferFormat != AUDIO_FORMAT_SILENCE) { 1.133 + format = ci->mBufferFormat; 1.134 + } 1.135 + } 1.136 + 1.137 + switch (format) { 1.138 + // If the format is silence at this point, all the chunks are silent. The 1.139 + // actual function we use does not matter, it's just a matter of changing 1.140 + // the chunks duration. 1.141 + case AUDIO_FORMAT_SILENCE: 1.142 + case AUDIO_FORMAT_FLOAT32: 1.143 + Resample<float>(aResampler, inRate, outRate); 1.144 + break; 1.145 + case AUDIO_FORMAT_S16: 1.146 + Resample<int16_t>(aResampler, inRate, outRate); 1.147 + break; 1.148 + default: 1.149 + MOZ_ASSERT(false); 1.150 + break; 1.151 + } 1.152 +} 1.153 + 1.154 +void 1.155 +AudioSegment::WriteTo(uint64_t aID, AudioStream* aOutput, AudioMixer* aMixer) 1.156 +{ 1.157 + uint32_t outputChannels = aOutput->GetChannels(); 1.158 + nsAutoTArray<AudioDataValue,AUDIO_PROCESSING_FRAMES*GUESS_AUDIO_CHANNELS> buf; 1.159 + nsAutoTArray<const void*,GUESS_AUDIO_CHANNELS> channelData; 1.160 + // Offset in the buffer that will end up sent to the AudioStream, in samples. 1.161 + uint32_t offset = 0; 1.162 + 1.163 + if (!GetDuration()) { 1.164 + return; 1.165 + } 1.166 + 1.167 + uint32_t outBufferLength = GetDuration() * outputChannels; 1.168 + buf.SetLength(outBufferLength); 1.169 + 1.170 + 1.171 + for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { 1.172 + AudioChunk& c = *ci; 1.173 + uint32_t frames = c.mDuration; 1.174 + 1.175 + // If we have written data in the past, or we have real (non-silent) data 1.176 + // to write, we can proceed. Otherwise, it means we just started the 1.177 + // AudioStream, and we don't have real data to write to it (just silence). 1.178 + // To avoid overbuffering in the AudioStream, we simply drop the silence, 1.179 + // here. The stream will underrun and output silence anyways. 1.180 + if (c.mBuffer || aOutput->GetWritten()) { 1.181 + if (c.mBuffer && c.mBufferFormat != AUDIO_FORMAT_SILENCE) { 1.182 + channelData.SetLength(c.mChannelData.Length()); 1.183 + for (uint32_t i = 0; i < channelData.Length(); ++i) { 1.184 + channelData[i] = c.mChannelData[i]; 1.185 + } 1.186 + 1.187 + if (channelData.Length() < outputChannels) { 1.188 + // Up-mix. Note that this might actually make channelData have more 1.189 + // than outputChannels temporarily. 1.190 + AudioChannelsUpMix(&channelData, outputChannels, gZeroChannel); 1.191 + } 1.192 + 1.193 + if (channelData.Length() > outputChannels) { 1.194 + // Down-mix. 1.195 + DownmixAndInterleave(channelData, c.mBufferFormat, frames, 1.196 + c.mVolume, outputChannels, buf.Elements() + offset); 1.197 + } else { 1.198 + InterleaveAndConvertBuffer(channelData.Elements(), c.mBufferFormat, 1.199 + frames, c.mVolume, 1.200 + outputChannels, 1.201 + buf.Elements() + offset); 1.202 + } 1.203 + } else { 1.204 + // Assumes that a bit pattern of zeroes == 0.0f 1.205 + memset(buf.Elements() + offset, 0, outputChannels * frames * sizeof(AudioDataValue)); 1.206 + } 1.207 + offset += frames * outputChannels; 1.208 + } 1.209 + 1.210 + if (!c.mTimeStamp.IsNull()) { 1.211 + TimeStamp now = TimeStamp::Now(); 1.212 + // would be more efficient to c.mTimeStamp to ms on create time then pass here 1.213 + LogTime(AsyncLatencyLogger::AudioMediaStreamTrack, aID, 1.214 + (now - c.mTimeStamp).ToMilliseconds(), c.mTimeStamp); 1.215 + } 1.216 + } 1.217 + 1.218 + aOutput->Write(buf.Elements(), offset / outputChannels, &(mChunks[mChunks.Length() - 1].mTimeStamp)); 1.219 + 1.220 + if (aMixer) { 1.221 + aMixer->Mix(buf.Elements(), outputChannels, GetDuration(), aOutput->GetRate()); 1.222 + } 1.223 + aOutput->Start(); 1.224 +} 1.225 + 1.226 +}