michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this file, michael@0: * You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "AudioSegment.h" michael@0: michael@0: #include "AudioStream.h" michael@0: #include "AudioMixer.h" michael@0: #include "AudioChannelFormat.h" michael@0: #include "Latency.h" michael@0: #include "speex/speex_resampler.h" michael@0: michael@0: namespace mozilla { michael@0: michael@0: template michael@0: static void michael@0: InterleaveAndConvertBuffer(const SrcT** aSourceChannels, michael@0: int32_t aLength, float aVolume, michael@0: int32_t aChannels, michael@0: DestT* aOutput) michael@0: { michael@0: DestT* output = aOutput; michael@0: for (int32_t i = 0; i < aLength; ++i) { michael@0: for (int32_t channel = 0; channel < aChannels; ++channel) { michael@0: float v = AudioSampleToFloat(aSourceChannels[channel][i])*aVolume; michael@0: *output = FloatToAudioSample(v); michael@0: ++output; michael@0: } michael@0: } michael@0: } michael@0: michael@0: void michael@0: InterleaveAndConvertBuffer(const void** aSourceChannels, michael@0: AudioSampleFormat aSourceFormat, michael@0: int32_t aLength, float aVolume, michael@0: int32_t aChannels, michael@0: AudioDataValue* aOutput) michael@0: { michael@0: switch (aSourceFormat) { michael@0: case AUDIO_FORMAT_FLOAT32: michael@0: InterleaveAndConvertBuffer(reinterpret_cast(aSourceChannels), michael@0: aLength, michael@0: aVolume, michael@0: aChannels, michael@0: aOutput); michael@0: break; michael@0: case AUDIO_FORMAT_S16: michael@0: InterleaveAndConvertBuffer(reinterpret_cast(aSourceChannels), michael@0: aLength, michael@0: aVolume, michael@0: aChannels, michael@0: aOutput); michael@0: break; michael@0: case AUDIO_FORMAT_SILENCE: michael@0: // nothing to do here. michael@0: break; michael@0: } michael@0: } michael@0: michael@0: void michael@0: AudioSegment::ApplyVolume(float aVolume) michael@0: { michael@0: for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { michael@0: ci->mVolume *= aVolume; michael@0: } michael@0: } michael@0: michael@0: static const int AUDIO_PROCESSING_FRAMES = 640; /* > 10ms of 48KHz audio */ michael@0: static const uint8_t gZeroChannel[MAX_AUDIO_SAMPLE_SIZE*AUDIO_PROCESSING_FRAMES] = {0}; michael@0: michael@0: void michael@0: DownmixAndInterleave(const nsTArray& aChannelData, michael@0: AudioSampleFormat aSourceFormat, int32_t aDuration, michael@0: float aVolume, uint32_t aOutputChannels, michael@0: AudioDataValue* aOutput) michael@0: { michael@0: nsAutoTArray channelData; michael@0: nsAutoTArray downmixConversionBuffer; michael@0: nsAutoTArray downmixOutputBuffer; michael@0: michael@0: channelData.SetLength(aChannelData.Length()); michael@0: if (aSourceFormat != AUDIO_FORMAT_FLOAT32) { michael@0: NS_ASSERTION(aSourceFormat == AUDIO_FORMAT_S16, "unknown format"); michael@0: downmixConversionBuffer.SetLength(aDuration*aChannelData.Length()); michael@0: for (uint32_t i = 0; i < aChannelData.Length(); ++i) { michael@0: float* conversionBuf = downmixConversionBuffer.Elements() + (i*aDuration); michael@0: const int16_t* sourceBuf = static_cast(aChannelData[i]); michael@0: for (uint32_t j = 0; j < (uint32_t)aDuration; ++j) { michael@0: conversionBuf[j] = AudioSampleToFloat(sourceBuf[j]); michael@0: } michael@0: channelData[i] = conversionBuf; michael@0: } michael@0: } else { michael@0: for (uint32_t i = 0; i < aChannelData.Length(); ++i) { michael@0: channelData[i] = aChannelData[i]; michael@0: } michael@0: } michael@0: michael@0: downmixOutputBuffer.SetLength(aDuration*aOutputChannels); michael@0: nsAutoTArray outputChannelBuffers; michael@0: nsAutoTArray outputChannelData; michael@0: outputChannelBuffers.SetLength(aOutputChannels); michael@0: outputChannelData.SetLength(aOutputChannels); michael@0: for (uint32_t i = 0; i < (uint32_t)aOutputChannels; ++i) { michael@0: outputChannelData[i] = outputChannelBuffers[i] = michael@0: downmixOutputBuffer.Elements() + aDuration*i; michael@0: } michael@0: if (channelData.Length() > aOutputChannels) { michael@0: AudioChannelsDownMix(channelData, outputChannelBuffers.Elements(), michael@0: aOutputChannels, aDuration); michael@0: } michael@0: InterleaveAndConvertBuffer(outputChannelData.Elements(), AUDIO_FORMAT_FLOAT32, michael@0: aDuration, aVolume, aOutputChannels, aOutput); michael@0: } michael@0: michael@0: void AudioSegment::ResampleChunks(SpeexResamplerState* aResampler) michael@0: { michael@0: uint32_t inRate, outRate; michael@0: michael@0: if (mChunks.IsEmpty()) { michael@0: return; michael@0: } michael@0: michael@0: speex_resampler_get_rate(aResampler, &inRate, &outRate); michael@0: michael@0: AudioSampleFormat format = AUDIO_FORMAT_SILENCE; michael@0: for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { michael@0: if (ci->mBufferFormat != AUDIO_FORMAT_SILENCE) { michael@0: format = ci->mBufferFormat; michael@0: } michael@0: } michael@0: michael@0: switch (format) { michael@0: // If the format is silence at this point, all the chunks are silent. The michael@0: // actual function we use does not matter, it's just a matter of changing michael@0: // the chunks duration. michael@0: case AUDIO_FORMAT_SILENCE: michael@0: case AUDIO_FORMAT_FLOAT32: michael@0: Resample(aResampler, inRate, outRate); michael@0: break; michael@0: case AUDIO_FORMAT_S16: michael@0: Resample(aResampler, inRate, outRate); michael@0: break; michael@0: default: michael@0: MOZ_ASSERT(false); michael@0: break; michael@0: } michael@0: } michael@0: michael@0: void michael@0: AudioSegment::WriteTo(uint64_t aID, AudioStream* aOutput, AudioMixer* aMixer) michael@0: { michael@0: uint32_t outputChannels = aOutput->GetChannels(); michael@0: nsAutoTArray buf; michael@0: nsAutoTArray channelData; michael@0: // Offset in the buffer that will end up sent to the AudioStream, in samples. michael@0: uint32_t offset = 0; michael@0: michael@0: if (!GetDuration()) { michael@0: return; michael@0: } michael@0: michael@0: uint32_t outBufferLength = GetDuration() * outputChannels; michael@0: buf.SetLength(outBufferLength); michael@0: michael@0: michael@0: for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { michael@0: AudioChunk& c = *ci; michael@0: uint32_t frames = c.mDuration; michael@0: michael@0: // If we have written data in the past, or we have real (non-silent) data michael@0: // to write, we can proceed. Otherwise, it means we just started the michael@0: // AudioStream, and we don't have real data to write to it (just silence). michael@0: // To avoid overbuffering in the AudioStream, we simply drop the silence, michael@0: // here. The stream will underrun and output silence anyways. michael@0: if (c.mBuffer || aOutput->GetWritten()) { michael@0: if (c.mBuffer && c.mBufferFormat != AUDIO_FORMAT_SILENCE) { michael@0: channelData.SetLength(c.mChannelData.Length()); michael@0: for (uint32_t i = 0; i < channelData.Length(); ++i) { michael@0: channelData[i] = c.mChannelData[i]; michael@0: } michael@0: michael@0: if (channelData.Length() < outputChannels) { michael@0: // Up-mix. Note that this might actually make channelData have more michael@0: // than outputChannels temporarily. michael@0: AudioChannelsUpMix(&channelData, outputChannels, gZeroChannel); michael@0: } michael@0: michael@0: if (channelData.Length() > outputChannels) { michael@0: // Down-mix. michael@0: DownmixAndInterleave(channelData, c.mBufferFormat, frames, michael@0: c.mVolume, outputChannels, buf.Elements() + offset); michael@0: } else { michael@0: InterleaveAndConvertBuffer(channelData.Elements(), c.mBufferFormat, michael@0: frames, c.mVolume, michael@0: outputChannels, michael@0: buf.Elements() + offset); michael@0: } michael@0: } else { michael@0: // Assumes that a bit pattern of zeroes == 0.0f michael@0: memset(buf.Elements() + offset, 0, outputChannels * frames * sizeof(AudioDataValue)); michael@0: } michael@0: offset += frames * outputChannels; michael@0: } michael@0: michael@0: if (!c.mTimeStamp.IsNull()) { michael@0: TimeStamp now = TimeStamp::Now(); michael@0: // would be more efficient to c.mTimeStamp to ms on create time then pass here michael@0: LogTime(AsyncLatencyLogger::AudioMediaStreamTrack, aID, michael@0: (now - c.mTimeStamp).ToMilliseconds(), c.mTimeStamp); michael@0: } michael@0: } michael@0: michael@0: aOutput->Write(buf.Elements(), offset / outputChannels, &(mChunks[mChunks.Length() - 1].mTimeStamp)); michael@0: michael@0: if (aMixer) { michael@0: aMixer->Mix(buf.Elements(), outputChannels, GetDuration(), aOutput->GetRate()); michael@0: } michael@0: aOutput->Start(); michael@0: } michael@0: michael@0: }