michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this file, michael@0: * You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: #include "OpusTrackEncoder.h" michael@0: #include "nsString.h" michael@0: michael@0: #include michael@0: michael@0: #undef LOG michael@0: #ifdef MOZ_WIDGET_GONK michael@0: #include michael@0: #define LOG(args...) __android_log_print(ANDROID_LOG_INFO, "MediaEncoder", ## args); michael@0: #else michael@0: #define LOG(args, ...) michael@0: #endif michael@0: michael@0: namespace mozilla { michael@0: michael@0: // The Opus format supports up to 8 channels, and supports multitrack audio up michael@0: // to 255 channels, but the current implementation supports only mono and michael@0: // stereo, and downmixes any more than that. michael@0: static const int MAX_SUPPORTED_AUDIO_CHANNELS = 8; michael@0: michael@0: // http://www.opus-codec.org/docs/html_api-1.0.2/group__opus__encoder.html michael@0: // In section "opus_encoder_init", channels must be 1 or 2 of input signal. michael@0: static const int MAX_CHANNELS = 2; michael@0: michael@0: // A maximum data bytes for Opus to encode. michael@0: static const int MAX_DATA_BYTES = 4096; michael@0: michael@0: // http://tools.ietf.org/html/draft-ietf-codec-oggopus-00#section-4 michael@0: // Second paragraph, " The granule position of an audio data page is in units michael@0: // of PCM audio samples at a fixed rate of 48 kHz." michael@0: static const int kOpusSamplingRate = 48000; michael@0: michael@0: // The duration of an Opus frame, and it must be 2.5, 5, 10, 20, 40 or 60 ms. michael@0: static const int kFrameDurationMs = 20; michael@0: michael@0: // The supported sampling rate of input signal (Hz), michael@0: // must be one of the following. Will resampled to 48kHz otherwise. michael@0: static const int kOpusSupportedInputSamplingRates[] = michael@0: {8000, 12000, 16000, 24000, 48000}; michael@0: michael@0: namespace { michael@0: michael@0: // An endian-neutral serialization of integers. Serializing T in little endian michael@0: // format to aOutput, where T is a 16 bits or 32 bits integer. michael@0: template michael@0: static void michael@0: SerializeToBuffer(T aValue, nsTArray* aOutput) michael@0: { michael@0: for (uint32_t i = 0; i < sizeof(T); i++) { michael@0: aOutput->AppendElement((uint8_t)(0x000000ff & (aValue >> (i * 8)))); michael@0: } michael@0: } michael@0: michael@0: static inline void michael@0: SerializeToBuffer(const nsCString& aComment, nsTArray* aOutput) michael@0: { michael@0: // Format of serializing a string to buffer is, the length of string (32 bits, michael@0: // little endian), and the string. michael@0: SerializeToBuffer((uint32_t)(aComment.Length()), aOutput); michael@0: aOutput->AppendElements(aComment.get(), aComment.Length()); michael@0: } michael@0: michael@0: michael@0: static void michael@0: SerializeOpusIdHeader(uint8_t aChannelCount, uint16_t aPreskip, michael@0: uint32_t aInputSampleRate, nsTArray* aOutput) michael@0: { michael@0: // The magic signature, null terminator has to be stripped off from strings. michael@0: static const uint8_t magic[] = "OpusHead"; michael@0: aOutput->AppendElements(magic, sizeof(magic) - 1); michael@0: michael@0: // The version must always be 1 (8 bits, unsigned). michael@0: aOutput->AppendElement(1); michael@0: michael@0: // Number of output channels (8 bits, unsigned). michael@0: aOutput->AppendElement(aChannelCount); michael@0: michael@0: // Number of samples (at 48 kHz) to discard from the decoder output when michael@0: // starting playback (16 bits, unsigned, little endian). michael@0: SerializeToBuffer(aPreskip, aOutput); michael@0: michael@0: // The sampling rate of input source (32 bits, unsigned, little endian). michael@0: SerializeToBuffer(aInputSampleRate, aOutput); michael@0: michael@0: // Output gain, an encoder should set this field to zero (16 bits, signed, michael@0: // little endian). michael@0: SerializeToBuffer((int16_t)0, aOutput); michael@0: michael@0: // Channel mapping family. Family 0 allows only 1 or 2 channels (8 bits, michael@0: // unsigned). michael@0: aOutput->AppendElement(0); michael@0: } michael@0: michael@0: static void michael@0: SerializeOpusCommentHeader(const nsCString& aVendor, michael@0: const nsTArray& aComments, michael@0: nsTArray* aOutput) michael@0: { michael@0: // The magic signature, null terminator has to be stripped off. michael@0: static const uint8_t magic[] = "OpusTags"; michael@0: aOutput->AppendElements(magic, sizeof(magic) - 1); michael@0: michael@0: // The vendor; Should append in the following order: michael@0: // vendor string length (32 bits, unsigned, little endian) michael@0: // vendor string. michael@0: SerializeToBuffer(aVendor, aOutput); michael@0: michael@0: // Add comments; Should append in the following order: michael@0: // comment list length (32 bits, unsigned, little endian) michael@0: // comment #0 string length (32 bits, unsigned, little endian) michael@0: // comment #0 string michael@0: // comment #1 string length (32 bits, unsigned, little endian) michael@0: // comment #1 string ... michael@0: SerializeToBuffer((uint32_t)aComments.Length(), aOutput); michael@0: for (uint32_t i = 0; i < aComments.Length(); ++i) { michael@0: SerializeToBuffer(aComments[i], aOutput); michael@0: } michael@0: } michael@0: michael@0: } // Anonymous namespace. michael@0: michael@0: OpusTrackEncoder::OpusTrackEncoder() michael@0: : AudioTrackEncoder() michael@0: , mEncoder(nullptr) michael@0: , mLookahead(0) michael@0: , mResampler(nullptr) michael@0: { michael@0: } michael@0: michael@0: OpusTrackEncoder::~OpusTrackEncoder() michael@0: { michael@0: if (mEncoder) { michael@0: opus_encoder_destroy(mEncoder); michael@0: } michael@0: if (mResampler) { michael@0: speex_resampler_destroy(mResampler); michael@0: mResampler = nullptr; michael@0: } michael@0: } michael@0: michael@0: nsresult michael@0: OpusTrackEncoder::Init(int aChannels, int aSamplingRate) michael@0: { michael@0: // This monitor is used to wake up other methods that are waiting for encoder michael@0: // to be completely initialized. michael@0: ReentrantMonitorAutoEnter mon(mReentrantMonitor); michael@0: michael@0: NS_ENSURE_TRUE((aChannels <= MAX_SUPPORTED_AUDIO_CHANNELS) && (aChannels > 0), michael@0: NS_ERROR_FAILURE); michael@0: michael@0: // This version of encoder API only support 1 or 2 channels, michael@0: // So set the mChannels less or equal 2 and michael@0: // let InterleaveTrackData downmix pcm data. michael@0: mChannels = aChannels > MAX_CHANNELS ? MAX_CHANNELS : aChannels; michael@0: michael@0: // According to www.opus-codec.org, creating an opus encoder requires the michael@0: // sampling rate of source signal be one of 8000, 12000, 16000, 24000, or michael@0: // 48000. If this constraint is not satisfied, we resample the input to 48kHz. michael@0: nsTArray supportedSamplingRates; michael@0: supportedSamplingRates.AppendElements(kOpusSupportedInputSamplingRates, michael@0: ArrayLength(kOpusSupportedInputSamplingRates)); michael@0: if (!supportedSamplingRates.Contains(aSamplingRate)) { michael@0: int error; michael@0: mResampler = speex_resampler_init(mChannels, michael@0: aSamplingRate, michael@0: kOpusSamplingRate, michael@0: SPEEX_RESAMPLER_QUALITY_DEFAULT, michael@0: &error); michael@0: michael@0: if (error != RESAMPLER_ERR_SUCCESS) { michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: } michael@0: mSamplingRate = aSamplingRate; michael@0: NS_ENSURE_TRUE(mSamplingRate > 0, NS_ERROR_FAILURE); michael@0: michael@0: int error = 0; michael@0: mEncoder = opus_encoder_create(GetOutputSampleRate(), mChannels, michael@0: OPUS_APPLICATION_AUDIO, &error); michael@0: michael@0: mInitialized = (error == OPUS_OK); michael@0: michael@0: mReentrantMonitor.NotifyAll(); michael@0: michael@0: return error == OPUS_OK ? NS_OK : NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: int michael@0: OpusTrackEncoder::GetOutputSampleRate() michael@0: { michael@0: return mResampler ? kOpusSamplingRate : mSamplingRate; michael@0: } michael@0: michael@0: int michael@0: OpusTrackEncoder::GetPacketDuration() michael@0: { michael@0: return GetOutputSampleRate() * kFrameDurationMs / 1000; michael@0: } michael@0: michael@0: already_AddRefed michael@0: OpusTrackEncoder::GetMetadata() michael@0: { michael@0: { michael@0: // Wait if mEncoder is not initialized. michael@0: ReentrantMonitorAutoEnter mon(mReentrantMonitor); michael@0: while (!mCanceled && !mInitialized) { michael@0: mReentrantMonitor.Wait(); michael@0: } michael@0: } michael@0: michael@0: if (mCanceled || mEncodingComplete) { michael@0: return nullptr; michael@0: } michael@0: michael@0: nsRefPtr meta = new OpusMetadata(); michael@0: michael@0: mLookahead = 0; michael@0: int error = opus_encoder_ctl(mEncoder, OPUS_GET_LOOKAHEAD(&mLookahead)); michael@0: if (error != OPUS_OK) { michael@0: mLookahead = 0; michael@0: } michael@0: michael@0: // The ogg time stamping and pre-skip is always timed at 48000. michael@0: SerializeOpusIdHeader(mChannels, mLookahead * (kOpusSamplingRate / michael@0: GetOutputSampleRate()), mSamplingRate, michael@0: &meta->mIdHeader); michael@0: michael@0: nsCString vendor; michael@0: vendor.AppendASCII(opus_get_version_string()); michael@0: michael@0: nsTArray comments; michael@0: comments.AppendElement(NS_LITERAL_CSTRING("ENCODER=Mozilla" MOZ_APP_UA_VERSION)); michael@0: michael@0: SerializeOpusCommentHeader(vendor, comments, michael@0: &meta->mCommentHeader); michael@0: michael@0: return meta.forget(); michael@0: } michael@0: michael@0: nsresult michael@0: OpusTrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData) michael@0: { michael@0: { michael@0: ReentrantMonitorAutoEnter mon(mReentrantMonitor); michael@0: // Wait until initialized or cancelled. michael@0: while (!mCanceled && !mInitialized) { michael@0: mReentrantMonitor.Wait(); michael@0: } michael@0: if (mCanceled || mEncodingComplete) { michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: } michael@0: michael@0: // calculation below depends on the truth that mInitialized is true. michael@0: MOZ_ASSERT(mInitialized); michael@0: michael@0: // re-sampled frames left last time which didn't fit into an Opus packet duration. michael@0: const int framesLeft = mResampledLeftover.Length() / mChannels; michael@0: // When framesLeft is 0, (GetPacketDuration() - framesLeft) is a multiple michael@0: // of kOpusSamplingRate. There is not precision loss in the integer division michael@0: // in computing framesToFetch. If frameLeft > 0, we need to add 1 to michael@0: // framesToFetch to ensure there will be at least n frames after re-sampling. michael@0: const int frameRoundUp = framesLeft ? 1 : 0; michael@0: michael@0: MOZ_ASSERT(GetPacketDuration() >= framesLeft); michael@0: // Try to fetch m frames such that there will be n frames michael@0: // where (n + frameLeft) >= GetPacketDuration() after re-sampling. michael@0: const int framesToFetch = !mResampler ? GetPacketDuration() michael@0: : (GetPacketDuration() - framesLeft) * mSamplingRate / kOpusSamplingRate michael@0: + frameRoundUp; michael@0: { michael@0: // Move all the samples from mRawSegment to mSourceSegment. We only hold michael@0: // the monitor in this block. michael@0: ReentrantMonitorAutoEnter mon(mReentrantMonitor); michael@0: michael@0: // Wait until enough raw data, end of stream or cancelled. michael@0: while (!mCanceled && mRawSegment.GetDuration() + michael@0: mSourceSegment.GetDuration() < framesToFetch && michael@0: !mEndOfStream) { michael@0: mReentrantMonitor.Wait(); michael@0: } michael@0: michael@0: if (mCanceled || mEncodingComplete) { michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: mSourceSegment.AppendFrom(&mRawSegment); michael@0: michael@0: // Pad |mLookahead| samples to the end of source stream to prevent lost of michael@0: // original data, the pcm duration will be calculated at rate 48K later. michael@0: if (mEndOfStream && !mEosSetInEncoder) { michael@0: mEosSetInEncoder = true; michael@0: mSourceSegment.AppendNullData(mLookahead); michael@0: } michael@0: } michael@0: michael@0: // Start encoding data. michael@0: nsAutoTArray pcm; michael@0: pcm.SetLength(GetPacketDuration() * mChannels); michael@0: AudioSegment::ChunkIterator iter(mSourceSegment); michael@0: int frameCopied = 0; michael@0: michael@0: while (!iter.IsEnded() && frameCopied < framesToFetch) { michael@0: AudioChunk chunk = *iter; michael@0: michael@0: // Chunk to the required frame size. michael@0: int frameToCopy = chunk.GetDuration(); michael@0: if (frameCopied + frameToCopy > framesToFetch) { michael@0: frameToCopy = framesToFetch - frameCopied; michael@0: } michael@0: michael@0: if (!chunk.IsNull()) { michael@0: // Append the interleaved data to the end of pcm buffer. michael@0: AudioTrackEncoder::InterleaveTrackData(chunk, frameToCopy, mChannels, michael@0: pcm.Elements() + frameCopied * mChannels); michael@0: } else { michael@0: memset(pcm.Elements() + frameCopied * mChannels, 0, michael@0: frameToCopy * mChannels * sizeof(AudioDataValue)); michael@0: } michael@0: michael@0: frameCopied += frameToCopy; michael@0: iter.Next(); michael@0: } michael@0: michael@0: nsRefPtr audiodata = new EncodedFrame(); michael@0: audiodata->SetFrameType(EncodedFrame::OPUS_AUDIO_FRAME); michael@0: int framesInPCM = frameCopied; michael@0: if (mResampler) { michael@0: nsAutoTArray resamplingDest; michael@0: // We want to consume all the input data, so we slightly oversize the michael@0: // resampled data buffer so we can fit the output data in. We cannot really michael@0: // predict the output frame count at each call. michael@0: uint32_t outframes = frameCopied * kOpusSamplingRate / mSamplingRate + 1; michael@0: uint32_t inframes = frameCopied; michael@0: michael@0: resamplingDest.SetLength(outframes * mChannels); michael@0: michael@0: #if MOZ_SAMPLE_TYPE_S16 michael@0: short* in = reinterpret_cast(pcm.Elements()); michael@0: short* out = reinterpret_cast(resamplingDest.Elements()); michael@0: speex_resampler_process_interleaved_int(mResampler, in, &inframes, michael@0: out, &outframes); michael@0: #else michael@0: float* in = reinterpret_cast(pcm.Elements()); michael@0: float* out = reinterpret_cast(resamplingDest.Elements()); michael@0: speex_resampler_process_interleaved_float(mResampler, in, &inframes, michael@0: out, &outframes); michael@0: #endif michael@0: michael@0: MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length()); michael@0: PodCopy(pcm.Elements(), mResampledLeftover.Elements(), michael@0: mResampledLeftover.Length()); michael@0: michael@0: uint32_t outframesToCopy = std::min(outframes, michael@0: static_cast(GetPacketDuration() - framesLeft)); michael@0: michael@0: MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >= michael@0: outframesToCopy * mChannels); michael@0: PodCopy(pcm.Elements() + mResampledLeftover.Length(), michael@0: resamplingDest.Elements(), outframesToCopy * mChannels); michael@0: int frameLeftover = outframes - outframesToCopy; michael@0: mResampledLeftover.SetLength(frameLeftover * mChannels); michael@0: PodCopy(mResampledLeftover.Elements(), michael@0: resamplingDest.Elements() + outframesToCopy * mChannels, michael@0: mResampledLeftover.Length()); michael@0: // This is always at 48000Hz. michael@0: framesInPCM = framesLeft + outframesToCopy; michael@0: audiodata->SetDuration(framesInPCM); michael@0: } else { michael@0: // The ogg time stamping and pre-skip is always timed at 48000. michael@0: audiodata->SetDuration(frameCopied * (kOpusSamplingRate / mSamplingRate)); michael@0: } michael@0: michael@0: // Remove the raw data which has been pulled to pcm buffer. michael@0: // The value of frameCopied should equal to (or smaller than, if eos) michael@0: // GetPacketDuration(). michael@0: mSourceSegment.RemoveLeading(frameCopied); michael@0: michael@0: // Has reached the end of input stream and all queued data has pulled for michael@0: // encoding. michael@0: if (mSourceSegment.GetDuration() == 0 && mEndOfStream) { michael@0: mEncodingComplete = true; michael@0: LOG("[Opus] Done encoding."); michael@0: } michael@0: michael@0: MOZ_ASSERT(mEndOfStream || framesInPCM == GetPacketDuration()); michael@0: michael@0: // Append null data to pcm buffer if the leftover data is not enough for michael@0: // opus encoder. michael@0: if (framesInPCM < GetPacketDuration() && mEndOfStream) { michael@0: PodZero(pcm.Elements() + framesInPCM * mChannels, michael@0: (GetPacketDuration() - framesInPCM) * mChannels); michael@0: } michael@0: nsTArray frameData; michael@0: // Encode the data with Opus Encoder. michael@0: frameData.SetLength(MAX_DATA_BYTES); michael@0: // result is returned as opus error code if it is negative. michael@0: int result = 0; michael@0: #ifdef MOZ_SAMPLE_TYPE_S16 michael@0: const opus_int16* pcmBuf = static_cast(pcm.Elements()); michael@0: result = opus_encode(mEncoder, pcmBuf, GetPacketDuration(), michael@0: frameData.Elements(), MAX_DATA_BYTES); michael@0: #else michael@0: const float* pcmBuf = static_cast(pcm.Elements()); michael@0: result = opus_encode_float(mEncoder, pcmBuf, GetPacketDuration(), michael@0: frameData.Elements(), MAX_DATA_BYTES); michael@0: #endif michael@0: frameData.SetLength(result >= 0 ? result : 0); michael@0: michael@0: if (result < 0) { michael@0: LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result)); michael@0: } michael@0: if (mEncodingComplete) { michael@0: if (mResampler) { michael@0: speex_resampler_destroy(mResampler); michael@0: mResampler = nullptr; michael@0: } michael@0: mResampledLeftover.SetLength(0); michael@0: } michael@0: michael@0: audiodata->SwapInFrameData(frameData); michael@0: aData.AppendEncodedFrame(audiodata); michael@0: return result >= 0 ? NS_OK : NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: }