The Tor Browser: content/media/encoder/OpusTrackEncoder.cpp@6474c204b198 (annotated)

content/media/encoder/OpusTrackEncoder.cpp@6474c204b198 (annotated)

content/media/encoder/OpusTrackEncoder.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author: Michael Schloh von Bennewitz <michael@schloh.com>
date: Wed, 31 Dec 2014 06:09:35 +0100
changeset 0: 6474c204b198
permissions: -rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 #include "OpusTrackEncoder.h"
 #include "nsString.h"
 #include <opus/opus.h>
 #undef LOG
 #ifdef MOZ_WIDGET_GONK
 #include <android/log.h>
 #define LOG(args...) __android_log_print(ANDROID_LOG_INFO, "MediaEncoder", ## args);
 #else
 #define LOG(args, ...)
 #endif
 namespace mozilla {
 // The Opus format supports up to 8 channels, and supports multitrack audio up
 // to 255 channels, but the current implementation supports only mono and
 // stereo, and downmixes any more than that.
 static const int MAX_SUPPORTED_AUDIO_CHANNELS = 8;
 // http://www.opus-codec.org/docs/html_api-1.0.2/group__opus__encoder.html
 // In section "opus_encoder_init", channels must be 1 or 2 of input signal.
 static const int MAX_CHANNELS = 2;
 // A maximum data bytes for Opus to encode.
 static const int MAX_DATA_BYTES = 4096;
 // http://tools.ietf.org/html/draft-ietf-codec-oggopus-00#section-4
 // Second paragraph, " The granule position of an audio data page is in units
 // of PCM audio samples at a fixed rate of 48 kHz."
 static const int kOpusSamplingRate = 48000;
 // The duration of an Opus frame, and it must be 2.5, 5, 10, 20, 40 or 60 ms.
 static const int kFrameDurationMs  = 20;
 // The supported sampling rate of input signal (Hz),
 // must be one of the following. Will resampled to 48kHz otherwise.
 static const int kOpusSupportedInputSamplingRates[] =
                    {8000, 12000, 16000, 24000, 48000};
 namespace {
 // An endian-neutral serialization of integers. Serializing T in little endian
 // format to aOutput, where T is a 16 bits or 32 bits integer.
 template<typename T>
 static void
 SerializeToBuffer(T aValue, nsTArray<uint8_t>* aOutput)
 {
   for (uint32_t i = 0; i < sizeof(T); i++) {
     aOutput->AppendElement((uint8_t)(0x000000ff & (aValue >> (i * 8))));
   }
 }
 static inline void
 SerializeToBuffer(const nsCString& aComment, nsTArray<uint8_t>* aOutput)
 {
   // Format of serializing a string to buffer is, the length of string (32 bits,
   // little endian), and the string.
   SerializeToBuffer((uint32_t)(aComment.Length()), aOutput);
   aOutput->AppendElements(aComment.get(), aComment.Length());
 }
 static void
 SerializeOpusIdHeader(uint8_t aChannelCount, uint16_t aPreskip,
                       uint32_t aInputSampleRate, nsTArray<uint8_t>* aOutput)
 {
   // The magic signature, null terminator has to be stripped off from strings.
   static const uint8_t magic[] = "OpusHead";
   aOutput->AppendElements(magic, sizeof(magic) - 1);
   // The version must always be 1 (8 bits, unsigned).
   aOutput->AppendElement(1);
   // Number of output channels (8 bits, unsigned).
   aOutput->AppendElement(aChannelCount);
   // Number of samples (at 48 kHz) to discard from the decoder output when
   // starting playback (16 bits, unsigned, little endian).
   SerializeToBuffer(aPreskip, aOutput);
   // The sampling rate of input source (32 bits, unsigned, little endian).
   SerializeToBuffer(aInputSampleRate, aOutput);
   // Output gain, an encoder should set this field to zero (16 bits, signed,
   // little endian).
   SerializeToBuffer((int16_t)0, aOutput);
   // Channel mapping family. Family 0 allows only 1 or 2 channels (8 bits,
   // unsigned).
   aOutput->AppendElement(0);
 }
 static void
 SerializeOpusCommentHeader(const nsCString& aVendor,
                            const nsTArray<nsCString>& aComments,
                            nsTArray<uint8_t>* aOutput)
 {
   // The magic signature, null terminator has to be stripped off.
   static const uint8_t magic[] = "OpusTags";
   aOutput->AppendElements(magic, sizeof(magic) - 1);
   // The vendor; Should append in the following order:
   // vendor string length (32 bits, unsigned, little endian)
   // vendor string.
   SerializeToBuffer(aVendor, aOutput);
   // Add comments; Should append in the following order:
   // comment list length (32 bits, unsigned, little endian)
   // comment #0 string length (32 bits, unsigned, little endian)
   // comment #0 string
   // comment #1 string length (32 bits, unsigned, little endian)
   // comment #1 string ...
   SerializeToBuffer((uint32_t)aComments.Length(), aOutput);
   for (uint32_t i = 0; i < aComments.Length(); ++i) {
     SerializeToBuffer(aComments[i], aOutput);
   }
 }
 }  // Anonymous namespace.
 OpusTrackEncoder::OpusTrackEncoder()
   : AudioTrackEncoder()
   , mEncoder(nullptr)
   , mLookahead(0)
   , mResampler(nullptr)
 {
 }
 OpusTrackEncoder::~OpusTrackEncoder()
 {
   if (mEncoder) {
     opus_encoder_destroy(mEncoder);
   }
   if (mResampler) {
     speex_resampler_destroy(mResampler);
     mResampler = nullptr;
   }
 }
 nsresult
 OpusTrackEncoder::Init(int aChannels, int aSamplingRate)
 {
   // This monitor is used to wake up other methods that are waiting for encoder
   // to be completely initialized.
   ReentrantMonitorAutoEnter mon(mReentrantMonitor);
   NS_ENSURE_TRUE((aChannels <= MAX_SUPPORTED_AUDIO_CHANNELS) && (aChannels > 0),
                  NS_ERROR_FAILURE);
   // This version of encoder API only support 1 or 2 channels,
   // So set the mChannels less or equal 2 and
   // let InterleaveTrackData downmix pcm data.
   mChannels = aChannels > MAX_CHANNELS ? MAX_CHANNELS : aChannels;
   // According to www.opus-codec.org, creating an opus encoder requires the
   // sampling rate of source signal be one of 8000, 12000, 16000, 24000, or
   // 48000. If this constraint is not satisfied, we resample the input to 48kHz.
   nsTArray<int> supportedSamplingRates;
   supportedSamplingRates.AppendElements(kOpusSupportedInputSamplingRates,
                          ArrayLength(kOpusSupportedInputSamplingRates));
   if (!supportedSamplingRates.Contains(aSamplingRate)) {
     int error;
     mResampler = speex_resampler_init(mChannels,
                                       aSamplingRate,
                                       kOpusSamplingRate,
                                       SPEEX_RESAMPLER_QUALITY_DEFAULT,
                                       &error);
     if (error != RESAMPLER_ERR_SUCCESS) {
       return NS_ERROR_FAILURE;
     }
   }
   mSamplingRate = aSamplingRate;
   NS_ENSURE_TRUE(mSamplingRate > 0, NS_ERROR_FAILURE);
   int error = 0;
   mEncoder = opus_encoder_create(GetOutputSampleRate(), mChannels,
                                  OPUS_APPLICATION_AUDIO, &error);
   mInitialized = (error == OPUS_OK);
   mReentrantMonitor.NotifyAll();
   return error == OPUS_OK ? NS_OK : NS_ERROR_FAILURE;
 }
 int
 OpusTrackEncoder::GetOutputSampleRate()
 {
   return mResampler ? kOpusSamplingRate : mSamplingRate;
 }
 int
 OpusTrackEncoder::GetPacketDuration()
 {
   return GetOutputSampleRate() * kFrameDurationMs / 1000;
 }
 already_AddRefed<TrackMetadataBase>
 OpusTrackEncoder::GetMetadata()
 {
   {
     // Wait if mEncoder is not initialized.
     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
     while (!mCanceled && !mInitialized) {
       mReentrantMonitor.Wait();
     }
   }
   if (mCanceled || mEncodingComplete) {
     return nullptr;
   }
   nsRefPtr<OpusMetadata> meta = new OpusMetadata();
   mLookahead = 0;
   int error = opus_encoder_ctl(mEncoder, OPUS_GET_LOOKAHEAD(&mLookahead));
   if (error != OPUS_OK) {
     mLookahead = 0;
   }
   // The ogg time stamping and pre-skip is always timed at 48000.
   SerializeOpusIdHeader(mChannels, mLookahead * (kOpusSamplingRate /
                         GetOutputSampleRate()), mSamplingRate,
                         &meta->mIdHeader);
   nsCString vendor;
   vendor.AppendASCII(opus_get_version_string());
   nsTArray<nsCString> comments;
   comments.AppendElement(NS_LITERAL_CSTRING("ENCODER=Mozilla" MOZ_APP_UA_VERSION));
   SerializeOpusCommentHeader(vendor, comments,
                              &meta->mCommentHeader);
   return meta.forget();
 }
 nsresult
 OpusTrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData)
 {
   {
     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
     // Wait until initialized or cancelled.
     while (!mCanceled && !mInitialized) {
       mReentrantMonitor.Wait();
     }
     if (mCanceled || mEncodingComplete) {
       return NS_ERROR_FAILURE;
     }
   }
   // calculation below depends on the truth that mInitialized is true.
   MOZ_ASSERT(mInitialized);
   // re-sampled frames left last time which didn't fit into an Opus packet duration.
   const int framesLeft = mResampledLeftover.Length() / mChannels;
   // When framesLeft is 0, (GetPacketDuration() - framesLeft) is a multiple
   // of kOpusSamplingRate. There is not precision loss in the integer division
   // in computing framesToFetch. If frameLeft > 0, we need to add 1 to
   // framesToFetch to ensure there will be at least n frames after re-sampling.
   const int frameRoundUp = framesLeft ? 1 : 0;
   MOZ_ASSERT(GetPacketDuration() >= framesLeft);
   // Try to fetch m frames such that there will be n frames
   // where (n + frameLeft) >= GetPacketDuration() after re-sampling.
   const int framesToFetch = !mResampler ? GetPacketDuration()
     : (GetPacketDuration() - framesLeft) * mSamplingRate / kOpusSamplingRate
       + frameRoundUp;
   {
     // Move all the samples from mRawSegment to mSourceSegment. We only hold
     // the monitor in this block.
     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
     // Wait until enough raw data, end of stream or cancelled.
     while (!mCanceled && mRawSegment.GetDuration() +
         mSourceSegment.GetDuration() < framesToFetch &&
         !mEndOfStream) {
       mReentrantMonitor.Wait();
     }
     if (mCanceled || mEncodingComplete) {
       return NS_ERROR_FAILURE;
     }
     mSourceSegment.AppendFrom(&mRawSegment);
     // Pad |mLookahead| samples to the end of source stream to prevent lost of
     // original data, the pcm duration will be calculated at rate 48K later.
     if (mEndOfStream && !mEosSetInEncoder) {
       mEosSetInEncoder = true;
       mSourceSegment.AppendNullData(mLookahead);
     }
   }
   // Start encoding data.
   nsAutoTArray<AudioDataValue, 9600> pcm;
   pcm.SetLength(GetPacketDuration() * mChannels);
   AudioSegment::ChunkIterator iter(mSourceSegment);
   int frameCopied = 0;
   while (!iter.IsEnded() && frameCopied < framesToFetch) {
     AudioChunk chunk = *iter;
     // Chunk to the required frame size.
     int frameToCopy = chunk.GetDuration();
     if (frameCopied + frameToCopy > framesToFetch) {
       frameToCopy = framesToFetch - frameCopied;
     }
     if (!chunk.IsNull()) {
       // Append the interleaved data to the end of pcm buffer.
       AudioTrackEncoder::InterleaveTrackData(chunk, frameToCopy, mChannels,
         pcm.Elements() + frameCopied * mChannels);
     } else {
       memset(pcm.Elements() + frameCopied * mChannels, 0,
              frameToCopy * mChannels * sizeof(AudioDataValue));
     }
     frameCopied += frameToCopy;
     iter.Next();
   }
   nsRefPtr<EncodedFrame> audiodata = new EncodedFrame();
   audiodata->SetFrameType(EncodedFrame::OPUS_AUDIO_FRAME);
   int framesInPCM = frameCopied;
   if (mResampler) {
     nsAutoTArray<AudioDataValue, 9600> resamplingDest;
     // We want to consume all the input data, so we slightly oversize the
     // resampled data buffer so we can fit the output data in. We cannot really
     // predict the output frame count at each call.
     uint32_t outframes = frameCopied * kOpusSamplingRate / mSamplingRate + 1;
     uint32_t inframes = frameCopied;
     resamplingDest.SetLength(outframes * mChannels);
 #if MOZ_SAMPLE_TYPE_S16
     short* in = reinterpret_cast<short*>(pcm.Elements());
     short* out = reinterpret_cast<short*>(resamplingDest.Elements());
     speex_resampler_process_interleaved_int(mResampler, in, &inframes,
                                                         out, &outframes);
 #else
     float* in = reinterpret_cast<float*>(pcm.Elements());
     float* out = reinterpret_cast<float*>(resamplingDest.Elements());
     speex_resampler_process_interleaved_float(mResampler, in, &inframes,
                                                           out, &outframes);
 #endif
     MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length());
     PodCopy(pcm.Elements(), mResampledLeftover.Elements(),
         mResampledLeftover.Length());
     uint32_t outframesToCopy = std::min(outframes,
         static_cast<uint32_t>(GetPacketDuration() - framesLeft));
     MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >=
         outframesToCopy * mChannels);
     PodCopy(pcm.Elements() + mResampledLeftover.Length(),
         resamplingDest.Elements(), outframesToCopy * mChannels);
     int frameLeftover = outframes - outframesToCopy;
     mResampledLeftover.SetLength(frameLeftover * mChannels);
     PodCopy(mResampledLeftover.Elements(),
         resamplingDest.Elements() + outframesToCopy * mChannels,
         mResampledLeftover.Length());
     // This is always at 48000Hz.
     framesInPCM = framesLeft + outframesToCopy;
     audiodata->SetDuration(framesInPCM);
   } else {
     // The ogg time stamping and pre-skip is always timed at 48000.
     audiodata->SetDuration(frameCopied * (kOpusSamplingRate / mSamplingRate));
   }
   // Remove the raw data which has been pulled to pcm buffer.
   // The value of frameCopied should equal to (or smaller than, if eos)
   // GetPacketDuration().
   mSourceSegment.RemoveLeading(frameCopied);
   // Has reached the end of input stream and all queued data has pulled for
   // encoding.
   if (mSourceSegment.GetDuration() == 0 && mEndOfStream) {
     mEncodingComplete = true;
     LOG("[Opus] Done encoding.");
   }
   MOZ_ASSERT(mEndOfStream || framesInPCM == GetPacketDuration());
   // Append null data to pcm buffer if the leftover data is not enough for
   // opus encoder.
   if (framesInPCM < GetPacketDuration() && mEndOfStream) {
     PodZero(pcm.Elements() + framesInPCM * mChannels,
         (GetPacketDuration() - framesInPCM) * mChannels);
   }
   nsTArray<uint8_t> frameData;
   // Encode the data with Opus Encoder.
   frameData.SetLength(MAX_DATA_BYTES);
   // result is returned as opus error code if it is negative.
   int result = 0;
 #ifdef MOZ_SAMPLE_TYPE_S16
   const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements());
   result = opus_encode(mEncoder, pcmBuf, GetPacketDuration(),
                        frameData.Elements(), MAX_DATA_BYTES);
 #else
   const float* pcmBuf = static_cast<float*>(pcm.Elements());
   result = opus_encode_float(mEncoder, pcmBuf, GetPacketDuration(),
                              frameData.Elements(), MAX_DATA_BYTES);
 #endif
   frameData.SetLength(result >= 0 ? result : 0);
   if (result < 0) {
     LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result));
   }
   if (mEncodingComplete) {
     if (mResampler) {
       speex_resampler_destroy(mResampler);
       mResampler = nullptr;
     }
     mResampledLeftover.SetLength(0);
   }
   audiodata->SwapInFrameData(frameData);
   aData.AppendEncodedFrame(audiodata);
   return result >= 0 ? NS_OK : NS_ERROR_FAILURE;
 }
 }

The Tor Browser / annotate

content/media/encoder/OpusTrackEncoder.cpp@6474c204b198 (annotated)

content/media/encoder/OpusTrackEncoder.cpp