The Tor Browser: content/media/encoder/OpusTrackEncoder.cpp@6474c204b198

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/

     2 /* This Source Code Form is subject to the terms of the Mozilla Public

     3  * License, v. 2.0. If a copy of the MPL was not distributed with this file,

     4  * You can obtain one at http://mozilla.org/MPL/2.0/. */

     5 #include "OpusTrackEncoder.h"

     6 #include "nsString.h"

     8 #include <opus/opus.h>

    10 #undef LOG

    11 #ifdef MOZ_WIDGET_GONK

    12 #include <android/log.h>

    13 #define LOG(args...) __android_log_print(ANDROID_LOG_INFO, "MediaEncoder", ## args);

    14 #else

    15 #define LOG(args, ...)

    16 #endif

    18 namespace mozilla {

    20 // The Opus format supports up to 8 channels, and supports multitrack audio up

    21 // to 255 channels, but the current implementation supports only mono and

    22 // stereo, and downmixes any more than that.

    23 static const int MAX_SUPPORTED_AUDIO_CHANNELS = 8;

    25 // http://www.opus-codec.org/docs/html_api-1.0.2/group__opus__encoder.html

    26 // In section "opus_encoder_init", channels must be 1 or 2 of input signal.

    27 static const int MAX_CHANNELS = 2;

    29 // A maximum data bytes for Opus to encode.

    30 static const int MAX_DATA_BYTES = 4096;

    32 // http://tools.ietf.org/html/draft-ietf-codec-oggopus-00#section-4

    33 // Second paragraph, " The granule position of an audio data page is in units

    34 // of PCM audio samples at a fixed rate of 48 kHz."

    35 static const int kOpusSamplingRate = 48000;

    37 // The duration of an Opus frame, and it must be 2.5, 5, 10, 20, 40 or 60 ms.

    38 static const int kFrameDurationMs  = 20;

    40 // The supported sampling rate of input signal (Hz),

    41 // must be one of the following. Will resampled to 48kHz otherwise.

    42 static const int kOpusSupportedInputSamplingRates[] =

    43                    {8000, 12000, 16000, 24000, 48000};

    45 namespace {

    47 // An endian-neutral serialization of integers. Serializing T in little endian

    48 // format to aOutput, where T is a 16 bits or 32 bits integer.

    49 template<typename T>

    50 static void

    51 SerializeToBuffer(T aValue, nsTArray<uint8_t>* aOutput)

    52 {

    53   for (uint32_t i = 0; i < sizeof(T); i++) {

    54     aOutput->AppendElement((uint8_t)(0x000000ff & (aValue >> (i * 8))));

    55   }

    56 }

    58 static inline void

    59 SerializeToBuffer(const nsCString& aComment, nsTArray<uint8_t>* aOutput)

    60 {

    61   // Format of serializing a string to buffer is, the length of string (32 bits,

    62   // little endian), and the string.

    63   SerializeToBuffer((uint32_t)(aComment.Length()), aOutput);

    64   aOutput->AppendElements(aComment.get(), aComment.Length());

    65 }

    68 static void

    69 SerializeOpusIdHeader(uint8_t aChannelCount, uint16_t aPreskip,

    70                       uint32_t aInputSampleRate, nsTArray<uint8_t>* aOutput)

    71 {

    72   // The magic signature, null terminator has to be stripped off from strings.

    73   static const uint8_t magic[] = "OpusHead";

    74   aOutput->AppendElements(magic, sizeof(magic) - 1);

    76   // The version must always be 1 (8 bits, unsigned).

    77   aOutput->AppendElement(1);

    79   // Number of output channels (8 bits, unsigned).

    80   aOutput->AppendElement(aChannelCount);

    82   // Number of samples (at 48 kHz) to discard from the decoder output when

    83   // starting playback (16 bits, unsigned, little endian).

    84   SerializeToBuffer(aPreskip, aOutput);

    86   // The sampling rate of input source (32 bits, unsigned, little endian).

    87   SerializeToBuffer(aInputSampleRate, aOutput);

    89   // Output gain, an encoder should set this field to zero (16 bits, signed,

    90   // little endian).

    91   SerializeToBuffer((int16_t)0, aOutput);

    93   // Channel mapping family. Family 0 allows only 1 or 2 channels (8 bits,

    94   // unsigned).

    95   aOutput->AppendElement(0);

    96 }

    98 static void

    99 SerializeOpusCommentHeader(const nsCString& aVendor,

   100                            const nsTArray<nsCString>& aComments,

   101                            nsTArray<uint8_t>* aOutput)

   102 {

   103   // The magic signature, null terminator has to be stripped off.

   104   static const uint8_t magic[] = "OpusTags";

   105   aOutput->AppendElements(magic, sizeof(magic) - 1);

   107   // The vendor; Should append in the following order:

   108   // vendor string length (32 bits, unsigned, little endian)

   109   // vendor string.

   110   SerializeToBuffer(aVendor, aOutput);

   112   // Add comments; Should append in the following order:

   113   // comment list length (32 bits, unsigned, little endian)

   114   // comment #0 string length (32 bits, unsigned, little endian)

   115   // comment #0 string

   116   // comment #1 string length (32 bits, unsigned, little endian)

   117   // comment #1 string ...

   118   SerializeToBuffer((uint32_t)aComments.Length(), aOutput);

   119   for (uint32_t i = 0; i < aComments.Length(); ++i) {

   120     SerializeToBuffer(aComments[i], aOutput);

   121   }

   122 }

   124 }  // Anonymous namespace.

   126 OpusTrackEncoder::OpusTrackEncoder()

   127   : AudioTrackEncoder()

   128   , mEncoder(nullptr)

   129   , mLookahead(0)

   130   , mResampler(nullptr)

   131 {

   132 }

   134 OpusTrackEncoder::~OpusTrackEncoder()

   135 {

   136   if (mEncoder) {

   137     opus_encoder_destroy(mEncoder);

   138   }

   139   if (mResampler) {

   140     speex_resampler_destroy(mResampler);

   141     mResampler = nullptr;

   142   }

   143 }

   145 nsresult

   146 OpusTrackEncoder::Init(int aChannels, int aSamplingRate)

   147 {

   148   // This monitor is used to wake up other methods that are waiting for encoder

   149   // to be completely initialized.

   150   ReentrantMonitorAutoEnter mon(mReentrantMonitor);

   152   NS_ENSURE_TRUE((aChannels <= MAX_SUPPORTED_AUDIO_CHANNELS) && (aChannels > 0),

   153                  NS_ERROR_FAILURE);

   155   // This version of encoder API only support 1 or 2 channels,

   156   // So set the mChannels less or equal 2 and

   157   // let InterleaveTrackData downmix pcm data.

   158   mChannels = aChannels > MAX_CHANNELS ? MAX_CHANNELS : aChannels;

   160   // According to www.opus-codec.org, creating an opus encoder requires the

   161   // sampling rate of source signal be one of 8000, 12000, 16000, 24000, or

   162   // 48000. If this constraint is not satisfied, we resample the input to 48kHz.

   163   nsTArray<int> supportedSamplingRates;

   164   supportedSamplingRates.AppendElements(kOpusSupportedInputSamplingRates,

   165                          ArrayLength(kOpusSupportedInputSamplingRates));

   166   if (!supportedSamplingRates.Contains(aSamplingRate)) {

   167     int error;

   168     mResampler = speex_resampler_init(mChannels,

   169                                       aSamplingRate,

   170                                       kOpusSamplingRate,

   171                                       SPEEX_RESAMPLER_QUALITY_DEFAULT,

   172                                       &error);

   174     if (error != RESAMPLER_ERR_SUCCESS) {

   175       return NS_ERROR_FAILURE;

   176     }

   177   }

   178   mSamplingRate = aSamplingRate;

   179   NS_ENSURE_TRUE(mSamplingRate > 0, NS_ERROR_FAILURE);

   181   int error = 0;

   182   mEncoder = opus_encoder_create(GetOutputSampleRate(), mChannels,

   183                                  OPUS_APPLICATION_AUDIO, &error);

   185   mInitialized = (error == OPUS_OK);

   187   mReentrantMonitor.NotifyAll();

   189   return error == OPUS_OK ? NS_OK : NS_ERROR_FAILURE;

   190 }

   192 int

   193 OpusTrackEncoder::GetOutputSampleRate()

   194 {

   195   return mResampler ? kOpusSamplingRate : mSamplingRate;

   196 }

   198 int

   199 OpusTrackEncoder::GetPacketDuration()

   200 {

   201   return GetOutputSampleRate() * kFrameDurationMs / 1000;

   202 }

   204 already_AddRefed<TrackMetadataBase>

   205 OpusTrackEncoder::GetMetadata()

   206 {

   207   {

   208     // Wait if mEncoder is not initialized.

   209     ReentrantMonitorAutoEnter mon(mReentrantMonitor);

   210     while (!mCanceled && !mInitialized) {

   211       mReentrantMonitor.Wait();

   212     }

   213   }

   215   if (mCanceled || mEncodingComplete) {

   216     return nullptr;

   217   }

   219   nsRefPtr<OpusMetadata> meta = new OpusMetadata();

   221   mLookahead = 0;

   222   int error = opus_encoder_ctl(mEncoder, OPUS_GET_LOOKAHEAD(&mLookahead));

   223   if (error != OPUS_OK) {

   224     mLookahead = 0;

   225   }

   227   // The ogg time stamping and pre-skip is always timed at 48000.

   228   SerializeOpusIdHeader(mChannels, mLookahead * (kOpusSamplingRate /

   229                         GetOutputSampleRate()), mSamplingRate,

   230                         &meta->mIdHeader);

   232   nsCString vendor;

   233   vendor.AppendASCII(opus_get_version_string());

   235   nsTArray<nsCString> comments;

   236   comments.AppendElement(NS_LITERAL_CSTRING("ENCODER=Mozilla" MOZ_APP_UA_VERSION));

   238   SerializeOpusCommentHeader(vendor, comments,

   239                              &meta->mCommentHeader);

   241   return meta.forget();

   242 }

   244 nsresult

   245 OpusTrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData)

   246 {

   247   {

   248     ReentrantMonitorAutoEnter mon(mReentrantMonitor);

   249     // Wait until initialized or cancelled.

   250     while (!mCanceled && !mInitialized) {

   251       mReentrantMonitor.Wait();

   252     }

   253     if (mCanceled || mEncodingComplete) {

   254       return NS_ERROR_FAILURE;

   255     }

   256   }

   258   // calculation below depends on the truth that mInitialized is true.

   259   MOZ_ASSERT(mInitialized);

   261   // re-sampled frames left last time which didn't fit into an Opus packet duration.

   262   const int framesLeft = mResampledLeftover.Length() / mChannels;

   263   // When framesLeft is 0, (GetPacketDuration() - framesLeft) is a multiple

   264   // of kOpusSamplingRate. There is not precision loss in the integer division

   265   // in computing framesToFetch. If frameLeft > 0, we need to add 1 to

   266   // framesToFetch to ensure there will be at least n frames after re-sampling.

   267   const int frameRoundUp = framesLeft ? 1 : 0;

   269   MOZ_ASSERT(GetPacketDuration() >= framesLeft);

   270   // Try to fetch m frames such that there will be n frames

   271   // where (n + frameLeft) >= GetPacketDuration() after re-sampling.

   272   const int framesToFetch = !mResampler ? GetPacketDuration()

   273     : (GetPacketDuration() - framesLeft) * mSamplingRate / kOpusSamplingRate

   274       + frameRoundUp;

   275   {

   276     // Move all the samples from mRawSegment to mSourceSegment. We only hold

   277     // the monitor in this block.

   278     ReentrantMonitorAutoEnter mon(mReentrantMonitor);

   280     // Wait until enough raw data, end of stream or cancelled.

   281     while (!mCanceled && mRawSegment.GetDuration() +

   282         mSourceSegment.GetDuration() < framesToFetch &&

   283         !mEndOfStream) {

   284       mReentrantMonitor.Wait();

   285     }

   287     if (mCanceled || mEncodingComplete) {

   288       return NS_ERROR_FAILURE;

   289     }

   291     mSourceSegment.AppendFrom(&mRawSegment);

   293     // Pad |mLookahead| samples to the end of source stream to prevent lost of

   294     // original data, the pcm duration will be calculated at rate 48K later.

   295     if (mEndOfStream && !mEosSetInEncoder) {

   296       mEosSetInEncoder = true;

   297       mSourceSegment.AppendNullData(mLookahead);

   298     }

   299   }

   301   // Start encoding data.

   302   nsAutoTArray<AudioDataValue, 9600> pcm;

   303   pcm.SetLength(GetPacketDuration() * mChannels);

   304   AudioSegment::ChunkIterator iter(mSourceSegment);

   305   int frameCopied = 0;

   307   while (!iter.IsEnded() && frameCopied < framesToFetch) {

   308     AudioChunk chunk = *iter;

   310     // Chunk to the required frame size.

   311     int frameToCopy = chunk.GetDuration();

   312     if (frameCopied + frameToCopy > framesToFetch) {

   313       frameToCopy = framesToFetch - frameCopied;

   314     }

   316     if (!chunk.IsNull()) {

   317       // Append the interleaved data to the end of pcm buffer.

   318       AudioTrackEncoder::InterleaveTrackData(chunk, frameToCopy, mChannels,

   319         pcm.Elements() + frameCopied * mChannels);

   320     } else {

   321       memset(pcm.Elements() + frameCopied * mChannels, 0,

   322              frameToCopy * mChannels * sizeof(AudioDataValue));

   323     }

   325     frameCopied += frameToCopy;

   326     iter.Next();

   327   }

   329   nsRefPtr<EncodedFrame> audiodata = new EncodedFrame();

   330   audiodata->SetFrameType(EncodedFrame::OPUS_AUDIO_FRAME);

   331   int framesInPCM = frameCopied;

   332   if (mResampler) {

   333     nsAutoTArray<AudioDataValue, 9600> resamplingDest;

   334     // We want to consume all the input data, so we slightly oversize the

   335     // resampled data buffer so we can fit the output data in. We cannot really

   336     // predict the output frame count at each call.

   337     uint32_t outframes = frameCopied * kOpusSamplingRate / mSamplingRate + 1;

   338     uint32_t inframes = frameCopied;

   340     resamplingDest.SetLength(outframes * mChannels);

   342 #if MOZ_SAMPLE_TYPE_S16

   343     short* in = reinterpret_cast<short*>(pcm.Elements());

   344     short* out = reinterpret_cast<short*>(resamplingDest.Elements());

   345     speex_resampler_process_interleaved_int(mResampler, in, &inframes,

   346                                                         out, &outframes);

   347 #else

   348     float* in = reinterpret_cast<float*>(pcm.Elements());

   349     float* out = reinterpret_cast<float*>(resamplingDest.Elements());

   350     speex_resampler_process_interleaved_float(mResampler, in, &inframes,

   351                                                           out, &outframes);

   352 #endif

   354     MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length());

   355     PodCopy(pcm.Elements(), mResampledLeftover.Elements(),

   356         mResampledLeftover.Length());

   358     uint32_t outframesToCopy = std::min(outframes,

   359         static_cast<uint32_t>(GetPacketDuration() - framesLeft));

   361     MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >=

   362         outframesToCopy * mChannels);

   363     PodCopy(pcm.Elements() + mResampledLeftover.Length(),

   364         resamplingDest.Elements(), outframesToCopy * mChannels);

   365     int frameLeftover = outframes - outframesToCopy;

   366     mResampledLeftover.SetLength(frameLeftover * mChannels);

   367     PodCopy(mResampledLeftover.Elements(),

   368         resamplingDest.Elements() + outframesToCopy * mChannels,

   369         mResampledLeftover.Length());

   370     // This is always at 48000Hz.

   371     framesInPCM = framesLeft + outframesToCopy;

   372     audiodata->SetDuration(framesInPCM);

   373   } else {

   374     // The ogg time stamping and pre-skip is always timed at 48000.

   375     audiodata->SetDuration(frameCopied * (kOpusSamplingRate / mSamplingRate));

   376   }

   378   // Remove the raw data which has been pulled to pcm buffer.

   379   // The value of frameCopied should equal to (or smaller than, if eos)

   380   // GetPacketDuration().

   381   mSourceSegment.RemoveLeading(frameCopied);

   383   // Has reached the end of input stream and all queued data has pulled for

   384   // encoding.

   385   if (mSourceSegment.GetDuration() == 0 && mEndOfStream) {

   386     mEncodingComplete = true;

   387     LOG("[Opus] Done encoding.");

   388   }

   390   MOZ_ASSERT(mEndOfStream || framesInPCM == GetPacketDuration());

   392   // Append null data to pcm buffer if the leftover data is not enough for

   393   // opus encoder.

   394   if (framesInPCM < GetPacketDuration() && mEndOfStream) {

   395     PodZero(pcm.Elements() + framesInPCM * mChannels,

   396         (GetPacketDuration() - framesInPCM) * mChannels);

   397   }

   398   nsTArray<uint8_t> frameData;

   399   // Encode the data with Opus Encoder.

   400   frameData.SetLength(MAX_DATA_BYTES);

   401   // result is returned as opus error code if it is negative.

   402   int result = 0;

   403 #ifdef MOZ_SAMPLE_TYPE_S16

   404   const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements());

   405   result = opus_encode(mEncoder, pcmBuf, GetPacketDuration(),

   406                        frameData.Elements(), MAX_DATA_BYTES);

   407 #else

   408   const float* pcmBuf = static_cast<float*>(pcm.Elements());

   409   result = opus_encode_float(mEncoder, pcmBuf, GetPacketDuration(),

   410                              frameData.Elements(), MAX_DATA_BYTES);

   411 #endif

   412   frameData.SetLength(result >= 0 ? result : 0);

   414   if (result < 0) {

   415     LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result));

   416   }

   417   if (mEncodingComplete) {

   418     if (mResampler) {

   419       speex_resampler_destroy(mResampler);

   420       mResampler = nullptr;

   421     }

   422     mResampledLeftover.SetLength(0);

   423   }

   425   audiodata->SwapInFrameData(frameData);

   426   aData.AppendEncodedFrame(audiodata);

   427   return result >= 0 ? NS_OK : NS_ERROR_FAILURE;

   428 }

   430 }

The Tor Browser / file revision

content/media/encoder/OpusTrackEncoder.cpp@6474c204b198

content/media/encoder/OpusTrackEncoder.cpp