content/media/encoder/OpusTrackEncoder.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
     4  * You can obtain one at http://mozilla.org/MPL/2.0/. */
     5 #include "OpusTrackEncoder.h"
     6 #include "nsString.h"
     8 #include <opus/opus.h>
    10 #undef LOG
    11 #ifdef MOZ_WIDGET_GONK
    12 #include <android/log.h>
    13 #define LOG(args...) __android_log_print(ANDROID_LOG_INFO, "MediaEncoder", ## args);
    14 #else
    15 #define LOG(args, ...)
    16 #endif
    18 namespace mozilla {
    20 // The Opus format supports up to 8 channels, and supports multitrack audio up
    21 // to 255 channels, but the current implementation supports only mono and
    22 // stereo, and downmixes any more than that.
    23 static const int MAX_SUPPORTED_AUDIO_CHANNELS = 8;
    25 // http://www.opus-codec.org/docs/html_api-1.0.2/group__opus__encoder.html
    26 // In section "opus_encoder_init", channels must be 1 or 2 of input signal.
    27 static const int MAX_CHANNELS = 2;
    29 // A maximum data bytes for Opus to encode.
    30 static const int MAX_DATA_BYTES = 4096;
    32 // http://tools.ietf.org/html/draft-ietf-codec-oggopus-00#section-4
    33 // Second paragraph, " The granule position of an audio data page is in units
    34 // of PCM audio samples at a fixed rate of 48 kHz."
    35 static const int kOpusSamplingRate = 48000;
    37 // The duration of an Opus frame, and it must be 2.5, 5, 10, 20, 40 or 60 ms.
    38 static const int kFrameDurationMs  = 20;
    40 // The supported sampling rate of input signal (Hz),
    41 // must be one of the following. Will resampled to 48kHz otherwise.
    42 static const int kOpusSupportedInputSamplingRates[] =
    43                    {8000, 12000, 16000, 24000, 48000};
    45 namespace {
    47 // An endian-neutral serialization of integers. Serializing T in little endian
    48 // format to aOutput, where T is a 16 bits or 32 bits integer.
    49 template<typename T>
    50 static void
    51 SerializeToBuffer(T aValue, nsTArray<uint8_t>* aOutput)
    52 {
    53   for (uint32_t i = 0; i < sizeof(T); i++) {
    54     aOutput->AppendElement((uint8_t)(0x000000ff & (aValue >> (i * 8))));
    55   }
    56 }
    58 static inline void
    59 SerializeToBuffer(const nsCString& aComment, nsTArray<uint8_t>* aOutput)
    60 {
    61   // Format of serializing a string to buffer is, the length of string (32 bits,
    62   // little endian), and the string.
    63   SerializeToBuffer((uint32_t)(aComment.Length()), aOutput);
    64   aOutput->AppendElements(aComment.get(), aComment.Length());
    65 }
    68 static void
    69 SerializeOpusIdHeader(uint8_t aChannelCount, uint16_t aPreskip,
    70                       uint32_t aInputSampleRate, nsTArray<uint8_t>* aOutput)
    71 {
    72   // The magic signature, null terminator has to be stripped off from strings.
    73   static const uint8_t magic[] = "OpusHead";
    74   aOutput->AppendElements(magic, sizeof(magic) - 1);
    76   // The version must always be 1 (8 bits, unsigned).
    77   aOutput->AppendElement(1);
    79   // Number of output channels (8 bits, unsigned).
    80   aOutput->AppendElement(aChannelCount);
    82   // Number of samples (at 48 kHz) to discard from the decoder output when
    83   // starting playback (16 bits, unsigned, little endian).
    84   SerializeToBuffer(aPreskip, aOutput);
    86   // The sampling rate of input source (32 bits, unsigned, little endian).
    87   SerializeToBuffer(aInputSampleRate, aOutput);
    89   // Output gain, an encoder should set this field to zero (16 bits, signed,
    90   // little endian).
    91   SerializeToBuffer((int16_t)0, aOutput);
    93   // Channel mapping family. Family 0 allows only 1 or 2 channels (8 bits,
    94   // unsigned).
    95   aOutput->AppendElement(0);
    96 }
    98 static void
    99 SerializeOpusCommentHeader(const nsCString& aVendor,
   100                            const nsTArray<nsCString>& aComments,
   101                            nsTArray<uint8_t>* aOutput)
   102 {
   103   // The magic signature, null terminator has to be stripped off.
   104   static const uint8_t magic[] = "OpusTags";
   105   aOutput->AppendElements(magic, sizeof(magic) - 1);
   107   // The vendor; Should append in the following order:
   108   // vendor string length (32 bits, unsigned, little endian)
   109   // vendor string.
   110   SerializeToBuffer(aVendor, aOutput);
   112   // Add comments; Should append in the following order:
   113   // comment list length (32 bits, unsigned, little endian)
   114   // comment #0 string length (32 bits, unsigned, little endian)
   115   // comment #0 string
   116   // comment #1 string length (32 bits, unsigned, little endian)
   117   // comment #1 string ...
   118   SerializeToBuffer((uint32_t)aComments.Length(), aOutput);
   119   for (uint32_t i = 0; i < aComments.Length(); ++i) {
   120     SerializeToBuffer(aComments[i], aOutput);
   121   }
   122 }
   124 }  // Anonymous namespace.
   126 OpusTrackEncoder::OpusTrackEncoder()
   127   : AudioTrackEncoder()
   128   , mEncoder(nullptr)
   129   , mLookahead(0)
   130   , mResampler(nullptr)
   131 {
   132 }
   134 OpusTrackEncoder::~OpusTrackEncoder()
   135 {
   136   if (mEncoder) {
   137     opus_encoder_destroy(mEncoder);
   138   }
   139   if (mResampler) {
   140     speex_resampler_destroy(mResampler);
   141     mResampler = nullptr;
   142   }
   143 }
   145 nsresult
   146 OpusTrackEncoder::Init(int aChannels, int aSamplingRate)
   147 {
   148   // This monitor is used to wake up other methods that are waiting for encoder
   149   // to be completely initialized.
   150   ReentrantMonitorAutoEnter mon(mReentrantMonitor);
   152   NS_ENSURE_TRUE((aChannels <= MAX_SUPPORTED_AUDIO_CHANNELS) && (aChannels > 0),
   153                  NS_ERROR_FAILURE);
   155   // This version of encoder API only support 1 or 2 channels,
   156   // So set the mChannels less or equal 2 and
   157   // let InterleaveTrackData downmix pcm data.
   158   mChannels = aChannels > MAX_CHANNELS ? MAX_CHANNELS : aChannels;
   160   // According to www.opus-codec.org, creating an opus encoder requires the
   161   // sampling rate of source signal be one of 8000, 12000, 16000, 24000, or
   162   // 48000. If this constraint is not satisfied, we resample the input to 48kHz.
   163   nsTArray<int> supportedSamplingRates;
   164   supportedSamplingRates.AppendElements(kOpusSupportedInputSamplingRates,
   165                          ArrayLength(kOpusSupportedInputSamplingRates));
   166   if (!supportedSamplingRates.Contains(aSamplingRate)) {
   167     int error;
   168     mResampler = speex_resampler_init(mChannels,
   169                                       aSamplingRate,
   170                                       kOpusSamplingRate,
   171                                       SPEEX_RESAMPLER_QUALITY_DEFAULT,
   172                                       &error);
   174     if (error != RESAMPLER_ERR_SUCCESS) {
   175       return NS_ERROR_FAILURE;
   176     }
   177   }
   178   mSamplingRate = aSamplingRate;
   179   NS_ENSURE_TRUE(mSamplingRate > 0, NS_ERROR_FAILURE);
   181   int error = 0;
   182   mEncoder = opus_encoder_create(GetOutputSampleRate(), mChannels,
   183                                  OPUS_APPLICATION_AUDIO, &error);
   185   mInitialized = (error == OPUS_OK);
   187   mReentrantMonitor.NotifyAll();
   189   return error == OPUS_OK ? NS_OK : NS_ERROR_FAILURE;
   190 }
   192 int
   193 OpusTrackEncoder::GetOutputSampleRate()
   194 {
   195   return mResampler ? kOpusSamplingRate : mSamplingRate;
   196 }
   198 int
   199 OpusTrackEncoder::GetPacketDuration()
   200 {
   201   return GetOutputSampleRate() * kFrameDurationMs / 1000;
   202 }
   204 already_AddRefed<TrackMetadataBase>
   205 OpusTrackEncoder::GetMetadata()
   206 {
   207   {
   208     // Wait if mEncoder is not initialized.
   209     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
   210     while (!mCanceled && !mInitialized) {
   211       mReentrantMonitor.Wait();
   212     }
   213   }
   215   if (mCanceled || mEncodingComplete) {
   216     return nullptr;
   217   }
   219   nsRefPtr<OpusMetadata> meta = new OpusMetadata();
   221   mLookahead = 0;
   222   int error = opus_encoder_ctl(mEncoder, OPUS_GET_LOOKAHEAD(&mLookahead));
   223   if (error != OPUS_OK) {
   224     mLookahead = 0;
   225   }
   227   // The ogg time stamping and pre-skip is always timed at 48000.
   228   SerializeOpusIdHeader(mChannels, mLookahead * (kOpusSamplingRate /
   229                         GetOutputSampleRate()), mSamplingRate,
   230                         &meta->mIdHeader);
   232   nsCString vendor;
   233   vendor.AppendASCII(opus_get_version_string());
   235   nsTArray<nsCString> comments;
   236   comments.AppendElement(NS_LITERAL_CSTRING("ENCODER=Mozilla" MOZ_APP_UA_VERSION));
   238   SerializeOpusCommentHeader(vendor, comments,
   239                              &meta->mCommentHeader);
   241   return meta.forget();
   242 }
   244 nsresult
   245 OpusTrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData)
   246 {
   247   {
   248     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
   249     // Wait until initialized or cancelled.
   250     while (!mCanceled && !mInitialized) {
   251       mReentrantMonitor.Wait();
   252     }
   253     if (mCanceled || mEncodingComplete) {
   254       return NS_ERROR_FAILURE;
   255     }
   256   }
   258   // calculation below depends on the truth that mInitialized is true.
   259   MOZ_ASSERT(mInitialized);
   261   // re-sampled frames left last time which didn't fit into an Opus packet duration.
   262   const int framesLeft = mResampledLeftover.Length() / mChannels;
   263   // When framesLeft is 0, (GetPacketDuration() - framesLeft) is a multiple
   264   // of kOpusSamplingRate. There is not precision loss in the integer division
   265   // in computing framesToFetch. If frameLeft > 0, we need to add 1 to
   266   // framesToFetch to ensure there will be at least n frames after re-sampling.
   267   const int frameRoundUp = framesLeft ? 1 : 0;
   269   MOZ_ASSERT(GetPacketDuration() >= framesLeft);
   270   // Try to fetch m frames such that there will be n frames
   271   // where (n + frameLeft) >= GetPacketDuration() after re-sampling.
   272   const int framesToFetch = !mResampler ? GetPacketDuration()
   273     : (GetPacketDuration() - framesLeft) * mSamplingRate / kOpusSamplingRate
   274       + frameRoundUp;
   275   {
   276     // Move all the samples from mRawSegment to mSourceSegment. We only hold
   277     // the monitor in this block.
   278     ReentrantMonitorAutoEnter mon(mReentrantMonitor);
   280     // Wait until enough raw data, end of stream or cancelled.
   281     while (!mCanceled && mRawSegment.GetDuration() +
   282         mSourceSegment.GetDuration() < framesToFetch &&
   283         !mEndOfStream) {
   284       mReentrantMonitor.Wait();
   285     }
   287     if (mCanceled || mEncodingComplete) {
   288       return NS_ERROR_FAILURE;
   289     }
   291     mSourceSegment.AppendFrom(&mRawSegment);
   293     // Pad |mLookahead| samples to the end of source stream to prevent lost of
   294     // original data, the pcm duration will be calculated at rate 48K later.
   295     if (mEndOfStream && !mEosSetInEncoder) {
   296       mEosSetInEncoder = true;
   297       mSourceSegment.AppendNullData(mLookahead);
   298     }
   299   }
   301   // Start encoding data.
   302   nsAutoTArray<AudioDataValue, 9600> pcm;
   303   pcm.SetLength(GetPacketDuration() * mChannels);
   304   AudioSegment::ChunkIterator iter(mSourceSegment);
   305   int frameCopied = 0;
   307   while (!iter.IsEnded() && frameCopied < framesToFetch) {
   308     AudioChunk chunk = *iter;
   310     // Chunk to the required frame size.
   311     int frameToCopy = chunk.GetDuration();
   312     if (frameCopied + frameToCopy > framesToFetch) {
   313       frameToCopy = framesToFetch - frameCopied;
   314     }
   316     if (!chunk.IsNull()) {
   317       // Append the interleaved data to the end of pcm buffer.
   318       AudioTrackEncoder::InterleaveTrackData(chunk, frameToCopy, mChannels,
   319         pcm.Elements() + frameCopied * mChannels);
   320     } else {
   321       memset(pcm.Elements() + frameCopied * mChannels, 0,
   322              frameToCopy * mChannels * sizeof(AudioDataValue));
   323     }
   325     frameCopied += frameToCopy;
   326     iter.Next();
   327   }
   329   nsRefPtr<EncodedFrame> audiodata = new EncodedFrame();
   330   audiodata->SetFrameType(EncodedFrame::OPUS_AUDIO_FRAME);
   331   int framesInPCM = frameCopied;
   332   if (mResampler) {
   333     nsAutoTArray<AudioDataValue, 9600> resamplingDest;
   334     // We want to consume all the input data, so we slightly oversize the
   335     // resampled data buffer so we can fit the output data in. We cannot really
   336     // predict the output frame count at each call.
   337     uint32_t outframes = frameCopied * kOpusSamplingRate / mSamplingRate + 1;
   338     uint32_t inframes = frameCopied;
   340     resamplingDest.SetLength(outframes * mChannels);
   342 #if MOZ_SAMPLE_TYPE_S16
   343     short* in = reinterpret_cast<short*>(pcm.Elements());
   344     short* out = reinterpret_cast<short*>(resamplingDest.Elements());
   345     speex_resampler_process_interleaved_int(mResampler, in, &inframes,
   346                                                         out, &outframes);
   347 #else
   348     float* in = reinterpret_cast<float*>(pcm.Elements());
   349     float* out = reinterpret_cast<float*>(resamplingDest.Elements());
   350     speex_resampler_process_interleaved_float(mResampler, in, &inframes,
   351                                                           out, &outframes);
   352 #endif
   354     MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length());
   355     PodCopy(pcm.Elements(), mResampledLeftover.Elements(),
   356         mResampledLeftover.Length());
   358     uint32_t outframesToCopy = std::min(outframes,
   359         static_cast<uint32_t>(GetPacketDuration() - framesLeft));
   361     MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >=
   362         outframesToCopy * mChannels);
   363     PodCopy(pcm.Elements() + mResampledLeftover.Length(),
   364         resamplingDest.Elements(), outframesToCopy * mChannels);
   365     int frameLeftover = outframes - outframesToCopy;
   366     mResampledLeftover.SetLength(frameLeftover * mChannels);
   367     PodCopy(mResampledLeftover.Elements(),
   368         resamplingDest.Elements() + outframesToCopy * mChannels,
   369         mResampledLeftover.Length());
   370     // This is always at 48000Hz.
   371     framesInPCM = framesLeft + outframesToCopy;
   372     audiodata->SetDuration(framesInPCM);
   373   } else {
   374     // The ogg time stamping and pre-skip is always timed at 48000.
   375     audiodata->SetDuration(frameCopied * (kOpusSamplingRate / mSamplingRate));
   376   }
   378   // Remove the raw data which has been pulled to pcm buffer.
   379   // The value of frameCopied should equal to (or smaller than, if eos)
   380   // GetPacketDuration().
   381   mSourceSegment.RemoveLeading(frameCopied);
   383   // Has reached the end of input stream and all queued data has pulled for
   384   // encoding.
   385   if (mSourceSegment.GetDuration() == 0 && mEndOfStream) {
   386     mEncodingComplete = true;
   387     LOG("[Opus] Done encoding.");
   388   }
   390   MOZ_ASSERT(mEndOfStream || framesInPCM == GetPacketDuration());
   392   // Append null data to pcm buffer if the leftover data is not enough for
   393   // opus encoder.
   394   if (framesInPCM < GetPacketDuration() && mEndOfStream) {
   395     PodZero(pcm.Elements() + framesInPCM * mChannels,
   396         (GetPacketDuration() - framesInPCM) * mChannels);
   397   }
   398   nsTArray<uint8_t> frameData;
   399   // Encode the data with Opus Encoder.
   400   frameData.SetLength(MAX_DATA_BYTES);
   401   // result is returned as opus error code if it is negative.
   402   int result = 0;
   403 #ifdef MOZ_SAMPLE_TYPE_S16
   404   const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements());
   405   result = opus_encode(mEncoder, pcmBuf, GetPacketDuration(),
   406                        frameData.Elements(), MAX_DATA_BYTES);
   407 #else
   408   const float* pcmBuf = static_cast<float*>(pcm.Elements());
   409   result = opus_encode_float(mEncoder, pcmBuf, GetPacketDuration(),
   410                              frameData.Elements(), MAX_DATA_BYTES);
   411 #endif
   412   frameData.SetLength(result >= 0 ? result : 0);
   414   if (result < 0) {
   415     LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result));
   416   }
   417   if (mEncodingComplete) {
   418     if (mResampler) {
   419       speex_resampler_destroy(mResampler);
   420       mResampler = nullptr;
   421     }
   422     mResampledLeftover.SetLength(0);
   423   }
   425   audiodata->SwapInFrameData(frameData);
   426   aData.AppendEncodedFrame(audiodata);
   427   return result >= 0 ? NS_OK : NS_ERROR_FAILURE;
   428 }
   430 }

mercurial