content/media/encoder/OpusTrackEncoder.cpp

Fri, 16 Jan 2015 04:50:19 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Fri, 16 Jan 2015 04:50:19 +0100
branch
TOR_BUG_9701
changeset 13
44a2da4a2ab2
permissions
-rw-r--r--

Replace accessor implementation with direct member state manipulation, by
request https://trac.torproject.org/projects/tor/ticket/9701#comment:32

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
michael@0 4 * You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5 #include "OpusTrackEncoder.h"
michael@0 6 #include "nsString.h"
michael@0 7
michael@0 8 #include <opus/opus.h>
michael@0 9
michael@0 10 #undef LOG
michael@0 11 #ifdef MOZ_WIDGET_GONK
michael@0 12 #include <android/log.h>
michael@0 13 #define LOG(args...) __android_log_print(ANDROID_LOG_INFO, "MediaEncoder", ## args);
michael@0 14 #else
michael@0 15 #define LOG(args, ...)
michael@0 16 #endif
michael@0 17
michael@0 18 namespace mozilla {
michael@0 19
michael@0 20 // The Opus format supports up to 8 channels, and supports multitrack audio up
michael@0 21 // to 255 channels, but the current implementation supports only mono and
michael@0 22 // stereo, and downmixes any more than that.
michael@0 23 static const int MAX_SUPPORTED_AUDIO_CHANNELS = 8;
michael@0 24
michael@0 25 // http://www.opus-codec.org/docs/html_api-1.0.2/group__opus__encoder.html
michael@0 26 // In section "opus_encoder_init", channels must be 1 or 2 of input signal.
michael@0 27 static const int MAX_CHANNELS = 2;
michael@0 28
michael@0 29 // A maximum data bytes for Opus to encode.
michael@0 30 static const int MAX_DATA_BYTES = 4096;
michael@0 31
michael@0 32 // http://tools.ietf.org/html/draft-ietf-codec-oggopus-00#section-4
michael@0 33 // Second paragraph, " The granule position of an audio data page is in units
michael@0 34 // of PCM audio samples at a fixed rate of 48 kHz."
michael@0 35 static const int kOpusSamplingRate = 48000;
michael@0 36
michael@0 37 // The duration of an Opus frame, and it must be 2.5, 5, 10, 20, 40 or 60 ms.
michael@0 38 static const int kFrameDurationMs = 20;
michael@0 39
michael@0 40 // The supported sampling rate of input signal (Hz),
michael@0 41 // must be one of the following. Will resampled to 48kHz otherwise.
michael@0 42 static const int kOpusSupportedInputSamplingRates[] =
michael@0 43 {8000, 12000, 16000, 24000, 48000};
michael@0 44
michael@0 45 namespace {
michael@0 46
michael@0 47 // An endian-neutral serialization of integers. Serializing T in little endian
michael@0 48 // format to aOutput, where T is a 16 bits or 32 bits integer.
michael@0 49 template<typename T>
michael@0 50 static void
michael@0 51 SerializeToBuffer(T aValue, nsTArray<uint8_t>* aOutput)
michael@0 52 {
michael@0 53 for (uint32_t i = 0; i < sizeof(T); i++) {
michael@0 54 aOutput->AppendElement((uint8_t)(0x000000ff & (aValue >> (i * 8))));
michael@0 55 }
michael@0 56 }
michael@0 57
michael@0 58 static inline void
michael@0 59 SerializeToBuffer(const nsCString& aComment, nsTArray<uint8_t>* aOutput)
michael@0 60 {
michael@0 61 // Format of serializing a string to buffer is, the length of string (32 bits,
michael@0 62 // little endian), and the string.
michael@0 63 SerializeToBuffer((uint32_t)(aComment.Length()), aOutput);
michael@0 64 aOutput->AppendElements(aComment.get(), aComment.Length());
michael@0 65 }
michael@0 66
michael@0 67
michael@0 68 static void
michael@0 69 SerializeOpusIdHeader(uint8_t aChannelCount, uint16_t aPreskip,
michael@0 70 uint32_t aInputSampleRate, nsTArray<uint8_t>* aOutput)
michael@0 71 {
michael@0 72 // The magic signature, null terminator has to be stripped off from strings.
michael@0 73 static const uint8_t magic[] = "OpusHead";
michael@0 74 aOutput->AppendElements(magic, sizeof(magic) - 1);
michael@0 75
michael@0 76 // The version must always be 1 (8 bits, unsigned).
michael@0 77 aOutput->AppendElement(1);
michael@0 78
michael@0 79 // Number of output channels (8 bits, unsigned).
michael@0 80 aOutput->AppendElement(aChannelCount);
michael@0 81
michael@0 82 // Number of samples (at 48 kHz) to discard from the decoder output when
michael@0 83 // starting playback (16 bits, unsigned, little endian).
michael@0 84 SerializeToBuffer(aPreskip, aOutput);
michael@0 85
michael@0 86 // The sampling rate of input source (32 bits, unsigned, little endian).
michael@0 87 SerializeToBuffer(aInputSampleRate, aOutput);
michael@0 88
michael@0 89 // Output gain, an encoder should set this field to zero (16 bits, signed,
michael@0 90 // little endian).
michael@0 91 SerializeToBuffer((int16_t)0, aOutput);
michael@0 92
michael@0 93 // Channel mapping family. Family 0 allows only 1 or 2 channels (8 bits,
michael@0 94 // unsigned).
michael@0 95 aOutput->AppendElement(0);
michael@0 96 }
michael@0 97
michael@0 98 static void
michael@0 99 SerializeOpusCommentHeader(const nsCString& aVendor,
michael@0 100 const nsTArray<nsCString>& aComments,
michael@0 101 nsTArray<uint8_t>* aOutput)
michael@0 102 {
michael@0 103 // The magic signature, null terminator has to be stripped off.
michael@0 104 static const uint8_t magic[] = "OpusTags";
michael@0 105 aOutput->AppendElements(magic, sizeof(magic) - 1);
michael@0 106
michael@0 107 // The vendor; Should append in the following order:
michael@0 108 // vendor string length (32 bits, unsigned, little endian)
michael@0 109 // vendor string.
michael@0 110 SerializeToBuffer(aVendor, aOutput);
michael@0 111
michael@0 112 // Add comments; Should append in the following order:
michael@0 113 // comment list length (32 bits, unsigned, little endian)
michael@0 114 // comment #0 string length (32 bits, unsigned, little endian)
michael@0 115 // comment #0 string
michael@0 116 // comment #1 string length (32 bits, unsigned, little endian)
michael@0 117 // comment #1 string ...
michael@0 118 SerializeToBuffer((uint32_t)aComments.Length(), aOutput);
michael@0 119 for (uint32_t i = 0; i < aComments.Length(); ++i) {
michael@0 120 SerializeToBuffer(aComments[i], aOutput);
michael@0 121 }
michael@0 122 }
michael@0 123
michael@0 124 } // Anonymous namespace.
michael@0 125
michael@0 126 OpusTrackEncoder::OpusTrackEncoder()
michael@0 127 : AudioTrackEncoder()
michael@0 128 , mEncoder(nullptr)
michael@0 129 , mLookahead(0)
michael@0 130 , mResampler(nullptr)
michael@0 131 {
michael@0 132 }
michael@0 133
michael@0 134 OpusTrackEncoder::~OpusTrackEncoder()
michael@0 135 {
michael@0 136 if (mEncoder) {
michael@0 137 opus_encoder_destroy(mEncoder);
michael@0 138 }
michael@0 139 if (mResampler) {
michael@0 140 speex_resampler_destroy(mResampler);
michael@0 141 mResampler = nullptr;
michael@0 142 }
michael@0 143 }
michael@0 144
michael@0 145 nsresult
michael@0 146 OpusTrackEncoder::Init(int aChannels, int aSamplingRate)
michael@0 147 {
michael@0 148 // This monitor is used to wake up other methods that are waiting for encoder
michael@0 149 // to be completely initialized.
michael@0 150 ReentrantMonitorAutoEnter mon(mReentrantMonitor);
michael@0 151
michael@0 152 NS_ENSURE_TRUE((aChannels <= MAX_SUPPORTED_AUDIO_CHANNELS) && (aChannels > 0),
michael@0 153 NS_ERROR_FAILURE);
michael@0 154
michael@0 155 // This version of encoder API only support 1 or 2 channels,
michael@0 156 // So set the mChannels less or equal 2 and
michael@0 157 // let InterleaveTrackData downmix pcm data.
michael@0 158 mChannels = aChannels > MAX_CHANNELS ? MAX_CHANNELS : aChannels;
michael@0 159
michael@0 160 // According to www.opus-codec.org, creating an opus encoder requires the
michael@0 161 // sampling rate of source signal be one of 8000, 12000, 16000, 24000, or
michael@0 162 // 48000. If this constraint is not satisfied, we resample the input to 48kHz.
michael@0 163 nsTArray<int> supportedSamplingRates;
michael@0 164 supportedSamplingRates.AppendElements(kOpusSupportedInputSamplingRates,
michael@0 165 ArrayLength(kOpusSupportedInputSamplingRates));
michael@0 166 if (!supportedSamplingRates.Contains(aSamplingRate)) {
michael@0 167 int error;
michael@0 168 mResampler = speex_resampler_init(mChannels,
michael@0 169 aSamplingRate,
michael@0 170 kOpusSamplingRate,
michael@0 171 SPEEX_RESAMPLER_QUALITY_DEFAULT,
michael@0 172 &error);
michael@0 173
michael@0 174 if (error != RESAMPLER_ERR_SUCCESS) {
michael@0 175 return NS_ERROR_FAILURE;
michael@0 176 }
michael@0 177 }
michael@0 178 mSamplingRate = aSamplingRate;
michael@0 179 NS_ENSURE_TRUE(mSamplingRate > 0, NS_ERROR_FAILURE);
michael@0 180
michael@0 181 int error = 0;
michael@0 182 mEncoder = opus_encoder_create(GetOutputSampleRate(), mChannels,
michael@0 183 OPUS_APPLICATION_AUDIO, &error);
michael@0 184
michael@0 185 mInitialized = (error == OPUS_OK);
michael@0 186
michael@0 187 mReentrantMonitor.NotifyAll();
michael@0 188
michael@0 189 return error == OPUS_OK ? NS_OK : NS_ERROR_FAILURE;
michael@0 190 }
michael@0 191
michael@0 192 int
michael@0 193 OpusTrackEncoder::GetOutputSampleRate()
michael@0 194 {
michael@0 195 return mResampler ? kOpusSamplingRate : mSamplingRate;
michael@0 196 }
michael@0 197
michael@0 198 int
michael@0 199 OpusTrackEncoder::GetPacketDuration()
michael@0 200 {
michael@0 201 return GetOutputSampleRate() * kFrameDurationMs / 1000;
michael@0 202 }
michael@0 203
michael@0 204 already_AddRefed<TrackMetadataBase>
michael@0 205 OpusTrackEncoder::GetMetadata()
michael@0 206 {
michael@0 207 {
michael@0 208 // Wait if mEncoder is not initialized.
michael@0 209 ReentrantMonitorAutoEnter mon(mReentrantMonitor);
michael@0 210 while (!mCanceled && !mInitialized) {
michael@0 211 mReentrantMonitor.Wait();
michael@0 212 }
michael@0 213 }
michael@0 214
michael@0 215 if (mCanceled || mEncodingComplete) {
michael@0 216 return nullptr;
michael@0 217 }
michael@0 218
michael@0 219 nsRefPtr<OpusMetadata> meta = new OpusMetadata();
michael@0 220
michael@0 221 mLookahead = 0;
michael@0 222 int error = opus_encoder_ctl(mEncoder, OPUS_GET_LOOKAHEAD(&mLookahead));
michael@0 223 if (error != OPUS_OK) {
michael@0 224 mLookahead = 0;
michael@0 225 }
michael@0 226
michael@0 227 // The ogg time stamping and pre-skip is always timed at 48000.
michael@0 228 SerializeOpusIdHeader(mChannels, mLookahead * (kOpusSamplingRate /
michael@0 229 GetOutputSampleRate()), mSamplingRate,
michael@0 230 &meta->mIdHeader);
michael@0 231
michael@0 232 nsCString vendor;
michael@0 233 vendor.AppendASCII(opus_get_version_string());
michael@0 234
michael@0 235 nsTArray<nsCString> comments;
michael@0 236 comments.AppendElement(NS_LITERAL_CSTRING("ENCODER=Mozilla" MOZ_APP_UA_VERSION));
michael@0 237
michael@0 238 SerializeOpusCommentHeader(vendor, comments,
michael@0 239 &meta->mCommentHeader);
michael@0 240
michael@0 241 return meta.forget();
michael@0 242 }
michael@0 243
michael@0 244 nsresult
michael@0 245 OpusTrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData)
michael@0 246 {
michael@0 247 {
michael@0 248 ReentrantMonitorAutoEnter mon(mReentrantMonitor);
michael@0 249 // Wait until initialized or cancelled.
michael@0 250 while (!mCanceled && !mInitialized) {
michael@0 251 mReentrantMonitor.Wait();
michael@0 252 }
michael@0 253 if (mCanceled || mEncodingComplete) {
michael@0 254 return NS_ERROR_FAILURE;
michael@0 255 }
michael@0 256 }
michael@0 257
michael@0 258 // calculation below depends on the truth that mInitialized is true.
michael@0 259 MOZ_ASSERT(mInitialized);
michael@0 260
michael@0 261 // re-sampled frames left last time which didn't fit into an Opus packet duration.
michael@0 262 const int framesLeft = mResampledLeftover.Length() / mChannels;
michael@0 263 // When framesLeft is 0, (GetPacketDuration() - framesLeft) is a multiple
michael@0 264 // of kOpusSamplingRate. There is not precision loss in the integer division
michael@0 265 // in computing framesToFetch. If frameLeft > 0, we need to add 1 to
michael@0 266 // framesToFetch to ensure there will be at least n frames after re-sampling.
michael@0 267 const int frameRoundUp = framesLeft ? 1 : 0;
michael@0 268
michael@0 269 MOZ_ASSERT(GetPacketDuration() >= framesLeft);
michael@0 270 // Try to fetch m frames such that there will be n frames
michael@0 271 // where (n + frameLeft) >= GetPacketDuration() after re-sampling.
michael@0 272 const int framesToFetch = !mResampler ? GetPacketDuration()
michael@0 273 : (GetPacketDuration() - framesLeft) * mSamplingRate / kOpusSamplingRate
michael@0 274 + frameRoundUp;
michael@0 275 {
michael@0 276 // Move all the samples from mRawSegment to mSourceSegment. We only hold
michael@0 277 // the monitor in this block.
michael@0 278 ReentrantMonitorAutoEnter mon(mReentrantMonitor);
michael@0 279
michael@0 280 // Wait until enough raw data, end of stream or cancelled.
michael@0 281 while (!mCanceled && mRawSegment.GetDuration() +
michael@0 282 mSourceSegment.GetDuration() < framesToFetch &&
michael@0 283 !mEndOfStream) {
michael@0 284 mReentrantMonitor.Wait();
michael@0 285 }
michael@0 286
michael@0 287 if (mCanceled || mEncodingComplete) {
michael@0 288 return NS_ERROR_FAILURE;
michael@0 289 }
michael@0 290
michael@0 291 mSourceSegment.AppendFrom(&mRawSegment);
michael@0 292
michael@0 293 // Pad |mLookahead| samples to the end of source stream to prevent lost of
michael@0 294 // original data, the pcm duration will be calculated at rate 48K later.
michael@0 295 if (mEndOfStream && !mEosSetInEncoder) {
michael@0 296 mEosSetInEncoder = true;
michael@0 297 mSourceSegment.AppendNullData(mLookahead);
michael@0 298 }
michael@0 299 }
michael@0 300
michael@0 301 // Start encoding data.
michael@0 302 nsAutoTArray<AudioDataValue, 9600> pcm;
michael@0 303 pcm.SetLength(GetPacketDuration() * mChannels);
michael@0 304 AudioSegment::ChunkIterator iter(mSourceSegment);
michael@0 305 int frameCopied = 0;
michael@0 306
michael@0 307 while (!iter.IsEnded() && frameCopied < framesToFetch) {
michael@0 308 AudioChunk chunk = *iter;
michael@0 309
michael@0 310 // Chunk to the required frame size.
michael@0 311 int frameToCopy = chunk.GetDuration();
michael@0 312 if (frameCopied + frameToCopy > framesToFetch) {
michael@0 313 frameToCopy = framesToFetch - frameCopied;
michael@0 314 }
michael@0 315
michael@0 316 if (!chunk.IsNull()) {
michael@0 317 // Append the interleaved data to the end of pcm buffer.
michael@0 318 AudioTrackEncoder::InterleaveTrackData(chunk, frameToCopy, mChannels,
michael@0 319 pcm.Elements() + frameCopied * mChannels);
michael@0 320 } else {
michael@0 321 memset(pcm.Elements() + frameCopied * mChannels, 0,
michael@0 322 frameToCopy * mChannels * sizeof(AudioDataValue));
michael@0 323 }
michael@0 324
michael@0 325 frameCopied += frameToCopy;
michael@0 326 iter.Next();
michael@0 327 }
michael@0 328
michael@0 329 nsRefPtr<EncodedFrame> audiodata = new EncodedFrame();
michael@0 330 audiodata->SetFrameType(EncodedFrame::OPUS_AUDIO_FRAME);
michael@0 331 int framesInPCM = frameCopied;
michael@0 332 if (mResampler) {
michael@0 333 nsAutoTArray<AudioDataValue, 9600> resamplingDest;
michael@0 334 // We want to consume all the input data, so we slightly oversize the
michael@0 335 // resampled data buffer so we can fit the output data in. We cannot really
michael@0 336 // predict the output frame count at each call.
michael@0 337 uint32_t outframes = frameCopied * kOpusSamplingRate / mSamplingRate + 1;
michael@0 338 uint32_t inframes = frameCopied;
michael@0 339
michael@0 340 resamplingDest.SetLength(outframes * mChannels);
michael@0 341
michael@0 342 #if MOZ_SAMPLE_TYPE_S16
michael@0 343 short* in = reinterpret_cast<short*>(pcm.Elements());
michael@0 344 short* out = reinterpret_cast<short*>(resamplingDest.Elements());
michael@0 345 speex_resampler_process_interleaved_int(mResampler, in, &inframes,
michael@0 346 out, &outframes);
michael@0 347 #else
michael@0 348 float* in = reinterpret_cast<float*>(pcm.Elements());
michael@0 349 float* out = reinterpret_cast<float*>(resamplingDest.Elements());
michael@0 350 speex_resampler_process_interleaved_float(mResampler, in, &inframes,
michael@0 351 out, &outframes);
michael@0 352 #endif
michael@0 353
michael@0 354 MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length());
michael@0 355 PodCopy(pcm.Elements(), mResampledLeftover.Elements(),
michael@0 356 mResampledLeftover.Length());
michael@0 357
michael@0 358 uint32_t outframesToCopy = std::min(outframes,
michael@0 359 static_cast<uint32_t>(GetPacketDuration() - framesLeft));
michael@0 360
michael@0 361 MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >=
michael@0 362 outframesToCopy * mChannels);
michael@0 363 PodCopy(pcm.Elements() + mResampledLeftover.Length(),
michael@0 364 resamplingDest.Elements(), outframesToCopy * mChannels);
michael@0 365 int frameLeftover = outframes - outframesToCopy;
michael@0 366 mResampledLeftover.SetLength(frameLeftover * mChannels);
michael@0 367 PodCopy(mResampledLeftover.Elements(),
michael@0 368 resamplingDest.Elements() + outframesToCopy * mChannels,
michael@0 369 mResampledLeftover.Length());
michael@0 370 // This is always at 48000Hz.
michael@0 371 framesInPCM = framesLeft + outframesToCopy;
michael@0 372 audiodata->SetDuration(framesInPCM);
michael@0 373 } else {
michael@0 374 // The ogg time stamping and pre-skip is always timed at 48000.
michael@0 375 audiodata->SetDuration(frameCopied * (kOpusSamplingRate / mSamplingRate));
michael@0 376 }
michael@0 377
michael@0 378 // Remove the raw data which has been pulled to pcm buffer.
michael@0 379 // The value of frameCopied should equal to (or smaller than, if eos)
michael@0 380 // GetPacketDuration().
michael@0 381 mSourceSegment.RemoveLeading(frameCopied);
michael@0 382
michael@0 383 // Has reached the end of input stream and all queued data has pulled for
michael@0 384 // encoding.
michael@0 385 if (mSourceSegment.GetDuration() == 0 && mEndOfStream) {
michael@0 386 mEncodingComplete = true;
michael@0 387 LOG("[Opus] Done encoding.");
michael@0 388 }
michael@0 389
michael@0 390 MOZ_ASSERT(mEndOfStream || framesInPCM == GetPacketDuration());
michael@0 391
michael@0 392 // Append null data to pcm buffer if the leftover data is not enough for
michael@0 393 // opus encoder.
michael@0 394 if (framesInPCM < GetPacketDuration() && mEndOfStream) {
michael@0 395 PodZero(pcm.Elements() + framesInPCM * mChannels,
michael@0 396 (GetPacketDuration() - framesInPCM) * mChannels);
michael@0 397 }
michael@0 398 nsTArray<uint8_t> frameData;
michael@0 399 // Encode the data with Opus Encoder.
michael@0 400 frameData.SetLength(MAX_DATA_BYTES);
michael@0 401 // result is returned as opus error code if it is negative.
michael@0 402 int result = 0;
michael@0 403 #ifdef MOZ_SAMPLE_TYPE_S16
michael@0 404 const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements());
michael@0 405 result = opus_encode(mEncoder, pcmBuf, GetPacketDuration(),
michael@0 406 frameData.Elements(), MAX_DATA_BYTES);
michael@0 407 #else
michael@0 408 const float* pcmBuf = static_cast<float*>(pcm.Elements());
michael@0 409 result = opus_encode_float(mEncoder, pcmBuf, GetPacketDuration(),
michael@0 410 frameData.Elements(), MAX_DATA_BYTES);
michael@0 411 #endif
michael@0 412 frameData.SetLength(result >= 0 ? result : 0);
michael@0 413
michael@0 414 if (result < 0) {
michael@0 415 LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result));
michael@0 416 }
michael@0 417 if (mEncodingComplete) {
michael@0 418 if (mResampler) {
michael@0 419 speex_resampler_destroy(mResampler);
michael@0 420 mResampler = nullptr;
michael@0 421 }
michael@0 422 mResampledLeftover.SetLength(0);
michael@0 423 }
michael@0 424
michael@0 425 audiodata->SwapInFrameData(frameData);
michael@0 426 aData.AppendEncodedFrame(audiodata);
michael@0 427 return result >= 0 ? NS_OK : NS_ERROR_FAILURE;
michael@0 428 }
michael@0 429
michael@0 430 }

mercurial