Fri, 16 Jan 2015 04:50:19 +0100
Replace accessor implementation with direct member state manipulation, by
request https://trac.torproject.org/projects/tor/ticket/9701#comment:32
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 * You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include "OpusTrackEncoder.h"
6 #include "nsString.h"
8 #include <opus/opus.h>
10 #undef LOG
11 #ifdef MOZ_WIDGET_GONK
12 #include <android/log.h>
13 #define LOG(args...) __android_log_print(ANDROID_LOG_INFO, "MediaEncoder", ## args);
14 #else
15 #define LOG(args, ...)
16 #endif
18 namespace mozilla {
20 // The Opus format supports up to 8 channels, and supports multitrack audio up
21 // to 255 channels, but the current implementation supports only mono and
22 // stereo, and downmixes any more than that.
23 static const int MAX_SUPPORTED_AUDIO_CHANNELS = 8;
25 // http://www.opus-codec.org/docs/html_api-1.0.2/group__opus__encoder.html
26 // In section "opus_encoder_init", channels must be 1 or 2 of input signal.
27 static const int MAX_CHANNELS = 2;
29 // A maximum data bytes for Opus to encode.
30 static const int MAX_DATA_BYTES = 4096;
32 // http://tools.ietf.org/html/draft-ietf-codec-oggopus-00#section-4
33 // Second paragraph, " The granule position of an audio data page is in units
34 // of PCM audio samples at a fixed rate of 48 kHz."
35 static const int kOpusSamplingRate = 48000;
37 // The duration of an Opus frame, and it must be 2.5, 5, 10, 20, 40 or 60 ms.
38 static const int kFrameDurationMs = 20;
40 // The supported sampling rate of input signal (Hz),
41 // must be one of the following. Will resampled to 48kHz otherwise.
42 static const int kOpusSupportedInputSamplingRates[] =
43 {8000, 12000, 16000, 24000, 48000};
45 namespace {
47 // An endian-neutral serialization of integers. Serializing T in little endian
48 // format to aOutput, where T is a 16 bits or 32 bits integer.
49 template<typename T>
50 static void
51 SerializeToBuffer(T aValue, nsTArray<uint8_t>* aOutput)
52 {
53 for (uint32_t i = 0; i < sizeof(T); i++) {
54 aOutput->AppendElement((uint8_t)(0x000000ff & (aValue >> (i * 8))));
55 }
56 }
58 static inline void
59 SerializeToBuffer(const nsCString& aComment, nsTArray<uint8_t>* aOutput)
60 {
61 // Format of serializing a string to buffer is, the length of string (32 bits,
62 // little endian), and the string.
63 SerializeToBuffer((uint32_t)(aComment.Length()), aOutput);
64 aOutput->AppendElements(aComment.get(), aComment.Length());
65 }
68 static void
69 SerializeOpusIdHeader(uint8_t aChannelCount, uint16_t aPreskip,
70 uint32_t aInputSampleRate, nsTArray<uint8_t>* aOutput)
71 {
72 // The magic signature, null terminator has to be stripped off from strings.
73 static const uint8_t magic[] = "OpusHead";
74 aOutput->AppendElements(magic, sizeof(magic) - 1);
76 // The version must always be 1 (8 bits, unsigned).
77 aOutput->AppendElement(1);
79 // Number of output channels (8 bits, unsigned).
80 aOutput->AppendElement(aChannelCount);
82 // Number of samples (at 48 kHz) to discard from the decoder output when
83 // starting playback (16 bits, unsigned, little endian).
84 SerializeToBuffer(aPreskip, aOutput);
86 // The sampling rate of input source (32 bits, unsigned, little endian).
87 SerializeToBuffer(aInputSampleRate, aOutput);
89 // Output gain, an encoder should set this field to zero (16 bits, signed,
90 // little endian).
91 SerializeToBuffer((int16_t)0, aOutput);
93 // Channel mapping family. Family 0 allows only 1 or 2 channels (8 bits,
94 // unsigned).
95 aOutput->AppendElement(0);
96 }
98 static void
99 SerializeOpusCommentHeader(const nsCString& aVendor,
100 const nsTArray<nsCString>& aComments,
101 nsTArray<uint8_t>* aOutput)
102 {
103 // The magic signature, null terminator has to be stripped off.
104 static const uint8_t magic[] = "OpusTags";
105 aOutput->AppendElements(magic, sizeof(magic) - 1);
107 // The vendor; Should append in the following order:
108 // vendor string length (32 bits, unsigned, little endian)
109 // vendor string.
110 SerializeToBuffer(aVendor, aOutput);
112 // Add comments; Should append in the following order:
113 // comment list length (32 bits, unsigned, little endian)
114 // comment #0 string length (32 bits, unsigned, little endian)
115 // comment #0 string
116 // comment #1 string length (32 bits, unsigned, little endian)
117 // comment #1 string ...
118 SerializeToBuffer((uint32_t)aComments.Length(), aOutput);
119 for (uint32_t i = 0; i < aComments.Length(); ++i) {
120 SerializeToBuffer(aComments[i], aOutput);
121 }
122 }
124 } // Anonymous namespace.
126 OpusTrackEncoder::OpusTrackEncoder()
127 : AudioTrackEncoder()
128 , mEncoder(nullptr)
129 , mLookahead(0)
130 , mResampler(nullptr)
131 {
132 }
134 OpusTrackEncoder::~OpusTrackEncoder()
135 {
136 if (mEncoder) {
137 opus_encoder_destroy(mEncoder);
138 }
139 if (mResampler) {
140 speex_resampler_destroy(mResampler);
141 mResampler = nullptr;
142 }
143 }
145 nsresult
146 OpusTrackEncoder::Init(int aChannels, int aSamplingRate)
147 {
148 // This monitor is used to wake up other methods that are waiting for encoder
149 // to be completely initialized.
150 ReentrantMonitorAutoEnter mon(mReentrantMonitor);
152 NS_ENSURE_TRUE((aChannels <= MAX_SUPPORTED_AUDIO_CHANNELS) && (aChannels > 0),
153 NS_ERROR_FAILURE);
155 // This version of encoder API only support 1 or 2 channels,
156 // So set the mChannels less or equal 2 and
157 // let InterleaveTrackData downmix pcm data.
158 mChannels = aChannels > MAX_CHANNELS ? MAX_CHANNELS : aChannels;
160 // According to www.opus-codec.org, creating an opus encoder requires the
161 // sampling rate of source signal be one of 8000, 12000, 16000, 24000, or
162 // 48000. If this constraint is not satisfied, we resample the input to 48kHz.
163 nsTArray<int> supportedSamplingRates;
164 supportedSamplingRates.AppendElements(kOpusSupportedInputSamplingRates,
165 ArrayLength(kOpusSupportedInputSamplingRates));
166 if (!supportedSamplingRates.Contains(aSamplingRate)) {
167 int error;
168 mResampler = speex_resampler_init(mChannels,
169 aSamplingRate,
170 kOpusSamplingRate,
171 SPEEX_RESAMPLER_QUALITY_DEFAULT,
172 &error);
174 if (error != RESAMPLER_ERR_SUCCESS) {
175 return NS_ERROR_FAILURE;
176 }
177 }
178 mSamplingRate = aSamplingRate;
179 NS_ENSURE_TRUE(mSamplingRate > 0, NS_ERROR_FAILURE);
181 int error = 0;
182 mEncoder = opus_encoder_create(GetOutputSampleRate(), mChannels,
183 OPUS_APPLICATION_AUDIO, &error);
185 mInitialized = (error == OPUS_OK);
187 mReentrantMonitor.NotifyAll();
189 return error == OPUS_OK ? NS_OK : NS_ERROR_FAILURE;
190 }
192 int
193 OpusTrackEncoder::GetOutputSampleRate()
194 {
195 return mResampler ? kOpusSamplingRate : mSamplingRate;
196 }
198 int
199 OpusTrackEncoder::GetPacketDuration()
200 {
201 return GetOutputSampleRate() * kFrameDurationMs / 1000;
202 }
204 already_AddRefed<TrackMetadataBase>
205 OpusTrackEncoder::GetMetadata()
206 {
207 {
208 // Wait if mEncoder is not initialized.
209 ReentrantMonitorAutoEnter mon(mReentrantMonitor);
210 while (!mCanceled && !mInitialized) {
211 mReentrantMonitor.Wait();
212 }
213 }
215 if (mCanceled || mEncodingComplete) {
216 return nullptr;
217 }
219 nsRefPtr<OpusMetadata> meta = new OpusMetadata();
221 mLookahead = 0;
222 int error = opus_encoder_ctl(mEncoder, OPUS_GET_LOOKAHEAD(&mLookahead));
223 if (error != OPUS_OK) {
224 mLookahead = 0;
225 }
227 // The ogg time stamping and pre-skip is always timed at 48000.
228 SerializeOpusIdHeader(mChannels, mLookahead * (kOpusSamplingRate /
229 GetOutputSampleRate()), mSamplingRate,
230 &meta->mIdHeader);
232 nsCString vendor;
233 vendor.AppendASCII(opus_get_version_string());
235 nsTArray<nsCString> comments;
236 comments.AppendElement(NS_LITERAL_CSTRING("ENCODER=Mozilla" MOZ_APP_UA_VERSION));
238 SerializeOpusCommentHeader(vendor, comments,
239 &meta->mCommentHeader);
241 return meta.forget();
242 }
244 nsresult
245 OpusTrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData)
246 {
247 {
248 ReentrantMonitorAutoEnter mon(mReentrantMonitor);
249 // Wait until initialized or cancelled.
250 while (!mCanceled && !mInitialized) {
251 mReentrantMonitor.Wait();
252 }
253 if (mCanceled || mEncodingComplete) {
254 return NS_ERROR_FAILURE;
255 }
256 }
258 // calculation below depends on the truth that mInitialized is true.
259 MOZ_ASSERT(mInitialized);
261 // re-sampled frames left last time which didn't fit into an Opus packet duration.
262 const int framesLeft = mResampledLeftover.Length() / mChannels;
263 // When framesLeft is 0, (GetPacketDuration() - framesLeft) is a multiple
264 // of kOpusSamplingRate. There is not precision loss in the integer division
265 // in computing framesToFetch. If frameLeft > 0, we need to add 1 to
266 // framesToFetch to ensure there will be at least n frames after re-sampling.
267 const int frameRoundUp = framesLeft ? 1 : 0;
269 MOZ_ASSERT(GetPacketDuration() >= framesLeft);
270 // Try to fetch m frames such that there will be n frames
271 // where (n + frameLeft) >= GetPacketDuration() after re-sampling.
272 const int framesToFetch = !mResampler ? GetPacketDuration()
273 : (GetPacketDuration() - framesLeft) * mSamplingRate / kOpusSamplingRate
274 + frameRoundUp;
275 {
276 // Move all the samples from mRawSegment to mSourceSegment. We only hold
277 // the monitor in this block.
278 ReentrantMonitorAutoEnter mon(mReentrantMonitor);
280 // Wait until enough raw data, end of stream or cancelled.
281 while (!mCanceled && mRawSegment.GetDuration() +
282 mSourceSegment.GetDuration() < framesToFetch &&
283 !mEndOfStream) {
284 mReentrantMonitor.Wait();
285 }
287 if (mCanceled || mEncodingComplete) {
288 return NS_ERROR_FAILURE;
289 }
291 mSourceSegment.AppendFrom(&mRawSegment);
293 // Pad |mLookahead| samples to the end of source stream to prevent lost of
294 // original data, the pcm duration will be calculated at rate 48K later.
295 if (mEndOfStream && !mEosSetInEncoder) {
296 mEosSetInEncoder = true;
297 mSourceSegment.AppendNullData(mLookahead);
298 }
299 }
301 // Start encoding data.
302 nsAutoTArray<AudioDataValue, 9600> pcm;
303 pcm.SetLength(GetPacketDuration() * mChannels);
304 AudioSegment::ChunkIterator iter(mSourceSegment);
305 int frameCopied = 0;
307 while (!iter.IsEnded() && frameCopied < framesToFetch) {
308 AudioChunk chunk = *iter;
310 // Chunk to the required frame size.
311 int frameToCopy = chunk.GetDuration();
312 if (frameCopied + frameToCopy > framesToFetch) {
313 frameToCopy = framesToFetch - frameCopied;
314 }
316 if (!chunk.IsNull()) {
317 // Append the interleaved data to the end of pcm buffer.
318 AudioTrackEncoder::InterleaveTrackData(chunk, frameToCopy, mChannels,
319 pcm.Elements() + frameCopied * mChannels);
320 } else {
321 memset(pcm.Elements() + frameCopied * mChannels, 0,
322 frameToCopy * mChannels * sizeof(AudioDataValue));
323 }
325 frameCopied += frameToCopy;
326 iter.Next();
327 }
329 nsRefPtr<EncodedFrame> audiodata = new EncodedFrame();
330 audiodata->SetFrameType(EncodedFrame::OPUS_AUDIO_FRAME);
331 int framesInPCM = frameCopied;
332 if (mResampler) {
333 nsAutoTArray<AudioDataValue, 9600> resamplingDest;
334 // We want to consume all the input data, so we slightly oversize the
335 // resampled data buffer so we can fit the output data in. We cannot really
336 // predict the output frame count at each call.
337 uint32_t outframes = frameCopied * kOpusSamplingRate / mSamplingRate + 1;
338 uint32_t inframes = frameCopied;
340 resamplingDest.SetLength(outframes * mChannels);
342 #if MOZ_SAMPLE_TYPE_S16
343 short* in = reinterpret_cast<short*>(pcm.Elements());
344 short* out = reinterpret_cast<short*>(resamplingDest.Elements());
345 speex_resampler_process_interleaved_int(mResampler, in, &inframes,
346 out, &outframes);
347 #else
348 float* in = reinterpret_cast<float*>(pcm.Elements());
349 float* out = reinterpret_cast<float*>(resamplingDest.Elements());
350 speex_resampler_process_interleaved_float(mResampler, in, &inframes,
351 out, &outframes);
352 #endif
354 MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length());
355 PodCopy(pcm.Elements(), mResampledLeftover.Elements(),
356 mResampledLeftover.Length());
358 uint32_t outframesToCopy = std::min(outframes,
359 static_cast<uint32_t>(GetPacketDuration() - framesLeft));
361 MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >=
362 outframesToCopy * mChannels);
363 PodCopy(pcm.Elements() + mResampledLeftover.Length(),
364 resamplingDest.Elements(), outframesToCopy * mChannels);
365 int frameLeftover = outframes - outframesToCopy;
366 mResampledLeftover.SetLength(frameLeftover * mChannels);
367 PodCopy(mResampledLeftover.Elements(),
368 resamplingDest.Elements() + outframesToCopy * mChannels,
369 mResampledLeftover.Length());
370 // This is always at 48000Hz.
371 framesInPCM = framesLeft + outframesToCopy;
372 audiodata->SetDuration(framesInPCM);
373 } else {
374 // The ogg time stamping and pre-skip is always timed at 48000.
375 audiodata->SetDuration(frameCopied * (kOpusSamplingRate / mSamplingRate));
376 }
378 // Remove the raw data which has been pulled to pcm buffer.
379 // The value of frameCopied should equal to (or smaller than, if eos)
380 // GetPacketDuration().
381 mSourceSegment.RemoveLeading(frameCopied);
383 // Has reached the end of input stream and all queued data has pulled for
384 // encoding.
385 if (mSourceSegment.GetDuration() == 0 && mEndOfStream) {
386 mEncodingComplete = true;
387 LOG("[Opus] Done encoding.");
388 }
390 MOZ_ASSERT(mEndOfStream || framesInPCM == GetPacketDuration());
392 // Append null data to pcm buffer if the leftover data is not enough for
393 // opus encoder.
394 if (framesInPCM < GetPacketDuration() && mEndOfStream) {
395 PodZero(pcm.Elements() + framesInPCM * mChannels,
396 (GetPacketDuration() - framesInPCM) * mChannels);
397 }
398 nsTArray<uint8_t> frameData;
399 // Encode the data with Opus Encoder.
400 frameData.SetLength(MAX_DATA_BYTES);
401 // result is returned as opus error code if it is negative.
402 int result = 0;
403 #ifdef MOZ_SAMPLE_TYPE_S16
404 const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements());
405 result = opus_encode(mEncoder, pcmBuf, GetPacketDuration(),
406 frameData.Elements(), MAX_DATA_BYTES);
407 #else
408 const float* pcmBuf = static_cast<float*>(pcm.Elements());
409 result = opus_encode_float(mEncoder, pcmBuf, GetPacketDuration(),
410 frameData.Elements(), MAX_DATA_BYTES);
411 #endif
412 frameData.SetLength(result >= 0 ? result : 0);
414 if (result < 0) {
415 LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result));
416 }
417 if (mEncodingComplete) {
418 if (mResampler) {
419 speex_resampler_destroy(mResampler);
420 mResampler = nullptr;
421 }
422 mResampledLeftover.SetLength(0);
423 }
425 audiodata->SwapInFrameData(frameData);
426 aData.AppendEncodedFrame(audiodata);
427 return result >= 0 ? NS_OK : NS_ERROR_FAILURE;
428 }
430 }