|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ |
|
3 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 #include <string.h> |
|
8 |
|
9 #include "mozilla/DebugOnly.h" |
|
10 #include "mozilla/Endian.h" |
|
11 #include <stdint.h> |
|
12 |
|
13 #include "nsDebug.h" |
|
14 #include "MediaDecoderReader.h" |
|
15 #include "OggCodecState.h" |
|
16 #include "OggDecoder.h" |
|
17 #include "nsISupportsImpl.h" |
|
18 #include "VideoUtils.h" |
|
19 #include <algorithm> |
|
20 |
|
21 // On Android JellyBean, the hardware.h header redefines version_major and |
|
22 // version_minor, which breaks our build. See: |
|
23 // https://bugzilla.mozilla.org/show_bug.cgi?id=912702#c6 |
|
24 #ifdef MOZ_WIDGET_GONK |
|
25 #ifdef version_major |
|
26 #undef version_major |
|
27 #endif |
|
28 #ifdef version_minor |
|
29 #undef version_minor |
|
30 #endif |
|
31 #endif |
|
32 |
|
33 namespace mozilla { |
|
34 |
|
35 #ifdef PR_LOGGING |
|
36 extern PRLogModuleInfo* gMediaDecoderLog; |
|
37 #define LOG(type, msg) PR_LOG(gMediaDecoderLog, type, msg) |
|
38 #else |
|
39 #define LOG(type, msg) |
|
40 #endif |
|
41 |
|
42 /** Decoder base class for Ogg-encapsulated streams. */ |
|
43 OggCodecState* |
|
44 OggCodecState::Create(ogg_page* aPage) |
|
45 { |
|
46 NS_ASSERTION(ogg_page_bos(aPage), "Only call on BOS page!"); |
|
47 nsAutoPtr<OggCodecState> codecState; |
|
48 if (aPage->body_len > 6 && memcmp(aPage->body+1, "theora", 6) == 0) { |
|
49 codecState = new TheoraState(aPage); |
|
50 } else if (aPage->body_len > 6 && memcmp(aPage->body+1, "vorbis", 6) == 0) { |
|
51 codecState = new VorbisState(aPage); |
|
52 #ifdef MOZ_OPUS |
|
53 } else if (aPage->body_len > 8 && memcmp(aPage->body, "OpusHead", 8) == 0) { |
|
54 codecState = new OpusState(aPage); |
|
55 #endif |
|
56 } else if (aPage->body_len > 8 && memcmp(aPage->body, "fishead\0", 8) == 0) { |
|
57 codecState = new SkeletonState(aPage); |
|
58 } else { |
|
59 codecState = new OggCodecState(aPage, false); |
|
60 } |
|
61 return codecState->OggCodecState::Init() ? codecState.forget() : nullptr; |
|
62 } |
|
63 |
|
64 OggCodecState::OggCodecState(ogg_page* aBosPage, bool aActive) : |
|
65 mPacketCount(0), |
|
66 mSerial(ogg_page_serialno(aBosPage)), |
|
67 mActive(aActive), |
|
68 mDoneReadingHeaders(!aActive) |
|
69 { |
|
70 MOZ_COUNT_CTOR(OggCodecState); |
|
71 memset(&mState, 0, sizeof(ogg_stream_state)); |
|
72 } |
|
73 |
|
74 OggCodecState::~OggCodecState() { |
|
75 MOZ_COUNT_DTOR(OggCodecState); |
|
76 Reset(); |
|
77 #ifdef DEBUG |
|
78 int ret = |
|
79 #endif |
|
80 ogg_stream_clear(&mState); |
|
81 NS_ASSERTION(ret == 0, "ogg_stream_clear failed"); |
|
82 } |
|
83 |
|
84 nsresult OggCodecState::Reset() { |
|
85 if (ogg_stream_reset(&mState) != 0) { |
|
86 return NS_ERROR_FAILURE; |
|
87 } |
|
88 mPackets.Erase(); |
|
89 ClearUnstamped(); |
|
90 return NS_OK; |
|
91 } |
|
92 |
|
93 void OggCodecState::ClearUnstamped() |
|
94 { |
|
95 for (uint32_t i = 0; i < mUnstamped.Length(); ++i) { |
|
96 OggCodecState::ReleasePacket(mUnstamped[i]); |
|
97 } |
|
98 mUnstamped.Clear(); |
|
99 } |
|
100 |
|
101 bool OggCodecState::Init() { |
|
102 int ret = ogg_stream_init(&mState, mSerial); |
|
103 return ret == 0; |
|
104 } |
|
105 |
|
106 bool OggCodecState::IsValidVorbisTagName(nsCString& aName) |
|
107 { |
|
108 // Tag names must consist of ASCII 0x20 through 0x7D, |
|
109 // excluding 0x3D '=' which is the separator. |
|
110 uint32_t length = aName.Length(); |
|
111 const char* data = aName.Data(); |
|
112 for (uint32_t i = 0; i < length; i++) { |
|
113 if (data[i] < 0x20 || data[i] > 0x7D || data[i] == '=') { |
|
114 return false; |
|
115 } |
|
116 } |
|
117 return true; |
|
118 } |
|
119 |
|
120 bool OggCodecState::AddVorbisComment(MetadataTags* aTags, |
|
121 const char* aComment, |
|
122 uint32_t aLength) |
|
123 { |
|
124 const char* div = (const char*)memchr(aComment, '=', aLength); |
|
125 if (!div) { |
|
126 LOG(PR_LOG_DEBUG, ("Skipping comment: no separator")); |
|
127 return false; |
|
128 } |
|
129 nsCString key = nsCString(aComment, div-aComment); |
|
130 if (!IsValidVorbisTagName(key)) { |
|
131 LOG(PR_LOG_DEBUG, ("Skipping comment: invalid tag name")); |
|
132 return false; |
|
133 } |
|
134 uint32_t valueLength = aLength - (div-aComment); |
|
135 nsCString value = nsCString(div + 1, valueLength); |
|
136 if (!IsUTF8(value)) { |
|
137 LOG(PR_LOG_DEBUG, ("Skipping comment: invalid UTF-8 in value")); |
|
138 return false; |
|
139 } |
|
140 aTags->Put(key, value); |
|
141 return true; |
|
142 } |
|
143 |
|
144 void VorbisState::RecordVorbisPacketSamples(ogg_packet* aPacket, |
|
145 long aSamples) |
|
146 { |
|
147 #ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION |
|
148 mVorbisPacketSamples[aPacket] = aSamples; |
|
149 #endif |
|
150 } |
|
151 |
|
152 void VorbisState::ValidateVorbisPacketSamples(ogg_packet* aPacket, |
|
153 long aSamples) |
|
154 { |
|
155 #ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION |
|
156 NS_ASSERTION(mVorbisPacketSamples[aPacket] == aSamples, |
|
157 "Decoded samples for Vorbis packet don't match expected!"); |
|
158 mVorbisPacketSamples.erase(aPacket); |
|
159 #endif |
|
160 } |
|
161 |
|
162 void VorbisState::AssertHasRecordedPacketSamples(ogg_packet* aPacket) |
|
163 { |
|
164 #ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION |
|
165 NS_ASSERTION(mVorbisPacketSamples.count(aPacket) == 1, |
|
166 "Must have recorded packet samples"); |
|
167 #endif |
|
168 } |
|
169 |
|
170 static ogg_packet* Clone(ogg_packet* aPacket) { |
|
171 ogg_packet* p = new ogg_packet(); |
|
172 memcpy(p, aPacket, sizeof(ogg_packet)); |
|
173 p->packet = new unsigned char[p->bytes]; |
|
174 memcpy(p->packet, aPacket->packet, p->bytes); |
|
175 return p; |
|
176 } |
|
177 |
|
178 void OggCodecState::ReleasePacket(ogg_packet* aPacket) { |
|
179 if (aPacket) |
|
180 delete [] aPacket->packet; |
|
181 delete aPacket; |
|
182 } |
|
183 |
|
184 void OggPacketQueue::Append(ogg_packet* aPacket) { |
|
185 nsDeque::Push(aPacket); |
|
186 } |
|
187 |
|
188 ogg_packet* OggCodecState::PacketOut() { |
|
189 if (mPackets.IsEmpty()) { |
|
190 return nullptr; |
|
191 } |
|
192 return mPackets.PopFront(); |
|
193 } |
|
194 |
|
195 nsresult OggCodecState::PageIn(ogg_page* aPage) { |
|
196 if (!mActive) |
|
197 return NS_OK; |
|
198 NS_ASSERTION(static_cast<uint32_t>(ogg_page_serialno(aPage)) == mSerial, |
|
199 "Page must be for this stream!"); |
|
200 if (ogg_stream_pagein(&mState, aPage) == -1) |
|
201 return NS_ERROR_FAILURE; |
|
202 int r; |
|
203 do { |
|
204 ogg_packet packet; |
|
205 r = ogg_stream_packetout(&mState, &packet); |
|
206 if (r == 1) { |
|
207 mPackets.Append(Clone(&packet)); |
|
208 } |
|
209 } while (r != 0); |
|
210 if (ogg_stream_check(&mState)) { |
|
211 NS_WARNING("Unrecoverable error in ogg_stream_packetout"); |
|
212 return NS_ERROR_FAILURE; |
|
213 } |
|
214 return NS_OK; |
|
215 } |
|
216 |
|
217 nsresult OggCodecState::PacketOutUntilGranulepos(bool& aFoundGranulepos) { |
|
218 int r; |
|
219 aFoundGranulepos = false; |
|
220 // Extract packets from the sync state until either no more packets |
|
221 // come out, or we get a data packet with non -1 granulepos. |
|
222 do { |
|
223 ogg_packet packet; |
|
224 r = ogg_stream_packetout(&mState, &packet); |
|
225 if (r == 1) { |
|
226 ogg_packet* clone = Clone(&packet); |
|
227 if (IsHeader(&packet)) { |
|
228 // Header packets go straight into the packet queue. |
|
229 mPackets.Append(clone); |
|
230 } else { |
|
231 // We buffer data packets until we encounter a granulepos. We'll |
|
232 // then use the granulepos to figure out the granulepos of the |
|
233 // preceeding packets. |
|
234 mUnstamped.AppendElement(clone); |
|
235 aFoundGranulepos = packet.granulepos > 0; |
|
236 } |
|
237 } |
|
238 } while (r != 0 && !aFoundGranulepos); |
|
239 if (ogg_stream_check(&mState)) { |
|
240 NS_WARNING("Unrecoverable error in ogg_stream_packetout"); |
|
241 return NS_ERROR_FAILURE; |
|
242 } |
|
243 return NS_OK; |
|
244 } |
|
245 |
|
246 TheoraState::TheoraState(ogg_page* aBosPage) : |
|
247 OggCodecState(aBosPage, true), |
|
248 mSetup(0), |
|
249 mCtx(0), |
|
250 mPixelAspectRatio(0) |
|
251 { |
|
252 MOZ_COUNT_CTOR(TheoraState); |
|
253 th_info_init(&mInfo); |
|
254 th_comment_init(&mComment); |
|
255 } |
|
256 |
|
257 TheoraState::~TheoraState() { |
|
258 MOZ_COUNT_DTOR(TheoraState); |
|
259 th_setup_free(mSetup); |
|
260 th_decode_free(mCtx); |
|
261 th_comment_clear(&mComment); |
|
262 th_info_clear(&mInfo); |
|
263 } |
|
264 |
|
265 bool TheoraState::Init() { |
|
266 if (!mActive) |
|
267 return false; |
|
268 |
|
269 int64_t n = mInfo.aspect_numerator; |
|
270 int64_t d = mInfo.aspect_denominator; |
|
271 |
|
272 mPixelAspectRatio = (n == 0 || d == 0) ? |
|
273 1.0f : static_cast<float>(n) / static_cast<float>(d); |
|
274 |
|
275 // Ensure the frame and picture regions aren't larger than our prescribed |
|
276 // maximum, or zero sized. |
|
277 nsIntSize frame(mInfo.frame_width, mInfo.frame_height); |
|
278 nsIntRect picture(mInfo.pic_x, mInfo.pic_y, mInfo.pic_width, mInfo.pic_height); |
|
279 if (!IsValidVideoRegion(frame, picture, frame)) { |
|
280 return mActive = false; |
|
281 } |
|
282 |
|
283 mCtx = th_decode_alloc(&mInfo, mSetup); |
|
284 if (mCtx == nullptr) { |
|
285 return mActive = false; |
|
286 } |
|
287 |
|
288 return true; |
|
289 } |
|
290 |
|
291 bool |
|
292 TheoraState::DecodeHeader(ogg_packet* aPacket) |
|
293 { |
|
294 nsAutoRef<ogg_packet> autoRelease(aPacket); |
|
295 mPacketCount++; |
|
296 int ret = th_decode_headerin(&mInfo, |
|
297 &mComment, |
|
298 &mSetup, |
|
299 aPacket); |
|
300 |
|
301 // We must determine when we've read the last header packet. |
|
302 // th_decode_headerin() does not tell us when it's read the last header, so |
|
303 // we must keep track of the headers externally. |
|
304 // |
|
305 // There are 3 header packets, the Identification, Comment, and Setup |
|
306 // headers, which must be in that order. If they're out of order, the file |
|
307 // is invalid. If we've successfully read a header, and it's the setup |
|
308 // header, then we're done reading headers. The first byte of each packet |
|
309 // determines it's type as follows: |
|
310 // 0x80 -> Identification header |
|
311 // 0x81 -> Comment header |
|
312 // 0x82 -> Setup header |
|
313 // See http://www.theora.org/doc/Theora.pdf Chapter 6, "Bitstream Headers", |
|
314 // for more details of the Ogg/Theora containment scheme. |
|
315 bool isSetupHeader = aPacket->bytes > 0 && aPacket->packet[0] == 0x82; |
|
316 if (ret < 0 || mPacketCount > 3) { |
|
317 // We've received an error, or the first three packets weren't valid |
|
318 // header packets. Assume bad input. |
|
319 // Our caller will deactivate the bitstream. |
|
320 return false; |
|
321 } else if (ret > 0 && isSetupHeader && mPacketCount == 3) { |
|
322 // Successfully read the three header packets. |
|
323 mDoneReadingHeaders = true; |
|
324 } |
|
325 return true; |
|
326 } |
|
327 |
|
328 int64_t |
|
329 TheoraState::Time(int64_t granulepos) { |
|
330 if (!mActive) { |
|
331 return -1; |
|
332 } |
|
333 return TheoraState::Time(&mInfo, granulepos); |
|
334 } |
|
335 |
|
336 bool |
|
337 TheoraState::IsHeader(ogg_packet* aPacket) { |
|
338 return th_packet_isheader(aPacket); |
|
339 } |
|
340 |
|
341 # define TH_VERSION_CHECK(_info,_maj,_min,_sub) \ |
|
342 (((_info)->version_major>(_maj)||(_info)->version_major==(_maj))&& \ |
|
343 (((_info)->version_minor>(_min)||(_info)->version_minor==(_min))&& \ |
|
344 (_info)->version_subminor>=(_sub))) |
|
345 |
|
346 int64_t TheoraState::Time(th_info* aInfo, int64_t aGranulepos) |
|
347 { |
|
348 if (aGranulepos < 0 || aInfo->fps_numerator == 0) { |
|
349 return -1; |
|
350 } |
|
351 // Implementation of th_granule_frame inlined here to operate |
|
352 // on the th_info structure instead of the theora_state. |
|
353 int shift = aInfo->keyframe_granule_shift; |
|
354 ogg_int64_t iframe = aGranulepos >> shift; |
|
355 ogg_int64_t pframe = aGranulepos - (iframe << shift); |
|
356 int64_t frameno = iframe + pframe - TH_VERSION_CHECK(aInfo, 3, 2, 1); |
|
357 CheckedInt64 t = ((CheckedInt64(frameno) + 1) * USECS_PER_S) * aInfo->fps_denominator; |
|
358 if (!t.isValid()) |
|
359 return -1; |
|
360 t /= aInfo->fps_numerator; |
|
361 return t.isValid() ? t.value() : -1; |
|
362 } |
|
363 |
|
364 int64_t TheoraState::StartTime(int64_t granulepos) { |
|
365 if (granulepos < 0 || !mActive || mInfo.fps_numerator == 0) { |
|
366 return -1; |
|
367 } |
|
368 CheckedInt64 t = (CheckedInt64(th_granule_frame(mCtx, granulepos)) * USECS_PER_S) * mInfo.fps_denominator; |
|
369 if (!t.isValid()) |
|
370 return -1; |
|
371 return t.value() / mInfo.fps_numerator; |
|
372 } |
|
373 |
|
374 int64_t |
|
375 TheoraState::MaxKeyframeOffset() |
|
376 { |
|
377 // Determine the maximum time in microseconds by which a key frame could |
|
378 // offset for the theora bitstream. Theora granulepos encode time as: |
|
379 // ((key_frame_number << granule_shift) + frame_offset). |
|
380 // Therefore the maximum possible time by which any frame could be offset |
|
381 // from a keyframe is the duration of (1 << granule_shift) - 1) frames. |
|
382 int64_t frameDuration; |
|
383 |
|
384 // Max number of frames keyframe could possibly be offset. |
|
385 int64_t keyframeDiff = (1 << mInfo.keyframe_granule_shift) - 1; |
|
386 |
|
387 // Length of frame in usecs. |
|
388 frameDuration = (mInfo.fps_denominator * USECS_PER_S) / mInfo.fps_numerator; |
|
389 |
|
390 // Total time in usecs keyframe can be offset from any given frame. |
|
391 return frameDuration * keyframeDiff; |
|
392 } |
|
393 |
|
394 nsresult |
|
395 TheoraState::PageIn(ogg_page* aPage) |
|
396 { |
|
397 if (!mActive) |
|
398 return NS_OK; |
|
399 NS_ASSERTION(static_cast<uint32_t>(ogg_page_serialno(aPage)) == mSerial, |
|
400 "Page must be for this stream!"); |
|
401 if (ogg_stream_pagein(&mState, aPage) == -1) |
|
402 return NS_ERROR_FAILURE; |
|
403 bool foundGp; |
|
404 nsresult res = PacketOutUntilGranulepos(foundGp); |
|
405 if (NS_FAILED(res)) |
|
406 return res; |
|
407 if (foundGp && mDoneReadingHeaders) { |
|
408 // We've found a packet with a granulepos, and we've loaded our metadata |
|
409 // and initialized our decoder. Determine granulepos of buffered packets. |
|
410 ReconstructTheoraGranulepos(); |
|
411 for (uint32_t i = 0; i < mUnstamped.Length(); ++i) { |
|
412 ogg_packet* packet = mUnstamped[i]; |
|
413 #ifdef DEBUG |
|
414 NS_ASSERTION(!IsHeader(packet), "Don't try to recover header packet gp"); |
|
415 NS_ASSERTION(packet->granulepos != -1, "Packet must have gp by now"); |
|
416 #endif |
|
417 mPackets.Append(packet); |
|
418 } |
|
419 mUnstamped.Clear(); |
|
420 } |
|
421 return NS_OK; |
|
422 } |
|
423 |
|
424 // Returns 1 if the Theora info struct is decoding a media of Theora |
|
425 // version (maj,min,sub) or later, otherwise returns 0. |
|
426 int |
|
427 TheoraVersion(th_info* info, |
|
428 unsigned char maj, |
|
429 unsigned char min, |
|
430 unsigned char sub) |
|
431 { |
|
432 ogg_uint32_t ver = (maj << 16) + (min << 8) + sub; |
|
433 ogg_uint32_t th_ver = (info->version_major << 16) + |
|
434 (info->version_minor << 8) + |
|
435 info->version_subminor; |
|
436 return (th_ver >= ver) ? 1 : 0; |
|
437 } |
|
438 |
|
439 void TheoraState::ReconstructTheoraGranulepos() |
|
440 { |
|
441 if (mUnstamped.Length() == 0) { |
|
442 return; |
|
443 } |
|
444 ogg_int64_t lastGranulepos = mUnstamped[mUnstamped.Length() - 1]->granulepos; |
|
445 NS_ASSERTION(lastGranulepos != -1, "Must know last granulepos"); |
|
446 |
|
447 // Reconstruct the granulepos (and thus timestamps) of the decoded |
|
448 // frames. Granulepos are stored as ((keyframe<<shift)+offset). We |
|
449 // know the granulepos of the last frame in the list, so we can infer |
|
450 // the granulepos of the intermediate frames using their frame numbers. |
|
451 ogg_int64_t shift = mInfo.keyframe_granule_shift; |
|
452 ogg_int64_t version_3_2_1 = TheoraVersion(&mInfo,3,2,1); |
|
453 ogg_int64_t lastFrame = th_granule_frame(mCtx, |
|
454 lastGranulepos) + version_3_2_1; |
|
455 ogg_int64_t firstFrame = lastFrame - mUnstamped.Length() + 1; |
|
456 |
|
457 // Until we encounter a keyframe, we'll assume that the "keyframe" |
|
458 // segment of the granulepos is the first frame, or if that causes |
|
459 // the "offset" segment to overflow, we assume the required |
|
460 // keyframe is maximumally offset. Until we encounter a keyframe |
|
461 // the granulepos will probably be wrong, but we can't decode the |
|
462 // frame anyway (since we don't have its keyframe) so it doesn't really |
|
463 // matter. |
|
464 ogg_int64_t keyframe = lastGranulepos >> shift; |
|
465 |
|
466 // The lastFrame, firstFrame, keyframe variables, as well as the frame |
|
467 // variable in the loop below, store the frame number for Theora |
|
468 // version >= 3.2.1 streams, and store the frame index for Theora |
|
469 // version < 3.2.1 streams. |
|
470 for (uint32_t i = 0; i < mUnstamped.Length() - 1; ++i) { |
|
471 ogg_int64_t frame = firstFrame + i; |
|
472 ogg_int64_t granulepos; |
|
473 ogg_packet* packet = mUnstamped[i]; |
|
474 bool isKeyframe = th_packet_iskeyframe(packet) == 1; |
|
475 |
|
476 if (isKeyframe) { |
|
477 granulepos = frame << shift; |
|
478 keyframe = frame; |
|
479 } else if (frame >= keyframe && |
|
480 frame - keyframe < ((ogg_int64_t)1 << shift)) |
|
481 { |
|
482 // (frame - keyframe) won't overflow the "offset" segment of the |
|
483 // granulepos, so it's safe to calculate the granulepos. |
|
484 granulepos = (keyframe << shift) + (frame - keyframe); |
|
485 } else { |
|
486 // (frame - keyframeno) will overflow the "offset" segment of the |
|
487 // granulepos, so we take "keyframe" to be the max possible offset |
|
488 // frame instead. |
|
489 ogg_int64_t k = std::max(frame - (((ogg_int64_t)1 << shift) - 1), version_3_2_1); |
|
490 granulepos = (k << shift) + (frame - k); |
|
491 } |
|
492 // Theora 3.2.1+ granulepos store frame number [1..N], so granulepos |
|
493 // should be > 0. |
|
494 // Theora 3.2.0 granulepos store the frame index [0..(N-1)], so |
|
495 // granulepos should be >= 0. |
|
496 NS_ASSERTION(granulepos >= version_3_2_1, |
|
497 "Invalid granulepos for Theora version"); |
|
498 |
|
499 // Check that the frame's granule number is one more than the |
|
500 // previous frame's. |
|
501 NS_ASSERTION(i == 0 || |
|
502 th_granule_frame(mCtx, granulepos) == |
|
503 th_granule_frame(mCtx, mUnstamped[i-1]->granulepos) + 1, |
|
504 "Granulepos calculation is incorrect!"); |
|
505 |
|
506 packet->granulepos = granulepos; |
|
507 } |
|
508 |
|
509 // Check that the second to last frame's granule number is one less than |
|
510 // the last frame's (the known granule number). If not our granulepos |
|
511 // recovery missed a beat. |
|
512 NS_ASSERTION(mUnstamped.Length() < 2 || |
|
513 th_granule_frame(mCtx, mUnstamped[mUnstamped.Length()-2]->granulepos) + 1 == |
|
514 th_granule_frame(mCtx, lastGranulepos), |
|
515 "Granulepos recovery should catch up with packet->granulepos!"); |
|
516 } |
|
517 |
|
518 nsresult VorbisState::Reset() |
|
519 { |
|
520 nsresult res = NS_OK; |
|
521 if (mActive && vorbis_synthesis_restart(&mDsp) != 0) { |
|
522 res = NS_ERROR_FAILURE; |
|
523 } |
|
524 if (NS_FAILED(OggCodecState::Reset())) { |
|
525 return NS_ERROR_FAILURE; |
|
526 } |
|
527 |
|
528 mGranulepos = 0; |
|
529 mPrevVorbisBlockSize = 0; |
|
530 |
|
531 return res; |
|
532 } |
|
533 |
|
534 VorbisState::VorbisState(ogg_page* aBosPage) : |
|
535 OggCodecState(aBosPage, true), |
|
536 mPrevVorbisBlockSize(0), |
|
537 mGranulepos(0) |
|
538 { |
|
539 MOZ_COUNT_CTOR(VorbisState); |
|
540 vorbis_info_init(&mInfo); |
|
541 vorbis_comment_init(&mComment); |
|
542 memset(&mDsp, 0, sizeof(vorbis_dsp_state)); |
|
543 memset(&mBlock, 0, sizeof(vorbis_block)); |
|
544 } |
|
545 |
|
546 VorbisState::~VorbisState() { |
|
547 MOZ_COUNT_DTOR(VorbisState); |
|
548 Reset(); |
|
549 vorbis_block_clear(&mBlock); |
|
550 vorbis_dsp_clear(&mDsp); |
|
551 vorbis_info_clear(&mInfo); |
|
552 vorbis_comment_clear(&mComment); |
|
553 } |
|
554 |
|
555 bool VorbisState::DecodeHeader(ogg_packet* aPacket) { |
|
556 nsAutoRef<ogg_packet> autoRelease(aPacket); |
|
557 mPacketCount++; |
|
558 int ret = vorbis_synthesis_headerin(&mInfo, |
|
559 &mComment, |
|
560 aPacket); |
|
561 // We must determine when we've read the last header packet. |
|
562 // vorbis_synthesis_headerin() does not tell us when it's read the last |
|
563 // header, so we must keep track of the headers externally. |
|
564 // |
|
565 // There are 3 header packets, the Identification, Comment, and Setup |
|
566 // headers, which must be in that order. If they're out of order, the file |
|
567 // is invalid. If we've successfully read a header, and it's the setup |
|
568 // header, then we're done reading headers. The first byte of each packet |
|
569 // determines it's type as follows: |
|
570 // 0x1 -> Identification header |
|
571 // 0x3 -> Comment header |
|
572 // 0x5 -> Setup header |
|
573 // For more details of the Vorbis/Ogg containment scheme, see the Vorbis I |
|
574 // Specification, Chapter 4, Codec Setup and Packet Decode: |
|
575 // http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-580004 |
|
576 |
|
577 bool isSetupHeader = aPacket->bytes > 0 && aPacket->packet[0] == 0x5; |
|
578 |
|
579 if (ret < 0 || mPacketCount > 3) { |
|
580 // We've received an error, or the first three packets weren't valid |
|
581 // header packets. Assume bad input. Our caller will deactivate the |
|
582 // bitstream. |
|
583 return false; |
|
584 } else if (ret == 0 && isSetupHeader && mPacketCount == 3) { |
|
585 // Successfully read the three header packets. |
|
586 // The bitstream remains active. |
|
587 mDoneReadingHeaders = true; |
|
588 } |
|
589 return true; |
|
590 } |
|
591 |
|
592 bool VorbisState::Init() |
|
593 { |
|
594 if (!mActive) |
|
595 return false; |
|
596 |
|
597 int ret = vorbis_synthesis_init(&mDsp, &mInfo); |
|
598 if (ret != 0) { |
|
599 NS_WARNING("vorbis_synthesis_init() failed initializing vorbis bitstream"); |
|
600 return mActive = false; |
|
601 } |
|
602 ret = vorbis_block_init(&mDsp, &mBlock); |
|
603 if (ret != 0) { |
|
604 NS_WARNING("vorbis_block_init() failed initializing vorbis bitstream"); |
|
605 if (mActive) { |
|
606 vorbis_dsp_clear(&mDsp); |
|
607 } |
|
608 return mActive = false; |
|
609 } |
|
610 return true; |
|
611 } |
|
612 |
|
613 int64_t VorbisState::Time(int64_t granulepos) |
|
614 { |
|
615 if (!mActive) { |
|
616 return -1; |
|
617 } |
|
618 |
|
619 return VorbisState::Time(&mInfo, granulepos); |
|
620 } |
|
621 |
|
622 int64_t VorbisState::Time(vorbis_info* aInfo, int64_t aGranulepos) |
|
623 { |
|
624 if (aGranulepos == -1 || aInfo->rate == 0) { |
|
625 return -1; |
|
626 } |
|
627 CheckedInt64 t = CheckedInt64(aGranulepos) * USECS_PER_S; |
|
628 if (!t.isValid()) |
|
629 t = 0; |
|
630 return t.value() / aInfo->rate; |
|
631 } |
|
632 |
|
633 bool |
|
634 VorbisState::IsHeader(ogg_packet* aPacket) |
|
635 { |
|
636 // The first byte in each Vorbis header packet is either 0x01, 0x03, or 0x05, |
|
637 // i.e. the first bit is odd. Audio data packets have their first bit as 0x0. |
|
638 // Any packet with its first bit set cannot be a data packet, it's a |
|
639 // (possibly invalid) header packet. |
|
640 // See: http://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-610004.2.1 |
|
641 return aPacket->bytes > 0 ? (aPacket->packet[0] & 0x1) : false; |
|
642 } |
|
643 |
|
644 MetadataTags* |
|
645 VorbisState::GetTags() |
|
646 { |
|
647 MetadataTags* tags; |
|
648 NS_ASSERTION(mComment.user_comments, "no vorbis comment strings!"); |
|
649 NS_ASSERTION(mComment.comment_lengths, "no vorbis comment lengths!"); |
|
650 tags = new MetadataTags; |
|
651 for (int i = 0; i < mComment.comments; i++) { |
|
652 AddVorbisComment(tags, mComment.user_comments[i], |
|
653 mComment.comment_lengths[i]); |
|
654 } |
|
655 return tags; |
|
656 } |
|
657 |
|
658 nsresult |
|
659 VorbisState::PageIn(ogg_page* aPage) |
|
660 { |
|
661 if (!mActive) |
|
662 return NS_OK; |
|
663 NS_ASSERTION(static_cast<uint32_t>(ogg_page_serialno(aPage)) == mSerial, |
|
664 "Page must be for this stream!"); |
|
665 if (ogg_stream_pagein(&mState, aPage) == -1) |
|
666 return NS_ERROR_FAILURE; |
|
667 bool foundGp; |
|
668 nsresult res = PacketOutUntilGranulepos(foundGp); |
|
669 if (NS_FAILED(res)) |
|
670 return res; |
|
671 if (foundGp && mDoneReadingHeaders) { |
|
672 // We've found a packet with a granulepos, and we've loaded our metadata |
|
673 // and initialized our decoder. Determine granulepos of buffered packets. |
|
674 ReconstructVorbisGranulepos(); |
|
675 for (uint32_t i = 0; i < mUnstamped.Length(); ++i) { |
|
676 ogg_packet* packet = mUnstamped[i]; |
|
677 AssertHasRecordedPacketSamples(packet); |
|
678 NS_ASSERTION(!IsHeader(packet), "Don't try to recover header packet gp"); |
|
679 NS_ASSERTION(packet->granulepos != -1, "Packet must have gp by now"); |
|
680 mPackets.Append(packet); |
|
681 } |
|
682 mUnstamped.Clear(); |
|
683 } |
|
684 return NS_OK; |
|
685 } |
|
686 |
|
687 nsresult VorbisState::ReconstructVorbisGranulepos() |
|
688 { |
|
689 // The number of samples in a Vorbis packet is: |
|
690 // window_blocksize(previous_packet)/4+window_blocksize(current_packet)/4 |
|
691 // See: http://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-230001.3.2 |
|
692 // So we maintain mPrevVorbisBlockSize, the block size of the last packet |
|
693 // encountered. We also maintain mGranulepos, which is the granulepos of |
|
694 // the last encountered packet. This enables us to give granulepos to |
|
695 // packets when the last packet in mUnstamped doesn't have a granulepos |
|
696 // (for example if the stream was truncated). |
|
697 // |
|
698 // We validate our prediction of the number of samples decoded when |
|
699 // VALIDATE_VORBIS_SAMPLE_CALCULATION is defined by recording the predicted |
|
700 // number of samples, and verifing we extract that many when decoding |
|
701 // each packet. |
|
702 |
|
703 NS_ASSERTION(mUnstamped.Length() > 0, "Length must be > 0"); |
|
704 ogg_packet* last = mUnstamped[mUnstamped.Length()-1]; |
|
705 NS_ASSERTION(last->e_o_s || last->granulepos >= 0, |
|
706 "Must know last granulepos!"); |
|
707 if (mUnstamped.Length() == 1) { |
|
708 ogg_packet* packet = mUnstamped[0]; |
|
709 long blockSize = vorbis_packet_blocksize(&mInfo, packet); |
|
710 if (blockSize < 0) { |
|
711 // On failure vorbis_packet_blocksize returns < 0. If we've got |
|
712 // a bad packet, we just assume that decode will have to skip this |
|
713 // packet, i.e. assume 0 samples are decodable from this packet. |
|
714 blockSize = 0; |
|
715 mPrevVorbisBlockSize = 0; |
|
716 } |
|
717 long samples = mPrevVorbisBlockSize / 4 + blockSize / 4; |
|
718 mPrevVorbisBlockSize = blockSize; |
|
719 if (packet->granulepos == -1) { |
|
720 packet->granulepos = mGranulepos + samples; |
|
721 } |
|
722 |
|
723 // Account for a partial last frame |
|
724 if (packet->e_o_s && packet->granulepos >= mGranulepos) { |
|
725 samples = packet->granulepos - mGranulepos; |
|
726 } |
|
727 |
|
728 mGranulepos = packet->granulepos; |
|
729 RecordVorbisPacketSamples(packet, samples); |
|
730 return NS_OK; |
|
731 } |
|
732 |
|
733 bool unknownGranulepos = last->granulepos == -1; |
|
734 int totalSamples = 0; |
|
735 for (int32_t i = mUnstamped.Length() - 1; i > 0; i--) { |
|
736 ogg_packet* packet = mUnstamped[i]; |
|
737 ogg_packet* prev = mUnstamped[i-1]; |
|
738 ogg_int64_t granulepos = packet->granulepos; |
|
739 NS_ASSERTION(granulepos != -1, "Must know granulepos!"); |
|
740 long prevBlockSize = vorbis_packet_blocksize(&mInfo, prev); |
|
741 long blockSize = vorbis_packet_blocksize(&mInfo, packet); |
|
742 |
|
743 if (blockSize < 0 || prevBlockSize < 0) { |
|
744 // On failure vorbis_packet_blocksize returns < 0. If we've got |
|
745 // a bad packet, we just assume that decode will have to skip this |
|
746 // packet, i.e. assume 0 samples are decodable from this packet. |
|
747 blockSize = 0; |
|
748 prevBlockSize = 0; |
|
749 } |
|
750 |
|
751 long samples = prevBlockSize / 4 + blockSize / 4; |
|
752 totalSamples += samples; |
|
753 prev->granulepos = granulepos - samples; |
|
754 RecordVorbisPacketSamples(packet, samples); |
|
755 } |
|
756 |
|
757 if (unknownGranulepos) { |
|
758 for (uint32_t i = 0; i < mUnstamped.Length(); i++) { |
|
759 ogg_packet* packet = mUnstamped[i]; |
|
760 packet->granulepos += mGranulepos + totalSamples + 1; |
|
761 } |
|
762 } |
|
763 |
|
764 ogg_packet* first = mUnstamped[0]; |
|
765 long blockSize = vorbis_packet_blocksize(&mInfo, first); |
|
766 if (blockSize < 0) { |
|
767 mPrevVorbisBlockSize = 0; |
|
768 blockSize = 0; |
|
769 } |
|
770 |
|
771 long samples = (mPrevVorbisBlockSize == 0) ? 0 : |
|
772 mPrevVorbisBlockSize / 4 + blockSize / 4; |
|
773 int64_t start = first->granulepos - samples; |
|
774 RecordVorbisPacketSamples(first, samples); |
|
775 |
|
776 if (last->e_o_s && start < mGranulepos) { |
|
777 // We've calculated that there are more samples in this page than its |
|
778 // granulepos claims, and it's the last page in the stream. This is legal, |
|
779 // and we will need to prune the trailing samples when we come to decode it. |
|
780 // We must correct the timestamps so that they follow the last Vorbis page's |
|
781 // samples. |
|
782 int64_t pruned = mGranulepos - start; |
|
783 for (uint32_t i = 0; i < mUnstamped.Length() - 1; i++) { |
|
784 mUnstamped[i]->granulepos += pruned; |
|
785 } |
|
786 #ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION |
|
787 mVorbisPacketSamples[last] -= pruned; |
|
788 #endif |
|
789 } |
|
790 |
|
791 mPrevVorbisBlockSize = vorbis_packet_blocksize(&mInfo, last); |
|
792 mPrevVorbisBlockSize = std::max(static_cast<long>(0), mPrevVorbisBlockSize); |
|
793 mGranulepos = last->granulepos; |
|
794 |
|
795 return NS_OK; |
|
796 } |
|
797 |
|
798 #ifdef MOZ_OPUS |
|
799 OpusState::OpusState(ogg_page* aBosPage) : |
|
800 OggCodecState(aBosPage, true), |
|
801 mParser(nullptr), |
|
802 mDecoder(nullptr), |
|
803 mSkip(0), |
|
804 mPrevPacketGranulepos(0), |
|
805 mPrevPageGranulepos(0) |
|
806 { |
|
807 MOZ_COUNT_CTOR(OpusState); |
|
808 } |
|
809 |
|
810 OpusState::~OpusState() { |
|
811 MOZ_COUNT_DTOR(OpusState); |
|
812 Reset(); |
|
813 |
|
814 if (mDecoder) { |
|
815 opus_multistream_decoder_destroy(mDecoder); |
|
816 mDecoder = nullptr; |
|
817 } |
|
818 } |
|
819 |
|
820 nsresult OpusState::Reset() |
|
821 { |
|
822 return Reset(false); |
|
823 } |
|
824 |
|
825 nsresult OpusState::Reset(bool aStart) |
|
826 { |
|
827 nsresult res = NS_OK; |
|
828 |
|
829 if (mActive && mDecoder) { |
|
830 // Reset the decoder. |
|
831 opus_multistream_decoder_ctl(mDecoder, OPUS_RESET_STATE); |
|
832 // Let the seek logic handle pre-roll if we're not seeking to the start. |
|
833 mSkip = aStart ? mParser->mPreSkip : 0; |
|
834 // This lets us distinguish the first page being the last page vs. just |
|
835 // not having processed the previous page when we encounter the last page. |
|
836 mPrevPageGranulepos = aStart ? 0 : -1; |
|
837 mPrevPacketGranulepos = aStart ? 0 : -1; |
|
838 } |
|
839 |
|
840 // Clear queued data. |
|
841 if (NS_FAILED(OggCodecState::Reset())) { |
|
842 return NS_ERROR_FAILURE; |
|
843 } |
|
844 |
|
845 LOG(PR_LOG_DEBUG, ("Opus decoder reset, to skip %d", mSkip)); |
|
846 |
|
847 return res; |
|
848 } |
|
849 |
|
850 bool OpusState::Init(void) |
|
851 { |
|
852 if (!mActive) |
|
853 return false; |
|
854 |
|
855 int error; |
|
856 |
|
857 NS_ASSERTION(mDecoder == nullptr, "leaking OpusDecoder"); |
|
858 |
|
859 mDecoder = opus_multistream_decoder_create(mParser->mRate, |
|
860 mParser->mChannels, |
|
861 mParser->mStreams, |
|
862 mParser->mCoupledStreams, |
|
863 mParser->mMappingTable, |
|
864 &error); |
|
865 |
|
866 mSkip = mParser->mPreSkip; |
|
867 |
|
868 LOG(PR_LOG_DEBUG, ("Opus decoder init, to skip %d", mSkip)); |
|
869 |
|
870 return error == OPUS_OK; |
|
871 } |
|
872 |
|
873 bool OpusState::DecodeHeader(ogg_packet* aPacket) |
|
874 { |
|
875 nsAutoRef<ogg_packet> autoRelease(aPacket); |
|
876 switch(mPacketCount++) { |
|
877 // Parse the id header. |
|
878 case 0: { |
|
879 mParser = new OpusParser; |
|
880 if(!mParser->DecodeHeader(aPacket->packet, aPacket->bytes)) { |
|
881 return false; |
|
882 } |
|
883 mRate = mParser->mRate; |
|
884 mChannels = mParser->mChannels; |
|
885 mPreSkip = mParser->mPreSkip; |
|
886 #ifdef MOZ_SAMPLE_TYPE_FLOAT32 |
|
887 mGain = mParser->mGain; |
|
888 #else |
|
889 mGain_Q16 = mParser->mGain_Q16; |
|
890 #endif |
|
891 } |
|
892 break; |
|
893 |
|
894 // Parse the metadata header. |
|
895 case 1: { |
|
896 if(!mParser->DecodeTags(aPacket->packet, aPacket->bytes)) { |
|
897 return false; |
|
898 } |
|
899 } |
|
900 break; |
|
901 |
|
902 // We made it to the first data packet (which includes reconstructing |
|
903 // timestamps for it in PageIn). Success! |
|
904 default: { |
|
905 mDoneReadingHeaders = true; |
|
906 // Put it back on the queue so we can decode it. |
|
907 mPackets.PushFront(autoRelease.disown()); |
|
908 } |
|
909 break; |
|
910 } |
|
911 return true; |
|
912 } |
|
913 |
|
914 /* Construct and return a tags hashmap from our internal array */ |
|
915 MetadataTags* OpusState::GetTags() |
|
916 { |
|
917 MetadataTags* tags; |
|
918 |
|
919 tags = new MetadataTags; |
|
920 for (uint32_t i = 0; i < mParser->mTags.Length(); i++) { |
|
921 AddVorbisComment(tags, mParser->mTags[i].Data(), mParser->mTags[i].Length()); |
|
922 } |
|
923 |
|
924 return tags; |
|
925 } |
|
926 |
|
927 /* Return the timestamp (in microseconds) equivalent to a granulepos. */ |
|
928 int64_t OpusState::Time(int64_t aGranulepos) |
|
929 { |
|
930 if (!mActive) |
|
931 return -1; |
|
932 |
|
933 return Time(mParser->mPreSkip, aGranulepos); |
|
934 } |
|
935 |
|
936 int64_t OpusState::Time(int aPreSkip, int64_t aGranulepos) |
|
937 { |
|
938 if (aGranulepos < 0) |
|
939 return -1; |
|
940 |
|
941 // Ogg Opus always runs at a granule rate of 48 kHz. |
|
942 CheckedInt64 t = CheckedInt64(aGranulepos - aPreSkip) * USECS_PER_S; |
|
943 return t.isValid() ? t.value() / 48000 : -1; |
|
944 } |
|
945 |
|
946 bool OpusState::IsHeader(ogg_packet* aPacket) |
|
947 { |
|
948 return aPacket->bytes >= 16 && |
|
949 (!memcmp(aPacket->packet, "OpusHead", 8) || |
|
950 !memcmp(aPacket->packet, "OpusTags", 8)); |
|
951 } |
|
952 |
|
953 nsresult OpusState::PageIn(ogg_page* aPage) |
|
954 { |
|
955 if (!mActive) |
|
956 return NS_OK; |
|
957 NS_ASSERTION(static_cast<uint32_t>(ogg_page_serialno(aPage)) == mSerial, |
|
958 "Page must be for this stream!"); |
|
959 if (ogg_stream_pagein(&mState, aPage) == -1) |
|
960 return NS_ERROR_FAILURE; |
|
961 |
|
962 bool haveGranulepos; |
|
963 nsresult rv = PacketOutUntilGranulepos(haveGranulepos); |
|
964 if (NS_FAILED(rv) || !haveGranulepos || mPacketCount < 2) |
|
965 return rv; |
|
966 if(!ReconstructOpusGranulepos()) |
|
967 return NS_ERROR_FAILURE; |
|
968 for (uint32_t i = 0; i < mUnstamped.Length(); i++) { |
|
969 ogg_packet* packet = mUnstamped[i]; |
|
970 NS_ASSERTION(!IsHeader(packet), "Don't try to play a header packet"); |
|
971 NS_ASSERTION(packet->granulepos != -1, "Packet should have a granulepos"); |
|
972 mPackets.Append(packet); |
|
973 } |
|
974 mUnstamped.Clear(); |
|
975 return NS_OK; |
|
976 } |
|
977 |
|
978 // Helper method to return the change in granule position due to an Opus packet |
|
979 // (as distinct from the number of samples in the packet, which depends on the |
|
980 // decoder rate). It should work with a multistream Opus file, and continue to |
|
981 // work should we ever allow the decoder to decode at a rate other than 48 kHz. |
|
982 // It even works before we've created the actual Opus decoder. |
|
983 static int GetOpusDeltaGP(ogg_packet* packet) |
|
984 { |
|
985 int nframes; |
|
986 nframes = opus_packet_get_nb_frames(packet->packet, packet->bytes); |
|
987 if (nframes > 0) { |
|
988 return nframes*opus_packet_get_samples_per_frame(packet->packet, 48000); |
|
989 } |
|
990 NS_WARNING("Invalid Opus packet."); |
|
991 return nframes; |
|
992 } |
|
993 |
|
994 bool OpusState::ReconstructOpusGranulepos(void) |
|
995 { |
|
996 NS_ASSERTION(mUnstamped.Length() > 0, "Must have unstamped packets"); |
|
997 ogg_packet* last = mUnstamped[mUnstamped.Length()-1]; |
|
998 NS_ASSERTION(last->e_o_s || last->granulepos > 0, |
|
999 "Must know last granulepos!"); |
|
1000 int64_t gp; |
|
1001 // If this is the last page, and we've seen at least one previous page (or |
|
1002 // this is the first page)... |
|
1003 if (last->e_o_s) { |
|
1004 if (mPrevPageGranulepos != -1) { |
|
1005 // If this file only has one page and the final granule position is |
|
1006 // smaller than the pre-skip amount, we MUST reject the stream. |
|
1007 if (!mDoneReadingHeaders && last->granulepos < mPreSkip) |
|
1008 return false; |
|
1009 int64_t last_gp = last->granulepos; |
|
1010 gp = mPrevPageGranulepos; |
|
1011 // Loop through the packets forwards, adding the current packet's |
|
1012 // duration to the previous granulepos to get the value for the |
|
1013 // current packet. |
|
1014 for (uint32_t i = 0; i < mUnstamped.Length() - 1; ++i) { |
|
1015 ogg_packet* packet = mUnstamped[i]; |
|
1016 int offset = GetOpusDeltaGP(packet); |
|
1017 // Check for error (negative offset) and overflow. |
|
1018 if (offset >= 0 && gp <= INT64_MAX - offset) { |
|
1019 gp += offset; |
|
1020 if (gp >= last_gp) { |
|
1021 NS_WARNING("Opus end trimming removed more than a full packet."); |
|
1022 // We were asked to remove a full packet's worth of data or more. |
|
1023 // Encoders SHOULD NOT produce streams like this, but we'll handle |
|
1024 // it for them anyway. |
|
1025 gp = last_gp; |
|
1026 for (uint32_t j = i+1; j < mUnstamped.Length(); ++j) { |
|
1027 OggCodecState::ReleasePacket(mUnstamped[j]); |
|
1028 } |
|
1029 mUnstamped.RemoveElementsAt(i+1, mUnstamped.Length() - (i+1)); |
|
1030 last = packet; |
|
1031 last->e_o_s = 1; |
|
1032 } |
|
1033 } |
|
1034 packet->granulepos = gp; |
|
1035 } |
|
1036 mPrevPageGranulepos = last_gp; |
|
1037 return true; |
|
1038 } else { |
|
1039 NS_WARNING("No previous granule position to use for Opus end trimming."); |
|
1040 // If we don't have a previous granule position, fall through. |
|
1041 // We simply won't trim any samples from the end. |
|
1042 // TODO: Are we guaranteed to have seen a previous page if there is one? |
|
1043 } |
|
1044 } |
|
1045 |
|
1046 gp = last->granulepos; |
|
1047 // Loop through the packets backwards, subtracting the next |
|
1048 // packet's duration from its granulepos to get the value |
|
1049 // for the current packet. |
|
1050 for (uint32_t i = mUnstamped.Length() - 1; i > 0; i--) { |
|
1051 int offset = GetOpusDeltaGP(mUnstamped[i]); |
|
1052 // Check for error (negative offset) and overflow. |
|
1053 if (offset >= 0) { |
|
1054 if (offset <= gp) { |
|
1055 gp -= offset; |
|
1056 } else { |
|
1057 // If the granule position of the first data page is smaller than the |
|
1058 // number of decodable audio samples on that page, then we MUST reject |
|
1059 // the stream. |
|
1060 if (!mDoneReadingHeaders) |
|
1061 return false; |
|
1062 // It's too late to reject the stream. |
|
1063 // If we get here, this almost certainly means the file has screwed-up |
|
1064 // timestamps somewhere after the first page. |
|
1065 NS_WARNING("Clamping negative Opus granulepos to zero."); |
|
1066 gp = 0; |
|
1067 } |
|
1068 } |
|
1069 mUnstamped[i - 1]->granulepos = gp; |
|
1070 } |
|
1071 |
|
1072 // Check to make sure the first granule position is at least as large as the |
|
1073 // total number of samples decodable from the first page with completed |
|
1074 // packets. This requires looking at the duration of the first packet, too. |
|
1075 // We MUST reject such streams. |
|
1076 if (!mDoneReadingHeaders && GetOpusDeltaGP(mUnstamped[0]) > gp) |
|
1077 return false; |
|
1078 mPrevPageGranulepos = last->granulepos; |
|
1079 return true; |
|
1080 } |
|
1081 #endif /* MOZ_OPUS */ |
|
1082 |
|
1083 SkeletonState::SkeletonState(ogg_page* aBosPage) : |
|
1084 OggCodecState(aBosPage, true), |
|
1085 mVersion(0), |
|
1086 mPresentationTime(0), |
|
1087 mLength(0) |
|
1088 { |
|
1089 MOZ_COUNT_CTOR(SkeletonState); |
|
1090 } |
|
1091 |
|
1092 SkeletonState::~SkeletonState() |
|
1093 { |
|
1094 MOZ_COUNT_DTOR(SkeletonState); |
|
1095 } |
|
1096 |
|
1097 // Support for Ogg Skeleton 4.0, as per specification at: |
|
1098 // http://wiki.xiph.org/Ogg_Skeleton_4 |
|
1099 |
|
1100 // Minimum length in bytes of a Skeleton header packet. |
|
1101 static const long SKELETON_MIN_HEADER_LEN = 28; |
|
1102 static const long SKELETON_4_0_MIN_HEADER_LEN = 80; |
|
1103 |
|
1104 // Minimum length in bytes of a Skeleton 4.0 index packet. |
|
1105 static const long SKELETON_4_0_MIN_INDEX_LEN = 42; |
|
1106 |
|
1107 // Minimum possible size of a compressed index keypoint. |
|
1108 static const size_t MIN_KEY_POINT_SIZE = 2; |
|
1109 |
|
1110 // Byte offset of the major and minor version numbers in the |
|
1111 // Ogg Skeleton 4.0 header packet. |
|
1112 static const size_t SKELETON_VERSION_MAJOR_OFFSET = 8; |
|
1113 static const size_t SKELETON_VERSION_MINOR_OFFSET = 10; |
|
1114 |
|
1115 // Byte-offsets of the presentation time numerator and denominator |
|
1116 static const size_t SKELETON_PRESENTATION_TIME_NUMERATOR_OFFSET = 12; |
|
1117 static const size_t SKELETON_PRESENTATION_TIME_DENOMINATOR_OFFSET = 20; |
|
1118 |
|
1119 // Byte-offsets of the length of file field in the Skeleton 4.0 header packet. |
|
1120 static const size_t SKELETON_FILE_LENGTH_OFFSET = 64; |
|
1121 |
|
1122 // Byte-offsets of the fields in the Skeleton index packet. |
|
1123 static const size_t INDEX_SERIALNO_OFFSET = 6; |
|
1124 static const size_t INDEX_NUM_KEYPOINTS_OFFSET = 10; |
|
1125 static const size_t INDEX_TIME_DENOM_OFFSET = 18; |
|
1126 static const size_t INDEX_FIRST_NUMER_OFFSET = 26; |
|
1127 static const size_t INDEX_LAST_NUMER_OFFSET = 34; |
|
1128 static const size_t INDEX_KEYPOINT_OFFSET = 42; |
|
1129 |
|
1130 static bool IsSkeletonBOS(ogg_packet* aPacket) |
|
1131 { |
|
1132 return aPacket->bytes >= SKELETON_MIN_HEADER_LEN && |
|
1133 memcmp(reinterpret_cast<char*>(aPacket->packet), "fishead", 8) == 0; |
|
1134 } |
|
1135 |
|
1136 static bool IsSkeletonIndex(ogg_packet* aPacket) |
|
1137 { |
|
1138 return aPacket->bytes >= SKELETON_4_0_MIN_INDEX_LEN && |
|
1139 memcmp(reinterpret_cast<char*>(aPacket->packet), "index", 5) == 0; |
|
1140 } |
|
1141 |
|
1142 // Reads a variable length encoded integer at p. Will not read |
|
1143 // past aLimit. Returns pointer to character after end of integer. |
|
1144 static const unsigned char* ReadVariableLengthInt(const unsigned char* p, |
|
1145 const unsigned char* aLimit, |
|
1146 int64_t& n) |
|
1147 { |
|
1148 int shift = 0; |
|
1149 int64_t byte = 0; |
|
1150 n = 0; |
|
1151 while (p < aLimit && |
|
1152 (byte & 0x80) != 0x80 && |
|
1153 shift < 57) |
|
1154 { |
|
1155 byte = static_cast<int64_t>(*p); |
|
1156 n |= ((byte & 0x7f) << shift); |
|
1157 shift += 7; |
|
1158 p++; |
|
1159 } |
|
1160 return p; |
|
1161 } |
|
1162 |
|
1163 bool SkeletonState::DecodeIndex(ogg_packet* aPacket) |
|
1164 { |
|
1165 NS_ASSERTION(aPacket->bytes >= SKELETON_4_0_MIN_INDEX_LEN, |
|
1166 "Index must be at least minimum size"); |
|
1167 if (!mActive) { |
|
1168 return false; |
|
1169 } |
|
1170 |
|
1171 uint32_t serialno = LittleEndian::readUint32(aPacket->packet + INDEX_SERIALNO_OFFSET); |
|
1172 int64_t numKeyPoints = LittleEndian::readInt64(aPacket->packet + INDEX_NUM_KEYPOINTS_OFFSET); |
|
1173 |
|
1174 int64_t endTime = 0, startTime = 0; |
|
1175 const unsigned char* p = aPacket->packet; |
|
1176 |
|
1177 int64_t timeDenom = LittleEndian::readInt64(aPacket->packet + INDEX_TIME_DENOM_OFFSET); |
|
1178 if (timeDenom == 0) { |
|
1179 LOG(PR_LOG_DEBUG, ("Ogg Skeleton Index packet for stream %u has 0 " |
|
1180 "timestamp denominator.", serialno)); |
|
1181 return (mActive = false); |
|
1182 } |
|
1183 |
|
1184 // Extract the start time. |
|
1185 CheckedInt64 t = CheckedInt64(LittleEndian::readInt64(p + INDEX_FIRST_NUMER_OFFSET)) * USECS_PER_S; |
|
1186 if (!t.isValid()) { |
|
1187 return (mActive = false); |
|
1188 } else { |
|
1189 startTime = t.value() / timeDenom; |
|
1190 } |
|
1191 |
|
1192 // Extract the end time. |
|
1193 t = LittleEndian::readInt64(p + INDEX_LAST_NUMER_OFFSET) * USECS_PER_S; |
|
1194 if (!t.isValid()) { |
|
1195 return (mActive = false); |
|
1196 } else { |
|
1197 endTime = t.value() / timeDenom; |
|
1198 } |
|
1199 |
|
1200 // Check the numKeyPoints value read, ensure we're not going to run out of |
|
1201 // memory while trying to decode the index packet. |
|
1202 CheckedInt64 minPacketSize = (CheckedInt64(numKeyPoints) * MIN_KEY_POINT_SIZE) + INDEX_KEYPOINT_OFFSET; |
|
1203 if (!minPacketSize.isValid()) |
|
1204 { |
|
1205 return (mActive = false); |
|
1206 } |
|
1207 |
|
1208 int64_t sizeofIndex = aPacket->bytes - INDEX_KEYPOINT_OFFSET; |
|
1209 int64_t maxNumKeyPoints = sizeofIndex / MIN_KEY_POINT_SIZE; |
|
1210 if (aPacket->bytes < minPacketSize.value() || |
|
1211 numKeyPoints > maxNumKeyPoints || |
|
1212 numKeyPoints < 0) |
|
1213 { |
|
1214 // Packet size is less than the theoretical minimum size, or the packet is |
|
1215 // claiming to store more keypoints than it's capable of storing. This means |
|
1216 // that the numKeyPoints field is too large or small for the packet to |
|
1217 // possibly contain as many packets as it claims to, so the numKeyPoints |
|
1218 // field is possibly malicious. Don't try decoding this index, we may run |
|
1219 // out of memory. |
|
1220 LOG(PR_LOG_DEBUG, ("Possibly malicious number of key points reported " |
|
1221 "(%lld) in index packet for stream %u.", |
|
1222 numKeyPoints, |
|
1223 serialno)); |
|
1224 return (mActive = false); |
|
1225 } |
|
1226 |
|
1227 nsAutoPtr<nsKeyFrameIndex> keyPoints(new nsKeyFrameIndex(startTime, endTime)); |
|
1228 |
|
1229 p = aPacket->packet + INDEX_KEYPOINT_OFFSET; |
|
1230 const unsigned char* limit = aPacket->packet + aPacket->bytes; |
|
1231 int64_t numKeyPointsRead = 0; |
|
1232 CheckedInt64 offset = 0; |
|
1233 CheckedInt64 time = 0; |
|
1234 while (p < limit && |
|
1235 numKeyPointsRead < numKeyPoints) |
|
1236 { |
|
1237 int64_t delta = 0; |
|
1238 p = ReadVariableLengthInt(p, limit, delta); |
|
1239 offset += delta; |
|
1240 if (p == limit || |
|
1241 !offset.isValid() || |
|
1242 offset.value() > mLength || |
|
1243 offset.value() < 0) |
|
1244 { |
|
1245 return (mActive = false); |
|
1246 } |
|
1247 p = ReadVariableLengthInt(p, limit, delta); |
|
1248 time += delta; |
|
1249 if (!time.isValid() || |
|
1250 time.value() > endTime || |
|
1251 time.value() < startTime) |
|
1252 { |
|
1253 return (mActive = false); |
|
1254 } |
|
1255 CheckedInt64 timeUsecs = time * USECS_PER_S; |
|
1256 if (!timeUsecs.isValid()) |
|
1257 return mActive = false; |
|
1258 timeUsecs /= timeDenom; |
|
1259 keyPoints->Add(offset.value(), timeUsecs.value()); |
|
1260 numKeyPointsRead++; |
|
1261 } |
|
1262 |
|
1263 int32_t keyPointsRead = keyPoints->Length(); |
|
1264 if (keyPointsRead > 0) { |
|
1265 mIndex.Put(serialno, keyPoints.forget()); |
|
1266 } |
|
1267 |
|
1268 LOG(PR_LOG_DEBUG, ("Loaded %d keypoints for Skeleton on stream %u", |
|
1269 keyPointsRead, serialno)); |
|
1270 return true; |
|
1271 } |
|
1272 |
|
1273 nsresult SkeletonState::IndexedSeekTargetForTrack(uint32_t aSerialno, |
|
1274 int64_t aTarget, |
|
1275 nsKeyPoint& aResult) |
|
1276 { |
|
1277 nsKeyFrameIndex* index = nullptr; |
|
1278 mIndex.Get(aSerialno, &index); |
|
1279 |
|
1280 if (!index || |
|
1281 index->Length() == 0 || |
|
1282 aTarget < index->mStartTime || |
|
1283 aTarget > index->mEndTime) |
|
1284 { |
|
1285 return NS_ERROR_FAILURE; |
|
1286 } |
|
1287 |
|
1288 // Binary search to find the last key point with time less than target. |
|
1289 int start = 0; |
|
1290 int end = index->Length() - 1; |
|
1291 while (end > start) { |
|
1292 int mid = start + ((end - start + 1) >> 1); |
|
1293 if (index->Get(mid).mTime == aTarget) { |
|
1294 start = mid; |
|
1295 break; |
|
1296 } else if (index->Get(mid).mTime < aTarget) { |
|
1297 start = mid; |
|
1298 } else { |
|
1299 end = mid - 1; |
|
1300 } |
|
1301 } |
|
1302 |
|
1303 aResult = index->Get(start); |
|
1304 NS_ASSERTION(aResult.mTime <= aTarget, "Result should have time <= target"); |
|
1305 return NS_OK; |
|
1306 } |
|
1307 |
|
1308 nsresult SkeletonState::IndexedSeekTarget(int64_t aTarget, |
|
1309 nsTArray<uint32_t>& aTracks, |
|
1310 nsSeekTarget& aResult) |
|
1311 { |
|
1312 if (!mActive || mVersion < SKELETON_VERSION(4,0)) { |
|
1313 return NS_ERROR_FAILURE; |
|
1314 } |
|
1315 // Loop over all requested tracks' indexes, and get the keypoint for that |
|
1316 // seek target. Record the keypoint with the lowest offset, this will be |
|
1317 // our seek result. User must seek to the one with lowest offset to ensure we |
|
1318 // pass "keyframes" on all tracks when we decode forwards to the seek target. |
|
1319 nsSeekTarget r; |
|
1320 for (uint32_t i=0; i<aTracks.Length(); i++) { |
|
1321 nsKeyPoint k; |
|
1322 if (NS_SUCCEEDED(IndexedSeekTargetForTrack(aTracks[i], aTarget, k)) && |
|
1323 k.mOffset < r.mKeyPoint.mOffset) |
|
1324 { |
|
1325 r.mKeyPoint = k; |
|
1326 r.mSerial = aTracks[i]; |
|
1327 } |
|
1328 } |
|
1329 if (r.IsNull()) { |
|
1330 return NS_ERROR_FAILURE; |
|
1331 } |
|
1332 LOG(PR_LOG_DEBUG, ("Indexed seek target for time %lld is offset %lld", |
|
1333 aTarget, r.mKeyPoint.mOffset)); |
|
1334 aResult = r; |
|
1335 return NS_OK; |
|
1336 } |
|
1337 |
|
1338 nsresult SkeletonState::GetDuration(const nsTArray<uint32_t>& aTracks, |
|
1339 int64_t& aDuration) |
|
1340 { |
|
1341 if (!mActive || |
|
1342 mVersion < SKELETON_VERSION(4,0) || |
|
1343 !HasIndex() || |
|
1344 aTracks.Length() == 0) |
|
1345 { |
|
1346 return NS_ERROR_FAILURE; |
|
1347 } |
|
1348 int64_t endTime = INT64_MIN; |
|
1349 int64_t startTime = INT64_MAX; |
|
1350 for (uint32_t i=0; i<aTracks.Length(); i++) { |
|
1351 nsKeyFrameIndex* index = nullptr; |
|
1352 mIndex.Get(aTracks[i], &index); |
|
1353 if (!index) { |
|
1354 // Can't get the timestamps for one of the required tracks, fail. |
|
1355 return NS_ERROR_FAILURE; |
|
1356 } |
|
1357 if (index->mEndTime > endTime) { |
|
1358 endTime = index->mEndTime; |
|
1359 } |
|
1360 if (index->mStartTime < startTime) { |
|
1361 startTime = index->mStartTime; |
|
1362 } |
|
1363 } |
|
1364 NS_ASSERTION(endTime > startTime, "Duration must be positive"); |
|
1365 CheckedInt64 duration = CheckedInt64(endTime) - startTime; |
|
1366 aDuration = duration.isValid() ? duration.value() : 0; |
|
1367 return duration.isValid() ? NS_OK : NS_ERROR_FAILURE; |
|
1368 } |
|
1369 |
|
1370 bool SkeletonState::DecodeHeader(ogg_packet* aPacket) |
|
1371 { |
|
1372 nsAutoRef<ogg_packet> autoRelease(aPacket); |
|
1373 if (IsSkeletonBOS(aPacket)) { |
|
1374 uint16_t verMajor = LittleEndian::readUint16(aPacket->packet + SKELETON_VERSION_MAJOR_OFFSET); |
|
1375 uint16_t verMinor = LittleEndian::readUint16(aPacket->packet + SKELETON_VERSION_MINOR_OFFSET); |
|
1376 |
|
1377 // Read the presentation time. We read this before the version check as the |
|
1378 // presentation time exists in all versions. |
|
1379 int64_t n = LittleEndian::readInt64(aPacket->packet + SKELETON_PRESENTATION_TIME_NUMERATOR_OFFSET); |
|
1380 int64_t d = LittleEndian::readInt64(aPacket->packet + SKELETON_PRESENTATION_TIME_DENOMINATOR_OFFSET); |
|
1381 mPresentationTime = d == 0 ? 0 : (static_cast<float>(n) / static_cast<float>(d)) * USECS_PER_S; |
|
1382 |
|
1383 mVersion = SKELETON_VERSION(verMajor, verMinor); |
|
1384 // We can only care to parse Skeleton version 4.0+. |
|
1385 if (mVersion < SKELETON_VERSION(4,0) || |
|
1386 mVersion >= SKELETON_VERSION(5,0) || |
|
1387 aPacket->bytes < SKELETON_4_0_MIN_HEADER_LEN) |
|
1388 return false; |
|
1389 |
|
1390 // Extract the segment length. |
|
1391 mLength = LittleEndian::readInt64(aPacket->packet + SKELETON_FILE_LENGTH_OFFSET); |
|
1392 |
|
1393 LOG(PR_LOG_DEBUG, ("Skeleton segment length: %lld", mLength)); |
|
1394 |
|
1395 // Initialize the serialno-to-index map. |
|
1396 return true; |
|
1397 } else if (IsSkeletonIndex(aPacket) && mVersion >= SKELETON_VERSION(4,0)) { |
|
1398 return DecodeIndex(aPacket); |
|
1399 } else if (aPacket->e_o_s) { |
|
1400 mDoneReadingHeaders = true; |
|
1401 return true; |
|
1402 } |
|
1403 return true; |
|
1404 } |
|
1405 |
|
1406 |
|
1407 } // namespace mozilla |
|
1408 |