|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ |
|
3 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 #if !defined(OggCodecState_h_) |
|
7 #define OggCodecState_h_ |
|
8 |
|
9 #include <ogg/ogg.h> |
|
10 #include <theora/theoradec.h> |
|
11 #ifdef MOZ_TREMOR |
|
12 #include <tremor/ivorbiscodec.h> |
|
13 #else |
|
14 #include <vorbis/codec.h> |
|
15 #endif |
|
16 #ifdef MOZ_OPUS |
|
17 #include <opus/opus.h> |
|
18 #include "opus/opus_multistream.h" |
|
19 // For MOZ_SAMPLE_TYPE_* |
|
20 #include "mozilla/dom/HTMLMediaElement.h" |
|
21 #include "MediaDecoderStateMachine.h" |
|
22 #include "MediaDecoderReader.h" |
|
23 #endif |
|
24 #include <nsAutoRef.h> |
|
25 #include <nsDeque.h> |
|
26 #include <nsTArray.h> |
|
27 #include <nsClassHashtable.h> |
|
28 #include "VideoUtils.h" |
|
29 |
|
30 #include <stdint.h> |
|
31 |
|
32 // Uncomment the following to validate that we're predicting the number |
|
33 // of Vorbis samples in each packet correctly. |
|
34 #define VALIDATE_VORBIS_SAMPLE_CALCULATION |
|
35 #ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION |
|
36 #include <map> |
|
37 #endif |
|
38 |
|
39 #include "OpusParser.h" |
|
40 |
|
41 namespace mozilla { |
|
42 |
|
43 // Deallocates a packet, used in OggPacketQueue below. |
|
44 class OggPacketDeallocator : public nsDequeFunctor { |
|
45 virtual void* operator() (void* aPacket) { |
|
46 ogg_packet* p = static_cast<ogg_packet*>(aPacket); |
|
47 delete [] p->packet; |
|
48 delete p; |
|
49 return nullptr; |
|
50 } |
|
51 }; |
|
52 |
|
53 // A queue of ogg_packets. When we read a page, we extract the page's packets |
|
54 // and buffer them in the owning stream's OggCodecState. This is because |
|
55 // if we're skipping up to the next keyframe in very large frame sized videos, |
|
56 // there may be several megabytes of data between keyframes, and the |
|
57 // ogg_stream_state would end up resizing its buffer every time we added a |
|
58 // new 4KB page to the bitstream, which kills performance on Windows. This |
|
59 // also gives us the option to timestamp packets rather than decoded |
|
60 // frames/samples, reducing the amount of frames/samples we must decode to |
|
61 // determine start-time at a particular offset, and gives us finer control |
|
62 // over memory usage. |
|
63 class OggPacketQueue : private nsDeque { |
|
64 public: |
|
65 OggPacketQueue() : nsDeque(new OggPacketDeallocator()) {} |
|
66 ~OggPacketQueue() { Erase(); } |
|
67 bool IsEmpty() { return nsDeque::GetSize() == 0; } |
|
68 void Append(ogg_packet* aPacket); |
|
69 ogg_packet* PopFront() { return static_cast<ogg_packet*>(nsDeque::PopFront()); } |
|
70 ogg_packet* PeekFront() { return static_cast<ogg_packet*>(nsDeque::PeekFront()); } |
|
71 void PushFront(ogg_packet* aPacket) { nsDeque::PushFront(aPacket); } |
|
72 void PushBack(ogg_packet* aPacket) { nsDeque::PushFront(aPacket); } |
|
73 void Erase() { nsDeque::Erase(); } |
|
74 }; |
|
75 |
|
76 // Encapsulates the data required for decoding an ogg bitstream and for |
|
77 // converting granulepos to timestamps. |
|
78 class OggCodecState { |
|
79 public: |
|
80 typedef mozilla::MetadataTags MetadataTags; |
|
81 // Ogg types we know about |
|
82 enum CodecType { |
|
83 TYPE_VORBIS=0, |
|
84 TYPE_THEORA=1, |
|
85 TYPE_OPUS=2, |
|
86 TYPE_SKELETON=3, |
|
87 TYPE_UNKNOWN=4 |
|
88 }; |
|
89 |
|
90 virtual ~OggCodecState(); |
|
91 |
|
92 // Factory for creating nsCodecStates. Use instead of constructor. |
|
93 // aPage should be a beginning-of-stream page. |
|
94 static OggCodecState* Create(ogg_page* aPage); |
|
95 |
|
96 virtual CodecType GetType() { return TYPE_UNKNOWN; } |
|
97 |
|
98 // Reads a header packet. Returns false if an error was encountered |
|
99 // while reading header packets. Callers should check DoneReadingHeaders() |
|
100 // to determine if the last header has been read. |
|
101 // This function takes ownership of the packet and is responsible for |
|
102 // releasing it or queuing it for later processing. |
|
103 virtual bool DecodeHeader(ogg_packet* aPacket) { |
|
104 return (mDoneReadingHeaders = true); |
|
105 } |
|
106 |
|
107 // Build a hash table with tag metadata parsed from the stream. |
|
108 virtual MetadataTags* GetTags() { |
|
109 return nullptr; |
|
110 } |
|
111 |
|
112 // Returns the end time that a granulepos represents. |
|
113 virtual int64_t Time(int64_t granulepos) { return -1; } |
|
114 |
|
115 // Returns the start time that a granulepos represents. |
|
116 virtual int64_t StartTime(int64_t granulepos) { return -1; } |
|
117 |
|
118 // Initializes the codec state. |
|
119 virtual bool Init(); |
|
120 |
|
121 // Returns true when this bitstream has finished reading all its |
|
122 // header packets. |
|
123 bool DoneReadingHeaders() { return mDoneReadingHeaders; } |
|
124 |
|
125 // Deactivates the bitstream. Only the primary video and audio bitstreams |
|
126 // should be active. |
|
127 void Deactivate() { |
|
128 mActive = false; |
|
129 mDoneReadingHeaders = true; |
|
130 Reset(); |
|
131 } |
|
132 |
|
133 // Resets decoding state. |
|
134 virtual nsresult Reset(); |
|
135 |
|
136 // Returns true if the OggCodecState thinks this packet is a header |
|
137 // packet. Note this does not verify the validity of the header packet, |
|
138 // it just guarantees that the packet is marked as a header packet (i.e. |
|
139 // it is definintely not a data packet). Do not use this to identify |
|
140 // streams, use it to filter header packets from data packets while |
|
141 // decoding. |
|
142 virtual bool IsHeader(ogg_packet* aPacket) { return false; } |
|
143 |
|
144 // Returns the next packet in the stream, or nullptr if there are no more |
|
145 // packets buffered in the packet queue. More packets can be buffered by |
|
146 // inserting one or more pages into the stream by calling PageIn(). The |
|
147 // caller is responsible for deleting returned packet's using |
|
148 // OggCodecState::ReleasePacket(). The packet will have a valid granulepos. |
|
149 ogg_packet* PacketOut(); |
|
150 |
|
151 // Releases the memory used by a cloned packet. Every packet returned by |
|
152 // PacketOut() must be free'd using this function. |
|
153 static void ReleasePacket(ogg_packet* aPacket); |
|
154 |
|
155 // Extracts all packets from the page, and inserts them into the packet |
|
156 // queue. They can be extracted by calling PacketOut(). Packets from an |
|
157 // inactive stream are not buffered, i.e. this call has no effect for |
|
158 // inactive streams. Multiple pages may need to be inserted before |
|
159 // PacketOut() starts to return packets, as granulepos may need to be |
|
160 // captured. |
|
161 virtual nsresult PageIn(ogg_page* aPage); |
|
162 |
|
163 // Number of packets read. |
|
164 uint64_t mPacketCount; |
|
165 |
|
166 // Serial number of the bitstream. |
|
167 uint32_t mSerial; |
|
168 |
|
169 // Ogg specific state. |
|
170 ogg_stream_state mState; |
|
171 |
|
172 // Queue of as yet undecoded packets. Packets are guaranteed to have |
|
173 // a valid granulepos. |
|
174 OggPacketQueue mPackets; |
|
175 |
|
176 // Is the bitstream active; whether we're decoding and playing this bitstream. |
|
177 bool mActive; |
|
178 |
|
179 // True when all headers packets have been read. |
|
180 bool mDoneReadingHeaders; |
|
181 |
|
182 protected: |
|
183 // Constructs a new OggCodecState. aActive denotes whether the stream is |
|
184 // active. For streams of unsupported or unknown types, aActive should be |
|
185 // false. |
|
186 OggCodecState(ogg_page* aBosPage, bool aActive); |
|
187 |
|
188 // Deallocates all packets stored in mUnstamped, and clears the array. |
|
189 void ClearUnstamped(); |
|
190 |
|
191 // Extracts packets out of mState until a data packet with a non -1 |
|
192 // granulepos is encountered, or no more packets are readable. Header |
|
193 // packets are pushed into the packet queue immediately, and data packets |
|
194 // are buffered in mUnstamped. Once a non -1 granulepos packet is read |
|
195 // the granulepos of the packets in mUnstamped can be inferred, and they |
|
196 // can be pushed over to mPackets. Used by PageIn() implementations in |
|
197 // subclasses. |
|
198 nsresult PacketOutUntilGranulepos(bool& aFoundGranulepos); |
|
199 |
|
200 // Temporary buffer in which to store packets while we're reading packets |
|
201 // in order to capture granulepos. |
|
202 nsTArray<ogg_packet*> mUnstamped; |
|
203 |
|
204 // Validation utility for vorbis-style tag names. |
|
205 static bool IsValidVorbisTagName(nsCString& aName); |
|
206 |
|
207 // Utility method to parse and add a vorbis-style comment |
|
208 // to a metadata hash table. Most Ogg-encapsulated codecs |
|
209 // use the vorbis comment format for metadata. |
|
210 static bool AddVorbisComment(MetadataTags* aTags, |
|
211 const char* aComment, |
|
212 uint32_t aLength); |
|
213 }; |
|
214 |
|
215 class VorbisState : public OggCodecState { |
|
216 public: |
|
217 VorbisState(ogg_page* aBosPage); |
|
218 virtual ~VorbisState(); |
|
219 |
|
220 CodecType GetType() { return TYPE_VORBIS; } |
|
221 bool DecodeHeader(ogg_packet* aPacket); |
|
222 int64_t Time(int64_t granulepos); |
|
223 bool Init(); |
|
224 nsresult Reset(); |
|
225 bool IsHeader(ogg_packet* aPacket); |
|
226 nsresult PageIn(ogg_page* aPage); |
|
227 |
|
228 // Return a hash table with tag metadata. |
|
229 MetadataTags* GetTags(); |
|
230 |
|
231 // Returns the end time that a granulepos represents. |
|
232 static int64_t Time(vorbis_info* aInfo, int64_t aGranulePos); |
|
233 |
|
234 vorbis_info mInfo; |
|
235 vorbis_comment mComment; |
|
236 vorbis_dsp_state mDsp; |
|
237 vorbis_block mBlock; |
|
238 |
|
239 private: |
|
240 |
|
241 // Reconstructs the granulepos of Vorbis packets stored in the mUnstamped |
|
242 // array. |
|
243 nsresult ReconstructVorbisGranulepos(); |
|
244 |
|
245 // The "block size" of the previously decoded Vorbis packet, or 0 if we've |
|
246 // not yet decoded anything. This is used to calculate the number of samples |
|
247 // in a Vorbis packet, since each Vorbis packet depends on the previous |
|
248 // packet while being decoded. |
|
249 long mPrevVorbisBlockSize; |
|
250 |
|
251 // Granulepos (end sample) of the last decoded Vorbis packet. This is used |
|
252 // to calculate the Vorbis granulepos when we don't find a granulepos to |
|
253 // back-propagate from. |
|
254 int64_t mGranulepos; |
|
255 |
|
256 #ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION |
|
257 // When validating that we've correctly predicted Vorbis packets' number |
|
258 // of samples, we store each packet's predicted number of samples in this |
|
259 // map, and verify we decode the predicted number of samples. |
|
260 std::map<ogg_packet*, long> mVorbisPacketSamples; |
|
261 #endif |
|
262 |
|
263 // Records that aPacket is predicted to have aSamples samples. |
|
264 // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION |
|
265 // is not defined. |
|
266 void RecordVorbisPacketSamples(ogg_packet* aPacket, long aSamples); |
|
267 |
|
268 // Verifies that aPacket has had its number of samples predicted. |
|
269 // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION |
|
270 // is not defined. |
|
271 void AssertHasRecordedPacketSamples(ogg_packet* aPacket); |
|
272 |
|
273 public: |
|
274 // Asserts that the number of samples predicted for aPacket is aSamples. |
|
275 // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION |
|
276 // is not defined. |
|
277 void ValidateVorbisPacketSamples(ogg_packet* aPacket, long aSamples); |
|
278 |
|
279 }; |
|
280 |
|
281 // Returns 1 if the Theora info struct is decoding a media of Theora |
|
282 // version (maj,min,sub) or later, otherwise returns 0. |
|
283 int TheoraVersion(th_info* info, |
|
284 unsigned char maj, |
|
285 unsigned char min, |
|
286 unsigned char sub); |
|
287 |
|
288 class TheoraState : public OggCodecState { |
|
289 public: |
|
290 TheoraState(ogg_page* aBosPage); |
|
291 virtual ~TheoraState(); |
|
292 |
|
293 CodecType GetType() { return TYPE_THEORA; } |
|
294 bool DecodeHeader(ogg_packet* aPacket); |
|
295 int64_t Time(int64_t granulepos); |
|
296 int64_t StartTime(int64_t granulepos); |
|
297 bool Init(); |
|
298 bool IsHeader(ogg_packet* aPacket); |
|
299 nsresult PageIn(ogg_page* aPage); |
|
300 |
|
301 // Returns the maximum number of microseconds which a keyframe can be offset |
|
302 // from any given interframe. |
|
303 int64_t MaxKeyframeOffset(); |
|
304 |
|
305 // Returns the end time that a granulepos represents. |
|
306 static int64_t Time(th_info* aInfo, int64_t aGranulePos); |
|
307 |
|
308 th_info mInfo; |
|
309 th_comment mComment; |
|
310 th_setup_info *mSetup; |
|
311 th_dec_ctx* mCtx; |
|
312 |
|
313 float mPixelAspectRatio; |
|
314 |
|
315 private: |
|
316 |
|
317 // Reconstructs the granulepos of Theora packets stored in the |
|
318 // mUnstamped array. mUnstamped must be filled with consecutive packets from |
|
319 // the stream, with the last packet having a known granulepos. Using this |
|
320 // known granulepos, and the known frame numbers, we recover the granulepos |
|
321 // of all frames in the array. This enables us to determine their timestamps. |
|
322 void ReconstructTheoraGranulepos(); |
|
323 |
|
324 }; |
|
325 |
|
326 class OpusState : public OggCodecState { |
|
327 #ifdef MOZ_OPUS |
|
328 public: |
|
329 OpusState(ogg_page* aBosPage); |
|
330 virtual ~OpusState(); |
|
331 |
|
332 CodecType GetType() { return TYPE_OPUS; } |
|
333 bool DecodeHeader(ogg_packet* aPacket); |
|
334 int64_t Time(int64_t aGranulepos); |
|
335 bool Init(); |
|
336 nsresult Reset(); |
|
337 nsresult Reset(bool aStart); |
|
338 bool IsHeader(ogg_packet* aPacket); |
|
339 nsresult PageIn(ogg_page* aPage); |
|
340 |
|
341 // Returns the end time that a granulepos represents. |
|
342 static int64_t Time(int aPreSkip, int64_t aGranulepos); |
|
343 |
|
344 // Various fields from the Ogg Opus header. |
|
345 int mRate; // Sample rate the decoder uses (always 48 kHz). |
|
346 int mChannels; // Number of channels the stream encodes. |
|
347 uint16_t mPreSkip; // Number of samples to strip after decoder reset. |
|
348 #ifdef MOZ_SAMPLE_TYPE_FLOAT32 |
|
349 float mGain; // Gain to apply to decoder output. |
|
350 #else |
|
351 int32_t mGain_Q16; // Gain to apply to the decoder output. |
|
352 #endif |
|
353 |
|
354 nsAutoPtr<OpusParser> mParser; |
|
355 OpusMSDecoder *mDecoder; |
|
356 |
|
357 int mSkip; // Number of samples left to trim before playback. |
|
358 // Granule position (end sample) of the last decoded Opus packet. This is |
|
359 // used to calculate the amount we should trim from the last packet. |
|
360 int64_t mPrevPacketGranulepos; |
|
361 |
|
362 // Construct and return a table of tags from the metadata header. |
|
363 MetadataTags* GetTags(); |
|
364 |
|
365 private: |
|
366 |
|
367 // Reconstructs the granulepos of Opus packets stored in the |
|
368 // mUnstamped array. mUnstamped must be filled with consecutive packets from |
|
369 // the stream, with the last packet having a known granulepos. Using this |
|
370 // known granulepos, and the known frame numbers, we recover the granulepos |
|
371 // of all frames in the array. This enables us to determine their timestamps. |
|
372 bool ReconstructOpusGranulepos(); |
|
373 |
|
374 // Granule position (end sample) of the last decoded Opus page. This is |
|
375 // used to calculate the Opus per-packet granule positions on the last page, |
|
376 // where we may need to trim some samples from the end. |
|
377 int64_t mPrevPageGranulepos; |
|
378 |
|
379 #endif /* MOZ_OPUS */ |
|
380 }; |
|
381 |
|
382 // Constructs a 32bit version number out of two 16 bit major,minor |
|
383 // version numbers. |
|
384 #define SKELETON_VERSION(major, minor) (((major)<<16)|(minor)) |
|
385 |
|
386 class SkeletonState : public OggCodecState { |
|
387 public: |
|
388 SkeletonState(ogg_page* aBosPage); |
|
389 ~SkeletonState(); |
|
390 CodecType GetType() { return TYPE_SKELETON; } |
|
391 bool DecodeHeader(ogg_packet* aPacket); |
|
392 int64_t Time(int64_t granulepos) { return -1; } |
|
393 bool Init() { return true; } |
|
394 bool IsHeader(ogg_packet* aPacket) { return true; } |
|
395 |
|
396 // Return true if the given time (in milliseconds) is within |
|
397 // the presentation time defined in the skeleton track. |
|
398 bool IsPresentable(int64_t aTime) { return aTime >= mPresentationTime; } |
|
399 |
|
400 // Stores the offset of the page on which a keyframe starts, |
|
401 // and its presentation time. |
|
402 class nsKeyPoint { |
|
403 public: |
|
404 nsKeyPoint() |
|
405 : mOffset(INT64_MAX), |
|
406 mTime(INT64_MAX) {} |
|
407 |
|
408 nsKeyPoint(int64_t aOffset, int64_t aTime) |
|
409 : mOffset(aOffset), |
|
410 mTime(aTime) {} |
|
411 |
|
412 // Offset from start of segment/link-in-the-chain in bytes. |
|
413 int64_t mOffset; |
|
414 |
|
415 // Presentation time in usecs. |
|
416 int64_t mTime; |
|
417 |
|
418 bool IsNull() { |
|
419 return mOffset == INT64_MAX && |
|
420 mTime == INT64_MAX; |
|
421 } |
|
422 }; |
|
423 |
|
424 // Stores a keyframe's byte-offset, presentation time and the serialno |
|
425 // of the stream it belongs to. |
|
426 class nsSeekTarget { |
|
427 public: |
|
428 nsSeekTarget() : mSerial(0) {} |
|
429 nsKeyPoint mKeyPoint; |
|
430 uint32_t mSerial; |
|
431 bool IsNull() { |
|
432 return mKeyPoint.IsNull() && |
|
433 mSerial == 0; |
|
434 } |
|
435 }; |
|
436 |
|
437 // Determines from the seek index the keyframe which you must seek back to |
|
438 // in order to get all keyframes required to render all streams with |
|
439 // serialnos in aTracks, at time aTarget. |
|
440 nsresult IndexedSeekTarget(int64_t aTarget, |
|
441 nsTArray<uint32_t>& aTracks, |
|
442 nsSeekTarget& aResult); |
|
443 |
|
444 bool HasIndex() const { |
|
445 return mIndex.Count() > 0; |
|
446 } |
|
447 |
|
448 // Returns the duration of the active tracks in the media, if we have |
|
449 // an index. aTracks must be filled with the serialnos of the active tracks. |
|
450 // The duration is calculated as the greatest end time of all active tracks, |
|
451 // minus the smalled start time of all the active tracks. |
|
452 nsresult GetDuration(const nsTArray<uint32_t>& aTracks, int64_t& aDuration); |
|
453 |
|
454 private: |
|
455 |
|
456 // Decodes an index packet. Returns false on failure. |
|
457 bool DecodeIndex(ogg_packet* aPacket); |
|
458 |
|
459 // Gets the keypoint you must seek to in order to get the keyframe required |
|
460 // to render the stream at time aTarget on stream with serial aSerialno. |
|
461 nsresult IndexedSeekTargetForTrack(uint32_t aSerialno, |
|
462 int64_t aTarget, |
|
463 nsKeyPoint& aResult); |
|
464 |
|
465 // Version of the decoded skeleton track, as per the SKELETON_VERSION macro. |
|
466 uint32_t mVersion; |
|
467 |
|
468 // Presentation time of the resource in milliseconds |
|
469 int64_t mPresentationTime; |
|
470 |
|
471 // Length of the resource in bytes. |
|
472 int64_t mLength; |
|
473 |
|
474 // Stores the keyframe index and duration information for a particular |
|
475 // stream. |
|
476 class nsKeyFrameIndex { |
|
477 public: |
|
478 |
|
479 nsKeyFrameIndex(int64_t aStartTime, int64_t aEndTime) |
|
480 : mStartTime(aStartTime), |
|
481 mEndTime(aEndTime) |
|
482 { |
|
483 MOZ_COUNT_CTOR(nsKeyFrameIndex); |
|
484 } |
|
485 |
|
486 ~nsKeyFrameIndex() { |
|
487 MOZ_COUNT_DTOR(nsKeyFrameIndex); |
|
488 } |
|
489 |
|
490 void Add(int64_t aOffset, int64_t aTimeMs) { |
|
491 mKeyPoints.AppendElement(nsKeyPoint(aOffset, aTimeMs)); |
|
492 } |
|
493 |
|
494 const nsKeyPoint& Get(uint32_t aIndex) const { |
|
495 return mKeyPoints[aIndex]; |
|
496 } |
|
497 |
|
498 uint32_t Length() const { |
|
499 return mKeyPoints.Length(); |
|
500 } |
|
501 |
|
502 // Presentation time of the first sample in this stream in usecs. |
|
503 const int64_t mStartTime; |
|
504 |
|
505 // End time of the last sample in this stream in usecs. |
|
506 const int64_t mEndTime; |
|
507 |
|
508 private: |
|
509 nsTArray<nsKeyPoint> mKeyPoints; |
|
510 }; |
|
511 |
|
512 // Maps Ogg serialnos to the index-keypoint list. |
|
513 nsClassHashtable<nsUint32HashKey, nsKeyFrameIndex> mIndex; |
|
514 }; |
|
515 |
|
516 } // namespace mozilla |
|
517 |
|
518 // This allows the use of nsAutoRefs for an ogg_packet that properly free the |
|
519 // contents of the packet. |
|
520 template <> |
|
521 class nsAutoRefTraits<ogg_packet> : public nsPointerRefTraits<ogg_packet> |
|
522 { |
|
523 public: |
|
524 static void Release(ogg_packet* aPacket) { |
|
525 mozilla::OggCodecState::ReleasePacket(aPacket); |
|
526 } |
|
527 }; |
|
528 |
|
529 |
|
530 #endif |