1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/content/media/ogg/OggCodecState.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,530 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim:set ts=2 sw=2 sts=2 et cindent: */ 1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 +#if !defined(OggCodecState_h_) 1.10 +#define OggCodecState_h_ 1.11 + 1.12 +#include <ogg/ogg.h> 1.13 +#include <theora/theoradec.h> 1.14 +#ifdef MOZ_TREMOR 1.15 +#include <tremor/ivorbiscodec.h> 1.16 +#else 1.17 +#include <vorbis/codec.h> 1.18 +#endif 1.19 +#ifdef MOZ_OPUS 1.20 +#include <opus/opus.h> 1.21 +#include "opus/opus_multistream.h" 1.22 +// For MOZ_SAMPLE_TYPE_* 1.23 +#include "mozilla/dom/HTMLMediaElement.h" 1.24 +#include "MediaDecoderStateMachine.h" 1.25 +#include "MediaDecoderReader.h" 1.26 +#endif 1.27 +#include <nsAutoRef.h> 1.28 +#include <nsDeque.h> 1.29 +#include <nsTArray.h> 1.30 +#include <nsClassHashtable.h> 1.31 +#include "VideoUtils.h" 1.32 + 1.33 +#include <stdint.h> 1.34 + 1.35 +// Uncomment the following to validate that we're predicting the number 1.36 +// of Vorbis samples in each packet correctly. 1.37 +#define VALIDATE_VORBIS_SAMPLE_CALCULATION 1.38 +#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION 1.39 +#include <map> 1.40 +#endif 1.41 + 1.42 +#include "OpusParser.h" 1.43 + 1.44 +namespace mozilla { 1.45 + 1.46 +// Deallocates a packet, used in OggPacketQueue below. 1.47 +class OggPacketDeallocator : public nsDequeFunctor { 1.48 + virtual void* operator() (void* aPacket) { 1.49 + ogg_packet* p = static_cast<ogg_packet*>(aPacket); 1.50 + delete [] p->packet; 1.51 + delete p; 1.52 + return nullptr; 1.53 + } 1.54 +}; 1.55 + 1.56 +// A queue of ogg_packets. When we read a page, we extract the page's packets 1.57 +// and buffer them in the owning stream's OggCodecState. This is because 1.58 +// if we're skipping up to the next keyframe in very large frame sized videos, 1.59 +// there may be several megabytes of data between keyframes, and the 1.60 +// ogg_stream_state would end up resizing its buffer every time we added a 1.61 +// new 4KB page to the bitstream, which kills performance on Windows. This 1.62 +// also gives us the option to timestamp packets rather than decoded 1.63 +// frames/samples, reducing the amount of frames/samples we must decode to 1.64 +// determine start-time at a particular offset, and gives us finer control 1.65 +// over memory usage. 1.66 +class OggPacketQueue : private nsDeque { 1.67 +public: 1.68 + OggPacketQueue() : nsDeque(new OggPacketDeallocator()) {} 1.69 + ~OggPacketQueue() { Erase(); } 1.70 + bool IsEmpty() { return nsDeque::GetSize() == 0; } 1.71 + void Append(ogg_packet* aPacket); 1.72 + ogg_packet* PopFront() { return static_cast<ogg_packet*>(nsDeque::PopFront()); } 1.73 + ogg_packet* PeekFront() { return static_cast<ogg_packet*>(nsDeque::PeekFront()); } 1.74 + void PushFront(ogg_packet* aPacket) { nsDeque::PushFront(aPacket); } 1.75 + void PushBack(ogg_packet* aPacket) { nsDeque::PushFront(aPacket); } 1.76 + void Erase() { nsDeque::Erase(); } 1.77 +}; 1.78 + 1.79 +// Encapsulates the data required for decoding an ogg bitstream and for 1.80 +// converting granulepos to timestamps. 1.81 +class OggCodecState { 1.82 +public: 1.83 + typedef mozilla::MetadataTags MetadataTags; 1.84 + // Ogg types we know about 1.85 + enum CodecType { 1.86 + TYPE_VORBIS=0, 1.87 + TYPE_THEORA=1, 1.88 + TYPE_OPUS=2, 1.89 + TYPE_SKELETON=3, 1.90 + TYPE_UNKNOWN=4 1.91 + }; 1.92 + 1.93 + virtual ~OggCodecState(); 1.94 + 1.95 + // Factory for creating nsCodecStates. Use instead of constructor. 1.96 + // aPage should be a beginning-of-stream page. 1.97 + static OggCodecState* Create(ogg_page* aPage); 1.98 + 1.99 + virtual CodecType GetType() { return TYPE_UNKNOWN; } 1.100 + 1.101 + // Reads a header packet. Returns false if an error was encountered 1.102 + // while reading header packets. Callers should check DoneReadingHeaders() 1.103 + // to determine if the last header has been read. 1.104 + // This function takes ownership of the packet and is responsible for 1.105 + // releasing it or queuing it for later processing. 1.106 + virtual bool DecodeHeader(ogg_packet* aPacket) { 1.107 + return (mDoneReadingHeaders = true); 1.108 + } 1.109 + 1.110 + // Build a hash table with tag metadata parsed from the stream. 1.111 + virtual MetadataTags* GetTags() { 1.112 + return nullptr; 1.113 + } 1.114 + 1.115 + // Returns the end time that a granulepos represents. 1.116 + virtual int64_t Time(int64_t granulepos) { return -1; } 1.117 + 1.118 + // Returns the start time that a granulepos represents. 1.119 + virtual int64_t StartTime(int64_t granulepos) { return -1; } 1.120 + 1.121 + // Initializes the codec state. 1.122 + virtual bool Init(); 1.123 + 1.124 + // Returns true when this bitstream has finished reading all its 1.125 + // header packets. 1.126 + bool DoneReadingHeaders() { return mDoneReadingHeaders; } 1.127 + 1.128 + // Deactivates the bitstream. Only the primary video and audio bitstreams 1.129 + // should be active. 1.130 + void Deactivate() { 1.131 + mActive = false; 1.132 + mDoneReadingHeaders = true; 1.133 + Reset(); 1.134 + } 1.135 + 1.136 + // Resets decoding state. 1.137 + virtual nsresult Reset(); 1.138 + 1.139 + // Returns true if the OggCodecState thinks this packet is a header 1.140 + // packet. Note this does not verify the validity of the header packet, 1.141 + // it just guarantees that the packet is marked as a header packet (i.e. 1.142 + // it is definintely not a data packet). Do not use this to identify 1.143 + // streams, use it to filter header packets from data packets while 1.144 + // decoding. 1.145 + virtual bool IsHeader(ogg_packet* aPacket) { return false; } 1.146 + 1.147 + // Returns the next packet in the stream, or nullptr if there are no more 1.148 + // packets buffered in the packet queue. More packets can be buffered by 1.149 + // inserting one or more pages into the stream by calling PageIn(). The 1.150 + // caller is responsible for deleting returned packet's using 1.151 + // OggCodecState::ReleasePacket(). The packet will have a valid granulepos. 1.152 + ogg_packet* PacketOut(); 1.153 + 1.154 + // Releases the memory used by a cloned packet. Every packet returned by 1.155 + // PacketOut() must be free'd using this function. 1.156 + static void ReleasePacket(ogg_packet* aPacket); 1.157 + 1.158 + // Extracts all packets from the page, and inserts them into the packet 1.159 + // queue. They can be extracted by calling PacketOut(). Packets from an 1.160 + // inactive stream are not buffered, i.e. this call has no effect for 1.161 + // inactive streams. Multiple pages may need to be inserted before 1.162 + // PacketOut() starts to return packets, as granulepos may need to be 1.163 + // captured. 1.164 + virtual nsresult PageIn(ogg_page* aPage); 1.165 + 1.166 + // Number of packets read. 1.167 + uint64_t mPacketCount; 1.168 + 1.169 + // Serial number of the bitstream. 1.170 + uint32_t mSerial; 1.171 + 1.172 + // Ogg specific state. 1.173 + ogg_stream_state mState; 1.174 + 1.175 + // Queue of as yet undecoded packets. Packets are guaranteed to have 1.176 + // a valid granulepos. 1.177 + OggPacketQueue mPackets; 1.178 + 1.179 + // Is the bitstream active; whether we're decoding and playing this bitstream. 1.180 + bool mActive; 1.181 + 1.182 + // True when all headers packets have been read. 1.183 + bool mDoneReadingHeaders; 1.184 + 1.185 +protected: 1.186 + // Constructs a new OggCodecState. aActive denotes whether the stream is 1.187 + // active. For streams of unsupported or unknown types, aActive should be 1.188 + // false. 1.189 + OggCodecState(ogg_page* aBosPage, bool aActive); 1.190 + 1.191 + // Deallocates all packets stored in mUnstamped, and clears the array. 1.192 + void ClearUnstamped(); 1.193 + 1.194 + // Extracts packets out of mState until a data packet with a non -1 1.195 + // granulepos is encountered, or no more packets are readable. Header 1.196 + // packets are pushed into the packet queue immediately, and data packets 1.197 + // are buffered in mUnstamped. Once a non -1 granulepos packet is read 1.198 + // the granulepos of the packets in mUnstamped can be inferred, and they 1.199 + // can be pushed over to mPackets. Used by PageIn() implementations in 1.200 + // subclasses. 1.201 + nsresult PacketOutUntilGranulepos(bool& aFoundGranulepos); 1.202 + 1.203 + // Temporary buffer in which to store packets while we're reading packets 1.204 + // in order to capture granulepos. 1.205 + nsTArray<ogg_packet*> mUnstamped; 1.206 + 1.207 + // Validation utility for vorbis-style tag names. 1.208 + static bool IsValidVorbisTagName(nsCString& aName); 1.209 + 1.210 + // Utility method to parse and add a vorbis-style comment 1.211 + // to a metadata hash table. Most Ogg-encapsulated codecs 1.212 + // use the vorbis comment format for metadata. 1.213 + static bool AddVorbisComment(MetadataTags* aTags, 1.214 + const char* aComment, 1.215 + uint32_t aLength); 1.216 +}; 1.217 + 1.218 +class VorbisState : public OggCodecState { 1.219 +public: 1.220 + VorbisState(ogg_page* aBosPage); 1.221 + virtual ~VorbisState(); 1.222 + 1.223 + CodecType GetType() { return TYPE_VORBIS; } 1.224 + bool DecodeHeader(ogg_packet* aPacket); 1.225 + int64_t Time(int64_t granulepos); 1.226 + bool Init(); 1.227 + nsresult Reset(); 1.228 + bool IsHeader(ogg_packet* aPacket); 1.229 + nsresult PageIn(ogg_page* aPage); 1.230 + 1.231 + // Return a hash table with tag metadata. 1.232 + MetadataTags* GetTags(); 1.233 + 1.234 + // Returns the end time that a granulepos represents. 1.235 + static int64_t Time(vorbis_info* aInfo, int64_t aGranulePos); 1.236 + 1.237 + vorbis_info mInfo; 1.238 + vorbis_comment mComment; 1.239 + vorbis_dsp_state mDsp; 1.240 + vorbis_block mBlock; 1.241 + 1.242 +private: 1.243 + 1.244 + // Reconstructs the granulepos of Vorbis packets stored in the mUnstamped 1.245 + // array. 1.246 + nsresult ReconstructVorbisGranulepos(); 1.247 + 1.248 + // The "block size" of the previously decoded Vorbis packet, or 0 if we've 1.249 + // not yet decoded anything. This is used to calculate the number of samples 1.250 + // in a Vorbis packet, since each Vorbis packet depends on the previous 1.251 + // packet while being decoded. 1.252 + long mPrevVorbisBlockSize; 1.253 + 1.254 + // Granulepos (end sample) of the last decoded Vorbis packet. This is used 1.255 + // to calculate the Vorbis granulepos when we don't find a granulepos to 1.256 + // back-propagate from. 1.257 + int64_t mGranulepos; 1.258 + 1.259 +#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION 1.260 + // When validating that we've correctly predicted Vorbis packets' number 1.261 + // of samples, we store each packet's predicted number of samples in this 1.262 + // map, and verify we decode the predicted number of samples. 1.263 + std::map<ogg_packet*, long> mVorbisPacketSamples; 1.264 +#endif 1.265 + 1.266 + // Records that aPacket is predicted to have aSamples samples. 1.267 + // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION 1.268 + // is not defined. 1.269 + void RecordVorbisPacketSamples(ogg_packet* aPacket, long aSamples); 1.270 + 1.271 + // Verifies that aPacket has had its number of samples predicted. 1.272 + // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION 1.273 + // is not defined. 1.274 + void AssertHasRecordedPacketSamples(ogg_packet* aPacket); 1.275 + 1.276 +public: 1.277 + // Asserts that the number of samples predicted for aPacket is aSamples. 1.278 + // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION 1.279 + // is not defined. 1.280 + void ValidateVorbisPacketSamples(ogg_packet* aPacket, long aSamples); 1.281 + 1.282 +}; 1.283 + 1.284 +// Returns 1 if the Theora info struct is decoding a media of Theora 1.285 +// version (maj,min,sub) or later, otherwise returns 0. 1.286 +int TheoraVersion(th_info* info, 1.287 + unsigned char maj, 1.288 + unsigned char min, 1.289 + unsigned char sub); 1.290 + 1.291 +class TheoraState : public OggCodecState { 1.292 +public: 1.293 + TheoraState(ogg_page* aBosPage); 1.294 + virtual ~TheoraState(); 1.295 + 1.296 + CodecType GetType() { return TYPE_THEORA; } 1.297 + bool DecodeHeader(ogg_packet* aPacket); 1.298 + int64_t Time(int64_t granulepos); 1.299 + int64_t StartTime(int64_t granulepos); 1.300 + bool Init(); 1.301 + bool IsHeader(ogg_packet* aPacket); 1.302 + nsresult PageIn(ogg_page* aPage); 1.303 + 1.304 + // Returns the maximum number of microseconds which a keyframe can be offset 1.305 + // from any given interframe. 1.306 + int64_t MaxKeyframeOffset(); 1.307 + 1.308 + // Returns the end time that a granulepos represents. 1.309 + static int64_t Time(th_info* aInfo, int64_t aGranulePos); 1.310 + 1.311 + th_info mInfo; 1.312 + th_comment mComment; 1.313 + th_setup_info *mSetup; 1.314 + th_dec_ctx* mCtx; 1.315 + 1.316 + float mPixelAspectRatio; 1.317 + 1.318 +private: 1.319 + 1.320 + // Reconstructs the granulepos of Theora packets stored in the 1.321 + // mUnstamped array. mUnstamped must be filled with consecutive packets from 1.322 + // the stream, with the last packet having a known granulepos. Using this 1.323 + // known granulepos, and the known frame numbers, we recover the granulepos 1.324 + // of all frames in the array. This enables us to determine their timestamps. 1.325 + void ReconstructTheoraGranulepos(); 1.326 + 1.327 +}; 1.328 + 1.329 +class OpusState : public OggCodecState { 1.330 +#ifdef MOZ_OPUS 1.331 +public: 1.332 + OpusState(ogg_page* aBosPage); 1.333 + virtual ~OpusState(); 1.334 + 1.335 + CodecType GetType() { return TYPE_OPUS; } 1.336 + bool DecodeHeader(ogg_packet* aPacket); 1.337 + int64_t Time(int64_t aGranulepos); 1.338 + bool Init(); 1.339 + nsresult Reset(); 1.340 + nsresult Reset(bool aStart); 1.341 + bool IsHeader(ogg_packet* aPacket); 1.342 + nsresult PageIn(ogg_page* aPage); 1.343 + 1.344 + // Returns the end time that a granulepos represents. 1.345 + static int64_t Time(int aPreSkip, int64_t aGranulepos); 1.346 + 1.347 + // Various fields from the Ogg Opus header. 1.348 + int mRate; // Sample rate the decoder uses (always 48 kHz). 1.349 + int mChannels; // Number of channels the stream encodes. 1.350 + uint16_t mPreSkip; // Number of samples to strip after decoder reset. 1.351 +#ifdef MOZ_SAMPLE_TYPE_FLOAT32 1.352 + float mGain; // Gain to apply to decoder output. 1.353 +#else 1.354 + int32_t mGain_Q16; // Gain to apply to the decoder output. 1.355 +#endif 1.356 + 1.357 + nsAutoPtr<OpusParser> mParser; 1.358 + OpusMSDecoder *mDecoder; 1.359 + 1.360 + int mSkip; // Number of samples left to trim before playback. 1.361 + // Granule position (end sample) of the last decoded Opus packet. This is 1.362 + // used to calculate the amount we should trim from the last packet. 1.363 + int64_t mPrevPacketGranulepos; 1.364 + 1.365 + // Construct and return a table of tags from the metadata header. 1.366 + MetadataTags* GetTags(); 1.367 + 1.368 +private: 1.369 + 1.370 + // Reconstructs the granulepos of Opus packets stored in the 1.371 + // mUnstamped array. mUnstamped must be filled with consecutive packets from 1.372 + // the stream, with the last packet having a known granulepos. Using this 1.373 + // known granulepos, and the known frame numbers, we recover the granulepos 1.374 + // of all frames in the array. This enables us to determine their timestamps. 1.375 + bool ReconstructOpusGranulepos(); 1.376 + 1.377 + // Granule position (end sample) of the last decoded Opus page. This is 1.378 + // used to calculate the Opus per-packet granule positions on the last page, 1.379 + // where we may need to trim some samples from the end. 1.380 + int64_t mPrevPageGranulepos; 1.381 + 1.382 +#endif /* MOZ_OPUS */ 1.383 +}; 1.384 + 1.385 +// Constructs a 32bit version number out of two 16 bit major,minor 1.386 +// version numbers. 1.387 +#define SKELETON_VERSION(major, minor) (((major)<<16)|(minor)) 1.388 + 1.389 +class SkeletonState : public OggCodecState { 1.390 +public: 1.391 + SkeletonState(ogg_page* aBosPage); 1.392 + ~SkeletonState(); 1.393 + CodecType GetType() { return TYPE_SKELETON; } 1.394 + bool DecodeHeader(ogg_packet* aPacket); 1.395 + int64_t Time(int64_t granulepos) { return -1; } 1.396 + bool Init() { return true; } 1.397 + bool IsHeader(ogg_packet* aPacket) { return true; } 1.398 + 1.399 + // Return true if the given time (in milliseconds) is within 1.400 + // the presentation time defined in the skeleton track. 1.401 + bool IsPresentable(int64_t aTime) { return aTime >= mPresentationTime; } 1.402 + 1.403 + // Stores the offset of the page on which a keyframe starts, 1.404 + // and its presentation time. 1.405 + class nsKeyPoint { 1.406 + public: 1.407 + nsKeyPoint() 1.408 + : mOffset(INT64_MAX), 1.409 + mTime(INT64_MAX) {} 1.410 + 1.411 + nsKeyPoint(int64_t aOffset, int64_t aTime) 1.412 + : mOffset(aOffset), 1.413 + mTime(aTime) {} 1.414 + 1.415 + // Offset from start of segment/link-in-the-chain in bytes. 1.416 + int64_t mOffset; 1.417 + 1.418 + // Presentation time in usecs. 1.419 + int64_t mTime; 1.420 + 1.421 + bool IsNull() { 1.422 + return mOffset == INT64_MAX && 1.423 + mTime == INT64_MAX; 1.424 + } 1.425 + }; 1.426 + 1.427 + // Stores a keyframe's byte-offset, presentation time and the serialno 1.428 + // of the stream it belongs to. 1.429 + class nsSeekTarget { 1.430 + public: 1.431 + nsSeekTarget() : mSerial(0) {} 1.432 + nsKeyPoint mKeyPoint; 1.433 + uint32_t mSerial; 1.434 + bool IsNull() { 1.435 + return mKeyPoint.IsNull() && 1.436 + mSerial == 0; 1.437 + } 1.438 + }; 1.439 + 1.440 + // Determines from the seek index the keyframe which you must seek back to 1.441 + // in order to get all keyframes required to render all streams with 1.442 + // serialnos in aTracks, at time aTarget. 1.443 + nsresult IndexedSeekTarget(int64_t aTarget, 1.444 + nsTArray<uint32_t>& aTracks, 1.445 + nsSeekTarget& aResult); 1.446 + 1.447 + bool HasIndex() const { 1.448 + return mIndex.Count() > 0; 1.449 + } 1.450 + 1.451 + // Returns the duration of the active tracks in the media, if we have 1.452 + // an index. aTracks must be filled with the serialnos of the active tracks. 1.453 + // The duration is calculated as the greatest end time of all active tracks, 1.454 + // minus the smalled start time of all the active tracks. 1.455 + nsresult GetDuration(const nsTArray<uint32_t>& aTracks, int64_t& aDuration); 1.456 + 1.457 +private: 1.458 + 1.459 + // Decodes an index packet. Returns false on failure. 1.460 + bool DecodeIndex(ogg_packet* aPacket); 1.461 + 1.462 + // Gets the keypoint you must seek to in order to get the keyframe required 1.463 + // to render the stream at time aTarget on stream with serial aSerialno. 1.464 + nsresult IndexedSeekTargetForTrack(uint32_t aSerialno, 1.465 + int64_t aTarget, 1.466 + nsKeyPoint& aResult); 1.467 + 1.468 + // Version of the decoded skeleton track, as per the SKELETON_VERSION macro. 1.469 + uint32_t mVersion; 1.470 + 1.471 + // Presentation time of the resource in milliseconds 1.472 + int64_t mPresentationTime; 1.473 + 1.474 + // Length of the resource in bytes. 1.475 + int64_t mLength; 1.476 + 1.477 + // Stores the keyframe index and duration information for a particular 1.478 + // stream. 1.479 + class nsKeyFrameIndex { 1.480 + public: 1.481 + 1.482 + nsKeyFrameIndex(int64_t aStartTime, int64_t aEndTime) 1.483 + : mStartTime(aStartTime), 1.484 + mEndTime(aEndTime) 1.485 + { 1.486 + MOZ_COUNT_CTOR(nsKeyFrameIndex); 1.487 + } 1.488 + 1.489 + ~nsKeyFrameIndex() { 1.490 + MOZ_COUNT_DTOR(nsKeyFrameIndex); 1.491 + } 1.492 + 1.493 + void Add(int64_t aOffset, int64_t aTimeMs) { 1.494 + mKeyPoints.AppendElement(nsKeyPoint(aOffset, aTimeMs)); 1.495 + } 1.496 + 1.497 + const nsKeyPoint& Get(uint32_t aIndex) const { 1.498 + return mKeyPoints[aIndex]; 1.499 + } 1.500 + 1.501 + uint32_t Length() const { 1.502 + return mKeyPoints.Length(); 1.503 + } 1.504 + 1.505 + // Presentation time of the first sample in this stream in usecs. 1.506 + const int64_t mStartTime; 1.507 + 1.508 + // End time of the last sample in this stream in usecs. 1.509 + const int64_t mEndTime; 1.510 + 1.511 + private: 1.512 + nsTArray<nsKeyPoint> mKeyPoints; 1.513 + }; 1.514 + 1.515 + // Maps Ogg serialnos to the index-keypoint list. 1.516 + nsClassHashtable<nsUint32HashKey, nsKeyFrameIndex> mIndex; 1.517 +}; 1.518 + 1.519 +} // namespace mozilla 1.520 + 1.521 +// This allows the use of nsAutoRefs for an ogg_packet that properly free the 1.522 +// contents of the packet. 1.523 +template <> 1.524 +class nsAutoRefTraits<ogg_packet> : public nsPointerRefTraits<ogg_packet> 1.525 +{ 1.526 +public: 1.527 + static void Release(ogg_packet* aPacket) { 1.528 + mozilla::OggCodecState::ReleasePacket(aPacket); 1.529 + } 1.530 +}; 1.531 + 1.532 + 1.533 +#endif