michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim:set ts=2 sw=2 sts=2 et cindent: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: #if !defined(OggCodecState_h_) michael@0: #define OggCodecState_h_ michael@0: michael@0: #include michael@0: #include michael@0: #ifdef MOZ_TREMOR michael@0: #include michael@0: #else michael@0: #include michael@0: #endif michael@0: #ifdef MOZ_OPUS michael@0: #include michael@0: #include "opus/opus_multistream.h" michael@0: // For MOZ_SAMPLE_TYPE_* michael@0: #include "mozilla/dom/HTMLMediaElement.h" michael@0: #include "MediaDecoderStateMachine.h" michael@0: #include "MediaDecoderReader.h" michael@0: #endif michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include "VideoUtils.h" michael@0: michael@0: #include michael@0: michael@0: // Uncomment the following to validate that we're predicting the number michael@0: // of Vorbis samples in each packet correctly. michael@0: #define VALIDATE_VORBIS_SAMPLE_CALCULATION michael@0: #ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION michael@0: #include michael@0: #endif michael@0: michael@0: #include "OpusParser.h" michael@0: michael@0: namespace mozilla { michael@0: michael@0: // Deallocates a packet, used in OggPacketQueue below. michael@0: class OggPacketDeallocator : public nsDequeFunctor { michael@0: virtual void* operator() (void* aPacket) { michael@0: ogg_packet* p = static_cast(aPacket); michael@0: delete [] p->packet; michael@0: delete p; michael@0: return nullptr; michael@0: } michael@0: }; michael@0: michael@0: // A queue of ogg_packets. When we read a page, we extract the page's packets michael@0: // and buffer them in the owning stream's OggCodecState. This is because michael@0: // if we're skipping up to the next keyframe in very large frame sized videos, michael@0: // there may be several megabytes of data between keyframes, and the michael@0: // ogg_stream_state would end up resizing its buffer every time we added a michael@0: // new 4KB page to the bitstream, which kills performance on Windows. This michael@0: // also gives us the option to timestamp packets rather than decoded michael@0: // frames/samples, reducing the amount of frames/samples we must decode to michael@0: // determine start-time at a particular offset, and gives us finer control michael@0: // over memory usage. michael@0: class OggPacketQueue : private nsDeque { michael@0: public: michael@0: OggPacketQueue() : nsDeque(new OggPacketDeallocator()) {} michael@0: ~OggPacketQueue() { Erase(); } michael@0: bool IsEmpty() { return nsDeque::GetSize() == 0; } michael@0: void Append(ogg_packet* aPacket); michael@0: ogg_packet* PopFront() { return static_cast(nsDeque::PopFront()); } michael@0: ogg_packet* PeekFront() { return static_cast(nsDeque::PeekFront()); } michael@0: void PushFront(ogg_packet* aPacket) { nsDeque::PushFront(aPacket); } michael@0: void PushBack(ogg_packet* aPacket) { nsDeque::PushFront(aPacket); } michael@0: void Erase() { nsDeque::Erase(); } michael@0: }; michael@0: michael@0: // Encapsulates the data required for decoding an ogg bitstream and for michael@0: // converting granulepos to timestamps. michael@0: class OggCodecState { michael@0: public: michael@0: typedef mozilla::MetadataTags MetadataTags; michael@0: // Ogg types we know about michael@0: enum CodecType { michael@0: TYPE_VORBIS=0, michael@0: TYPE_THEORA=1, michael@0: TYPE_OPUS=2, michael@0: TYPE_SKELETON=3, michael@0: TYPE_UNKNOWN=4 michael@0: }; michael@0: michael@0: virtual ~OggCodecState(); michael@0: michael@0: // Factory for creating nsCodecStates. Use instead of constructor. michael@0: // aPage should be a beginning-of-stream page. michael@0: static OggCodecState* Create(ogg_page* aPage); michael@0: michael@0: virtual CodecType GetType() { return TYPE_UNKNOWN; } michael@0: michael@0: // Reads a header packet. Returns false if an error was encountered michael@0: // while reading header packets. Callers should check DoneReadingHeaders() michael@0: // to determine if the last header has been read. michael@0: // This function takes ownership of the packet and is responsible for michael@0: // releasing it or queuing it for later processing. michael@0: virtual bool DecodeHeader(ogg_packet* aPacket) { michael@0: return (mDoneReadingHeaders = true); michael@0: } michael@0: michael@0: // Build a hash table with tag metadata parsed from the stream. michael@0: virtual MetadataTags* GetTags() { michael@0: return nullptr; michael@0: } michael@0: michael@0: // Returns the end time that a granulepos represents. michael@0: virtual int64_t Time(int64_t granulepos) { return -1; } michael@0: michael@0: // Returns the start time that a granulepos represents. michael@0: virtual int64_t StartTime(int64_t granulepos) { return -1; } michael@0: michael@0: // Initializes the codec state. michael@0: virtual bool Init(); michael@0: michael@0: // Returns true when this bitstream has finished reading all its michael@0: // header packets. michael@0: bool DoneReadingHeaders() { return mDoneReadingHeaders; } michael@0: michael@0: // Deactivates the bitstream. Only the primary video and audio bitstreams michael@0: // should be active. michael@0: void Deactivate() { michael@0: mActive = false; michael@0: mDoneReadingHeaders = true; michael@0: Reset(); michael@0: } michael@0: michael@0: // Resets decoding state. michael@0: virtual nsresult Reset(); michael@0: michael@0: // Returns true if the OggCodecState thinks this packet is a header michael@0: // packet. Note this does not verify the validity of the header packet, michael@0: // it just guarantees that the packet is marked as a header packet (i.e. michael@0: // it is definintely not a data packet). Do not use this to identify michael@0: // streams, use it to filter header packets from data packets while michael@0: // decoding. michael@0: virtual bool IsHeader(ogg_packet* aPacket) { return false; } michael@0: michael@0: // Returns the next packet in the stream, or nullptr if there are no more michael@0: // packets buffered in the packet queue. More packets can be buffered by michael@0: // inserting one or more pages into the stream by calling PageIn(). The michael@0: // caller is responsible for deleting returned packet's using michael@0: // OggCodecState::ReleasePacket(). The packet will have a valid granulepos. michael@0: ogg_packet* PacketOut(); michael@0: michael@0: // Releases the memory used by a cloned packet. Every packet returned by michael@0: // PacketOut() must be free'd using this function. michael@0: static void ReleasePacket(ogg_packet* aPacket); michael@0: michael@0: // Extracts all packets from the page, and inserts them into the packet michael@0: // queue. They can be extracted by calling PacketOut(). Packets from an michael@0: // inactive stream are not buffered, i.e. this call has no effect for michael@0: // inactive streams. Multiple pages may need to be inserted before michael@0: // PacketOut() starts to return packets, as granulepos may need to be michael@0: // captured. michael@0: virtual nsresult PageIn(ogg_page* aPage); michael@0: michael@0: // Number of packets read. michael@0: uint64_t mPacketCount; michael@0: michael@0: // Serial number of the bitstream. michael@0: uint32_t mSerial; michael@0: michael@0: // Ogg specific state. michael@0: ogg_stream_state mState; michael@0: michael@0: // Queue of as yet undecoded packets. Packets are guaranteed to have michael@0: // a valid granulepos. michael@0: OggPacketQueue mPackets; michael@0: michael@0: // Is the bitstream active; whether we're decoding and playing this bitstream. michael@0: bool mActive; michael@0: michael@0: // True when all headers packets have been read. michael@0: bool mDoneReadingHeaders; michael@0: michael@0: protected: michael@0: // Constructs a new OggCodecState. aActive denotes whether the stream is michael@0: // active. For streams of unsupported or unknown types, aActive should be michael@0: // false. michael@0: OggCodecState(ogg_page* aBosPage, bool aActive); michael@0: michael@0: // Deallocates all packets stored in mUnstamped, and clears the array. michael@0: void ClearUnstamped(); michael@0: michael@0: // Extracts packets out of mState until a data packet with a non -1 michael@0: // granulepos is encountered, or no more packets are readable. Header michael@0: // packets are pushed into the packet queue immediately, and data packets michael@0: // are buffered in mUnstamped. Once a non -1 granulepos packet is read michael@0: // the granulepos of the packets in mUnstamped can be inferred, and they michael@0: // can be pushed over to mPackets. Used by PageIn() implementations in michael@0: // subclasses. michael@0: nsresult PacketOutUntilGranulepos(bool& aFoundGranulepos); michael@0: michael@0: // Temporary buffer in which to store packets while we're reading packets michael@0: // in order to capture granulepos. michael@0: nsTArray mUnstamped; michael@0: michael@0: // Validation utility for vorbis-style tag names. michael@0: static bool IsValidVorbisTagName(nsCString& aName); michael@0: michael@0: // Utility method to parse and add a vorbis-style comment michael@0: // to a metadata hash table. Most Ogg-encapsulated codecs michael@0: // use the vorbis comment format for metadata. michael@0: static bool AddVorbisComment(MetadataTags* aTags, michael@0: const char* aComment, michael@0: uint32_t aLength); michael@0: }; michael@0: michael@0: class VorbisState : public OggCodecState { michael@0: public: michael@0: VorbisState(ogg_page* aBosPage); michael@0: virtual ~VorbisState(); michael@0: michael@0: CodecType GetType() { return TYPE_VORBIS; } michael@0: bool DecodeHeader(ogg_packet* aPacket); michael@0: int64_t Time(int64_t granulepos); michael@0: bool Init(); michael@0: nsresult Reset(); michael@0: bool IsHeader(ogg_packet* aPacket); michael@0: nsresult PageIn(ogg_page* aPage); michael@0: michael@0: // Return a hash table with tag metadata. michael@0: MetadataTags* GetTags(); michael@0: michael@0: // Returns the end time that a granulepos represents. michael@0: static int64_t Time(vorbis_info* aInfo, int64_t aGranulePos); michael@0: michael@0: vorbis_info mInfo; michael@0: vorbis_comment mComment; michael@0: vorbis_dsp_state mDsp; michael@0: vorbis_block mBlock; michael@0: michael@0: private: michael@0: michael@0: // Reconstructs the granulepos of Vorbis packets stored in the mUnstamped michael@0: // array. michael@0: nsresult ReconstructVorbisGranulepos(); michael@0: michael@0: // The "block size" of the previously decoded Vorbis packet, or 0 if we've michael@0: // not yet decoded anything. This is used to calculate the number of samples michael@0: // in a Vorbis packet, since each Vorbis packet depends on the previous michael@0: // packet while being decoded. michael@0: long mPrevVorbisBlockSize; michael@0: michael@0: // Granulepos (end sample) of the last decoded Vorbis packet. This is used michael@0: // to calculate the Vorbis granulepos when we don't find a granulepos to michael@0: // back-propagate from. michael@0: int64_t mGranulepos; michael@0: michael@0: #ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION michael@0: // When validating that we've correctly predicted Vorbis packets' number michael@0: // of samples, we store each packet's predicted number of samples in this michael@0: // map, and verify we decode the predicted number of samples. michael@0: std::map mVorbisPacketSamples; michael@0: #endif michael@0: michael@0: // Records that aPacket is predicted to have aSamples samples. michael@0: // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION michael@0: // is not defined. michael@0: void RecordVorbisPacketSamples(ogg_packet* aPacket, long aSamples); michael@0: michael@0: // Verifies that aPacket has had its number of samples predicted. michael@0: // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION michael@0: // is not defined. michael@0: void AssertHasRecordedPacketSamples(ogg_packet* aPacket); michael@0: michael@0: public: michael@0: // Asserts that the number of samples predicted for aPacket is aSamples. michael@0: // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION michael@0: // is not defined. michael@0: void ValidateVorbisPacketSamples(ogg_packet* aPacket, long aSamples); michael@0: michael@0: }; michael@0: michael@0: // Returns 1 if the Theora info struct is decoding a media of Theora michael@0: // version (maj,min,sub) or later, otherwise returns 0. michael@0: int TheoraVersion(th_info* info, michael@0: unsigned char maj, michael@0: unsigned char min, michael@0: unsigned char sub); michael@0: michael@0: class TheoraState : public OggCodecState { michael@0: public: michael@0: TheoraState(ogg_page* aBosPage); michael@0: virtual ~TheoraState(); michael@0: michael@0: CodecType GetType() { return TYPE_THEORA; } michael@0: bool DecodeHeader(ogg_packet* aPacket); michael@0: int64_t Time(int64_t granulepos); michael@0: int64_t StartTime(int64_t granulepos); michael@0: bool Init(); michael@0: bool IsHeader(ogg_packet* aPacket); michael@0: nsresult PageIn(ogg_page* aPage); michael@0: michael@0: // Returns the maximum number of microseconds which a keyframe can be offset michael@0: // from any given interframe. michael@0: int64_t MaxKeyframeOffset(); michael@0: michael@0: // Returns the end time that a granulepos represents. michael@0: static int64_t Time(th_info* aInfo, int64_t aGranulePos); michael@0: michael@0: th_info mInfo; michael@0: th_comment mComment; michael@0: th_setup_info *mSetup; michael@0: th_dec_ctx* mCtx; michael@0: michael@0: float mPixelAspectRatio; michael@0: michael@0: private: michael@0: michael@0: // Reconstructs the granulepos of Theora packets stored in the michael@0: // mUnstamped array. mUnstamped must be filled with consecutive packets from michael@0: // the stream, with the last packet having a known granulepos. Using this michael@0: // known granulepos, and the known frame numbers, we recover the granulepos michael@0: // of all frames in the array. This enables us to determine their timestamps. michael@0: void ReconstructTheoraGranulepos(); michael@0: michael@0: }; michael@0: michael@0: class OpusState : public OggCodecState { michael@0: #ifdef MOZ_OPUS michael@0: public: michael@0: OpusState(ogg_page* aBosPage); michael@0: virtual ~OpusState(); michael@0: michael@0: CodecType GetType() { return TYPE_OPUS; } michael@0: bool DecodeHeader(ogg_packet* aPacket); michael@0: int64_t Time(int64_t aGranulepos); michael@0: bool Init(); michael@0: nsresult Reset(); michael@0: nsresult Reset(bool aStart); michael@0: bool IsHeader(ogg_packet* aPacket); michael@0: nsresult PageIn(ogg_page* aPage); michael@0: michael@0: // Returns the end time that a granulepos represents. michael@0: static int64_t Time(int aPreSkip, int64_t aGranulepos); michael@0: michael@0: // Various fields from the Ogg Opus header. michael@0: int mRate; // Sample rate the decoder uses (always 48 kHz). michael@0: int mChannels; // Number of channels the stream encodes. michael@0: uint16_t mPreSkip; // Number of samples to strip after decoder reset. michael@0: #ifdef MOZ_SAMPLE_TYPE_FLOAT32 michael@0: float mGain; // Gain to apply to decoder output. michael@0: #else michael@0: int32_t mGain_Q16; // Gain to apply to the decoder output. michael@0: #endif michael@0: michael@0: nsAutoPtr mParser; michael@0: OpusMSDecoder *mDecoder; michael@0: michael@0: int mSkip; // Number of samples left to trim before playback. michael@0: // Granule position (end sample) of the last decoded Opus packet. This is michael@0: // used to calculate the amount we should trim from the last packet. michael@0: int64_t mPrevPacketGranulepos; michael@0: michael@0: // Construct and return a table of tags from the metadata header. michael@0: MetadataTags* GetTags(); michael@0: michael@0: private: michael@0: michael@0: // Reconstructs the granulepos of Opus packets stored in the michael@0: // mUnstamped array. mUnstamped must be filled with consecutive packets from michael@0: // the stream, with the last packet having a known granulepos. Using this michael@0: // known granulepos, and the known frame numbers, we recover the granulepos michael@0: // of all frames in the array. This enables us to determine their timestamps. michael@0: bool ReconstructOpusGranulepos(); michael@0: michael@0: // Granule position (end sample) of the last decoded Opus page. This is michael@0: // used to calculate the Opus per-packet granule positions on the last page, michael@0: // where we may need to trim some samples from the end. michael@0: int64_t mPrevPageGranulepos; michael@0: michael@0: #endif /* MOZ_OPUS */ michael@0: }; michael@0: michael@0: // Constructs a 32bit version number out of two 16 bit major,minor michael@0: // version numbers. michael@0: #define SKELETON_VERSION(major, minor) (((major)<<16)|(minor)) michael@0: michael@0: class SkeletonState : public OggCodecState { michael@0: public: michael@0: SkeletonState(ogg_page* aBosPage); michael@0: ~SkeletonState(); michael@0: CodecType GetType() { return TYPE_SKELETON; } michael@0: bool DecodeHeader(ogg_packet* aPacket); michael@0: int64_t Time(int64_t granulepos) { return -1; } michael@0: bool Init() { return true; } michael@0: bool IsHeader(ogg_packet* aPacket) { return true; } michael@0: michael@0: // Return true if the given time (in milliseconds) is within michael@0: // the presentation time defined in the skeleton track. michael@0: bool IsPresentable(int64_t aTime) { return aTime >= mPresentationTime; } michael@0: michael@0: // Stores the offset of the page on which a keyframe starts, michael@0: // and its presentation time. michael@0: class nsKeyPoint { michael@0: public: michael@0: nsKeyPoint() michael@0: : mOffset(INT64_MAX), michael@0: mTime(INT64_MAX) {} michael@0: michael@0: nsKeyPoint(int64_t aOffset, int64_t aTime) michael@0: : mOffset(aOffset), michael@0: mTime(aTime) {} michael@0: michael@0: // Offset from start of segment/link-in-the-chain in bytes. michael@0: int64_t mOffset; michael@0: michael@0: // Presentation time in usecs. michael@0: int64_t mTime; michael@0: michael@0: bool IsNull() { michael@0: return mOffset == INT64_MAX && michael@0: mTime == INT64_MAX; michael@0: } michael@0: }; michael@0: michael@0: // Stores a keyframe's byte-offset, presentation time and the serialno michael@0: // of the stream it belongs to. michael@0: class nsSeekTarget { michael@0: public: michael@0: nsSeekTarget() : mSerial(0) {} michael@0: nsKeyPoint mKeyPoint; michael@0: uint32_t mSerial; michael@0: bool IsNull() { michael@0: return mKeyPoint.IsNull() && michael@0: mSerial == 0; michael@0: } michael@0: }; michael@0: michael@0: // Determines from the seek index the keyframe which you must seek back to michael@0: // in order to get all keyframes required to render all streams with michael@0: // serialnos in aTracks, at time aTarget. michael@0: nsresult IndexedSeekTarget(int64_t aTarget, michael@0: nsTArray& aTracks, michael@0: nsSeekTarget& aResult); michael@0: michael@0: bool HasIndex() const { michael@0: return mIndex.Count() > 0; michael@0: } michael@0: michael@0: // Returns the duration of the active tracks in the media, if we have michael@0: // an index. aTracks must be filled with the serialnos of the active tracks. michael@0: // The duration is calculated as the greatest end time of all active tracks, michael@0: // minus the smalled start time of all the active tracks. michael@0: nsresult GetDuration(const nsTArray& aTracks, int64_t& aDuration); michael@0: michael@0: private: michael@0: michael@0: // Decodes an index packet. Returns false on failure. michael@0: bool DecodeIndex(ogg_packet* aPacket); michael@0: michael@0: // Gets the keypoint you must seek to in order to get the keyframe required michael@0: // to render the stream at time aTarget on stream with serial aSerialno. michael@0: nsresult IndexedSeekTargetForTrack(uint32_t aSerialno, michael@0: int64_t aTarget, michael@0: nsKeyPoint& aResult); michael@0: michael@0: // Version of the decoded skeleton track, as per the SKELETON_VERSION macro. michael@0: uint32_t mVersion; michael@0: michael@0: // Presentation time of the resource in milliseconds michael@0: int64_t mPresentationTime; michael@0: michael@0: // Length of the resource in bytes. michael@0: int64_t mLength; michael@0: michael@0: // Stores the keyframe index and duration information for a particular michael@0: // stream. michael@0: class nsKeyFrameIndex { michael@0: public: michael@0: michael@0: nsKeyFrameIndex(int64_t aStartTime, int64_t aEndTime) michael@0: : mStartTime(aStartTime), michael@0: mEndTime(aEndTime) michael@0: { michael@0: MOZ_COUNT_CTOR(nsKeyFrameIndex); michael@0: } michael@0: michael@0: ~nsKeyFrameIndex() { michael@0: MOZ_COUNT_DTOR(nsKeyFrameIndex); michael@0: } michael@0: michael@0: void Add(int64_t aOffset, int64_t aTimeMs) { michael@0: mKeyPoints.AppendElement(nsKeyPoint(aOffset, aTimeMs)); michael@0: } michael@0: michael@0: const nsKeyPoint& Get(uint32_t aIndex) const { michael@0: return mKeyPoints[aIndex]; michael@0: } michael@0: michael@0: uint32_t Length() const { michael@0: return mKeyPoints.Length(); michael@0: } michael@0: michael@0: // Presentation time of the first sample in this stream in usecs. michael@0: const int64_t mStartTime; michael@0: michael@0: // End time of the last sample in this stream in usecs. michael@0: const int64_t mEndTime; michael@0: michael@0: private: michael@0: nsTArray mKeyPoints; michael@0: }; michael@0: michael@0: // Maps Ogg serialnos to the index-keypoint list. michael@0: nsClassHashtable mIndex; michael@0: }; michael@0: michael@0: } // namespace mozilla michael@0: michael@0: // This allows the use of nsAutoRefs for an ogg_packet that properly free the michael@0: // contents of the packet. michael@0: template <> michael@0: class nsAutoRefTraits : public nsPointerRefTraits michael@0: { michael@0: public: michael@0: static void Release(ogg_packet* aPacket) { michael@0: mozilla::OggCodecState::ReleasePacket(aPacket); michael@0: } michael@0: }; michael@0: michael@0: michael@0: #endif