content/media/ogg/OggCodecState.h

Wed, 31 Dec 2014 13:27:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 13:27:57 +0100
branch
TOR_BUG_3246
changeset 6
8bccb770b82d
permissions
-rw-r--r--

Ignore runtime configuration files generated during quality assurance.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
     3 /* This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 #if !defined(OggCodecState_h_)
     7 #define OggCodecState_h_
     9 #include <ogg/ogg.h>
    10 #include <theora/theoradec.h>
    11 #ifdef MOZ_TREMOR
    12 #include <tremor/ivorbiscodec.h>
    13 #else
    14 #include <vorbis/codec.h>
    15 #endif
    16 #ifdef MOZ_OPUS
    17 #include <opus/opus.h>
    18 #include "opus/opus_multistream.h"
    19 // For MOZ_SAMPLE_TYPE_*
    20 #include "mozilla/dom/HTMLMediaElement.h"
    21 #include "MediaDecoderStateMachine.h"
    22 #include "MediaDecoderReader.h"
    23 #endif
    24 #include <nsAutoRef.h>
    25 #include <nsDeque.h>
    26 #include <nsTArray.h>
    27 #include <nsClassHashtable.h>
    28 #include "VideoUtils.h"
    30 #include <stdint.h>
    32 // Uncomment the following to validate that we're predicting the number
    33 // of Vorbis samples in each packet correctly.
    34 #define VALIDATE_VORBIS_SAMPLE_CALCULATION
    35 #ifdef  VALIDATE_VORBIS_SAMPLE_CALCULATION
    36 #include <map>
    37 #endif
    39 #include "OpusParser.h"
    41 namespace mozilla {
    43 // Deallocates a packet, used in OggPacketQueue below.
    44 class OggPacketDeallocator : public nsDequeFunctor {
    45   virtual void* operator() (void* aPacket) {
    46     ogg_packet* p = static_cast<ogg_packet*>(aPacket);
    47     delete [] p->packet;
    48     delete p;
    49     return nullptr;
    50   }
    51 };
    53 // A queue of ogg_packets. When we read a page, we extract the page's packets
    54 // and buffer them in the owning stream's OggCodecState. This is because
    55 // if we're skipping up to the next keyframe in very large frame sized videos,
    56 // there may be several megabytes of data between keyframes, and the
    57 // ogg_stream_state would end up resizing its buffer every time we added a
    58 // new 4KB page to the bitstream, which kills performance on Windows. This
    59 // also gives us the option to timestamp packets rather than decoded
    60 // frames/samples, reducing the amount of frames/samples we must decode to
    61 // determine start-time at a particular offset, and gives us finer control
    62 // over memory usage.
    63 class OggPacketQueue : private nsDeque {
    64 public:
    65   OggPacketQueue() : nsDeque(new OggPacketDeallocator()) {}
    66   ~OggPacketQueue() { Erase(); }
    67   bool IsEmpty() { return nsDeque::GetSize() == 0; }
    68   void Append(ogg_packet* aPacket);
    69   ogg_packet* PopFront() { return static_cast<ogg_packet*>(nsDeque::PopFront()); }
    70   ogg_packet* PeekFront() { return static_cast<ogg_packet*>(nsDeque::PeekFront()); }
    71   void PushFront(ogg_packet* aPacket) { nsDeque::PushFront(aPacket); }
    72   void PushBack(ogg_packet* aPacket) { nsDeque::PushFront(aPacket); }
    73   void Erase() { nsDeque::Erase(); }
    74 };
    76 // Encapsulates the data required for decoding an ogg bitstream and for
    77 // converting granulepos to timestamps.
    78 class OggCodecState {
    79 public:
    80   typedef mozilla::MetadataTags MetadataTags;
    81   // Ogg types we know about
    82   enum CodecType {
    83     TYPE_VORBIS=0,
    84     TYPE_THEORA=1,
    85     TYPE_OPUS=2,
    86     TYPE_SKELETON=3,
    87     TYPE_UNKNOWN=4
    88   };
    90   virtual ~OggCodecState();
    92   // Factory for creating nsCodecStates. Use instead of constructor.
    93   // aPage should be a beginning-of-stream page.
    94   static OggCodecState* Create(ogg_page* aPage);
    96   virtual CodecType GetType() { return TYPE_UNKNOWN; }
    98   // Reads a header packet. Returns false if an error was encountered
    99   // while reading header packets. Callers should check DoneReadingHeaders()
   100   // to determine if the last header has been read.
   101   // This function takes ownership of the packet and is responsible for
   102   // releasing it or queuing it for later processing.
   103   virtual bool DecodeHeader(ogg_packet* aPacket) {
   104     return (mDoneReadingHeaders = true);
   105   }
   107   // Build a hash table with tag metadata parsed from the stream.
   108   virtual MetadataTags* GetTags() {
   109     return nullptr;
   110   }
   112   // Returns the end time that a granulepos represents.
   113   virtual int64_t Time(int64_t granulepos) { return -1; }
   115   // Returns the start time that a granulepos represents.
   116   virtual int64_t StartTime(int64_t granulepos) { return -1; }
   118   // Initializes the codec state.
   119   virtual bool Init();
   121   // Returns true when this bitstream has finished reading all its
   122   // header packets.
   123   bool DoneReadingHeaders() { return mDoneReadingHeaders; }
   125   // Deactivates the bitstream. Only the primary video and audio bitstreams
   126   // should be active.
   127   void Deactivate() {
   128     mActive = false;
   129     mDoneReadingHeaders = true;
   130     Reset();
   131   }
   133   // Resets decoding state.
   134   virtual nsresult Reset();
   136   // Returns true if the OggCodecState thinks this packet is a header
   137   // packet. Note this does not verify the validity of the header packet,
   138   // it just guarantees that the packet is marked as a header packet (i.e.
   139   // it is definintely not a data packet). Do not use this to identify
   140   // streams, use it to filter header packets from data packets while
   141   // decoding.
   142   virtual bool IsHeader(ogg_packet* aPacket) { return false; }
   144   // Returns the next packet in the stream, or nullptr if there are no more
   145   // packets buffered in the packet queue. More packets can be buffered by
   146   // inserting one or more pages into the stream by calling PageIn(). The
   147   // caller is responsible for deleting returned packet's using
   148   // OggCodecState::ReleasePacket(). The packet will have a valid granulepos.
   149   ogg_packet* PacketOut();
   151   // Releases the memory used by a cloned packet. Every packet returned by
   152   // PacketOut() must be free'd using this function.
   153   static void ReleasePacket(ogg_packet* aPacket);
   155   // Extracts all packets from the page, and inserts them into the packet
   156   // queue. They can be extracted by calling PacketOut(). Packets from an
   157   // inactive stream are not buffered, i.e. this call has no effect for
   158   // inactive streams. Multiple pages may need to be inserted before
   159   // PacketOut() starts to return packets, as granulepos may need to be
   160   // captured.
   161   virtual nsresult PageIn(ogg_page* aPage);
   163   // Number of packets read.  
   164   uint64_t mPacketCount;
   166   // Serial number of the bitstream.
   167   uint32_t mSerial;
   169   // Ogg specific state.
   170   ogg_stream_state mState;
   172   // Queue of as yet undecoded packets. Packets are guaranteed to have
   173   // a valid granulepos.
   174   OggPacketQueue mPackets;
   176   // Is the bitstream active; whether we're decoding and playing this bitstream.
   177   bool mActive;
   179   // True when all headers packets have been read.
   180   bool mDoneReadingHeaders;
   182 protected:
   183   // Constructs a new OggCodecState. aActive denotes whether the stream is
   184   // active. For streams of unsupported or unknown types, aActive should be
   185   // false.
   186   OggCodecState(ogg_page* aBosPage, bool aActive);
   188   // Deallocates all packets stored in mUnstamped, and clears the array.
   189   void ClearUnstamped();
   191   // Extracts packets out of mState until a data packet with a non -1
   192   // granulepos is encountered, or no more packets are readable. Header
   193   // packets are pushed into the packet queue immediately, and data packets
   194   // are buffered in mUnstamped. Once a non -1 granulepos packet is read
   195   // the granulepos of the packets in mUnstamped can be inferred, and they
   196   // can be pushed over to mPackets. Used by PageIn() implementations in
   197   // subclasses.
   198   nsresult PacketOutUntilGranulepos(bool& aFoundGranulepos);
   200   // Temporary buffer in which to store packets while we're reading packets
   201   // in order to capture granulepos.
   202   nsTArray<ogg_packet*> mUnstamped;
   204   // Validation utility for vorbis-style tag names.
   205   static bool IsValidVorbisTagName(nsCString& aName);
   207   // Utility method to parse and add a vorbis-style comment
   208   // to a metadata hash table. Most Ogg-encapsulated codecs
   209   // use the vorbis comment format for metadata.
   210   static bool AddVorbisComment(MetadataTags* aTags,
   211                         const char* aComment,
   212                         uint32_t aLength);
   213 };
   215 class VorbisState : public OggCodecState {
   216 public:
   217   VorbisState(ogg_page* aBosPage);
   218   virtual ~VorbisState();
   220   CodecType GetType() { return TYPE_VORBIS; }
   221   bool DecodeHeader(ogg_packet* aPacket);
   222   int64_t Time(int64_t granulepos);
   223   bool Init();
   224   nsresult Reset();
   225   bool IsHeader(ogg_packet* aPacket);
   226   nsresult PageIn(ogg_page* aPage); 
   228   // Return a hash table with tag metadata.
   229   MetadataTags* GetTags();
   231   // Returns the end time that a granulepos represents.
   232   static int64_t Time(vorbis_info* aInfo, int64_t aGranulePos); 
   234   vorbis_info mInfo;
   235   vorbis_comment mComment;
   236   vorbis_dsp_state mDsp;
   237   vorbis_block mBlock;
   239 private:
   241   // Reconstructs the granulepos of Vorbis packets stored in the mUnstamped
   242   // array.
   243   nsresult ReconstructVorbisGranulepos();
   245   // The "block size" of the previously decoded Vorbis packet, or 0 if we've
   246   // not yet decoded anything. This is used to calculate the number of samples
   247   // in a Vorbis packet, since each Vorbis packet depends on the previous
   248   // packet while being decoded.
   249   long mPrevVorbisBlockSize;
   251   // Granulepos (end sample) of the last decoded Vorbis packet. This is used
   252   // to calculate the Vorbis granulepos when we don't find a granulepos to
   253   // back-propagate from.
   254   int64_t mGranulepos;
   256 #ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION
   257   // When validating that we've correctly predicted Vorbis packets' number
   258   // of samples, we store each packet's predicted number of samples in this
   259   // map, and verify we decode the predicted number of samples.
   260   std::map<ogg_packet*, long> mVorbisPacketSamples;
   261 #endif
   263   // Records that aPacket is predicted to have aSamples samples.
   264   // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION
   265   // is not defined.
   266   void RecordVorbisPacketSamples(ogg_packet* aPacket, long aSamples);
   268   // Verifies that aPacket has had its number of samples predicted.
   269   // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION
   270   // is not defined.
   271   void AssertHasRecordedPacketSamples(ogg_packet* aPacket);
   273 public:
   274   // Asserts that the number of samples predicted for aPacket is aSamples.
   275   // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION
   276   // is not defined.
   277   void ValidateVorbisPacketSamples(ogg_packet* aPacket, long aSamples);
   279 };
   281 // Returns 1 if the Theora info struct is decoding a media of Theora
   282 // version (maj,min,sub) or later, otherwise returns 0.
   283 int TheoraVersion(th_info* info,
   284                   unsigned char maj,
   285                   unsigned char min,
   286                   unsigned char sub);
   288 class TheoraState : public OggCodecState {
   289 public:
   290   TheoraState(ogg_page* aBosPage);
   291   virtual ~TheoraState();
   293   CodecType GetType() { return TYPE_THEORA; }
   294   bool DecodeHeader(ogg_packet* aPacket);
   295   int64_t Time(int64_t granulepos);
   296   int64_t StartTime(int64_t granulepos);
   297   bool Init();
   298   bool IsHeader(ogg_packet* aPacket);
   299   nsresult PageIn(ogg_page* aPage); 
   301   // Returns the maximum number of microseconds which a keyframe can be offset
   302   // from any given interframe.
   303   int64_t MaxKeyframeOffset();
   305   // Returns the end time that a granulepos represents.
   306   static int64_t Time(th_info* aInfo, int64_t aGranulePos); 
   308   th_info mInfo;
   309   th_comment mComment;
   310   th_setup_info *mSetup;
   311   th_dec_ctx* mCtx;
   313   float mPixelAspectRatio;
   315 private:
   317   // Reconstructs the granulepos of Theora packets stored in the
   318   // mUnstamped array. mUnstamped must be filled with consecutive packets from
   319   // the stream, with the last packet having a known granulepos. Using this
   320   // known granulepos, and the known frame numbers, we recover the granulepos
   321   // of all frames in the array. This enables us to determine their timestamps.
   322   void ReconstructTheoraGranulepos();
   324 };
   326 class OpusState : public OggCodecState {
   327 #ifdef MOZ_OPUS
   328 public:
   329   OpusState(ogg_page* aBosPage);
   330   virtual ~OpusState();
   332   CodecType GetType() { return TYPE_OPUS; }
   333   bool DecodeHeader(ogg_packet* aPacket);
   334   int64_t Time(int64_t aGranulepos);
   335   bool Init();
   336   nsresult Reset();
   337   nsresult Reset(bool aStart);
   338   bool IsHeader(ogg_packet* aPacket);
   339   nsresult PageIn(ogg_page* aPage);
   341   // Returns the end time that a granulepos represents.
   342   static int64_t Time(int aPreSkip, int64_t aGranulepos);
   344   // Various fields from the Ogg Opus header.
   345   int mRate;        // Sample rate the decoder uses (always 48 kHz).
   346   int mChannels;    // Number of channels the stream encodes.
   347   uint16_t mPreSkip; // Number of samples to strip after decoder reset.
   348 #ifdef MOZ_SAMPLE_TYPE_FLOAT32
   349   float mGain;      // Gain to apply to decoder output.
   350 #else
   351   int32_t mGain_Q16; // Gain to apply to the decoder output.
   352 #endif
   354   nsAutoPtr<OpusParser> mParser;
   355   OpusMSDecoder *mDecoder;
   357   int mSkip;        // Number of samples left to trim before playback.
   358   // Granule position (end sample) of the last decoded Opus packet. This is
   359   // used to calculate the amount we should trim from the last packet.
   360   int64_t mPrevPacketGranulepos;
   362   // Construct and return a table of tags from the metadata header.
   363   MetadataTags* GetTags();
   365 private:
   367   // Reconstructs the granulepos of Opus packets stored in the
   368   // mUnstamped array. mUnstamped must be filled with consecutive packets from
   369   // the stream, with the last packet having a known granulepos. Using this
   370   // known granulepos, and the known frame numbers, we recover the granulepos
   371   // of all frames in the array. This enables us to determine their timestamps.
   372   bool ReconstructOpusGranulepos();
   374   // Granule position (end sample) of the last decoded Opus page. This is
   375   // used to calculate the Opus per-packet granule positions on the last page,
   376   // where we may need to trim some samples from the end.
   377   int64_t mPrevPageGranulepos;
   379 #endif /* MOZ_OPUS */
   380 };
   382 // Constructs a 32bit version number out of two 16 bit major,minor
   383 // version numbers.
   384 #define SKELETON_VERSION(major, minor) (((major)<<16)|(minor))
   386 class SkeletonState : public OggCodecState {
   387 public:
   388   SkeletonState(ogg_page* aBosPage);
   389   ~SkeletonState();
   390   CodecType GetType() { return TYPE_SKELETON; }
   391   bool DecodeHeader(ogg_packet* aPacket);
   392   int64_t Time(int64_t granulepos) { return -1; }
   393   bool Init() { return true; }
   394   bool IsHeader(ogg_packet* aPacket) { return true; }
   396   // Return true if the given time (in milliseconds) is within
   397   // the presentation time defined in the skeleton track.
   398   bool IsPresentable(int64_t aTime) { return aTime >= mPresentationTime; }
   400   // Stores the offset of the page on which a keyframe starts,
   401   // and its presentation time.
   402   class nsKeyPoint {
   403   public:
   404     nsKeyPoint()
   405       : mOffset(INT64_MAX),
   406         mTime(INT64_MAX) {}
   408     nsKeyPoint(int64_t aOffset, int64_t aTime)
   409       : mOffset(aOffset),
   410         mTime(aTime) {}
   412     // Offset from start of segment/link-in-the-chain in bytes.
   413     int64_t mOffset;
   415     // Presentation time in usecs.
   416     int64_t mTime;
   418     bool IsNull() {
   419       return mOffset == INT64_MAX &&
   420              mTime == INT64_MAX;
   421     }
   422   };
   424   // Stores a keyframe's byte-offset, presentation time and the serialno
   425   // of the stream it belongs to.
   426   class nsSeekTarget {
   427   public:
   428     nsSeekTarget() : mSerial(0) {}
   429     nsKeyPoint mKeyPoint;
   430     uint32_t mSerial;
   431     bool IsNull() {
   432       return mKeyPoint.IsNull() &&
   433              mSerial == 0;
   434     }
   435   };
   437   // Determines from the seek index the keyframe which you must seek back to
   438   // in order to get all keyframes required to render all streams with
   439   // serialnos in aTracks, at time aTarget.
   440   nsresult IndexedSeekTarget(int64_t aTarget,
   441                              nsTArray<uint32_t>& aTracks,
   442                              nsSeekTarget& aResult);
   444   bool HasIndex() const {
   445     return mIndex.Count() > 0;
   446   }
   448   // Returns the duration of the active tracks in the media, if we have
   449   // an index. aTracks must be filled with the serialnos of the active tracks.
   450   // The duration is calculated as the greatest end time of all active tracks,
   451   // minus the smalled start time of all the active tracks.
   452   nsresult GetDuration(const nsTArray<uint32_t>& aTracks, int64_t& aDuration);
   454 private:
   456   // Decodes an index packet. Returns false on failure.
   457   bool DecodeIndex(ogg_packet* aPacket);
   459   // Gets the keypoint you must seek to in order to get the keyframe required
   460   // to render the stream at time aTarget on stream with serial aSerialno.
   461   nsresult IndexedSeekTargetForTrack(uint32_t aSerialno,
   462                                      int64_t aTarget,
   463                                      nsKeyPoint& aResult);
   465   // Version of the decoded skeleton track, as per the SKELETON_VERSION macro.
   466   uint32_t mVersion;
   468   // Presentation time of the resource in milliseconds
   469   int64_t mPresentationTime;
   471   // Length of the resource in bytes.
   472   int64_t mLength;
   474   // Stores the keyframe index and duration information for a particular
   475   // stream.
   476   class nsKeyFrameIndex {
   477   public:
   479     nsKeyFrameIndex(int64_t aStartTime, int64_t aEndTime) 
   480       : mStartTime(aStartTime),
   481         mEndTime(aEndTime)
   482     {
   483       MOZ_COUNT_CTOR(nsKeyFrameIndex);
   484     }
   486     ~nsKeyFrameIndex() {
   487       MOZ_COUNT_DTOR(nsKeyFrameIndex);
   488     }
   490     void Add(int64_t aOffset, int64_t aTimeMs) {
   491       mKeyPoints.AppendElement(nsKeyPoint(aOffset, aTimeMs));
   492     }
   494     const nsKeyPoint& Get(uint32_t aIndex) const {
   495       return mKeyPoints[aIndex];
   496     }
   498     uint32_t Length() const {
   499       return mKeyPoints.Length();
   500     }
   502     // Presentation time of the first sample in this stream in usecs.
   503     const int64_t mStartTime;
   505     // End time of the last sample in this stream in usecs.
   506     const int64_t mEndTime;
   508   private:
   509     nsTArray<nsKeyPoint> mKeyPoints;
   510   };
   512   // Maps Ogg serialnos to the index-keypoint list.
   513   nsClassHashtable<nsUint32HashKey, nsKeyFrameIndex> mIndex;
   514 };
   516 } // namespace mozilla
   518 // This allows the use of nsAutoRefs for an ogg_packet that properly free the
   519 // contents of the packet.
   520 template <>
   521 class nsAutoRefTraits<ogg_packet> : public nsPointerRefTraits<ogg_packet>
   522 {
   523 public:
   524   static void Release(ogg_packet* aPacket) {
   525     mozilla::OggCodecState::ReleasePacket(aPacket);
   526   }
   527 };
   530 #endif

mercurial