content/media/MP3FrameParser.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
     3 /* This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 #include <algorithm>
     9 #include "nsMemory.h"
    10 #include "MP3FrameParser.h"
    11 #include "VideoUtils.h"
    14 #define FROM_BIG_ENDIAN(X) ((uint32_t)((uint8_t)(X)[0] << 24 | (uint8_t)(X)[1] << 16 | \
    15                                        (uint8_t)(X)[2] << 8 | (uint8_t)(X)[3]))
    18 namespace mozilla {
    20 /*
    21  * Following code taken from http://www.hydrogenaudio.org/forums/index.php?showtopic=85125
    22  * with permission from the author, Nick Wallette <sirnickity@gmail.com>.
    23  */
    25 /* BEGIN shameless copy and paste */
    27 // Bitrates - use [version][layer][bitrate]
    28 const uint16_t mpeg_bitrates[4][4][16] = {
    29   { // Version 2.5
    30     { 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0 }, // Reserved
    31     { 0,   8,  16,  24,  32,  40,  48,  56,  64,  80,  96, 112, 128, 144, 160, 0 }, // Layer 3
    32     { 0,   8,  16,  24,  32,  40,  48,  56,  64,  80,  96, 112, 128, 144, 160, 0 }, // Layer 2
    33     { 0,  32,  48,  56,  64,  80,  96, 112, 128, 144, 160, 176, 192, 224, 256, 0 }  // Layer 1
    34   },
    35   { // Reserved
    36     { 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0 }, // Invalid
    37     { 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0 }, // Invalid
    38     { 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0 }, // Invalid
    39     { 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0 }  // Invalid
    40   },
    41   { // Version 2
    42     { 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0 }, // Reserved
    43     { 0,   8,  16,  24,  32,  40,  48,  56,  64,  80,  96, 112, 128, 144, 160, 0 }, // Layer 3
    44     { 0,   8,  16,  24,  32,  40,  48,  56,  64,  80,  96, 112, 128, 144, 160, 0 }, // Layer 2
    45     { 0,  32,  48,  56,  64,  80,  96, 112, 128, 144, 160, 176, 192, 224, 256, 0 }  // Layer 1
    46   },
    47   { // Version 1
    48     { 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0 }, // Reserved
    49     { 0,  32,  40,  48,  56,  64,  80,  96, 112, 128, 160, 192, 224, 256, 320, 0 }, // Layer 3
    50     { 0,  32,  48,  56,  64,  80,  96, 112, 128, 160, 192, 224, 256, 320, 384, 0 }, // Layer 2
    51     { 0,  32,  64,  96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0 }, // Layer 1
    52   }
    53 };
    55 // Sample rates - use [version][srate]
    56 const uint16_t mpeg_srates[4][4] = {
    57     { 11025, 12000,  8000, 0 }, // MPEG 2.5
    58     {     0,     0,     0, 0 }, // Reserved
    59     { 22050, 24000, 16000, 0 }, // MPEG 2
    60     { 44100, 48000, 32000, 0 }  // MPEG 1
    61 };
    63 // Samples per frame - use [version][layer]
    64 const uint16_t mpeg_frame_samples[4][4] = {
    65 //    Rsvd     3     2     1  < Layer  v Version
    66     {    0,  576, 1152,  384 }, //       2.5
    67     {    0,    0,    0,    0 }, //       Reserved
    68     {    0,  576, 1152,  384 }, //       2
    69     {    0, 1152, 1152,  384 }  //       1
    70 };
    72 // Slot size (MPEG unit of measurement) - use [layer]
    73 const uint8_t mpeg_slot_size[4] = { 0, 1, 1, 4 }; // Rsvd, 3, 2, 1
    75 uint16_t
    76 MP3Frame::CalculateLength()
    77 {
    78   // Lookup real values of these fields
    79   uint32_t  bitrate   = mpeg_bitrates[mVersion][mLayer][mBitrate] * 1000;
    80   uint32_t  samprate  = mpeg_srates[mVersion][mSampleRate];
    81   uint16_t  samples   = mpeg_frame_samples[mVersion][mLayer];
    82   uint8_t   slot_size = mpeg_slot_size[mLayer];
    84   // In-between calculations
    85   float     bps       = (float)samples / 8.0;
    86   float     fsize     = ( (bps * (float)bitrate) / (float)samprate )
    87     + ( (mPad) ? slot_size : 0 );
    89   // Frame sizes are truncated integers
    90   return (uint16_t)fsize;
    91 }
    93 /* END shameless copy and paste */
    96 /** MP3Parser methods **/
    98 MP3Parser::MP3Parser()
    99   : mCurrentChar(0)
   100 { }
   102 void
   103 MP3Parser::Reset()
   104 {
   105   mCurrentChar = 0;
   106 }
   108 uint16_t
   109 MP3Parser::ParseFrameLength(uint8_t ch)
   110 {
   111   mData.mRaw[mCurrentChar] = ch;
   113   MP3Frame &frame = mData.mFrame;
   115   // Validate MP3 header as we read. We can't mistake the start of an MP3 frame
   116   // for the middle of another frame due to the sync byte at the beginning
   117   // of the frame.
   119   // The only valid position for an all-high byte is the sync byte at the
   120   // beginning of the frame.
   121   if (ch == 0xff) {
   122     mCurrentChar = 0;
   123   }
   125   // Make sure the current byte is valid in context. If not, reset the parser.
   126   if (mCurrentChar == 2) {
   127     if (frame.mBitrate == 0x0f) {
   128       goto fail;
   129     }
   130   } else if (mCurrentChar == 1) {
   131     if (frame.mSync2 != 0x07
   132         || frame.mVersion == 0x01
   133         || frame.mLayer == 0x00) {
   134       goto fail;
   135     }
   136   }
   138   // The only valid character at the beginning of the header is 0xff. Fail if
   139   // it's different.
   140   if (mCurrentChar == 0 && frame.mSync1 != 0xff) {
   141     // Couldn't find the sync byte. Fail.
   142     return 0;
   143   }
   145   mCurrentChar++;
   146   MOZ_ASSERT(mCurrentChar <= sizeof(MP3Frame));
   148   // Don't have a full header yet.
   149   if (mCurrentChar < sizeof(MP3Frame)) {
   150     return 0;
   151   }
   153   // Woo, valid header. Return the length.
   154   mCurrentChar = 0;
   155   return frame.CalculateLength();
   157 fail:
   158   Reset();
   159   return 0;
   160 }
   162 uint32_t
   163 MP3Parser::GetSampleRate()
   164 {
   165   MP3Frame &frame = mData.mFrame;
   166   return mpeg_srates[frame.mVersion][frame.mSampleRate];
   167 }
   169 uint32_t
   170 MP3Parser::GetSamplesPerFrame()
   171 {
   172   MP3Frame &frame = mData.mFrame;
   173   return mpeg_frame_samples[frame.mVersion][frame.mLayer];
   174 }
   177 /** ID3Parser methods **/
   179 const char sID3Head[3] = { 'I', 'D', '3' };
   180 const uint32_t ID3_HEADER_LENGTH = 10;
   182 ID3Parser::ID3Parser()
   183   : mCurrentChar(0)
   184   , mHeaderLength(0)
   185 { }
   187 void
   188 ID3Parser::Reset()
   189 {
   190   mCurrentChar = mHeaderLength = 0;
   191 }
   193 bool
   194 ID3Parser::ParseChar(char ch)
   195 {
   196   // First three bytes of an ID3v2 header must match the string "ID3".
   197   if (mCurrentChar < sizeof(sID3Head) / sizeof(*sID3Head)
   198       && ch != sID3Head[mCurrentChar]) {
   199     goto fail;
   200   }
   202   // The last four bytes of the header is a 28-bit unsigned integer with the
   203   // high bit of each byte unset.
   204   if (mCurrentChar >= 6 && mCurrentChar < ID3_HEADER_LENGTH) {
   205     if (ch & 0x80) {
   206       goto fail;
   207     } else {
   208       mHeaderLength <<= 7;
   209       mHeaderLength |= ch;
   210     }
   211   }
   213   mCurrentChar++;
   215   return IsParsed();
   217 fail:
   218   Reset();
   219   return false;
   220 }
   222 bool
   223 ID3Parser::IsParsed() const
   224 {
   225   return mCurrentChar >= ID3_HEADER_LENGTH;
   226 }
   228 uint32_t
   229 ID3Parser::GetHeaderLength() const
   230 {
   231   MOZ_ASSERT(IsParsed(),
   232              "Queried length of ID3 header before parsing finished.");
   233   return mHeaderLength;
   234 }
   237 /** VBR header helper stuff **/
   239 // Helper function to find a VBR header in an MP3 frame.
   240 // Based on information from
   241 // http://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header
   243 const uint32_t VBRI_TAG = FROM_BIG_ENDIAN("VBRI");
   244 const uint32_t VBRI_OFFSET = 32 - sizeof(MP3Frame);
   245 const uint32_t VBRI_FRAME_COUNT_OFFSET = VBRI_OFFSET + 14;
   246 const uint32_t VBRI_MIN_FRAME_SIZE = VBRI_OFFSET + 26;
   248 const uint32_t XING_TAG = FROM_BIG_ENDIAN("Xing");
   249 enum XingFlags {
   250   XING_HAS_NUM_FRAMES = 0x01,
   251   XING_HAS_NUM_BYTES = 0x02,
   252   XING_HAS_TOC = 0x04,
   253   XING_HAS_VBR_SCALE = 0x08
   254 };
   256 static int64_t
   257 ParseXing(const char *aBuffer)
   258 {
   259   uint32_t flags = FROM_BIG_ENDIAN(aBuffer + 4);
   261   if (!(flags & XING_HAS_NUM_FRAMES)) {
   262     NS_WARNING("VBR file without frame count. Duration estimation likely to "
   263                "be totally wrong.");
   264     return -1;
   265   }
   267   int64_t numFrames = -1;
   268   if (flags & XING_HAS_NUM_FRAMES) {
   269     numFrames = FROM_BIG_ENDIAN(aBuffer + 8);
   270   }
   272   return numFrames;
   273 }
   275 static int64_t
   276 FindNumVBRFrames(const nsAutoCString& aFrame)
   277 {
   278   const char *buffer = aFrame.get();
   279   const char *bufferEnd = aFrame.get() + aFrame.Length();
   281   // VBRI header is nice and well-defined; let's try to find that first.
   282   if (aFrame.Length() > VBRI_MIN_FRAME_SIZE &&
   283       FROM_BIG_ENDIAN(buffer + VBRI_OFFSET) == VBRI_TAG) {
   284     return FROM_BIG_ENDIAN(buffer + VBRI_FRAME_COUNT_OFFSET);
   285   }
   287   // We have to search for the Xing header as its position can change.
   288   for (; buffer + sizeof(XING_TAG) < bufferEnd; buffer++) {
   289     if (FROM_BIG_ENDIAN(buffer) == XING_TAG) {
   290       return ParseXing(buffer);
   291     }
   292   }
   294   return -1;
   295 }
   298 /** MP3FrameParser methods **/
   300 // Some MP3's have large ID3v2 tags, up to 150KB, so we allow lots of
   301 // skipped bytes to be read, just in case, before we give up and assume
   302 // we're not parsing an MP3 stream.
   303 static const uint32_t MAX_SKIPPED_BYTES = 4096;
   305 // The number of audio samples per MP3 frame. This is constant over all MP3
   306 // streams. With this constant, the stream's sample rate, and an estimated
   307 // number of frames in the stream, we can estimate the stream's duration
   308 // fairly accurately.
   309 static const uint32_t SAMPLES_PER_FRAME = 1152;
   311 enum {
   312   MP3_HEADER_LENGTH   = 4,
   313 };
   315 MP3FrameParser::MP3FrameParser(int64_t aLength)
   316 : mLock("MP3FrameParser.mLock"),
   317   mTotalID3Size(0),
   318   mTotalFrameSize(0),
   319   mFrameCount(0),
   320   mOffset(0),
   321   mLength(aLength),
   322   mMP3Offset(-1),
   323   mSamplesPerSecond(0),
   324   mFirstFrameEnd(-1),
   325   mIsMP3(MAYBE_MP3)
   326 { }
   328 nsresult MP3FrameParser::ParseBuffer(const uint8_t* aBuffer,
   329                                      uint32_t aLength,
   330                                      int64_t aStreamOffset,
   331                                      uint32_t* aOutBytesRead)
   332 {
   333   // Iterate forwards over the buffer, looking for ID3 tag, or MP3
   334   // Frame headers.
   335   const uint8_t *buffer = aBuffer;
   336   const uint8_t *bufferEnd = aBuffer + aLength;
   338   // If we haven't found any MP3 frame data yet, there might be ID3 headers
   339   // we can skip over.
   340   if (mMP3Offset < 0) {
   341     for (const uint8_t *ch = buffer; ch < bufferEnd; ch++) {
   342       if (mID3Parser.ParseChar(*ch)) {
   343         // Found an ID3 header. We don't care about the body of the header, so
   344         // just skip past.
   345         buffer = ch + mID3Parser.GetHeaderLength() - (ID3_HEADER_LENGTH - 1);
   346         ch = buffer;
   348         mTotalID3Size += mID3Parser.GetHeaderLength();
   350         // Yes, this is an MP3!
   351         mIsMP3 = DEFINITELY_MP3;
   353         mID3Parser.Reset();
   354       }
   355     }
   356   }
   358   // The first MP3 frame in a variable bitrate stream can contain metadata
   359   // for duration estimation and seeking, so we buffer that first frame here.
   360   if (aStreamOffset < mFirstFrameEnd) {
   361     uint64_t copyLen = std::min((int64_t)aLength, mFirstFrameEnd - aStreamOffset);
   362     mFirstFrame.Append((const char *)buffer, copyLen);
   363     buffer += copyLen;
   364   }
   366   while (buffer < bufferEnd) {
   367     uint16_t frameLen = mMP3Parser.ParseFrameLength(*buffer);
   369     if (frameLen) {
   370       // We've found an MP3 frame!
   371       // This is the first frame (and the only one we'll bother parsing), so:
   372       // * Mark this stream as MP3;
   373       // * Store the offset at which the MP3 data started; and
   374       // * Start buffering the frame, as it might contain handy metadata.
   376       // We're now sure this is an MP3 stream.
   377       mIsMP3 = DEFINITELY_MP3;
   379       // We need to know these to convert the number of frames in the stream
   380       // to the length of the stream in seconds.
   381       mSamplesPerSecond = mMP3Parser.GetSampleRate();
   382       mSamplesPerFrame = mMP3Parser.GetSamplesPerFrame();
   384       // If the stream has a constant bitrate, we should only need the length
   385       // of the first frame and the length (in bytes) of the stream to
   386       // estimate the length (in seconds).
   387       mTotalFrameSize += frameLen;
   388       mFrameCount++;
   390       // If |mMP3Offset| isn't set then this is the first MP3 frame we have
   391       // seen in the stream, which is useful for duration estimation.
   392       if (mMP3Offset > -1) {
   393         uint16_t skip = frameLen - sizeof(MP3Frame);
   394         buffer += skip ? skip : 1;
   395         continue;
   396       }
   398       // Remember the offset of the MP3 stream.
   399       // We're at the last byte of an MP3Frame, so MP3 data started
   400       // sizeof(MP3Frame) - 1 bytes ago.
   401       mMP3Offset = aStreamOffset
   402         + (buffer - aBuffer)
   403         - (sizeof(MP3Frame) - 1);
   405       buffer++;
   407       // If the stream has a variable bitrate, the first frame has metadata
   408       // we need for duration estimation and seeking. Start buffering it so we
   409       // can parse it later.
   410       mFirstFrameEnd = mMP3Offset + frameLen;
   411       uint64_t currOffset = buffer - aBuffer + aStreamOffset;
   412       uint64_t copyLen = std::min(mFirstFrameEnd - currOffset,
   413                                   (uint64_t)(bufferEnd - buffer));
   414       mFirstFrame.Append((const char *)buffer, copyLen);
   416       buffer += copyLen;
   418     } else {
   419       // Nothing to see here. Move along.
   420       buffer++;
   421     }
   422   }
   424   *aOutBytesRead = buffer - aBuffer;
   426   if (mFirstFrameEnd > -1 && mFirstFrameEnd <= aStreamOffset + buffer - aBuffer) {
   427     // We have our whole first frame. Try to find a VBR header.
   428     mNumFrames = FindNumVBRFrames(mFirstFrame);
   429     mFirstFrameEnd = -1;
   430   }
   432   return NS_OK;
   433 }
   435 void MP3FrameParser::Parse(const char* aBuffer, uint32_t aLength, uint64_t aOffset)
   436 {
   437   MutexAutoLock mon(mLock);
   439   if (HasExactDuration()) {
   440     // We know the duration; nothing to do here.
   441     return;
   442   }
   444   const uint8_t* buffer = reinterpret_cast<const uint8_t*>(aBuffer);
   445   int32_t length = aLength;
   446   uint64_t offset = aOffset;
   448   // Got some data we have seen already. Skip forward to what we need.
   449   if (aOffset < mOffset) {
   450     buffer += mOffset - aOffset;
   451     length -= mOffset - aOffset;
   452     offset = mOffset;
   454     if (length <= 0) {
   455       return;
   456     }
   457   }
   459   // If there is a discontinuity in the input stream, reset the state of the
   460   // parsers so we don't get any partial headers.
   461   if (mOffset < aOffset) {
   462     if (!mID3Parser.IsParsed()) {
   463       // Only reset this if it hasn't finished yet.
   464       mID3Parser.Reset();
   465     }
   467     if (mFirstFrameEnd > -1) {
   468       NS_WARNING("Discontinuity in input while buffering first frame.");
   469       mFirstFrameEnd = -1;
   470     }
   472     mMP3Parser.Reset();
   473   }
   475   uint32_t bytesRead = 0;
   476   if (NS_FAILED(ParseBuffer(buffer,
   477                             length,
   478                             offset,
   479                             &bytesRead))) {
   480     return;
   481   }
   483   MOZ_ASSERT(length <= (int)bytesRead, "All bytes should have been consumed");
   485   // Update next data offset
   486   mOffset = offset + bytesRead;
   488   // If we've parsed lots of data and we still have nothing, just give up.
   489   // We don't count ID3 headers towards the skipped bytes count, as MP3 files
   490   // can have massive ID3 sections.
   491   if (!mID3Parser.IsParsed() && mMP3Offset < 0 &&
   492       mOffset - mTotalID3Size > MAX_SKIPPED_BYTES) {
   493     mIsMP3 = NOT_MP3;
   494   }
   495 }
   497 int64_t MP3FrameParser::GetDuration()
   498 {
   499   MutexAutoLock mon(mLock);
   501   if (!ParsedHeaders() || !mSamplesPerSecond) {
   502     // Not a single frame decoded yet.
   503     return -1;
   504   }
   506   MOZ_ASSERT(mFrameCount > 0 && mTotalFrameSize > 0,
   507              "Frame parser should have seen at least one MP3 frame of positive length.");
   509   if (!mFrameCount || !mTotalFrameSize) {
   510     // This should never happen.
   511     return -1;
   512   }
   514   double frames;
   515   if (mNumFrames < 0) {
   516     // Estimate the number of frames in the stream based on the average frame
   517     // size and the length of the MP3 file.
   518     double frameSize = (double)mTotalFrameSize / mFrameCount;
   519     frames = (double)(mLength - mMP3Offset) / frameSize;
   520   } else {
   521     // We know the exact number of frames from the VBR header.
   522     frames = mNumFrames;
   523   }
   525   // The duration of each frame is constant over a given stream.
   526   double usPerFrame = USECS_PER_S * mSamplesPerFrame / mSamplesPerSecond;
   528   return frames * usPerFrame;
   529 }
   531 int64_t MP3FrameParser::GetMP3Offset()
   532 {
   533   MutexAutoLock mon(mLock);
   534   return mMP3Offset;
   535 }
   537 bool MP3FrameParser::ParsedHeaders()
   538 {
   539   // We have seen both the beginning and the end of the first MP3 frame in the
   540   // stream.
   541   return mMP3Offset > -1 && mFirstFrameEnd < 0;
   542 }
   544 bool MP3FrameParser::HasExactDuration()
   545 {
   546   return ParsedHeaders() && mNumFrames > -1;
   547 }
   549 bool MP3FrameParser::NeedsData()
   550 {
   551   // If we don't know the duration exactly then either:
   552   //  - we're still waiting for a VBR header; or
   553   //  - we look at all frames to constantly update our duration estimate.
   554   return IsMP3() && !HasExactDuration();
   555 }
   557 }

mercurial