michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim:set ts=2 sw=2 sts=2 et cindent: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include michael@0: michael@0: #include "nsMemory.h" michael@0: #include "MP3FrameParser.h" michael@0: #include "VideoUtils.h" michael@0: michael@0: michael@0: #define FROM_BIG_ENDIAN(X) ((uint32_t)((uint8_t)(X)[0] << 24 | (uint8_t)(X)[1] << 16 | \ michael@0: (uint8_t)(X)[2] << 8 | (uint8_t)(X)[3])) michael@0: michael@0: michael@0: namespace mozilla { michael@0: michael@0: /* michael@0: * Following code taken from http://www.hydrogenaudio.org/forums/index.php?showtopic=85125 michael@0: * with permission from the author, Nick Wallette . michael@0: */ michael@0: michael@0: /* BEGIN shameless copy and paste */ michael@0: michael@0: // Bitrates - use [version][layer][bitrate] michael@0: const uint16_t mpeg_bitrates[4][4][16] = { michael@0: { // Version 2.5 michael@0: { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Reserved michael@0: { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 }, // Layer 3 michael@0: { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 }, // Layer 2 michael@0: { 0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0 } // Layer 1 michael@0: }, michael@0: { // Reserved michael@0: { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Invalid michael@0: { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Invalid michael@0: { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Invalid michael@0: { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } // Invalid michael@0: }, michael@0: { // Version 2 michael@0: { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Reserved michael@0: { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 }, // Layer 3 michael@0: { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 }, // Layer 2 michael@0: { 0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0 } // Layer 1 michael@0: }, michael@0: { // Version 1 michael@0: { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // Reserved michael@0: { 0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0 }, // Layer 3 michael@0: { 0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 0 }, // Layer 2 michael@0: { 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0 }, // Layer 1 michael@0: } michael@0: }; michael@0: michael@0: // Sample rates - use [version][srate] michael@0: const uint16_t mpeg_srates[4][4] = { michael@0: { 11025, 12000, 8000, 0 }, // MPEG 2.5 michael@0: { 0, 0, 0, 0 }, // Reserved michael@0: { 22050, 24000, 16000, 0 }, // MPEG 2 michael@0: { 44100, 48000, 32000, 0 } // MPEG 1 michael@0: }; michael@0: michael@0: // Samples per frame - use [version][layer] michael@0: const uint16_t mpeg_frame_samples[4][4] = { michael@0: // Rsvd 3 2 1 < Layer v Version michael@0: { 0, 576, 1152, 384 }, // 2.5 michael@0: { 0, 0, 0, 0 }, // Reserved michael@0: { 0, 576, 1152, 384 }, // 2 michael@0: { 0, 1152, 1152, 384 } // 1 michael@0: }; michael@0: michael@0: // Slot size (MPEG unit of measurement) - use [layer] michael@0: const uint8_t mpeg_slot_size[4] = { 0, 1, 1, 4 }; // Rsvd, 3, 2, 1 michael@0: michael@0: uint16_t michael@0: MP3Frame::CalculateLength() michael@0: { michael@0: // Lookup real values of these fields michael@0: uint32_t bitrate = mpeg_bitrates[mVersion][mLayer][mBitrate] * 1000; michael@0: uint32_t samprate = mpeg_srates[mVersion][mSampleRate]; michael@0: uint16_t samples = mpeg_frame_samples[mVersion][mLayer]; michael@0: uint8_t slot_size = mpeg_slot_size[mLayer]; michael@0: michael@0: // In-between calculations michael@0: float bps = (float)samples / 8.0; michael@0: float fsize = ( (bps * (float)bitrate) / (float)samprate ) michael@0: + ( (mPad) ? slot_size : 0 ); michael@0: michael@0: // Frame sizes are truncated integers michael@0: return (uint16_t)fsize; michael@0: } michael@0: michael@0: /* END shameless copy and paste */ michael@0: michael@0: michael@0: /** MP3Parser methods **/ michael@0: michael@0: MP3Parser::MP3Parser() michael@0: : mCurrentChar(0) michael@0: { } michael@0: michael@0: void michael@0: MP3Parser::Reset() michael@0: { michael@0: mCurrentChar = 0; michael@0: } michael@0: michael@0: uint16_t michael@0: MP3Parser::ParseFrameLength(uint8_t ch) michael@0: { michael@0: mData.mRaw[mCurrentChar] = ch; michael@0: michael@0: MP3Frame &frame = mData.mFrame; michael@0: michael@0: // Validate MP3 header as we read. We can't mistake the start of an MP3 frame michael@0: // for the middle of another frame due to the sync byte at the beginning michael@0: // of the frame. michael@0: michael@0: // The only valid position for an all-high byte is the sync byte at the michael@0: // beginning of the frame. michael@0: if (ch == 0xff) { michael@0: mCurrentChar = 0; michael@0: } michael@0: michael@0: // Make sure the current byte is valid in context. If not, reset the parser. michael@0: if (mCurrentChar == 2) { michael@0: if (frame.mBitrate == 0x0f) { michael@0: goto fail; michael@0: } michael@0: } else if (mCurrentChar == 1) { michael@0: if (frame.mSync2 != 0x07 michael@0: || frame.mVersion == 0x01 michael@0: || frame.mLayer == 0x00) { michael@0: goto fail; michael@0: } michael@0: } michael@0: michael@0: // The only valid character at the beginning of the header is 0xff. Fail if michael@0: // it's different. michael@0: if (mCurrentChar == 0 && frame.mSync1 != 0xff) { michael@0: // Couldn't find the sync byte. Fail. michael@0: return 0; michael@0: } michael@0: michael@0: mCurrentChar++; michael@0: MOZ_ASSERT(mCurrentChar <= sizeof(MP3Frame)); michael@0: michael@0: // Don't have a full header yet. michael@0: if (mCurrentChar < sizeof(MP3Frame)) { michael@0: return 0; michael@0: } michael@0: michael@0: // Woo, valid header. Return the length. michael@0: mCurrentChar = 0; michael@0: return frame.CalculateLength(); michael@0: michael@0: fail: michael@0: Reset(); michael@0: return 0; michael@0: } michael@0: michael@0: uint32_t michael@0: MP3Parser::GetSampleRate() michael@0: { michael@0: MP3Frame &frame = mData.mFrame; michael@0: return mpeg_srates[frame.mVersion][frame.mSampleRate]; michael@0: } michael@0: michael@0: uint32_t michael@0: MP3Parser::GetSamplesPerFrame() michael@0: { michael@0: MP3Frame &frame = mData.mFrame; michael@0: return mpeg_frame_samples[frame.mVersion][frame.mLayer]; michael@0: } michael@0: michael@0: michael@0: /** ID3Parser methods **/ michael@0: michael@0: const char sID3Head[3] = { 'I', 'D', '3' }; michael@0: const uint32_t ID3_HEADER_LENGTH = 10; michael@0: michael@0: ID3Parser::ID3Parser() michael@0: : mCurrentChar(0) michael@0: , mHeaderLength(0) michael@0: { } michael@0: michael@0: void michael@0: ID3Parser::Reset() michael@0: { michael@0: mCurrentChar = mHeaderLength = 0; michael@0: } michael@0: michael@0: bool michael@0: ID3Parser::ParseChar(char ch) michael@0: { michael@0: // First three bytes of an ID3v2 header must match the string "ID3". michael@0: if (mCurrentChar < sizeof(sID3Head) / sizeof(*sID3Head) michael@0: && ch != sID3Head[mCurrentChar]) { michael@0: goto fail; michael@0: } michael@0: michael@0: // The last four bytes of the header is a 28-bit unsigned integer with the michael@0: // high bit of each byte unset. michael@0: if (mCurrentChar >= 6 && mCurrentChar < ID3_HEADER_LENGTH) { michael@0: if (ch & 0x80) { michael@0: goto fail; michael@0: } else { michael@0: mHeaderLength <<= 7; michael@0: mHeaderLength |= ch; michael@0: } michael@0: } michael@0: michael@0: mCurrentChar++; michael@0: michael@0: return IsParsed(); michael@0: michael@0: fail: michael@0: Reset(); michael@0: return false; michael@0: } michael@0: michael@0: bool michael@0: ID3Parser::IsParsed() const michael@0: { michael@0: return mCurrentChar >= ID3_HEADER_LENGTH; michael@0: } michael@0: michael@0: uint32_t michael@0: ID3Parser::GetHeaderLength() const michael@0: { michael@0: MOZ_ASSERT(IsParsed(), michael@0: "Queried length of ID3 header before parsing finished."); michael@0: return mHeaderLength; michael@0: } michael@0: michael@0: michael@0: /** VBR header helper stuff **/ michael@0: michael@0: // Helper function to find a VBR header in an MP3 frame. michael@0: // Based on information from michael@0: // http://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header michael@0: michael@0: const uint32_t VBRI_TAG = FROM_BIG_ENDIAN("VBRI"); michael@0: const uint32_t VBRI_OFFSET = 32 - sizeof(MP3Frame); michael@0: const uint32_t VBRI_FRAME_COUNT_OFFSET = VBRI_OFFSET + 14; michael@0: const uint32_t VBRI_MIN_FRAME_SIZE = VBRI_OFFSET + 26; michael@0: michael@0: const uint32_t XING_TAG = FROM_BIG_ENDIAN("Xing"); michael@0: enum XingFlags { michael@0: XING_HAS_NUM_FRAMES = 0x01, michael@0: XING_HAS_NUM_BYTES = 0x02, michael@0: XING_HAS_TOC = 0x04, michael@0: XING_HAS_VBR_SCALE = 0x08 michael@0: }; michael@0: michael@0: static int64_t michael@0: ParseXing(const char *aBuffer) michael@0: { michael@0: uint32_t flags = FROM_BIG_ENDIAN(aBuffer + 4); michael@0: michael@0: if (!(flags & XING_HAS_NUM_FRAMES)) { michael@0: NS_WARNING("VBR file without frame count. Duration estimation likely to " michael@0: "be totally wrong."); michael@0: return -1; michael@0: } michael@0: michael@0: int64_t numFrames = -1; michael@0: if (flags & XING_HAS_NUM_FRAMES) { michael@0: numFrames = FROM_BIG_ENDIAN(aBuffer + 8); michael@0: } michael@0: michael@0: return numFrames; michael@0: } michael@0: michael@0: static int64_t michael@0: FindNumVBRFrames(const nsAutoCString& aFrame) michael@0: { michael@0: const char *buffer = aFrame.get(); michael@0: const char *bufferEnd = aFrame.get() + aFrame.Length(); michael@0: michael@0: // VBRI header is nice and well-defined; let's try to find that first. michael@0: if (aFrame.Length() > VBRI_MIN_FRAME_SIZE && michael@0: FROM_BIG_ENDIAN(buffer + VBRI_OFFSET) == VBRI_TAG) { michael@0: return FROM_BIG_ENDIAN(buffer + VBRI_FRAME_COUNT_OFFSET); michael@0: } michael@0: michael@0: // We have to search for the Xing header as its position can change. michael@0: for (; buffer + sizeof(XING_TAG) < bufferEnd; buffer++) { michael@0: if (FROM_BIG_ENDIAN(buffer) == XING_TAG) { michael@0: return ParseXing(buffer); michael@0: } michael@0: } michael@0: michael@0: return -1; michael@0: } michael@0: michael@0: michael@0: /** MP3FrameParser methods **/ michael@0: michael@0: // Some MP3's have large ID3v2 tags, up to 150KB, so we allow lots of michael@0: // skipped bytes to be read, just in case, before we give up and assume michael@0: // we're not parsing an MP3 stream. michael@0: static const uint32_t MAX_SKIPPED_BYTES = 4096; michael@0: michael@0: // The number of audio samples per MP3 frame. This is constant over all MP3 michael@0: // streams. With this constant, the stream's sample rate, and an estimated michael@0: // number of frames in the stream, we can estimate the stream's duration michael@0: // fairly accurately. michael@0: static const uint32_t SAMPLES_PER_FRAME = 1152; michael@0: michael@0: enum { michael@0: MP3_HEADER_LENGTH = 4, michael@0: }; michael@0: michael@0: MP3FrameParser::MP3FrameParser(int64_t aLength) michael@0: : mLock("MP3FrameParser.mLock"), michael@0: mTotalID3Size(0), michael@0: mTotalFrameSize(0), michael@0: mFrameCount(0), michael@0: mOffset(0), michael@0: mLength(aLength), michael@0: mMP3Offset(-1), michael@0: mSamplesPerSecond(0), michael@0: mFirstFrameEnd(-1), michael@0: mIsMP3(MAYBE_MP3) michael@0: { } michael@0: michael@0: nsresult MP3FrameParser::ParseBuffer(const uint8_t* aBuffer, michael@0: uint32_t aLength, michael@0: int64_t aStreamOffset, michael@0: uint32_t* aOutBytesRead) michael@0: { michael@0: // Iterate forwards over the buffer, looking for ID3 tag, or MP3 michael@0: // Frame headers. michael@0: const uint8_t *buffer = aBuffer; michael@0: const uint8_t *bufferEnd = aBuffer + aLength; michael@0: michael@0: // If we haven't found any MP3 frame data yet, there might be ID3 headers michael@0: // we can skip over. michael@0: if (mMP3Offset < 0) { michael@0: for (const uint8_t *ch = buffer; ch < bufferEnd; ch++) { michael@0: if (mID3Parser.ParseChar(*ch)) { michael@0: // Found an ID3 header. We don't care about the body of the header, so michael@0: // just skip past. michael@0: buffer = ch + mID3Parser.GetHeaderLength() - (ID3_HEADER_LENGTH - 1); michael@0: ch = buffer; michael@0: michael@0: mTotalID3Size += mID3Parser.GetHeaderLength(); michael@0: michael@0: // Yes, this is an MP3! michael@0: mIsMP3 = DEFINITELY_MP3; michael@0: michael@0: mID3Parser.Reset(); michael@0: } michael@0: } michael@0: } michael@0: michael@0: // The first MP3 frame in a variable bitrate stream can contain metadata michael@0: // for duration estimation and seeking, so we buffer that first frame here. michael@0: if (aStreamOffset < mFirstFrameEnd) { michael@0: uint64_t copyLen = std::min((int64_t)aLength, mFirstFrameEnd - aStreamOffset); michael@0: mFirstFrame.Append((const char *)buffer, copyLen); michael@0: buffer += copyLen; michael@0: } michael@0: michael@0: while (buffer < bufferEnd) { michael@0: uint16_t frameLen = mMP3Parser.ParseFrameLength(*buffer); michael@0: michael@0: if (frameLen) { michael@0: // We've found an MP3 frame! michael@0: // This is the first frame (and the only one we'll bother parsing), so: michael@0: // * Mark this stream as MP3; michael@0: // * Store the offset at which the MP3 data started; and michael@0: // * Start buffering the frame, as it might contain handy metadata. michael@0: michael@0: // We're now sure this is an MP3 stream. michael@0: mIsMP3 = DEFINITELY_MP3; michael@0: michael@0: // We need to know these to convert the number of frames in the stream michael@0: // to the length of the stream in seconds. michael@0: mSamplesPerSecond = mMP3Parser.GetSampleRate(); michael@0: mSamplesPerFrame = mMP3Parser.GetSamplesPerFrame(); michael@0: michael@0: // If the stream has a constant bitrate, we should only need the length michael@0: // of the first frame and the length (in bytes) of the stream to michael@0: // estimate the length (in seconds). michael@0: mTotalFrameSize += frameLen; michael@0: mFrameCount++; michael@0: michael@0: // If |mMP3Offset| isn't set then this is the first MP3 frame we have michael@0: // seen in the stream, which is useful for duration estimation. michael@0: if (mMP3Offset > -1) { michael@0: uint16_t skip = frameLen - sizeof(MP3Frame); michael@0: buffer += skip ? skip : 1; michael@0: continue; michael@0: } michael@0: michael@0: // Remember the offset of the MP3 stream. michael@0: // We're at the last byte of an MP3Frame, so MP3 data started michael@0: // sizeof(MP3Frame) - 1 bytes ago. michael@0: mMP3Offset = aStreamOffset michael@0: + (buffer - aBuffer) michael@0: - (sizeof(MP3Frame) - 1); michael@0: michael@0: buffer++; michael@0: michael@0: // If the stream has a variable bitrate, the first frame has metadata michael@0: // we need for duration estimation and seeking. Start buffering it so we michael@0: // can parse it later. michael@0: mFirstFrameEnd = mMP3Offset + frameLen; michael@0: uint64_t currOffset = buffer - aBuffer + aStreamOffset; michael@0: uint64_t copyLen = std::min(mFirstFrameEnd - currOffset, michael@0: (uint64_t)(bufferEnd - buffer)); michael@0: mFirstFrame.Append((const char *)buffer, copyLen); michael@0: michael@0: buffer += copyLen; michael@0: michael@0: } else { michael@0: // Nothing to see here. Move along. michael@0: buffer++; michael@0: } michael@0: } michael@0: michael@0: *aOutBytesRead = buffer - aBuffer; michael@0: michael@0: if (mFirstFrameEnd > -1 && mFirstFrameEnd <= aStreamOffset + buffer - aBuffer) { michael@0: // We have our whole first frame. Try to find a VBR header. michael@0: mNumFrames = FindNumVBRFrames(mFirstFrame); michael@0: mFirstFrameEnd = -1; michael@0: } michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: void MP3FrameParser::Parse(const char* aBuffer, uint32_t aLength, uint64_t aOffset) michael@0: { michael@0: MutexAutoLock mon(mLock); michael@0: michael@0: if (HasExactDuration()) { michael@0: // We know the duration; nothing to do here. michael@0: return; michael@0: } michael@0: michael@0: const uint8_t* buffer = reinterpret_cast(aBuffer); michael@0: int32_t length = aLength; michael@0: uint64_t offset = aOffset; michael@0: michael@0: // Got some data we have seen already. Skip forward to what we need. michael@0: if (aOffset < mOffset) { michael@0: buffer += mOffset - aOffset; michael@0: length -= mOffset - aOffset; michael@0: offset = mOffset; michael@0: michael@0: if (length <= 0) { michael@0: return; michael@0: } michael@0: } michael@0: michael@0: // If there is a discontinuity in the input stream, reset the state of the michael@0: // parsers so we don't get any partial headers. michael@0: if (mOffset < aOffset) { michael@0: if (!mID3Parser.IsParsed()) { michael@0: // Only reset this if it hasn't finished yet. michael@0: mID3Parser.Reset(); michael@0: } michael@0: michael@0: if (mFirstFrameEnd > -1) { michael@0: NS_WARNING("Discontinuity in input while buffering first frame."); michael@0: mFirstFrameEnd = -1; michael@0: } michael@0: michael@0: mMP3Parser.Reset(); michael@0: } michael@0: michael@0: uint32_t bytesRead = 0; michael@0: if (NS_FAILED(ParseBuffer(buffer, michael@0: length, michael@0: offset, michael@0: &bytesRead))) { michael@0: return; michael@0: } michael@0: michael@0: MOZ_ASSERT(length <= (int)bytesRead, "All bytes should have been consumed"); michael@0: michael@0: // Update next data offset michael@0: mOffset = offset + bytesRead; michael@0: michael@0: // If we've parsed lots of data and we still have nothing, just give up. michael@0: // We don't count ID3 headers towards the skipped bytes count, as MP3 files michael@0: // can have massive ID3 sections. michael@0: if (!mID3Parser.IsParsed() && mMP3Offset < 0 && michael@0: mOffset - mTotalID3Size > MAX_SKIPPED_BYTES) { michael@0: mIsMP3 = NOT_MP3; michael@0: } michael@0: } michael@0: michael@0: int64_t MP3FrameParser::GetDuration() michael@0: { michael@0: MutexAutoLock mon(mLock); michael@0: michael@0: if (!ParsedHeaders() || !mSamplesPerSecond) { michael@0: // Not a single frame decoded yet. michael@0: return -1; michael@0: } michael@0: michael@0: MOZ_ASSERT(mFrameCount > 0 && mTotalFrameSize > 0, michael@0: "Frame parser should have seen at least one MP3 frame of positive length."); michael@0: michael@0: if (!mFrameCount || !mTotalFrameSize) { michael@0: // This should never happen. michael@0: return -1; michael@0: } michael@0: michael@0: double frames; michael@0: if (mNumFrames < 0) { michael@0: // Estimate the number of frames in the stream based on the average frame michael@0: // size and the length of the MP3 file. michael@0: double frameSize = (double)mTotalFrameSize / mFrameCount; michael@0: frames = (double)(mLength - mMP3Offset) / frameSize; michael@0: } else { michael@0: // We know the exact number of frames from the VBR header. michael@0: frames = mNumFrames; michael@0: } michael@0: michael@0: // The duration of each frame is constant over a given stream. michael@0: double usPerFrame = USECS_PER_S * mSamplesPerFrame / mSamplesPerSecond; michael@0: michael@0: return frames * usPerFrame; michael@0: } michael@0: michael@0: int64_t MP3FrameParser::GetMP3Offset() michael@0: { michael@0: MutexAutoLock mon(mLock); michael@0: return mMP3Offset; michael@0: } michael@0: michael@0: bool MP3FrameParser::ParsedHeaders() michael@0: { michael@0: // We have seen both the beginning and the end of the first MP3 frame in the michael@0: // stream. michael@0: return mMP3Offset > -1 && mFirstFrameEnd < 0; michael@0: } michael@0: michael@0: bool MP3FrameParser::HasExactDuration() michael@0: { michael@0: return ParsedHeaders() && mNumFrames > -1; michael@0: } michael@0: michael@0: bool MP3FrameParser::NeedsData() michael@0: { michael@0: // If we don't know the duration exactly then either: michael@0: // - we're still waiting for a VBR header; or michael@0: // - we look at all frames to constantly update our duration estimate. michael@0: return IsMP3() && !HasExactDuration(); michael@0: } michael@0: michael@0: }