michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim:set ts=2 sw=2 sts=2 et cindent: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: #include "nsError.h" michael@0: #include "MediaDecoderStateMachine.h" michael@0: #include "AbstractMediaDecoder.h" michael@0: #include "MediaResource.h" michael@0: #include "WebMReader.h" michael@0: #include "WebMBufferedParser.h" michael@0: #include "mozilla/dom/TimeRanges.h" michael@0: #include "VorbisUtils.h" michael@0: #include "gfx2DGlue.h" michael@0: michael@0: #include michael@0: michael@0: #define VPX_DONT_DEFINE_STDINT_TYPES michael@0: #include "vpx/vp8dx.h" michael@0: #include "vpx/vpx_decoder.h" michael@0: michael@0: #include "OggReader.h" michael@0: michael@0: using mozilla::NesteggPacketHolder; michael@0: michael@0: template <> michael@0: class nsAutoRefTraits : public nsPointerRefTraits michael@0: { michael@0: public: michael@0: static void Release(NesteggPacketHolder* aHolder) { delete aHolder; } michael@0: }; michael@0: michael@0: namespace mozilla { michael@0: michael@0: using namespace gfx; michael@0: using namespace layers; michael@0: michael@0: // Un-comment to enable logging of seek bisections. michael@0: //#define SEEK_LOGGING michael@0: michael@0: #ifdef PR_LOGGING michael@0: extern PRLogModuleInfo* gMediaDecoderLog; michael@0: PRLogModuleInfo* gNesteggLog; michael@0: #define LOG(type, msg) PR_LOG(gMediaDecoderLog, type, msg) michael@0: #ifdef SEEK_LOGGING michael@0: #define SEEK_LOG(type, msg) PR_LOG(gMediaDecoderLog, type, msg) michael@0: #else michael@0: #define SEEK_LOG(type, msg) michael@0: #endif michael@0: #else michael@0: #define LOG(type, msg) michael@0: #define SEEK_LOG(type, msg) michael@0: #endif michael@0: michael@0: static const unsigned NS_PER_USEC = 1000; michael@0: static const double NS_PER_S = 1e9; michael@0: michael@0: // Functions for reading and seeking using MediaResource required for michael@0: // nestegg_io. The 'user data' passed to these functions is the michael@0: // decoder from which the media resource is obtained. michael@0: static int webm_read(void *aBuffer, size_t aLength, void *aUserData) michael@0: { michael@0: NS_ASSERTION(aUserData, "aUserData must point to a valid AbstractMediaDecoder"); michael@0: AbstractMediaDecoder* decoder = reinterpret_cast(aUserData); michael@0: MediaResource* resource = decoder->GetResource(); michael@0: NS_ASSERTION(resource, "Decoder has no media resource"); michael@0: michael@0: nsresult rv = NS_OK; michael@0: bool eof = false; michael@0: michael@0: char *p = static_cast(aBuffer); michael@0: while (NS_SUCCEEDED(rv) && aLength > 0) { michael@0: uint32_t bytes = 0; michael@0: rv = resource->Read(p, aLength, &bytes); michael@0: if (bytes == 0) { michael@0: eof = true; michael@0: break; michael@0: } michael@0: aLength -= bytes; michael@0: p += bytes; michael@0: } michael@0: michael@0: return NS_FAILED(rv) ? -1 : eof ? 0 : 1; michael@0: } michael@0: michael@0: static int webm_seek(int64_t aOffset, int aWhence, void *aUserData) michael@0: { michael@0: NS_ASSERTION(aUserData, "aUserData must point to a valid AbstractMediaDecoder"); michael@0: AbstractMediaDecoder* decoder = reinterpret_cast(aUserData); michael@0: MediaResource* resource = decoder->GetResource(); michael@0: NS_ASSERTION(resource, "Decoder has no media resource"); michael@0: nsresult rv = resource->Seek(aWhence, aOffset); michael@0: return NS_SUCCEEDED(rv) ? 0 : -1; michael@0: } michael@0: michael@0: static int64_t webm_tell(void *aUserData) michael@0: { michael@0: NS_ASSERTION(aUserData, "aUserData must point to a valid AbstractMediaDecoder"); michael@0: AbstractMediaDecoder* decoder = reinterpret_cast(aUserData); michael@0: MediaResource* resource = decoder->GetResource(); michael@0: NS_ASSERTION(resource, "Decoder has no media resource"); michael@0: return resource->Tell(); michael@0: } michael@0: michael@0: static void webm_log(nestegg * context, michael@0: unsigned int severity, michael@0: char const * format, ...) michael@0: { michael@0: #ifdef PR_LOGGING michael@0: va_list args; michael@0: char msg[256]; michael@0: const char * sevStr; michael@0: michael@0: switch(severity) { michael@0: case NESTEGG_LOG_DEBUG: michael@0: sevStr = "DBG"; michael@0: break; michael@0: case NESTEGG_LOG_INFO: michael@0: sevStr = "INF"; michael@0: break; michael@0: case NESTEGG_LOG_WARNING: michael@0: sevStr = "WRN"; michael@0: break; michael@0: case NESTEGG_LOG_ERROR: michael@0: sevStr = "ERR"; michael@0: break; michael@0: case NESTEGG_LOG_CRITICAL: michael@0: sevStr = "CRT"; michael@0: break; michael@0: default: michael@0: sevStr = "UNK"; michael@0: break; michael@0: } michael@0: michael@0: va_start(args, format); michael@0: michael@0: PR_snprintf(msg, sizeof(msg), "%p [Nestegg-%s] ", context, sevStr); michael@0: PR_vsnprintf(msg+strlen(msg), sizeof(msg)-strlen(msg), format, args); michael@0: PR_LOG(gNesteggLog, PR_LOG_DEBUG, (msg)); michael@0: michael@0: va_end(args); michael@0: #endif michael@0: } michael@0: michael@0: WebMReader::WebMReader(AbstractMediaDecoder* aDecoder) michael@0: : MediaDecoderReader(aDecoder), michael@0: mContext(nullptr), michael@0: mPacketCount(0), michael@0: mChannels(0), michael@0: #ifdef MOZ_OPUS michael@0: mOpusParser(nullptr), michael@0: mOpusDecoder(nullptr), michael@0: mSkip(0), michael@0: mSeekPreroll(0), michael@0: #endif michael@0: mVideoTrack(0), michael@0: mAudioTrack(0), michael@0: mAudioStartUsec(-1), michael@0: mAudioFrames(0), michael@0: mAudioCodec(-1), michael@0: mVideoCodec(-1), michael@0: mHasVideo(false), michael@0: mHasAudio(false) michael@0: { michael@0: MOZ_COUNT_CTOR(WebMReader); michael@0: #ifdef PR_LOGGING michael@0: if (!gNesteggLog) { michael@0: gNesteggLog = PR_NewLogModule("Nestegg"); michael@0: } michael@0: #endif michael@0: // Zero these member vars to avoid crashes in VP8 destroy and Vorbis clear michael@0: // functions when destructor is called before |Init|. michael@0: memset(&mVPX, 0, sizeof(vpx_codec_ctx_t)); michael@0: memset(&mVorbisBlock, 0, sizeof(vorbis_block)); michael@0: memset(&mVorbisDsp, 0, sizeof(vorbis_dsp_state)); michael@0: memset(&mVorbisInfo, 0, sizeof(vorbis_info)); michael@0: memset(&mVorbisComment, 0, sizeof(vorbis_comment)); michael@0: } michael@0: michael@0: WebMReader::~WebMReader() michael@0: { michael@0: Cleanup(); michael@0: michael@0: mVideoPackets.Reset(); michael@0: mAudioPackets.Reset(); michael@0: michael@0: vpx_codec_destroy(&mVPX); michael@0: michael@0: vorbis_block_clear(&mVorbisBlock); michael@0: vorbis_dsp_clear(&mVorbisDsp); michael@0: vorbis_info_clear(&mVorbisInfo); michael@0: vorbis_comment_clear(&mVorbisComment); michael@0: michael@0: if (mOpusDecoder) { michael@0: opus_multistream_decoder_destroy(mOpusDecoder); michael@0: mOpusDecoder = nullptr; michael@0: } michael@0: michael@0: MOZ_COUNT_DTOR(WebMReader); michael@0: } michael@0: michael@0: nsresult WebMReader::Init(MediaDecoderReader* aCloneDonor) michael@0: { michael@0: michael@0: vorbis_info_init(&mVorbisInfo); michael@0: vorbis_comment_init(&mVorbisComment); michael@0: memset(&mVorbisDsp, 0, sizeof(vorbis_dsp_state)); michael@0: memset(&mVorbisBlock, 0, sizeof(vorbis_block)); michael@0: michael@0: if (aCloneDonor) { michael@0: mBufferedState = static_cast(aCloneDonor)->mBufferedState; michael@0: } else { michael@0: mBufferedState = new WebMBufferedState; michael@0: } michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult WebMReader::ResetDecode() michael@0: { michael@0: mAudioFrames = 0; michael@0: mAudioStartUsec = -1; michael@0: nsresult res = NS_OK; michael@0: if (NS_FAILED(MediaDecoderReader::ResetDecode())) { michael@0: res = NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: if (mAudioCodec == NESTEGG_CODEC_VORBIS) { michael@0: // Ignore failed results from vorbis_synthesis_restart. They michael@0: // aren't fatal and it fails when ResetDecode is called at a michael@0: // time when no vorbis data has been read. michael@0: vorbis_synthesis_restart(&mVorbisDsp); michael@0: #ifdef MOZ_OPUS michael@0: } else if (mAudioCodec == NESTEGG_CODEC_OPUS) { michael@0: if (mOpusDecoder) { michael@0: // Reset the decoder. michael@0: opus_multistream_decoder_ctl(mOpusDecoder, OPUS_RESET_STATE); michael@0: mSkip = mOpusParser->mPreSkip; michael@0: } michael@0: #endif michael@0: } michael@0: michael@0: mVideoPackets.Reset(); michael@0: mAudioPackets.Reset(); michael@0: michael@0: return res; michael@0: } michael@0: michael@0: void WebMReader::Cleanup() michael@0: { michael@0: if (mContext) { michael@0: nestegg_destroy(mContext); michael@0: mContext = nullptr; michael@0: } michael@0: } michael@0: michael@0: nsresult WebMReader::ReadMetadata(MediaInfo* aInfo, michael@0: MetadataTags** aTags) michael@0: { michael@0: NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread."); michael@0: michael@0: nestegg_io io; michael@0: io.read = webm_read; michael@0: io.seek = webm_seek; michael@0: io.tell = webm_tell; michael@0: io.userdata = mDecoder; michael@0: int64_t maxOffset = -1; michael@0: int r = nestegg_init(&mContext, io, &webm_log, maxOffset); michael@0: if (r == -1) { michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: uint64_t duration = 0; michael@0: r = nestegg_duration(mContext, &duration); michael@0: if (r == 0) { michael@0: ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor()); michael@0: mDecoder->SetMediaDuration(duration / NS_PER_USEC); michael@0: } michael@0: michael@0: unsigned int ntracks = 0; michael@0: r = nestegg_track_count(mContext, &ntracks); michael@0: if (r == -1) { michael@0: Cleanup(); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: for (uint32_t track = 0; track < ntracks; ++track) { michael@0: int id = nestegg_track_codec_id(mContext, track); michael@0: if (id == -1) { michael@0: Cleanup(); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: int type = nestegg_track_type(mContext, track); michael@0: if (!mHasVideo && type == NESTEGG_TRACK_VIDEO) { michael@0: nestegg_video_params params; michael@0: r = nestegg_track_video_params(mContext, track, ¶ms); michael@0: if (r == -1) { michael@0: Cleanup(); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: vpx_codec_iface_t* dx = nullptr; michael@0: mVideoCodec = nestegg_track_codec_id(mContext, track); michael@0: if (mVideoCodec == NESTEGG_CODEC_VP8) { michael@0: dx = vpx_codec_vp8_dx(); michael@0: } else if (mVideoCodec == NESTEGG_CODEC_VP9) { michael@0: dx = vpx_codec_vp9_dx(); michael@0: } michael@0: if (!dx || vpx_codec_dec_init(&mVPX, dx, nullptr, 0)) { michael@0: Cleanup(); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: // Picture region, taking into account cropping, before scaling michael@0: // to the display size. michael@0: nsIntRect pictureRect(params.crop_left, michael@0: params.crop_top, michael@0: params.width - (params.crop_right + params.crop_left), michael@0: params.height - (params.crop_bottom + params.crop_top)); michael@0: michael@0: // If the cropping data appears invalid then use the frame data michael@0: if (pictureRect.width <= 0 || michael@0: pictureRect.height <= 0 || michael@0: pictureRect.x < 0 || michael@0: pictureRect.y < 0) michael@0: { michael@0: pictureRect.x = 0; michael@0: pictureRect.y = 0; michael@0: pictureRect.width = params.width; michael@0: pictureRect.height = params.height; michael@0: } michael@0: michael@0: // Validate the container-reported frame and pictureRect sizes. This ensures michael@0: // that our video frame creation code doesn't overflow. michael@0: nsIntSize displaySize(params.display_width, params.display_height); michael@0: nsIntSize frameSize(params.width, params.height); michael@0: if (!IsValidVideoRegion(frameSize, pictureRect, displaySize)) { michael@0: // Video track's frame sizes will overflow. Ignore the video track. michael@0: continue; michael@0: } michael@0: michael@0: mVideoTrack = track; michael@0: mHasVideo = true; michael@0: mInfo.mVideo.mHasVideo = true; michael@0: michael@0: mInfo.mVideo.mDisplay = displaySize; michael@0: mPicture = pictureRect; michael@0: mInitialFrame = frameSize; michael@0: michael@0: switch (params.stereo_mode) { michael@0: case NESTEGG_VIDEO_MONO: michael@0: mInfo.mVideo.mStereoMode = StereoMode::MONO; michael@0: break; michael@0: case NESTEGG_VIDEO_STEREO_LEFT_RIGHT: michael@0: mInfo.mVideo.mStereoMode = StereoMode::LEFT_RIGHT; michael@0: break; michael@0: case NESTEGG_VIDEO_STEREO_BOTTOM_TOP: michael@0: mInfo.mVideo.mStereoMode = StereoMode::BOTTOM_TOP; michael@0: break; michael@0: case NESTEGG_VIDEO_STEREO_TOP_BOTTOM: michael@0: mInfo.mVideo.mStereoMode = StereoMode::TOP_BOTTOM; michael@0: break; michael@0: case NESTEGG_VIDEO_STEREO_RIGHT_LEFT: michael@0: mInfo.mVideo.mStereoMode = StereoMode::RIGHT_LEFT; michael@0: break; michael@0: } michael@0: } michael@0: else if (!mHasAudio && type == NESTEGG_TRACK_AUDIO) { michael@0: nestegg_audio_params params; michael@0: r = nestegg_track_audio_params(mContext, track, ¶ms); michael@0: if (r == -1) { michael@0: Cleanup(); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: mAudioTrack = track; michael@0: mHasAudio = true; michael@0: mInfo.mAudio.mHasAudio = true; michael@0: mAudioCodec = nestegg_track_codec_id(mContext, track); michael@0: mCodecDelay = params.codec_delay / NS_PER_USEC; michael@0: michael@0: if (mAudioCodec == NESTEGG_CODEC_VORBIS) { michael@0: // Get the Vorbis header data michael@0: unsigned int nheaders = 0; michael@0: r = nestegg_track_codec_data_count(mContext, track, &nheaders); michael@0: if (r == -1 || nheaders != 3) { michael@0: Cleanup(); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: for (uint32_t header = 0; header < nheaders; ++header) { michael@0: unsigned char* data = 0; michael@0: size_t length = 0; michael@0: michael@0: r = nestegg_track_codec_data(mContext, track, header, &data, &length); michael@0: if (r == -1) { michael@0: Cleanup(); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: ogg_packet opacket = InitOggPacket(data, length, header == 0, false, 0); michael@0: michael@0: r = vorbis_synthesis_headerin(&mVorbisInfo, michael@0: &mVorbisComment, michael@0: &opacket); michael@0: if (r != 0) { michael@0: Cleanup(); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: } michael@0: michael@0: r = vorbis_synthesis_init(&mVorbisDsp, &mVorbisInfo); michael@0: if (r != 0) { michael@0: Cleanup(); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: r = vorbis_block_init(&mVorbisDsp, &mVorbisBlock); michael@0: if (r != 0) { michael@0: Cleanup(); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: mInfo.mAudio.mRate = mVorbisDsp.vi->rate; michael@0: mInfo.mAudio.mChannels = mVorbisDsp.vi->channels; michael@0: mChannels = mInfo.mAudio.mChannels; michael@0: #ifdef MOZ_OPUS michael@0: } else if (mAudioCodec == NESTEGG_CODEC_OPUS) { michael@0: unsigned char* data = 0; michael@0: size_t length = 0; michael@0: r = nestegg_track_codec_data(mContext, track, 0, &data, &length); michael@0: if (r == -1) { michael@0: Cleanup(); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: mOpusParser = new OpusParser; michael@0: if (!mOpusParser->DecodeHeader(data, length)) { michael@0: Cleanup(); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: if (!InitOpusDecoder()) { michael@0: Cleanup(); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: if (static_cast(mCodecDelay) != FramesToUsecs(mOpusParser->mPreSkip, mOpusParser->mRate).value()) { michael@0: LOG(PR_LOG_WARNING, michael@0: ("Invalid Opus header: CodecDelay and pre-skip do not match!\n")); michael@0: Cleanup(); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: mInfo.mAudio.mRate = mOpusParser->mRate; michael@0: michael@0: mInfo.mAudio.mChannels = mOpusParser->mChannels; michael@0: mChannels = mInfo.mAudio.mChannels; michael@0: mSeekPreroll = params.seek_preroll; michael@0: #endif michael@0: } else { michael@0: Cleanup(); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: } michael@0: } michael@0: michael@0: // We can't seek in buffered regions if we have no cues. michael@0: mDecoder->SetMediaSeekable(nestegg_has_cues(mContext) == 1); michael@0: michael@0: *aInfo = mInfo; michael@0: michael@0: *aTags = nullptr; michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: #ifdef MOZ_OPUS michael@0: bool WebMReader::InitOpusDecoder() michael@0: { michael@0: int r; michael@0: michael@0: NS_ASSERTION(mOpusDecoder == nullptr, "leaking OpusDecoder"); michael@0: michael@0: mOpusDecoder = opus_multistream_decoder_create(mOpusParser->mRate, michael@0: mOpusParser->mChannels, michael@0: mOpusParser->mStreams, michael@0: mOpusParser->mCoupledStreams, michael@0: mOpusParser->mMappingTable, michael@0: &r); michael@0: mSkip = mOpusParser->mPreSkip; michael@0: michael@0: return r == OPUS_OK; michael@0: } michael@0: #endif michael@0: michael@0: ogg_packet WebMReader::InitOggPacket(unsigned char* aData, michael@0: size_t aLength, michael@0: bool aBOS, michael@0: bool aEOS, michael@0: int64_t aGranulepos) michael@0: { michael@0: ogg_packet packet; michael@0: packet.packet = aData; michael@0: packet.bytes = aLength; michael@0: packet.b_o_s = aBOS; michael@0: packet.e_o_s = aEOS; michael@0: packet.granulepos = aGranulepos; michael@0: packet.packetno = mPacketCount++; michael@0: return packet; michael@0: } michael@0: michael@0: bool WebMReader::DecodeAudioPacket(nestegg_packet* aPacket, int64_t aOffset) michael@0: { michael@0: NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread."); michael@0: michael@0: int r = 0; michael@0: unsigned int count = 0; michael@0: r = nestegg_packet_count(aPacket, &count); michael@0: if (r == -1) { michael@0: return false; michael@0: } michael@0: michael@0: uint64_t tstamp = 0; michael@0: r = nestegg_packet_tstamp(aPacket, &tstamp); michael@0: if (r == -1) { michael@0: return false; michael@0: } michael@0: michael@0: const uint32_t rate = mInfo.mAudio.mRate; michael@0: uint64_t tstamp_usecs = tstamp / NS_PER_USEC; michael@0: if (mAudioStartUsec == -1) { michael@0: // This is the first audio chunk. Assume the start time of our decode michael@0: // is the start of this chunk. michael@0: mAudioStartUsec = tstamp_usecs; michael@0: } michael@0: // If there's a gap between the start of this audio chunk and the end of michael@0: // the previous audio chunk, we need to increment the packet count so that michael@0: // the vorbis decode doesn't use data from before the gap to help decode michael@0: // from after the gap. michael@0: CheckedInt64 tstamp_frames = UsecsToFrames(tstamp_usecs, rate); michael@0: CheckedInt64 decoded_frames = UsecsToFrames(mAudioStartUsec, rate); michael@0: if (!tstamp_frames.isValid() || !decoded_frames.isValid()) { michael@0: NS_WARNING("Int overflow converting WebM times to frames"); michael@0: return false; michael@0: } michael@0: decoded_frames += mAudioFrames; michael@0: if (!decoded_frames.isValid()) { michael@0: NS_WARNING("Int overflow adding decoded_frames"); michael@0: return false; michael@0: } michael@0: if (tstamp_frames.value() > decoded_frames.value()) { michael@0: #ifdef DEBUG michael@0: CheckedInt64 usecs = FramesToUsecs(tstamp_frames.value() - decoded_frames.value(), rate); michael@0: LOG(PR_LOG_DEBUG, ("WebMReader detected gap of %lld, %lld frames, in audio stream\n", michael@0: usecs.isValid() ? usecs.value() : -1, michael@0: tstamp_frames.value() - decoded_frames.value())); michael@0: #endif michael@0: mPacketCount++; michael@0: mAudioStartUsec = tstamp_usecs; michael@0: mAudioFrames = 0; michael@0: } michael@0: michael@0: int32_t total_frames = 0; michael@0: for (uint32_t i = 0; i < count; ++i) { michael@0: unsigned char* data; michael@0: size_t length; michael@0: r = nestegg_packet_data(aPacket, i, &data, &length); michael@0: if (r == -1) { michael@0: return false; michael@0: } michael@0: if (mAudioCodec == NESTEGG_CODEC_VORBIS) { michael@0: ogg_packet opacket = InitOggPacket(data, length, false, false, -1); michael@0: michael@0: if (vorbis_synthesis(&mVorbisBlock, &opacket) != 0) { michael@0: return false; michael@0: } michael@0: michael@0: if (vorbis_synthesis_blockin(&mVorbisDsp, michael@0: &mVorbisBlock) != 0) { michael@0: return false; michael@0: } michael@0: michael@0: VorbisPCMValue** pcm = 0; michael@0: int32_t frames = 0; michael@0: while ((frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm)) > 0) { michael@0: nsAutoArrayPtr buffer(new AudioDataValue[frames * mChannels]); michael@0: for (uint32_t j = 0; j < mChannels; ++j) { michael@0: VorbisPCMValue* channel = pcm[j]; michael@0: for (uint32_t i = 0; i < uint32_t(frames); ++i) { michael@0: buffer[i*mChannels + j] = MOZ_CONVERT_VORBIS_SAMPLE(channel[i]); michael@0: } michael@0: } michael@0: michael@0: CheckedInt64 duration = FramesToUsecs(frames, rate); michael@0: if (!duration.isValid()) { michael@0: NS_WARNING("Int overflow converting WebM audio duration"); michael@0: return false; michael@0: } michael@0: CheckedInt64 total_duration = FramesToUsecs(total_frames, rate); michael@0: if (!total_duration.isValid()) { michael@0: NS_WARNING("Int overflow converting WebM audio total_duration"); michael@0: return false; michael@0: } michael@0: michael@0: CheckedInt64 time = total_duration + tstamp_usecs; michael@0: if (!time.isValid()) { michael@0: NS_WARNING("Int overflow adding total_duration and tstamp_usecs"); michael@0: return false; michael@0: }; michael@0: michael@0: total_frames += frames; michael@0: AudioQueue().Push(new AudioData(aOffset, michael@0: time.value(), michael@0: duration.value(), michael@0: frames, michael@0: buffer.forget(), michael@0: mChannels)); michael@0: mAudioFrames += frames; michael@0: if (vorbis_synthesis_read(&mVorbisDsp, frames) != 0) { michael@0: return false; michael@0: } michael@0: } michael@0: } else if (mAudioCodec == NESTEGG_CODEC_OPUS) { michael@0: #ifdef MOZ_OPUS michael@0: uint32_t channels = mOpusParser->mChannels; michael@0: michael@0: // Maximum value is 63*2880, so there's no chance of overflow. michael@0: int32_t frames_number = opus_packet_get_nb_frames(data, length); michael@0: michael@0: if (frames_number <= 0) michael@0: return false; // Invalid packet header. michael@0: int32_t samples = opus_packet_get_samples_per_frame(data, michael@0: (opus_int32) rate); michael@0: int32_t frames = frames_number*samples; michael@0: michael@0: // A valid Opus packet must be between 2.5 and 120 ms long. michael@0: if (frames < 120 || frames > 5760) michael@0: return false; michael@0: nsAutoArrayPtr buffer(new AudioDataValue[frames * channels]); michael@0: michael@0: // Decode to the appropriate sample type. michael@0: #ifdef MOZ_SAMPLE_TYPE_FLOAT32 michael@0: int ret = opus_multistream_decode_float(mOpusDecoder, michael@0: data, length, michael@0: buffer, frames, false); michael@0: #else michael@0: int ret = opus_multistream_decode(mOpusDecoder, michael@0: data, length, michael@0: buffer, frames, false); michael@0: #endif michael@0: if (ret < 0) michael@0: return false; michael@0: NS_ASSERTION(ret == frames, "Opus decoded too few audio samples"); michael@0: CheckedInt64 startTime = tstamp_usecs; michael@0: michael@0: // Trim the initial frames while the decoder is settling. michael@0: if (mSkip > 0) { michael@0: int32_t skipFrames = std::min(mSkip, frames); michael@0: if (skipFrames == frames) { michael@0: // discard the whole packet michael@0: mSkip -= frames; michael@0: LOG(PR_LOG_DEBUG, ("Opus decoder skipping %d frames" michael@0: " (whole packet)", frames)); michael@0: return true; michael@0: } michael@0: int32_t keepFrames = frames - skipFrames; michael@0: if (keepFrames < 0) { michael@0: NS_WARNING("Int overflow in keepFrames"); michael@0: return false; michael@0: } michael@0: int samples = keepFrames * channels; michael@0: if (samples < 0) { michael@0: NS_WARNING("Int overflow in samples"); michael@0: return false; michael@0: } michael@0: nsAutoArrayPtr trimBuffer(new AudioDataValue[samples]); michael@0: for (int i = 0; i < samples; i++) michael@0: trimBuffer[i] = buffer[skipFrames*channels + i]; michael@0: startTime = startTime + FramesToUsecs(skipFrames, rate); michael@0: frames = keepFrames; michael@0: buffer = trimBuffer; michael@0: michael@0: mSkip -= skipFrames; michael@0: LOG(PR_LOG_DEBUG, ("Opus decoder skipping %d frames", skipFrames)); michael@0: } michael@0: michael@0: int64_t discardPadding = 0; michael@0: r = nestegg_packet_discard_padding(aPacket, &discardPadding); michael@0: if (discardPadding > 0) { michael@0: CheckedInt64 discardFrames = UsecsToFrames(discardPadding * NS_PER_USEC, rate); michael@0: if (!discardFrames.isValid()) { michael@0: NS_WARNING("Int overflow in DiscardPadding"); michael@0: return false; michael@0: } michael@0: int32_t keepFrames = frames - discardFrames.value(); michael@0: if (keepFrames > 0) { michael@0: int samples = keepFrames * channels; michael@0: if (samples < 0) { michael@0: NS_WARNING("Int overflow in samples"); michael@0: return false; michael@0: } michael@0: nsAutoArrayPtr trimBuffer(new AudioDataValue[samples]); michael@0: for (int i = 0; i < samples; i++) michael@0: trimBuffer[i] = buffer[i]; michael@0: frames = keepFrames; michael@0: buffer = trimBuffer; michael@0: } else { michael@0: LOG(PR_LOG_DEBUG, ("Opus decoder discarding whole packet" michael@0: " ( %d frames) as padding", frames)); michael@0: return true; michael@0: } michael@0: } michael@0: michael@0: // Apply the header gain if one was specified. michael@0: #ifdef MOZ_SAMPLE_TYPE_FLOAT32 michael@0: if (mOpusParser->mGain != 1.0f) { michael@0: float gain = mOpusParser->mGain; michael@0: int samples = frames * channels; michael@0: for (int i = 0; i < samples; i++) { michael@0: buffer[i] *= gain; michael@0: } michael@0: } michael@0: #else michael@0: if (mOpusParser->mGain_Q16 != 65536) { michael@0: int64_t gain_Q16 = mOpusParser->mGain_Q16; michael@0: int samples = frames * channels; michael@0: for (int i = 0; i < samples; i++) { michael@0: int32_t val = static_cast((gain_Q16*buffer[i] + 32768)>>16); michael@0: buffer[i] = static_cast(MOZ_CLIP_TO_15(val)); michael@0: } michael@0: } michael@0: #endif michael@0: michael@0: // No channel mapping for more than 8 channels. michael@0: if (channels > 8) { michael@0: return false; michael@0: } michael@0: michael@0: CheckedInt64 duration = FramesToUsecs(frames, rate); michael@0: if (!duration.isValid()) { michael@0: NS_WARNING("Int overflow converting WebM audio duration"); michael@0: return false; michael@0: } michael@0: CheckedInt64 time = startTime - mCodecDelay; michael@0: if (!time.isValid()) { michael@0: NS_WARNING("Int overflow shifting tstamp by codec delay"); michael@0: return false; michael@0: }; michael@0: AudioQueue().Push(new AudioData(mDecoder->GetResource()->Tell(), michael@0: time.value(), michael@0: duration.value(), michael@0: frames, michael@0: buffer.forget(), michael@0: mChannels)); michael@0: michael@0: mAudioFrames += frames; michael@0: #else michael@0: return false; michael@0: #endif /* MOZ_OPUS */ michael@0: } michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: nsReturnRef WebMReader::NextPacket(TrackType aTrackType) michael@0: { michael@0: // The packet queue that packets will be pushed on if they michael@0: // are not the type we are interested in. michael@0: WebMPacketQueue& otherPackets = michael@0: aTrackType == VIDEO ? mAudioPackets : mVideoPackets; michael@0: michael@0: // The packet queue for the type that we are interested in. michael@0: WebMPacketQueue &packets = michael@0: aTrackType == VIDEO ? mVideoPackets : mAudioPackets; michael@0: michael@0: // Flag to indicate that we do need to playback these types of michael@0: // packets. michael@0: bool hasType = aTrackType == VIDEO ? mHasVideo : mHasAudio; michael@0: michael@0: // Flag to indicate that we do need to playback the other type michael@0: // of track. michael@0: bool hasOtherType = aTrackType == VIDEO ? mHasAudio : mHasVideo; michael@0: michael@0: // Track we are interested in michael@0: uint32_t ourTrack = aTrackType == VIDEO ? mVideoTrack : mAudioTrack; michael@0: michael@0: // Value of other track michael@0: uint32_t otherTrack = aTrackType == VIDEO ? mAudioTrack : mVideoTrack; michael@0: michael@0: nsAutoRef holder; michael@0: michael@0: if (packets.GetSize() > 0) { michael@0: holder.own(packets.PopFront()); michael@0: } else { michael@0: // Keep reading packets until we find a packet michael@0: // for the track we want. michael@0: do { michael@0: nestegg_packet* packet; michael@0: int r = nestegg_read_packet(mContext, &packet); michael@0: if (r <= 0) { michael@0: return nsReturnRef(); michael@0: } michael@0: int64_t offset = mDecoder->GetResource()->Tell(); michael@0: holder.own(new NesteggPacketHolder(packet, offset)); michael@0: michael@0: unsigned int track = 0; michael@0: r = nestegg_packet_track(packet, &track); michael@0: if (r == -1) { michael@0: return nsReturnRef(); michael@0: } michael@0: michael@0: if (hasOtherType && otherTrack == track) { michael@0: // Save the packet for when we want these packets michael@0: otherPackets.Push(holder.disown()); michael@0: continue; michael@0: } michael@0: michael@0: // The packet is for the track we want to play michael@0: if (hasType && ourTrack == track) { michael@0: break; michael@0: } michael@0: } while (true); michael@0: } michael@0: michael@0: return holder.out(); michael@0: } michael@0: michael@0: bool WebMReader::DecodeAudioData() michael@0: { michael@0: NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread."); michael@0: michael@0: nsAutoRef holder(NextPacket(AUDIO)); michael@0: if (!holder) { michael@0: return false; michael@0: } michael@0: michael@0: return DecodeAudioPacket(holder->mPacket, holder->mOffset); michael@0: } michael@0: michael@0: bool WebMReader::DecodeVideoFrame(bool &aKeyframeSkip, michael@0: int64_t aTimeThreshold) michael@0: { michael@0: NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread."); michael@0: michael@0: // Record number of frames decoded and parsed. Automatically update the michael@0: // stats counters using the AutoNotifyDecoded stack-based class. michael@0: uint32_t parsed = 0, decoded = 0; michael@0: AbstractMediaDecoder::AutoNotifyDecoded autoNotify(mDecoder, parsed, decoded); michael@0: michael@0: nsAutoRef holder(NextPacket(VIDEO)); michael@0: if (!holder) { michael@0: return false; michael@0: } michael@0: michael@0: nestegg_packet* packet = holder->mPacket; michael@0: unsigned int track = 0; michael@0: int r = nestegg_packet_track(packet, &track); michael@0: if (r == -1) { michael@0: return false; michael@0: } michael@0: michael@0: unsigned int count = 0; michael@0: r = nestegg_packet_count(packet, &count); michael@0: if (r == -1) { michael@0: return false; michael@0: } michael@0: michael@0: uint64_t tstamp = 0; michael@0: r = nestegg_packet_tstamp(packet, &tstamp); michael@0: if (r == -1) { michael@0: return false; michael@0: } michael@0: michael@0: // The end time of this frame is the start time of the next frame. Fetch michael@0: // the timestamp of the next packet for this track. If we've reached the michael@0: // end of the resource, use the file's duration as the end time of this michael@0: // video frame. michael@0: uint64_t next_tstamp = 0; michael@0: nsAutoRef next_holder(NextPacket(VIDEO)); michael@0: if (next_holder) { michael@0: r = nestegg_packet_tstamp(next_holder->mPacket, &next_tstamp); michael@0: if (r == -1) { michael@0: return false; michael@0: } michael@0: PushVideoPacket(next_holder.disown()); michael@0: } else { michael@0: ReentrantMonitorAutoEnter decoderMon(mDecoder->GetReentrantMonitor()); michael@0: int64_t endTime = mDecoder->GetEndMediaTime(); michael@0: if (endTime == -1) { michael@0: return false; michael@0: } michael@0: next_tstamp = endTime * NS_PER_USEC; michael@0: } michael@0: michael@0: int64_t tstamp_usecs = tstamp / NS_PER_USEC; michael@0: for (uint32_t i = 0; i < count; ++i) { michael@0: unsigned char* data; michael@0: size_t length; michael@0: r = nestegg_packet_data(packet, i, &data, &length); michael@0: if (r == -1) { michael@0: return false; michael@0: } michael@0: michael@0: vpx_codec_stream_info_t si; michael@0: memset(&si, 0, sizeof(si)); michael@0: si.sz = sizeof(si); michael@0: if (mVideoCodec == NESTEGG_CODEC_VP8) { michael@0: vpx_codec_peek_stream_info(vpx_codec_vp8_dx(), data, length, &si); michael@0: } else if (mVideoCodec == NESTEGG_CODEC_VP9) { michael@0: vpx_codec_peek_stream_info(vpx_codec_vp9_dx(), data, length, &si); michael@0: } michael@0: if (aKeyframeSkip && (!si.is_kf || tstamp_usecs < aTimeThreshold)) { michael@0: // Skipping to next keyframe... michael@0: parsed++; // Assume 1 frame per chunk. michael@0: continue; michael@0: } michael@0: michael@0: if (aKeyframeSkip && si.is_kf) { michael@0: aKeyframeSkip = false; michael@0: } michael@0: michael@0: if (vpx_codec_decode(&mVPX, data, length, nullptr, 0)) { michael@0: return false; michael@0: } michael@0: michael@0: // If the timestamp of the video frame is less than michael@0: // the time threshold required then it is not added michael@0: // to the video queue and won't be displayed. michael@0: if (tstamp_usecs < aTimeThreshold) { michael@0: parsed++; // Assume 1 frame per chunk. michael@0: continue; michael@0: } michael@0: michael@0: vpx_codec_iter_t iter = nullptr; michael@0: vpx_image_t *img; michael@0: michael@0: while ((img = vpx_codec_get_frame(&mVPX, &iter))) { michael@0: NS_ASSERTION(img->fmt == IMG_FMT_I420, "WebM image format is not I420"); michael@0: michael@0: // Chroma shifts are rounded down as per the decoding examples in the VP8 SDK michael@0: VideoData::YCbCrBuffer b; michael@0: b.mPlanes[0].mData = img->planes[0]; michael@0: b.mPlanes[0].mStride = img->stride[0]; michael@0: b.mPlanes[0].mHeight = img->d_h; michael@0: b.mPlanes[0].mWidth = img->d_w; michael@0: b.mPlanes[0].mOffset = b.mPlanes[0].mSkip = 0; michael@0: michael@0: b.mPlanes[1].mData = img->planes[1]; michael@0: b.mPlanes[1].mStride = img->stride[1]; michael@0: b.mPlanes[1].mHeight = (img->d_h + 1) >> img->y_chroma_shift; michael@0: b.mPlanes[1].mWidth = (img->d_w + 1) >> img->x_chroma_shift; michael@0: b.mPlanes[1].mOffset = b.mPlanes[1].mSkip = 0; michael@0: michael@0: b.mPlanes[2].mData = img->planes[2]; michael@0: b.mPlanes[2].mStride = img->stride[2]; michael@0: b.mPlanes[2].mHeight = (img->d_h + 1) >> img->y_chroma_shift; michael@0: b.mPlanes[2].mWidth = (img->d_w + 1) >> img->x_chroma_shift; michael@0: b.mPlanes[2].mOffset = b.mPlanes[2].mSkip = 0; michael@0: michael@0: IntRect picture = ToIntRect(mPicture); michael@0: if (img->d_w != static_cast(mInitialFrame.width) || michael@0: img->d_h != static_cast(mInitialFrame.height)) { michael@0: // Frame size is different from what the container reports. This is legal michael@0: // in WebM, and we will preserve the ratio of the crop rectangle as it michael@0: // was reported relative to the picture size reported by the container. michael@0: picture.x = (mPicture.x * img->d_w) / mInitialFrame.width; michael@0: picture.y = (mPicture.y * img->d_h) / mInitialFrame.height; michael@0: picture.width = (img->d_w * mPicture.width) / mInitialFrame.width; michael@0: picture.height = (img->d_h * mPicture.height) / mInitialFrame.height; michael@0: } michael@0: michael@0: VideoData *v = VideoData::Create(mInfo.mVideo, michael@0: mDecoder->GetImageContainer(), michael@0: holder->mOffset, michael@0: tstamp_usecs, michael@0: (next_tstamp / NS_PER_USEC) - tstamp_usecs, michael@0: b, michael@0: si.is_kf, michael@0: -1, michael@0: picture); michael@0: if (!v) { michael@0: return false; michael@0: } michael@0: parsed++; michael@0: decoded++; michael@0: NS_ASSERTION(decoded <= parsed, michael@0: "Expect only 1 frame per chunk per packet in WebM..."); michael@0: VideoQueue().Push(v); michael@0: } michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: void michael@0: WebMReader::PushVideoPacket(NesteggPacketHolder* aItem) michael@0: { michael@0: mVideoPackets.PushFront(aItem); michael@0: } michael@0: michael@0: nsresult WebMReader::Seek(int64_t aTarget, int64_t aStartTime, int64_t aEndTime, michael@0: int64_t aCurrentTime) michael@0: { michael@0: NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread."); michael@0: michael@0: LOG(PR_LOG_DEBUG, ("Reader [%p] for Decoder [%p]: About to seek to %fs", michael@0: this, mDecoder, aTarget/1000000.0)); michael@0: if (NS_FAILED(ResetDecode())) { michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: uint32_t trackToSeek = mHasVideo ? mVideoTrack : mAudioTrack; michael@0: uint64_t target = aTarget * NS_PER_USEC; michael@0: if (mSeekPreroll) { michael@0: target = std::max(static_cast(aStartTime * NS_PER_USEC), target - mSeekPreroll); michael@0: } michael@0: int r = nestegg_track_seek(mContext, trackToSeek, target); michael@0: if (r != 0) { michael@0: // Try seeking directly based on cluster information in memory. michael@0: int64_t offset = 0; michael@0: bool rv = mBufferedState->GetOffsetForTime((aTarget - aStartTime)/NS_PER_USEC, &offset); michael@0: if (!rv) { michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: r = nestegg_offset_seek(mContext, offset); michael@0: if (r != 0) { michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: } michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult WebMReader::GetBuffered(dom::TimeRanges* aBuffered, int64_t aStartTime) michael@0: { michael@0: MediaResource* resource = mDecoder->GetResource(); michael@0: michael@0: uint64_t timecodeScale; michael@0: if (!mContext || nestegg_tstamp_scale(mContext, &timecodeScale) == -1) { michael@0: return NS_OK; michael@0: } michael@0: michael@0: // Special case completely cached files. This also handles local files. michael@0: bool isFullyCached = resource->IsDataCachedToEndOfResource(0); michael@0: if (isFullyCached) { michael@0: uint64_t duration = 0; michael@0: if (nestegg_duration(mContext, &duration) == 0) { michael@0: aBuffered->Add(0, duration / NS_PER_S); michael@0: } michael@0: } michael@0: michael@0: uint32_t bufferedLength = 0; michael@0: aBuffered->GetLength(&bufferedLength); michael@0: michael@0: // Either we the file is not fully cached, or we couldn't find a duration in michael@0: // the WebM bitstream. michael@0: if (!isFullyCached || !bufferedLength) { michael@0: MediaResource* resource = mDecoder->GetResource(); michael@0: nsTArray ranges; michael@0: nsresult res = resource->GetCachedRanges(ranges); michael@0: NS_ENSURE_SUCCESS(res, res); michael@0: michael@0: for (uint32_t index = 0; index < ranges.Length(); index++) { michael@0: uint64_t start, end; michael@0: bool rv = mBufferedState->CalculateBufferedForRange(ranges[index].mStart, michael@0: ranges[index].mEnd, michael@0: &start, &end); michael@0: if (rv) { michael@0: double startTime = start * timecodeScale / NS_PER_S - aStartTime; michael@0: double endTime = end * timecodeScale / NS_PER_S - aStartTime; michael@0: // If this range extends to the end of the file, the true end time michael@0: // is the file's duration. michael@0: if (resource->IsDataCachedToEndOfResource(ranges[index].mStart)) { michael@0: uint64_t duration = 0; michael@0: if (nestegg_duration(mContext, &duration) == 0) { michael@0: endTime = duration / NS_PER_S; michael@0: } michael@0: } michael@0: michael@0: aBuffered->Add(startTime, endTime); michael@0: } michael@0: } michael@0: } michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: void WebMReader::NotifyDataArrived(const char* aBuffer, uint32_t aLength, int64_t aOffset) michael@0: { michael@0: mBufferedState->NotifyDataArrived(aBuffer, aLength, aOffset); michael@0: } michael@0: michael@0: } // namespace mozilla