michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0: /* vim:set ts=2 sw=2 sts=2 et cindent: */
michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0:  * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0:  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0: 
michael@0: #include "WMFReader.h"
michael@0: #include "WMFDecoder.h"
michael@0: #include "WMFUtils.h"
michael@0: #include "WMFByteStream.h"
michael@0: #include "WMFSourceReaderCallback.h"
michael@0: #include "mozilla/ArrayUtils.h"
michael@0: #include "mozilla/dom/TimeRanges.h"
michael@0: #include "mozilla/dom/HTMLMediaElement.h"
michael@0: #include "mozilla/Preferences.h"
michael@0: #include "DXVA2Manager.h"
michael@0: #include "ImageContainer.h"
michael@0: #include "Layers.h"
michael@0: #include "mozilla/layers/LayersTypes.h"
michael@0: 
michael@0: #ifndef MOZ_SAMPLE_TYPE_FLOAT32
michael@0: #error We expect 32bit float audio samples on desktop for the Windows Media Foundation media backend.
michael@0: #endif
michael@0: 
michael@0: #include "MediaDecoder.h"
michael@0: #include "VideoUtils.h"
michael@0: #include "gfx2DGlue.h"
michael@0: 
michael@0: using namespace mozilla::gfx;
michael@0: using mozilla::layers::Image;
michael@0: using mozilla::layers::LayerManager;
michael@0: using mozilla::layers::LayersBackend;
michael@0: 
michael@0: namespace mozilla {
michael@0: 
michael@0: #ifdef PR_LOGGING
michael@0: extern PRLogModuleInfo* gMediaDecoderLog;
michael@0: #define DECODER_LOG(...) PR_LOG(gMediaDecoderLog, PR_LOG_DEBUG, (__VA_ARGS__))
michael@0: #else
michael@0: #define DECODER_LOG(...)
michael@0: #endif
michael@0: 
michael@0: // Uncomment to enable verbose per-sample logging.
michael@0: //#define LOG_SAMPLE_DECODE 1
michael@0: 
michael@0: WMFReader::WMFReader(AbstractMediaDecoder* aDecoder)
michael@0:   : MediaDecoderReader(aDecoder),
michael@0:     mSourceReader(nullptr),
michael@0:     mAudioChannels(0),
michael@0:     mAudioBytesPerSample(0),
michael@0:     mAudioRate(0),
michael@0:     mVideoWidth(0),
michael@0:     mVideoHeight(0),
michael@0:     mVideoStride(0),
michael@0:     mAudioFrameSum(0),
michael@0:     mAudioFrameOffset(0),
michael@0:     mHasAudio(false),
michael@0:     mHasVideo(false),
michael@0:     mUseHwAccel(false),
michael@0:     mMustRecaptureAudioPosition(true),
michael@0:     mIsMP3Enabled(WMFDecoder::IsMP3Supported()),
michael@0:     mCOMInitialized(false)
michael@0: {
michael@0:   NS_ASSERTION(NS_IsMainThread(), "Must be on main thread.");
michael@0:   MOZ_COUNT_CTOR(WMFReader);
michael@0: }
michael@0: 
michael@0: WMFReader::~WMFReader()
michael@0: {
michael@0:   NS_ASSERTION(NS_IsMainThread(), "Must be on main thread.");
michael@0: 
michael@0:   // Note: We must shutdown the byte stream before calling MFShutdown, else we
michael@0:   // get assertion failures when unlocking the byte stream's work queue.
michael@0:   if (mByteStream) {
michael@0:     DebugOnly<nsresult> rv = mByteStream->Shutdown();
michael@0:     NS_ASSERTION(NS_SUCCEEDED(rv), "Failed to shutdown WMFByteStream");
michael@0:   }
michael@0:   DebugOnly<HRESULT> hr = wmf::MFShutdown();
michael@0:   NS_ASSERTION(SUCCEEDED(hr), "MFShutdown failed");
michael@0:   MOZ_COUNT_DTOR(WMFReader);
michael@0: }
michael@0: 
michael@0: bool
michael@0: WMFReader::InitializeDXVA()
michael@0: {
michael@0:   if (!Preferences::GetBool("media.windows-media-foundation.use-dxva", false)) {
michael@0:     return false;
michael@0:   }
michael@0:   MOZ_ASSERT(mDecoder->GetImageContainer());
michael@0: 
michael@0:   // Extract the layer manager backend type so that we can determine
michael@0:   // whether it's worthwhile using DXVA. If we're not running with a D3D
michael@0:   // layer manager then the readback of decoded video frames from GPU to
michael@0:   // CPU memory grinds painting to a halt, and makes playback performance
michael@0:   // *worse*.
michael@0:   MediaDecoderOwner* owner = mDecoder->GetOwner();
michael@0:   NS_ENSURE_TRUE(owner, false);
michael@0: 
michael@0:   dom::HTMLMediaElement* element = owner->GetMediaElement();
michael@0:   NS_ENSURE_TRUE(element, false);
michael@0: 
michael@0:   nsRefPtr<LayerManager> layerManager =
michael@0:     nsContentUtils::LayerManagerForDocument(element->OwnerDoc());
michael@0:   NS_ENSURE_TRUE(layerManager, false);
michael@0: 
michael@0:   LayersBackend backend = layerManager->GetCompositorBackendType();
michael@0:   if (backend != LayersBackend::LAYERS_D3D9 &&
michael@0:       backend != LayersBackend::LAYERS_D3D10 &&
michael@0:       backend != LayersBackend::LAYERS_D3D11) {
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   mDXVA2Manager = DXVA2Manager::Create();
michael@0: 
michael@0:   return mDXVA2Manager != nullptr;
michael@0: }
michael@0: 
michael@0: nsresult
michael@0: WMFReader::Init(MediaDecoderReader* aCloneDonor)
michael@0: {
michael@0:   NS_ASSERTION(NS_IsMainThread(), "Must be on main thread.");
michael@0: 
michael@0:   nsresult rv = WMFDecoder::LoadDLLs();
michael@0:   NS_ENSURE_SUCCESS(rv, rv);
michael@0: 
michael@0:   if (FAILED(wmf::MFStartup())) {
michael@0:     NS_WARNING("Failed to initialize Windows Media Foundation");
michael@0:     return NS_ERROR_FAILURE;
michael@0:   }
michael@0: 
michael@0:   mSourceReaderCallback = new WMFSourceReaderCallback();
michael@0: 
michael@0:   // Must be created on main thread.
michael@0:   mByteStream = new WMFByteStream(mDecoder->GetResource(), mSourceReaderCallback);
michael@0:   rv = mByteStream->Init();
michael@0:   NS_ENSURE_SUCCESS(rv, rv);
michael@0: 
michael@0:   if (mDecoder->GetImageContainer() != nullptr &&
michael@0:       IsVideoContentType(mDecoder->GetResource()->GetContentType())) {
michael@0:     mUseHwAccel = InitializeDXVA();
michael@0:   } else {
michael@0:     mUseHwAccel = false;
michael@0:   }
michael@0: 
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: bool
michael@0: WMFReader::HasAudio()
michael@0: {
michael@0:   NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread.");
michael@0:   return mHasAudio;
michael@0: }
michael@0: 
michael@0: bool
michael@0: WMFReader::HasVideo()
michael@0: {
michael@0:   NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread.");
michael@0:   return mHasVideo;
michael@0: }
michael@0: 
michael@0: static HRESULT
michael@0: ConfigureSourceReaderStream(IMFSourceReader *aReader,
michael@0:                             const DWORD aStreamIndex,
michael@0:                             const GUID& aOutputSubType,
michael@0:                             const GUID* aAllowedInSubTypes,
michael@0:                             const uint32_t aNumAllowedInSubTypes)
michael@0: {
michael@0:   NS_ENSURE_TRUE(aReader, E_POINTER);
michael@0:   NS_ENSURE_TRUE(aAllowedInSubTypes, E_POINTER);
michael@0: 
michael@0:   RefPtr<IMFMediaType> nativeType;
michael@0:   RefPtr<IMFMediaType> type;
michael@0:   HRESULT hr;
michael@0: 
michael@0:   // Find the native format of the stream.
michael@0:   hr = aReader->GetNativeMediaType(aStreamIndex, 0, byRef(nativeType));
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   // Get the native output subtype of the stream. This denotes the uncompressed
michael@0:   // type.
michael@0:   GUID subType;
michael@0:   hr = nativeType->GetGUID(MF_MT_SUBTYPE, &subType);
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   // Ensure the input type of the media is in the allowed formats list.
michael@0:   bool isSubTypeAllowed = false;
michael@0:   for (uint32_t i = 0; i < aNumAllowedInSubTypes; i++) {
michael@0:     if (aAllowedInSubTypes[i] == subType) {
michael@0:       isSubTypeAllowed = true;
michael@0:       break;
michael@0:     }
michael@0:   }
michael@0:   if (!isSubTypeAllowed) {
michael@0:     nsCString name = GetGUIDName(subType);
michael@0:     DECODER_LOG("ConfigureSourceReaderStream subType=%s is not allowed to be decoded", name.get());
michael@0:     return E_FAIL;
michael@0:   }
michael@0: 
michael@0:   // Find the major type.
michael@0:   GUID majorType;
michael@0:   hr = nativeType->GetGUID(MF_MT_MAJOR_TYPE, &majorType);
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   // Define the output type.
michael@0:   hr = wmf::MFCreateMediaType(byRef(type));
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   hr = type->SetGUID(MF_MT_MAJOR_TYPE, majorType);
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   hr = type->SetGUID(MF_MT_SUBTYPE, aOutputSubType);
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   // Set the uncompressed format. This can fail if the decoder can't produce
michael@0:   // that type.
michael@0:   return aReader->SetCurrentMediaType(aStreamIndex, nullptr, type);
michael@0: }
michael@0: 
michael@0: // Returns the duration of the resource, in microseconds.
michael@0: HRESULT
michael@0: GetSourceReaderDuration(IMFSourceReader *aReader,
michael@0:                         int64_t& aOutDuration)
michael@0: {
michael@0:   AutoPropVar var;
michael@0:   HRESULT hr = aReader->GetPresentationAttribute(MF_SOURCE_READER_MEDIASOURCE,
michael@0:                                                  MF_PD_DURATION,
michael@0:                                                  &var);
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   // WMF stores duration in hundred nanosecond units.
michael@0:   int64_t duration_hns = 0;
michael@0:   hr = wmf::PropVariantToInt64(var, &duration_hns);
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   aOutDuration = HNsToUsecs(duration_hns);
michael@0: 
michael@0:   return S_OK;
michael@0: }
michael@0: 
michael@0: HRESULT
michael@0: GetSourceReaderCanSeek(IMFSourceReader* aReader, bool& aOutCanSeek)
michael@0: {
michael@0:   NS_ENSURE_TRUE(aReader, E_FAIL);
michael@0: 
michael@0:   HRESULT hr;
michael@0:   AutoPropVar var;
michael@0:   hr = aReader->GetPresentationAttribute(MF_SOURCE_READER_MEDIASOURCE,
michael@0:                                          MF_SOURCE_READER_MEDIASOURCE_CHARACTERISTICS,
michael@0:                                          &var);
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   ULONG flags = 0;
michael@0:   hr = wmf::PropVariantToUInt32(var, &flags);
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   aOutCanSeek = ((flags & MFMEDIASOURCE_CAN_SEEK) == MFMEDIASOURCE_CAN_SEEK);
michael@0: 
michael@0:   return S_OK;
michael@0: }
michael@0: 
michael@0: HRESULT
michael@0: WMFReader::ConfigureVideoFrameGeometry(IMFMediaType* aMediaType)
michael@0: {
michael@0:   NS_ENSURE_TRUE(aMediaType != nullptr, E_POINTER);
michael@0:   HRESULT hr;
michael@0: 
michael@0:   // Verify that the video subtype is what we expect it to be.
michael@0:   // When using hardware acceleration/DXVA2 the video format should
michael@0:   // be NV12, which is DXVA2's preferred format. For software decoding
michael@0:   // we use YV12, as that's easier for us to stick into our rendering
michael@0:   // pipeline than NV12. NV12 has interleaved UV samples, whereas YV12
michael@0:   // is a planar format.
michael@0:   GUID videoFormat;
michael@0:   hr = aMediaType->GetGUID(MF_MT_SUBTYPE, &videoFormat);
michael@0:   NS_ENSURE_TRUE(videoFormat == MFVideoFormat_NV12 || !mUseHwAccel, E_FAIL);
michael@0:   NS_ENSURE_TRUE(videoFormat == MFVideoFormat_YV12 || mUseHwAccel, E_FAIL);
michael@0: 
michael@0:   nsIntRect pictureRegion;
michael@0:   hr = GetPictureRegion(aMediaType, pictureRegion);
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   UINT32 width = 0, height = 0;
michael@0:   hr = MFGetAttributeSize(aMediaType, MF_MT_FRAME_SIZE, &width, &height);
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   uint32_t aspectNum = 0, aspectDenom = 0;
michael@0:   hr = MFGetAttributeRatio(aMediaType,
michael@0:                            MF_MT_PIXEL_ASPECT_RATIO,
michael@0:                            &aspectNum,
michael@0:                            &aspectDenom);
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   // Calculate and validate the picture region and frame dimensions after
michael@0:   // scaling by the pixel aspect ratio.
michael@0:   nsIntSize frameSize = nsIntSize(width, height);
michael@0:   nsIntSize displaySize = nsIntSize(pictureRegion.width, pictureRegion.height);
michael@0:   ScaleDisplayByAspectRatio(displaySize, float(aspectNum) / float(aspectDenom));
michael@0:   if (!IsValidVideoRegion(frameSize, pictureRegion, displaySize)) {
michael@0:     // Video track's frame sizes will overflow. Ignore the video track.
michael@0:     return E_FAIL;
michael@0:   }
michael@0: 
michael@0:   // Success! Save state.
michael@0:   mInfo.mVideo.mDisplay = displaySize;
michael@0:   GetDefaultStride(aMediaType, &mVideoStride);
michael@0:   mVideoWidth = width;
michael@0:   mVideoHeight = height;
michael@0:   mPictureRegion = pictureRegion;
michael@0: 
michael@0:   DECODER_LOG("WMFReader frame geometry frame=(%u,%u) stride=%u picture=(%d, %d, %d, %d) display=(%d,%d) PAR=%d:%d",
michael@0:               width, height,
michael@0:               mVideoStride,
michael@0:               mPictureRegion.x, mPictureRegion.y, mPictureRegion.width, mPictureRegion.height,
michael@0:               displaySize.width, displaySize.height,
michael@0:               aspectNum, aspectDenom);
michael@0: 
michael@0:   return S_OK;
michael@0: }
michael@0: 
michael@0: HRESULT
michael@0: WMFReader::ConfigureVideoDecoder()
michael@0: {
michael@0:   NS_ASSERTION(mSourceReader, "Must have a SourceReader before configuring decoders!");
michael@0: 
michael@0:   // Determine if we have video.
michael@0:   if (!mSourceReader ||
michael@0:       !SourceReaderHasStream(mSourceReader, MF_SOURCE_READER_FIRST_VIDEO_STREAM)) {
michael@0:     // No stream, no error.
michael@0:     return S_OK;
michael@0:   }
michael@0: 
michael@0:   if (!mDecoder->GetImageContainer()) {
michael@0:     // We can't display the video, so don't bother to decode; disable the stream.
michael@0:     return mSourceReader->SetStreamSelection(MF_SOURCE_READER_FIRST_VIDEO_STREAM, FALSE);
michael@0:   }
michael@0: 
michael@0:   static const GUID MP4VideoTypes[] = {
michael@0:     MFVideoFormat_H264
michael@0:   };
michael@0:   HRESULT hr = ConfigureSourceReaderStream(mSourceReader,
michael@0:                                            MF_SOURCE_READER_FIRST_VIDEO_STREAM,
michael@0:                                            mUseHwAccel ? MFVideoFormat_NV12 : MFVideoFormat_YV12,
michael@0:                                            MP4VideoTypes,
michael@0:                                            ArrayLength(MP4VideoTypes));
michael@0:   if (FAILED(hr)) {
michael@0:     DECODER_LOG("Failed to configured video output");
michael@0:     return hr;
michael@0:   }
michael@0: 
michael@0:   RefPtr<IMFMediaType> mediaType;
michael@0:   hr = mSourceReader->GetCurrentMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM,
michael@0:                                           byRef(mediaType));
michael@0:   if (FAILED(hr)) {
michael@0:     NS_WARNING("Failed to get configured video media type");
michael@0:     return hr;
michael@0:   }
michael@0: 
michael@0:   if (FAILED(ConfigureVideoFrameGeometry(mediaType))) {
michael@0:     NS_WARNING("Failed configured video frame dimensions");
michael@0:     return hr;
michael@0:   }
michael@0: 
michael@0:   DECODER_LOG("Successfully configured video stream");
michael@0: 
michael@0:   mHasVideo = mInfo.mVideo.mHasVideo = true;
michael@0: 
michael@0:   return S_OK;
michael@0: }
michael@0: 
michael@0: void
michael@0: WMFReader::GetSupportedAudioCodecs(const GUID** aCodecs, uint32_t* aNumCodecs)
michael@0: {
michael@0:   MOZ_ASSERT(aCodecs);
michael@0:   MOZ_ASSERT(aNumCodecs);
michael@0: 
michael@0:   if (mIsMP3Enabled) {
michael@0:     GUID aacOrMp3 = MFMPEG4Format_Base;
michael@0:     aacOrMp3.Data1 = 0x6D703461;// FOURCC('m','p','4','a');
michael@0:     static const GUID codecs[] = {
michael@0:       MFAudioFormat_AAC,
michael@0:       MFAudioFormat_MP3,
michael@0:       aacOrMp3
michael@0:     };
michael@0:     *aCodecs = codecs;
michael@0:     *aNumCodecs = ArrayLength(codecs);
michael@0:   } else {
michael@0:     static const GUID codecs[] = {
michael@0:       MFAudioFormat_AAC
michael@0:     };
michael@0:     *aCodecs = codecs;
michael@0:     *aNumCodecs = ArrayLength(codecs);
michael@0:   }
michael@0: }
michael@0: 
michael@0: HRESULT
michael@0: WMFReader::ConfigureAudioDecoder()
michael@0: {
michael@0:   NS_ASSERTION(mSourceReader, "Must have a SourceReader before configuring decoders!");
michael@0: 
michael@0:   if (!mSourceReader ||
michael@0:       !SourceReaderHasStream(mSourceReader, MF_SOURCE_READER_FIRST_AUDIO_STREAM)) {
michael@0:     // No stream, no error.
michael@0:     return S_OK;
michael@0:   }
michael@0: 
michael@0:   const GUID* codecs;
michael@0:   uint32_t numCodecs = 0;
michael@0:   GetSupportedAudioCodecs(&codecs, &numCodecs);
michael@0: 
michael@0:   HRESULT hr = ConfigureSourceReaderStream(mSourceReader,
michael@0:                                            MF_SOURCE_READER_FIRST_AUDIO_STREAM,
michael@0:                                            MFAudioFormat_Float,
michael@0:                                            codecs,
michael@0:                                            numCodecs);
michael@0:   if (FAILED(hr)) {
michael@0:     NS_WARNING("Failed to configure WMF Audio decoder for PCM output");
michael@0:     return hr;
michael@0:   }
michael@0: 
michael@0:   RefPtr<IMFMediaType> mediaType;
michael@0:   hr = mSourceReader->GetCurrentMediaType(MF_SOURCE_READER_FIRST_AUDIO_STREAM,
michael@0:                                           byRef(mediaType));
michael@0:   if (FAILED(hr)) {
michael@0:     NS_WARNING("Failed to get configured audio media type");
michael@0:     return hr;
michael@0:   }
michael@0: 
michael@0:   mAudioRate = MFGetAttributeUINT32(mediaType, MF_MT_AUDIO_SAMPLES_PER_SECOND, 0);
michael@0:   mAudioChannels = MFGetAttributeUINT32(mediaType, MF_MT_AUDIO_NUM_CHANNELS, 0);
michael@0:   mAudioBytesPerSample = MFGetAttributeUINT32(mediaType, MF_MT_AUDIO_BITS_PER_SAMPLE, 16) / 8;
michael@0: 
michael@0:   mInfo.mAudio.mChannels = mAudioChannels;
michael@0:   mInfo.mAudio.mRate = mAudioRate;
michael@0:   mHasAudio = mInfo.mAudio.mHasAudio = true;
michael@0: 
michael@0:   DECODER_LOG("Successfully configured audio stream. rate=%u channels=%u bitsPerSample=%u",
michael@0:               mAudioRate, mAudioChannels, mAudioBytesPerSample);
michael@0: 
michael@0:   return S_OK;
michael@0: }
michael@0: 
michael@0: HRESULT
michael@0: WMFReader::CreateSourceReader()
michael@0: {
michael@0:   HRESULT hr;
michael@0: 
michael@0:   RefPtr<IMFAttributes> attr;
michael@0:   hr = wmf::MFCreateAttributes(byRef(attr), 1);
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   hr = attr->SetUnknown(MF_SOURCE_READER_ASYNC_CALLBACK, mSourceReaderCallback);
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   if (mUseHwAccel) {
michael@0:     hr = attr->SetUnknown(MF_SOURCE_READER_D3D_MANAGER,
michael@0:                           mDXVA2Manager->GetDXVADeviceManager());
michael@0:     if (FAILED(hr)) {
michael@0:       DECODER_LOG("Failed to set DXVA2 D3D Device manager on source reader attributes");
michael@0:       mUseHwAccel = false;
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   hr = wmf::MFCreateSourceReaderFromByteStream(mByteStream, attr, byRef(mSourceReader));
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   hr = ConfigureVideoDecoder();
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   hr = ConfigureAudioDecoder();
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   if (mUseHwAccel && mInfo.mVideo.mHasVideo) {
michael@0:     RefPtr<IMFTransform> videoDecoder;
michael@0:     hr = mSourceReader->GetServiceForStream(MF_SOURCE_READER_FIRST_VIDEO_STREAM,
michael@0:                                             GUID_NULL,
michael@0:                                             IID_IMFTransform,
michael@0:                                             (void**)(IMFTransform**)(byRef(videoDecoder)));
michael@0: 
michael@0:     if (SUCCEEDED(hr)) {
michael@0:       ULONG_PTR manager = ULONG_PTR(mDXVA2Manager->GetDXVADeviceManager());
michael@0:       hr = videoDecoder->ProcessMessage(MFT_MESSAGE_SET_D3D_MANAGER,
michael@0:                                         manager);
michael@0:       if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
michael@0:         // Ignore MF_E_TRANSFORM_TYPE_NOT_SET. Vista returns this here
michael@0:         // on some, perhaps all, video cards. This may be because activating
michael@0:         // DXVA changes the available output types. It seems to be safe to
michael@0:         // ignore this error.
michael@0:         hr = S_OK;
michael@0:       }
michael@0:     }
michael@0:     if (FAILED(hr)) {
michael@0:       DECODER_LOG("Failed to set DXVA2 D3D Device manager on decoder hr=0x%x", hr);
michael@0:       mUseHwAccel = false;
michael@0:     }
michael@0:   }
michael@0:   return hr;
michael@0: }
michael@0: 
michael@0: nsresult
michael@0: WMFReader::ReadMetadata(MediaInfo* aInfo,
michael@0:                         MetadataTags** aTags)
michael@0: {
michael@0:   NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread.");
michael@0: 
michael@0:   DECODER_LOG("WMFReader::ReadMetadata()");
michael@0:   HRESULT hr;
michael@0: 
michael@0:   const bool triedToInitDXVA = mUseHwAccel;
michael@0:   if (FAILED(CreateSourceReader())) {
michael@0:     mSourceReader = nullptr;
michael@0:     if (triedToInitDXVA && !mUseHwAccel) {
michael@0:       // We tried to initialize DXVA and failed. Try again to create the
michael@0:       // IMFSourceReader but this time we won't use DXVA. Note that we
michael@0:       // must recreate the IMFSourceReader from scratch, as on some systems
michael@0:       // (AMD Radeon 3000) we cannot successfully reconfigure an existing
michael@0:       // reader to not use DXVA after we've failed to configure DXVA.
michael@0:       // See bug 987127.
michael@0:       if (FAILED(CreateSourceReader())) {
michael@0:         mSourceReader = nullptr;
michael@0:       }
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   if (!mSourceReader) {
michael@0:     NS_WARNING("Failed to create IMFSourceReader");
michael@0:     return NS_ERROR_FAILURE;
michael@0:   }
michael@0: 
michael@0:   if (mInfo.HasVideo()) {
michael@0:     DECODER_LOG("Using DXVA: %s", (mUseHwAccel ? "Yes" : "No"));
michael@0:   }
michael@0: 
michael@0:   // Abort if both video and audio failed to initialize.
michael@0:   NS_ENSURE_TRUE(mInfo.HasValidMedia(), NS_ERROR_FAILURE);
michael@0: 
michael@0:   // Get the duration, and report it to the decoder if we have it.
michael@0:   int64_t duration = 0;
michael@0:   hr = GetSourceReaderDuration(mSourceReader, duration);
michael@0:   if (SUCCEEDED(hr)) {
michael@0:     ReentrantMonitorAutoEnter mon(mDecoder->GetReentrantMonitor());
michael@0:     mDecoder->SetMediaEndTime(duration);
michael@0:   }
michael@0:   // We can seek if we get a duration *and* the reader reports that it's
michael@0:   // seekable.
michael@0:   bool canSeek = false;
michael@0:   if (FAILED(hr) ||
michael@0:       FAILED(GetSourceReaderCanSeek(mSourceReader, canSeek)) ||
michael@0:       !canSeek) {
michael@0:     mDecoder->SetMediaSeekable(false);
michael@0:   }
michael@0: 
michael@0:   *aInfo = mInfo;
michael@0:   *aTags = nullptr;
michael@0:   // aTags can be retrieved using techniques like used here:
michael@0:   // http://blogs.msdn.com/b/mf/archive/2010/01/12/mfmediapropdump.aspx
michael@0: 
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: bool
michael@0: WMFReader::DecodeAudioData()
michael@0: {
michael@0:   NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread.");
michael@0: 
michael@0:   HRESULT hr;
michael@0:   hr = mSourceReader->ReadSample(MF_SOURCE_READER_FIRST_AUDIO_STREAM,
michael@0:                                  0, // control flags
michael@0:                                  0, // read stream index
michael@0:                                  nullptr,
michael@0:                                  nullptr,
michael@0:                                  nullptr);
michael@0: 
michael@0:   if (FAILED(hr)) {
michael@0:     DECODER_LOG("WMFReader::DecodeAudioData() ReadSample failed with hr=0x%x", hr);
michael@0:     // End the stream.
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   DWORD flags = 0;
michael@0:   LONGLONG timestampHns = 0;
michael@0:   RefPtr<IMFSample> sample;
michael@0:   hr = mSourceReaderCallback->Wait(&flags, &timestampHns, byRef(sample));
michael@0:   if (FAILED(hr) ||
michael@0:       (flags & MF_SOURCE_READERF_ERROR) ||
michael@0:       (flags & MF_SOURCE_READERF_ENDOFSTREAM) ||
michael@0:       (flags & MF_SOURCE_READERF_CURRENTMEDIATYPECHANGED)) {
michael@0:     DECODER_LOG("WMFReader::DecodeAudioData() ReadSample failed with hr=0x%x flags=0x%x",
michael@0:                 hr, flags);
michael@0:     // End the stream.
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   if (!sample) {
michael@0:     // Not enough data? Try again...
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   RefPtr<IMFMediaBuffer> buffer;
michael@0:   hr = sample->ConvertToContiguousBuffer(byRef(buffer));
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), false);
michael@0: 
michael@0:   BYTE* data = nullptr; // Note: *data will be owned by the IMFMediaBuffer, we don't need to free it.
michael@0:   DWORD maxLength = 0, currentLength = 0;
michael@0:   hr = buffer->Lock(&data, &maxLength, &currentLength);
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), false);
michael@0: 
michael@0:   uint32_t numFrames = currentLength / mAudioBytesPerSample / mAudioChannels;
michael@0:   NS_ASSERTION(sizeof(AudioDataValue) == mAudioBytesPerSample, "Size calculation is wrong");
michael@0:   nsAutoArrayPtr<AudioDataValue> pcmSamples(new AudioDataValue[numFrames * mAudioChannels]);
michael@0:   memcpy(pcmSamples.get(), data, currentLength);
michael@0:   buffer->Unlock();
michael@0: 
michael@0:   // We calculate the timestamp and the duration based on the number of audio
michael@0:   // frames we've already played. We don't trust the timestamp stored on the
michael@0:   // IMFSample, as sometimes it's wrong, possibly due to buggy encoders?
michael@0: 
michael@0:   // If this sample block comes after a discontinuity (i.e. a gap or seek)
michael@0:   // reset the frame counters, and capture the timestamp. Future timestamps
michael@0:   // will be offset from this block's timestamp.
michael@0:   UINT32 discontinuity = false;
michael@0:   sample->GetUINT32(MFSampleExtension_Discontinuity, &discontinuity);
michael@0:   if (mMustRecaptureAudioPosition || discontinuity) {
michael@0:     mAudioFrameSum = 0;
michael@0:     hr = HNsToFrames(timestampHns, mAudioRate, &mAudioFrameOffset);
michael@0:     NS_ENSURE_TRUE(SUCCEEDED(hr), false);
michael@0:     mMustRecaptureAudioPosition = false;
michael@0:   }
michael@0: 
michael@0:   int64_t timestamp;
michael@0:   hr = FramesToUsecs(mAudioFrameOffset + mAudioFrameSum, mAudioRate, &timestamp);
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), false);
michael@0: 
michael@0:   mAudioFrameSum += numFrames;
michael@0: 
michael@0:   int64_t duration;
michael@0:   hr = FramesToUsecs(numFrames, mAudioRate, &duration);
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), false);
michael@0: 
michael@0:   mAudioQueue.Push(new AudioData(mDecoder->GetResource()->Tell(),
michael@0:                                  timestamp,
michael@0:                                  duration,
michael@0:                                  numFrames,
michael@0:                                  pcmSamples.forget(),
michael@0:                                  mAudioChannels));
michael@0: 
michael@0:   #ifdef LOG_SAMPLE_DECODE
michael@0:   DECODER_LOG("Decoded audio sample! timestamp=%lld duration=%lld currentLength=%u",
michael@0:               timestamp, duration, currentLength);
michael@0:   #endif
michael@0: 
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: HRESULT
michael@0: WMFReader::CreateBasicVideoFrame(IMFSample* aSample,
michael@0:                                  int64_t aTimestampUsecs,
michael@0:                                  int64_t aDurationUsecs,
michael@0:                                  int64_t aOffsetBytes,
michael@0:                                  VideoData** aOutVideoData)
michael@0: {
michael@0:   NS_ENSURE_TRUE(aSample, E_POINTER);
michael@0:   NS_ENSURE_TRUE(aOutVideoData, E_POINTER);
michael@0: 
michael@0:   *aOutVideoData = nullptr;
michael@0: 
michael@0:   HRESULT hr;
michael@0:   RefPtr<IMFMediaBuffer> buffer;
michael@0: 
michael@0:   // Must convert to contiguous buffer to use IMD2DBuffer interface.
michael@0:   hr = aSample->ConvertToContiguousBuffer(byRef(buffer));
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0: 
michael@0:   // Try and use the IMF2DBuffer interface if available, otherwise fallback
michael@0:   // to the IMFMediaBuffer interface. Apparently IMF2DBuffer is more efficient,
michael@0:   // but only some systems (Windows 8?) support it.
michael@0:   BYTE* data = nullptr;
michael@0:   LONG stride = 0;
michael@0:   RefPtr<IMF2DBuffer> twoDBuffer;
michael@0:   hr = buffer->QueryInterface(static_cast<IMF2DBuffer**>(byRef(twoDBuffer)));
michael@0:   if (SUCCEEDED(hr)) {
michael@0:     hr = twoDBuffer->Lock2D(&data, &stride);
michael@0:     NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0:   } else {
michael@0:     hr = buffer->Lock(&data, nullptr, nullptr);
michael@0:     NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0:     stride = mVideoStride;
michael@0:   }
michael@0: 
michael@0:   // YV12, planar format: [YYYY....][VVVV....][UUUU....]
michael@0:   // i.e., Y, then V, then U.
michael@0:   VideoData::YCbCrBuffer b;
michael@0: 
michael@0:   // Y (Y') plane
michael@0:   b.mPlanes[0].mData = data;
michael@0:   b.mPlanes[0].mStride = stride;
michael@0:   b.mPlanes[0].mHeight = mVideoHeight;
michael@0:   b.mPlanes[0].mWidth = mVideoWidth;
michael@0:   b.mPlanes[0].mOffset = 0;
michael@0:   b.mPlanes[0].mSkip = 0;
michael@0: 
michael@0:   // The V and U planes are stored 16-row-aligned, so we need to add padding
michael@0:   // to the row heights to ensure the Y'CbCr planes are referenced properly.
michael@0:   uint32_t padding = 0;
michael@0:   if (mVideoHeight % 16 != 0) {
michael@0:     padding = 16 - (mVideoHeight % 16);
michael@0:   }
michael@0:   uint32_t y_size = stride * (mVideoHeight + padding);
michael@0:   uint32_t v_size = stride * (mVideoHeight + padding) / 4;
michael@0:   uint32_t halfStride = (stride + 1) / 2;
michael@0:   uint32_t halfHeight = (mVideoHeight + 1) / 2;
michael@0:   uint32_t halfWidth = (mVideoWidth + 1) / 2;
michael@0: 
michael@0:   // U plane (Cb)
michael@0:   b.mPlanes[1].mData = data + y_size + v_size;
michael@0:   b.mPlanes[1].mStride = halfStride;
michael@0:   b.mPlanes[1].mHeight = halfHeight;
michael@0:   b.mPlanes[1].mWidth = halfWidth;
michael@0:   b.mPlanes[1].mOffset = 0;
michael@0:   b.mPlanes[1].mSkip = 0;
michael@0: 
michael@0:   // V plane (Cr)
michael@0:   b.mPlanes[2].mData = data + y_size;
michael@0:   b.mPlanes[2].mStride = halfStride;
michael@0:   b.mPlanes[2].mHeight = halfHeight;
michael@0:   b.mPlanes[2].mWidth = halfWidth;
michael@0:   b.mPlanes[2].mOffset = 0;
michael@0:   b.mPlanes[2].mSkip = 0;
michael@0: 
michael@0:   VideoData *v = VideoData::Create(mInfo.mVideo,
michael@0:                                    mDecoder->GetImageContainer(),
michael@0:                                    aOffsetBytes,
michael@0:                                    aTimestampUsecs,
michael@0:                                    aDurationUsecs,
michael@0:                                    b,
michael@0:                                    false,
michael@0:                                    -1,
michael@0:                                    ToIntRect(mPictureRegion));
michael@0:   if (twoDBuffer) {
michael@0:     twoDBuffer->Unlock2D();
michael@0:   } else {
michael@0:     buffer->Unlock();
michael@0:   }
michael@0: 
michael@0:   *aOutVideoData = v;
michael@0: 
michael@0:   return S_OK;
michael@0: }
michael@0: 
michael@0: HRESULT
michael@0: WMFReader::CreateD3DVideoFrame(IMFSample* aSample,
michael@0:                                int64_t aTimestampUsecs,
michael@0:                                int64_t aDurationUsecs,
michael@0:                                int64_t aOffsetBytes,
michael@0:                                VideoData** aOutVideoData)
michael@0: {
michael@0:   NS_ENSURE_TRUE(aSample, E_POINTER);
michael@0:   NS_ENSURE_TRUE(aOutVideoData, E_POINTER);
michael@0:   NS_ENSURE_TRUE(mDXVA2Manager, E_ABORT);
michael@0:   NS_ENSURE_TRUE(mUseHwAccel, E_ABORT);
michael@0: 
michael@0:   *aOutVideoData = nullptr;
michael@0:   HRESULT hr;
michael@0: 
michael@0:   nsRefPtr<Image> image;
michael@0:   hr = mDXVA2Manager->CopyToImage(aSample,
michael@0:                                   mPictureRegion,
michael@0:                                   mDecoder->GetImageContainer(),
michael@0:                                   getter_AddRefs(image));
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
michael@0:   NS_ENSURE_TRUE(image, E_FAIL);
michael@0: 
michael@0:   VideoData *v = VideoData::CreateFromImage(mInfo.mVideo,
michael@0:                                             mDecoder->GetImageContainer(),
michael@0:                                             aOffsetBytes,
michael@0:                                             aTimestampUsecs,
michael@0:                                             aDurationUsecs,
michael@0:                                             image.forget(),
michael@0:                                             false,
michael@0:                                             -1,
michael@0:                                             ToIntRect(mPictureRegion));
michael@0: 
michael@0:   NS_ENSURE_TRUE(v, E_FAIL);
michael@0:   *aOutVideoData = v;
michael@0: 
michael@0:   return S_OK;
michael@0: }
michael@0: 
michael@0: bool
michael@0: WMFReader::DecodeVideoFrame(bool &aKeyframeSkip,
michael@0:                             int64_t aTimeThreshold)
michael@0: {
michael@0:   NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread.");
michael@0: 
michael@0:   // Record number of frames decoded and parsed. Automatically update the
michael@0:   // stats counters using the AutoNotifyDecoded stack-based class.
michael@0:   uint32_t parsed = 0, decoded = 0;
michael@0:   AbstractMediaDecoder::AutoNotifyDecoded autoNotify(mDecoder, parsed, decoded);
michael@0: 
michael@0:   HRESULT hr;
michael@0: 
michael@0:   hr = mSourceReader->ReadSample(MF_SOURCE_READER_FIRST_VIDEO_STREAM,
michael@0:                                  0, // control flags
michael@0:                                  0, // read stream index
michael@0:                                  nullptr,
michael@0:                                  nullptr,
michael@0:                                  nullptr);
michael@0:   if (FAILED(hr)) {
michael@0:     DECODER_LOG("WMFReader::DecodeVideoData() ReadSample failed with hr=0x%x", hr);
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   DWORD flags = 0;
michael@0:   LONGLONG timestampHns = 0;
michael@0:   RefPtr<IMFSample> sample;
michael@0:   hr = mSourceReaderCallback->Wait(&flags, &timestampHns, byRef(sample));
michael@0: 
michael@0:   if (flags & MF_SOURCE_READERF_ERROR) {
michael@0:     NS_WARNING("WMFReader: Catastrophic failure reading video sample");
michael@0:     // Future ReadSample() calls will fail, so give up and report end of stream.
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   if (FAILED(hr)) {
michael@0:     // Unknown failure, ask caller to try again?
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   if (!sample) {
michael@0:     if ((flags & MF_SOURCE_READERF_ENDOFSTREAM)) {
michael@0:       DECODER_LOG("WMFReader; Null sample after video decode, at end of stream");
michael@0:       return false;
michael@0:     }
michael@0:     DECODER_LOG("WMFReader; Null sample after video decode. Maybe insufficient data...");
michael@0:     return true;
michael@0:   }
michael@0: 
michael@0:   if ((flags & MF_SOURCE_READERF_CURRENTMEDIATYPECHANGED)) {
michael@0:     DECODER_LOG("WMFReader: Video media type changed!");
michael@0:     RefPtr<IMFMediaType> mediaType;
michael@0:     hr = mSourceReader->GetCurrentMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM,
michael@0:                                             byRef(mediaType));
michael@0:     if (FAILED(hr) ||
michael@0:         FAILED(ConfigureVideoFrameGeometry(mediaType))) {
michael@0:       NS_WARNING("Failed to reconfigure video media type");
michael@0:       return false;
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   int64_t timestamp = HNsToUsecs(timestampHns);
michael@0:   if (timestamp < aTimeThreshold) {
michael@0:     return true;
michael@0:   }
michael@0:   int64_t offset = mDecoder->GetResource()->Tell();
michael@0:   int64_t duration = GetSampleDuration(sample);
michael@0: 
michael@0:   VideoData* v = nullptr;
michael@0:   if (mUseHwAccel) {
michael@0:     hr = CreateD3DVideoFrame(sample, timestamp, duration, offset, &v);
michael@0:   } else {
michael@0:     hr = CreateBasicVideoFrame(sample, timestamp, duration, offset, &v);
michael@0:   }
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr) && v, false);
michael@0: 
michael@0:   parsed++;
michael@0:   decoded++;
michael@0:   mVideoQueue.Push(v);
michael@0: 
michael@0:   #ifdef LOG_SAMPLE_DECODE
michael@0:   DECODER_LOG("Decoded video sample timestamp=%lld duration=%lld stride=%d height=%u flags=%u",
michael@0:               timestamp, duration, mVideoStride, mVideoHeight, flags);
michael@0:   #endif
michael@0: 
michael@0:   if ((flags & MF_SOURCE_READERF_ENDOFSTREAM)) {
michael@0:     // End of stream.
michael@0:     DECODER_LOG("End of video stream");
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: nsresult
michael@0: WMFReader::Seek(int64_t aTargetUs,
michael@0:                 int64_t aStartTime,
michael@0:                 int64_t aEndTime,
michael@0:                 int64_t aCurrentTime)
michael@0: {
michael@0:   DECODER_LOG("WMFReader::Seek() %lld", aTargetUs);
michael@0: 
michael@0:   NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread.");
michael@0: #ifdef DEBUG
michael@0:   bool canSeek = false;
michael@0:   GetSourceReaderCanSeek(mSourceReader, canSeek);
michael@0:   NS_ASSERTION(canSeek, "WMFReader::Seek() should only be called if we can seek!");
michael@0: #endif
michael@0: 
michael@0:   nsresult rv = ResetDecode();
michael@0:   NS_ENSURE_SUCCESS(rv, rv);
michael@0: 
michael@0:   // Mark that we must recapture the audio frame count from the next sample.
michael@0:   // WMF doesn't set a discontinuity marker when we seek to time 0, so we
michael@0:   // must remember to recapture the audio frame offset and reset the frame
michael@0:   // sum on the next audio packet we decode.
michael@0:   mMustRecaptureAudioPosition = true;
michael@0: 
michael@0:   AutoPropVar var;
michael@0:   HRESULT hr = InitPropVariantFromInt64(UsecsToHNs(aTargetUs), &var);
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), NS_ERROR_FAILURE);
michael@0: 
michael@0:   hr = mSourceReader->SetCurrentPosition(GUID_NULL, var);
michael@0:   NS_ENSURE_TRUE(SUCCEEDED(hr), NS_ERROR_FAILURE);
michael@0: 
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: } // namespace mozilla