diff -r 000000000000 -r 6474c204b198 content/media/encoder/VP8TrackEncoder.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/content/media/encoder/VP8TrackEncoder.cpp Wed Dec 31 06:09:35 2014 +0100 @@ -0,0 +1,569 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "VP8TrackEncoder.h" +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" +#include "VideoUtils.h" +#include "prsystem.h" +#include "WebMWriter.h" +#include "libyuv.h" + +namespace mozilla { + +#ifdef PR_LOGGING +PRLogModuleInfo* gVP8TrackEncoderLog; +#define VP8LOG(msg, ...) PR_LOG(gVP8TrackEncoderLog, PR_LOG_DEBUG, \ + (msg, ##__VA_ARGS__)) +// Debug logging macro with object pointer and class name. +#else +#define VP8LOG(msg, ...) +#endif + +#define DEFAULT_BITRATE 2500 // in kbit/s +#define DEFAULT_ENCODE_FRAMERATE 30 + +using namespace mozilla::layers; + +VP8TrackEncoder::VP8TrackEncoder() + : VideoTrackEncoder() + , mEncodedFrameDuration(0) + , mEncodedTimestamp(0) + , mRemainingTicks(0) + , mVPXContext(new vpx_codec_ctx_t()) + , mVPXImageWrapper(new vpx_image_t()) +{ + MOZ_COUNT_CTOR(VP8TrackEncoder); +#ifdef PR_LOGGING + if (!gVP8TrackEncoderLog) { + gVP8TrackEncoderLog = PR_NewLogModule("VP8TrackEncoder"); + } +#endif +} + +VP8TrackEncoder::~VP8TrackEncoder() +{ + if (mInitialized) { + vpx_codec_destroy(mVPXContext); + } + + if (mVPXImageWrapper) { + vpx_img_free(mVPXImageWrapper); + } + MOZ_COUNT_DTOR(VP8TrackEncoder); +} + +nsresult +VP8TrackEncoder::Init(int32_t aWidth, int32_t aHeight, int32_t aDisplayWidth, + int32_t aDisplayHeight,TrackRate aTrackRate) +{ + if (aWidth < 1 || aHeight < 1 || aDisplayWidth < 1 || aDisplayHeight < 1 + || aTrackRate <= 0) { + return NS_ERROR_FAILURE; + } + + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + + mTrackRate = aTrackRate; + mEncodedFrameRate = DEFAULT_ENCODE_FRAMERATE; + mEncodedFrameDuration = mTrackRate / mEncodedFrameRate; + mFrameWidth = aWidth; + mFrameHeight = aHeight; + mDisplayWidth = aDisplayWidth; + mDisplayHeight = aDisplayHeight; + + // Encoder configuration structure. + vpx_codec_enc_cfg_t config; + memset(&config, 0, sizeof(vpx_codec_enc_cfg_t)); + if (vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &config, 0)) { + return NS_ERROR_FAILURE; + } + + // Creating a wrapper to the image - setting image data to NULL. Actual + // pointer will be set in encode. Setting align to 1, as it is meaningless + // (actual memory is not allocated). + vpx_img_wrap(mVPXImageWrapper, IMG_FMT_I420, + mFrameWidth, mFrameHeight, 1, nullptr); + + config.g_w = mFrameWidth; + config.g_h = mFrameHeight; + // TODO: Maybe we should have various aFrameRate bitrate pair for each devices? + // or for different platform + config.rc_target_bitrate = DEFAULT_BITRATE; // in kbit/s + + // Setting the time base of the codec + config.g_timebase.num = 1; + config.g_timebase.den = mTrackRate; + + config.g_error_resilient = 0; + + config.g_lag_in_frames = 0; // 0- no frame lagging + + int32_t number_of_cores = PR_GetNumberOfProcessors(); + if (mFrameWidth * mFrameHeight > 1280 * 960 && number_of_cores >= 6) { + config.g_threads = 3; // 3 threads for 1080p. + } else if (mFrameWidth * mFrameHeight > 640 * 480 && number_of_cores >= 3) { + config.g_threads = 2; // 2 threads for qHD/HD. + } else { + config.g_threads = 1; // 1 thread for VGA or less + } + + // rate control settings + config.rc_dropframe_thresh = 0; + config.rc_end_usage = VPX_CBR; + config.g_pass = VPX_RC_ONE_PASS; + config.rc_resize_allowed = 1; + config.rc_undershoot_pct = 100; + config.rc_overshoot_pct = 15; + config.rc_buf_initial_sz = 500; + config.rc_buf_optimal_sz = 600; + config.rc_buf_sz = 1000; + + config.kf_mode = VPX_KF_AUTO; + // Ensure that we can output one I-frame per second. + config.kf_max_dist = mEncodedFrameRate; + + vpx_codec_flags_t flags = 0; + flags |= VPX_CODEC_USE_OUTPUT_PARTITION; + if (vpx_codec_enc_init(mVPXContext, vpx_codec_vp8_cx(), &config, flags)) { + return NS_ERROR_FAILURE; + } + + vpx_codec_control(mVPXContext, VP8E_SET_STATIC_THRESHOLD, 1); + vpx_codec_control(mVPXContext, VP8E_SET_CPUUSED, -6); + vpx_codec_control(mVPXContext, VP8E_SET_TOKEN_PARTITIONS, + VP8_ONE_TOKENPARTITION); + + mInitialized = true; + mon.NotifyAll(); + + return NS_OK; +} + +already_AddRefed +VP8TrackEncoder::GetMetadata() +{ + { + // Wait if mEncoder is not initialized. + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + while (!mCanceled && !mInitialized) { + mon.Wait(); + } + } + + if (mCanceled || mEncodingComplete) { + return nullptr; + } + + nsRefPtr meta = new VP8Metadata(); + meta->mWidth = mFrameWidth; + meta->mHeight = mFrameHeight; + meta->mDisplayWidth = mDisplayWidth; + meta->mDisplayHeight = mDisplayHeight; + meta->mEncodedFrameRate = mEncodedFrameRate; + + return meta.forget(); +} + +nsresult +VP8TrackEncoder::GetEncodedPartitions(EncodedFrameContainer& aData) +{ + vpx_codec_iter_t iter = nullptr; + EncodedFrame::FrameType frameType = EncodedFrame::VP8_P_FRAME; + nsTArray frameData; + nsresult rv; + const vpx_codec_cx_pkt_t *pkt = nullptr; + while ((pkt = vpx_codec_get_cx_data(mVPXContext, &iter)) != nullptr) { + switch (pkt->kind) { + case VPX_CODEC_CX_FRAME_PKT: { + // Copy the encoded data from libvpx to frameData + frameData.AppendElements((uint8_t*)pkt->data.frame.buf, + pkt->data.frame.sz); + break; + } + default: { + break; + } + } + // End of frame + if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) { + if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { + frameType = EncodedFrame::VP8_I_FRAME; + } + break; + } + } + + if (!frameData.IsEmpty() && + (pkt->data.frame.pts == mEncodedTimestamp)) { + // Copy the encoded data to aData. + EncodedFrame* videoData = new EncodedFrame(); + videoData->SetFrameType(frameType); + // Convert the timestamp and duration to Usecs. + CheckedInt64 timestamp = FramesToUsecs(mEncodedTimestamp, mTrackRate); + if (timestamp.isValid()) { + videoData->SetTimeStamp( + (uint64_t)FramesToUsecs(mEncodedTimestamp, mTrackRate).value()); + } + CheckedInt64 duration = FramesToUsecs(pkt->data.frame.duration, mTrackRate); + if (duration.isValid()) { + videoData->SetDuration( + (uint64_t)FramesToUsecs(pkt->data.frame.duration, mTrackRate).value()); + } + rv = videoData->SwapInFrameData(frameData); + NS_ENSURE_SUCCESS(rv, rv); + VP8LOG("GetEncodedPartitions TimeStamp %lld Duration %lld\n", + videoData->GetTimeStamp(), videoData->GetDuration()); + VP8LOG("frameType %d\n", videoData->GetFrameType()); + aData.AppendEncodedFrame(videoData); + } + + return NS_OK; +} + +void VP8TrackEncoder::PrepareMutedFrame() +{ + if (mMuteFrame.IsEmpty()) { + CreateMutedFrame(&mMuteFrame); + } + + uint32_t yPlaneSize = mFrameWidth * mFrameHeight; + uint32_t halfWidth = (mFrameWidth + 1) / 2; + uint32_t halfHeight = (mFrameHeight + 1) / 2; + uint32_t uvPlaneSize = halfWidth * halfHeight; + + MOZ_ASSERT(mMuteFrame.Length() >= (yPlaneSize + uvPlaneSize * 2)); + uint8_t *y = mMuteFrame.Elements(); + uint8_t *cb = mMuteFrame.Elements() + yPlaneSize; + uint8_t *cr = mMuteFrame.Elements() + yPlaneSize + uvPlaneSize; + + mVPXImageWrapper->planes[PLANE_Y] = y; + mVPXImageWrapper->planes[PLANE_U] = cb; + mVPXImageWrapper->planes[PLANE_V] = cr; + mVPXImageWrapper->stride[VPX_PLANE_Y] = mFrameWidth; + mVPXImageWrapper->stride[VPX_PLANE_U] = halfWidth; + mVPXImageWrapper->stride[VPX_PLANE_V] = halfWidth; +} + +static bool isYUV420(const PlanarYCbCrImage::Data *aData) +{ + if (aData->mYSize == aData->mCbCrSize * 2) { + return true; + } + return false; +} + +static bool isYUV422(const PlanarYCbCrImage::Data *aData) +{ + if ((aData->mYSize.width == aData->mCbCrSize.width * 2) && + (aData->mYSize.height == aData->mCbCrSize.height)) { + return true; + } + return false; +} + +static bool isYUV444(const PlanarYCbCrImage::Data *aData) +{ + if (aData->mYSize == aData->mCbCrSize) { + return true; + } + return false; +} + +nsresult VP8TrackEncoder::PrepareRawFrame(VideoChunk &aChunk) +{ + if (aChunk.mFrame.GetForceBlack() || aChunk.IsNull()) { + PrepareMutedFrame(); + } else { + Image* img = aChunk.mFrame.GetImage(); + ImageFormat format = img->GetFormat(); + if (format != ImageFormat::PLANAR_YCBCR) { + VP8LOG("Unsupported video format\n"); + return NS_ERROR_FAILURE; + } + + // Cast away constness b/c some of the accessors are non-const + PlanarYCbCrImage* yuv = + const_cast(static_cast(img)); + // Big-time assumption here that this is all contiguous data coming + // from getUserMedia or other sources. + MOZ_ASSERT(yuv); + if (!yuv->IsValid()) { + NS_WARNING("PlanarYCbCrImage is not valid"); + return NS_ERROR_FAILURE; + } + const PlanarYCbCrImage::Data *data = yuv->GetData(); + + if (isYUV420(data) && !data->mCbSkip) { // 420 planar + mVPXImageWrapper->planes[PLANE_Y] = data->mYChannel; + mVPXImageWrapper->planes[PLANE_U] = data->mCbChannel; + mVPXImageWrapper->planes[PLANE_V] = data->mCrChannel; + mVPXImageWrapper->stride[VPX_PLANE_Y] = data->mYStride; + mVPXImageWrapper->stride[VPX_PLANE_U] = data->mCbCrStride; + mVPXImageWrapper->stride[VPX_PLANE_V] = data->mCbCrStride; + } else { + uint32_t yPlaneSize = mFrameWidth * mFrameHeight; + uint32_t halfWidth = (mFrameWidth + 1) / 2; + uint32_t halfHeight = (mFrameHeight + 1) / 2; + uint32_t uvPlaneSize = halfWidth * halfHeight; + if (mI420Frame.IsEmpty()) { + mI420Frame.SetLength(yPlaneSize + uvPlaneSize * 2); + } + + MOZ_ASSERT(mI420Frame.Length() >= (yPlaneSize + uvPlaneSize * 2)); + uint8_t *y = mI420Frame.Elements(); + uint8_t *cb = mI420Frame.Elements() + yPlaneSize; + uint8_t *cr = mI420Frame.Elements() + yPlaneSize + uvPlaneSize; + + if (isYUV420(data) && data->mCbSkip) { + // If mCbSkip is set, we assume it's nv12 or nv21. + if (data->mCbChannel < data->mCrChannel) { // nv12 + libyuv::NV12ToI420(data->mYChannel, data->mYStride, + data->mCbChannel, data->mCbCrStride, + y, mFrameWidth, + cb, halfWidth, + cr, halfWidth, + mFrameWidth, mFrameHeight); + } else { // nv21 + libyuv::NV21ToI420(data->mYChannel, data->mYStride, + data->mCrChannel, data->mCbCrStride, + y, mFrameWidth, + cb, halfWidth, + cr, halfWidth, + mFrameWidth, mFrameHeight); + } + } else if (isYUV444(data) && !data->mCbSkip) { + libyuv::I444ToI420(data->mYChannel, data->mYStride, + data->mCbChannel, data->mCbCrStride, + data->mCrChannel, data->mCbCrStride, + y, mFrameWidth, + cb, halfWidth, + cr, halfWidth, + mFrameWidth, mFrameHeight); + } else if (isYUV422(data) && !data->mCbSkip) { + libyuv::I422ToI420(data->mYChannel, data->mYStride, + data->mCbChannel, data->mCbCrStride, + data->mCrChannel, data->mCbCrStride, + y, mFrameWidth, + cb, halfWidth, + cr, halfWidth, + mFrameWidth, mFrameHeight); + } else { + VP8LOG("Unsupported planar format\n"); + return NS_ERROR_NOT_IMPLEMENTED; + } + + mVPXImageWrapper->planes[PLANE_Y] = y; + mVPXImageWrapper->planes[PLANE_U] = cb; + mVPXImageWrapper->planes[PLANE_V] = cr; + mVPXImageWrapper->stride[VPX_PLANE_Y] = mFrameWidth; + mVPXImageWrapper->stride[VPX_PLANE_U] = halfWidth; + mVPXImageWrapper->stride[VPX_PLANE_V] = halfWidth; + } + } + return NS_OK; +} + +// These two define value used in GetNextEncodeOperation to determine the +// EncodeOperation for next target frame. +#define I_FRAME_RATIO (0.5) +#define SKIP_FRAME_RATIO (0.75) + +/** + * Compares the elapsed time from the beginning of GetEncodedTrack and + * the processed frame duration in mSourceSegment + * in order to set the nextEncodeOperation for next target frame. + */ +VP8TrackEncoder::EncodeOperation +VP8TrackEncoder::GetNextEncodeOperation(TimeDuration aTimeElapsed, + TrackTicks aProcessedDuration) +{ + int64_t durationInUsec = + FramesToUsecs(aProcessedDuration + mEncodedFrameDuration, + mTrackRate).value(); + if (aTimeElapsed.ToMicroseconds() > (durationInUsec * SKIP_FRAME_RATIO)) { + // The encoder is too slow. + // We should skip next frame to consume the mSourceSegment. + return SKIP_FRAME; + } else if (aTimeElapsed.ToMicroseconds() > (durationInUsec * I_FRAME_RATIO)) { + // The encoder is a little slow. + // We force the encoder to encode an I-frame to accelerate. + return ENCODE_I_FRAME; + } else { + return ENCODE_NORMAL_FRAME; + } +} + +TrackTicks +VP8TrackEncoder::CalculateRemainingTicks(TrackTicks aDurationCopied, + TrackTicks aEncodedDuration) +{ + return mRemainingTicks + aEncodedDuration - aDurationCopied; +} + +// Try to extend the encodedDuration as long as possible if the target frame +// has a long duration. +TrackTicks +VP8TrackEncoder::CalculateEncodedDuration(TrackTicks aDurationCopied) +{ + TrackTicks temp64 = aDurationCopied; + TrackTicks encodedDuration = mEncodedFrameDuration; + temp64 -= mRemainingTicks; + while (temp64 > mEncodedFrameDuration) { + temp64 -= mEncodedFrameDuration; + encodedDuration += mEncodedFrameDuration; + } + return encodedDuration; +} + +/** + * Encoding flow in GetEncodedTrack(): + * 1: Check the mInitialized state and the packet duration. + * 2: Move the data from mRawSegment to mSourceSegment. + * 3: Encode the video chunks in mSourceSegment in a for-loop. + * 3.1: Pick the video chunk by mRemainingTicks. + * 3.2: Calculate the encoding duration for the parameter of vpx_codec_encode(). + * The encoding duration is a multiple of mEncodedFrameDuration. + * 3.3: Setup the video chunk to mVPXImageWrapper by PrepareRawFrame(). + * 3.4: Send frame into vp8 encoder by vpx_codec_encode(). + * 3.5: Get the output frame from encoder by calling GetEncodedPartitions(). + * 3.6: Calculate the mRemainingTicks for next target frame. + * 3.7: Set the nextEncodeOperation for the next target frame. + * There is a heuristic: If the frame duration we have processed in + * mSourceSegment is 100ms, means that we can't spend more than 100ms to + * encode it. + * 4. Remove the encoded chunks in mSourceSegment after for-loop. + * + * Ex1: Input frame rate is 100 => input frame duration is 10ms for each. + * mEncodedFrameRate is 30 => output frame duration is 33ms. + * In this case, the frame duration in mSourceSegment will be: + * 1st : 0~10ms + * 2nd : 10~20ms + * 3rd : 20~30ms + * 4th : 30~40ms + * ... + * The VP8 encoder will take the 1st and 4th frames to encode. At beginning + * mRemainingTicks is 0 for 1st frame, then the mRemainingTicks is set + * to 23 to pick the 4th frame. (mEncodedFrameDuration - 1st frame duration) + * + * Ex2: Input frame rate is 25 => frame duration is 40ms for each. + * mEncodedFrameRate is 30 => output frame duration is 33ms. + * In this case, the frame duration in mSourceSegment will be: + * 1st : 0~40ms + * 2nd : 40~80ms + * 3rd : 80~120ms + * 4th : 120~160ms + * ... + * Because the input frame duration is 40ms larger than 33ms, so the first + * encoded frame duration will be 66ms by calling CalculateEncodedDuration. + * And the mRemainingTicks will be set to 26 + * (CalculateRemainingTicks 0+66-40) in order to pick the next frame(2nd) + * in mSourceSegment. + */ +nsresult +VP8TrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData) +{ + { + // Move all the samples from mRawSegment to mSourceSegment. We only hold + // the monitor in this block. + ReentrantMonitorAutoEnter mon(mReentrantMonitor); + // Wait if mEncoder is not initialized, or when not enough raw data, but is + // not the end of stream nor is being canceled. + while (!mCanceled && (!mInitialized || + (mRawSegment.GetDuration() + mSourceSegment.GetDuration() < + mEncodedFrameDuration && !mEndOfStream))) { + mon.Wait(); + } + if (mCanceled || mEncodingComplete) { + return NS_ERROR_FAILURE; + } + mSourceSegment.AppendFrom(&mRawSegment); + } + + VideoSegment::ChunkIterator iter(mSourceSegment); + TrackTicks durationCopied = 0; + TrackTicks totalProcessedDuration = 0; + TimeStamp timebase = TimeStamp::Now(); + EncodeOperation nextEncodeOperation = ENCODE_NORMAL_FRAME; + + for (; !iter.IsEnded(); iter.Next()) { + VideoChunk &chunk = *iter; + // Accumulate chunk's duration to durationCopied until it reaches + // mRemainingTicks. + durationCopied += chunk.GetDuration(); + MOZ_ASSERT(mRemainingTicks <= mEncodedFrameDuration); + VP8LOG("durationCopied %lld mRemainingTicks %lld\n", + durationCopied, mRemainingTicks); + if (durationCopied >= mRemainingTicks) { + VP8LOG("nextEncodeOperation is %d\n",nextEncodeOperation); + // Calculate encodedDuration for this target frame. + TrackTicks encodedDuration = CalculateEncodedDuration(durationCopied); + + // Encode frame. + if (nextEncodeOperation != SKIP_FRAME) { + nsresult rv = PrepareRawFrame(chunk); + NS_ENSURE_SUCCESS(rv, NS_ERROR_FAILURE); + + // Encode the data with VP8 encoder + int flags = (nextEncodeOperation == ENCODE_NORMAL_FRAME) ? + 0 : VPX_EFLAG_FORCE_KF; + if (vpx_codec_encode(mVPXContext, mVPXImageWrapper, mEncodedTimestamp, + (unsigned long)encodedDuration, flags, + VPX_DL_REALTIME)) { + return NS_ERROR_FAILURE; + } + // Get the encoded data from VP8 encoder. + GetEncodedPartitions(aData); + } else { + // SKIP_FRAME + // Extend the duration of the last encoded data in aData + // because this frame will be skip. + nsRefPtr last = nullptr; + last = aData.GetEncodedFrames().LastElement(); + if (last) { + last->SetDuration(last->GetDuration() + encodedDuration); + } + } + // Move forward the mEncodedTimestamp. + mEncodedTimestamp += encodedDuration; + totalProcessedDuration += durationCopied; + // Calculate mRemainingTicks for next target frame. + mRemainingTicks = CalculateRemainingTicks(durationCopied, + encodedDuration); + + // Check the remain data is enough for next target frame. + if (mSourceSegment.GetDuration() - totalProcessedDuration + >= mEncodedFrameDuration) { + TimeDuration elapsedTime = TimeStamp::Now() - timebase; + nextEncodeOperation = GetNextEncodeOperation(elapsedTime, + totalProcessedDuration); + // Reset durationCopied for next iteration. + durationCopied = 0; + } else { + // Process done, there is no enough data left for next iteration, + // break the for-loop. + break; + } + } + } + // Remove the chunks we have processed. + mSourceSegment.RemoveLeading(totalProcessedDuration); + VP8LOG("RemoveLeading %lld\n",totalProcessedDuration); + + // End of stream, pull the rest frames in encoder. + if (mEndOfStream) { + VP8LOG("mEndOfStream is true\n"); + mEncodingComplete = true; + if (vpx_codec_encode(mVPXContext, nullptr, mEncodedTimestamp, + mEncodedFrameDuration, 0, VPX_DL_REALTIME)) { + return NS_ERROR_FAILURE; + } + GetEncodedPartitions(aData); + } + + return NS_OK ; +} + +} // namespace mozilla