1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/content/media/ogg/OggCodecState.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1408 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim:set ts=2 sw=2 sts=2 et cindent: */ 1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#include <string.h> 1.11 + 1.12 +#include "mozilla/DebugOnly.h" 1.13 +#include "mozilla/Endian.h" 1.14 +#include <stdint.h> 1.15 + 1.16 +#include "nsDebug.h" 1.17 +#include "MediaDecoderReader.h" 1.18 +#include "OggCodecState.h" 1.19 +#include "OggDecoder.h" 1.20 +#include "nsISupportsImpl.h" 1.21 +#include "VideoUtils.h" 1.22 +#include <algorithm> 1.23 + 1.24 +// On Android JellyBean, the hardware.h header redefines version_major and 1.25 +// version_minor, which breaks our build. See: 1.26 +// https://bugzilla.mozilla.org/show_bug.cgi?id=912702#c6 1.27 +#ifdef MOZ_WIDGET_GONK 1.28 +#ifdef version_major 1.29 +#undef version_major 1.30 +#endif 1.31 +#ifdef version_minor 1.32 +#undef version_minor 1.33 +#endif 1.34 +#endif 1.35 + 1.36 +namespace mozilla { 1.37 + 1.38 +#ifdef PR_LOGGING 1.39 +extern PRLogModuleInfo* gMediaDecoderLog; 1.40 +#define LOG(type, msg) PR_LOG(gMediaDecoderLog, type, msg) 1.41 +#else 1.42 +#define LOG(type, msg) 1.43 +#endif 1.44 + 1.45 +/** Decoder base class for Ogg-encapsulated streams. */ 1.46 +OggCodecState* 1.47 +OggCodecState::Create(ogg_page* aPage) 1.48 +{ 1.49 + NS_ASSERTION(ogg_page_bos(aPage), "Only call on BOS page!"); 1.50 + nsAutoPtr<OggCodecState> codecState; 1.51 + if (aPage->body_len > 6 && memcmp(aPage->body+1, "theora", 6) == 0) { 1.52 + codecState = new TheoraState(aPage); 1.53 + } else if (aPage->body_len > 6 && memcmp(aPage->body+1, "vorbis", 6) == 0) { 1.54 + codecState = new VorbisState(aPage); 1.55 +#ifdef MOZ_OPUS 1.56 + } else if (aPage->body_len > 8 && memcmp(aPage->body, "OpusHead", 8) == 0) { 1.57 + codecState = new OpusState(aPage); 1.58 +#endif 1.59 + } else if (aPage->body_len > 8 && memcmp(aPage->body, "fishead\0", 8) == 0) { 1.60 + codecState = new SkeletonState(aPage); 1.61 + } else { 1.62 + codecState = new OggCodecState(aPage, false); 1.63 + } 1.64 + return codecState->OggCodecState::Init() ? codecState.forget() : nullptr; 1.65 +} 1.66 + 1.67 +OggCodecState::OggCodecState(ogg_page* aBosPage, bool aActive) : 1.68 + mPacketCount(0), 1.69 + mSerial(ogg_page_serialno(aBosPage)), 1.70 + mActive(aActive), 1.71 + mDoneReadingHeaders(!aActive) 1.72 +{ 1.73 + MOZ_COUNT_CTOR(OggCodecState); 1.74 + memset(&mState, 0, sizeof(ogg_stream_state)); 1.75 +} 1.76 + 1.77 +OggCodecState::~OggCodecState() { 1.78 + MOZ_COUNT_DTOR(OggCodecState); 1.79 + Reset(); 1.80 +#ifdef DEBUG 1.81 + int ret = 1.82 +#endif 1.83 + ogg_stream_clear(&mState); 1.84 + NS_ASSERTION(ret == 0, "ogg_stream_clear failed"); 1.85 +} 1.86 + 1.87 +nsresult OggCodecState::Reset() { 1.88 + if (ogg_stream_reset(&mState) != 0) { 1.89 + return NS_ERROR_FAILURE; 1.90 + } 1.91 + mPackets.Erase(); 1.92 + ClearUnstamped(); 1.93 + return NS_OK; 1.94 +} 1.95 + 1.96 +void OggCodecState::ClearUnstamped() 1.97 +{ 1.98 + for (uint32_t i = 0; i < mUnstamped.Length(); ++i) { 1.99 + OggCodecState::ReleasePacket(mUnstamped[i]); 1.100 + } 1.101 + mUnstamped.Clear(); 1.102 +} 1.103 + 1.104 +bool OggCodecState::Init() { 1.105 + int ret = ogg_stream_init(&mState, mSerial); 1.106 + return ret == 0; 1.107 +} 1.108 + 1.109 +bool OggCodecState::IsValidVorbisTagName(nsCString& aName) 1.110 +{ 1.111 + // Tag names must consist of ASCII 0x20 through 0x7D, 1.112 + // excluding 0x3D '=' which is the separator. 1.113 + uint32_t length = aName.Length(); 1.114 + const char* data = aName.Data(); 1.115 + for (uint32_t i = 0; i < length; i++) { 1.116 + if (data[i] < 0x20 || data[i] > 0x7D || data[i] == '=') { 1.117 + return false; 1.118 + } 1.119 + } 1.120 + return true; 1.121 +} 1.122 + 1.123 +bool OggCodecState::AddVorbisComment(MetadataTags* aTags, 1.124 + const char* aComment, 1.125 + uint32_t aLength) 1.126 +{ 1.127 + const char* div = (const char*)memchr(aComment, '=', aLength); 1.128 + if (!div) { 1.129 + LOG(PR_LOG_DEBUG, ("Skipping comment: no separator")); 1.130 + return false; 1.131 + } 1.132 + nsCString key = nsCString(aComment, div-aComment); 1.133 + if (!IsValidVorbisTagName(key)) { 1.134 + LOG(PR_LOG_DEBUG, ("Skipping comment: invalid tag name")); 1.135 + return false; 1.136 + } 1.137 + uint32_t valueLength = aLength - (div-aComment); 1.138 + nsCString value = nsCString(div + 1, valueLength); 1.139 + if (!IsUTF8(value)) { 1.140 + LOG(PR_LOG_DEBUG, ("Skipping comment: invalid UTF-8 in value")); 1.141 + return false; 1.142 + } 1.143 + aTags->Put(key, value); 1.144 + return true; 1.145 +} 1.146 + 1.147 +void VorbisState::RecordVorbisPacketSamples(ogg_packet* aPacket, 1.148 + long aSamples) 1.149 +{ 1.150 +#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION 1.151 + mVorbisPacketSamples[aPacket] = aSamples; 1.152 +#endif 1.153 +} 1.154 + 1.155 +void VorbisState::ValidateVorbisPacketSamples(ogg_packet* aPacket, 1.156 + long aSamples) 1.157 +{ 1.158 +#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION 1.159 + NS_ASSERTION(mVorbisPacketSamples[aPacket] == aSamples, 1.160 + "Decoded samples for Vorbis packet don't match expected!"); 1.161 + mVorbisPacketSamples.erase(aPacket); 1.162 +#endif 1.163 +} 1.164 + 1.165 +void VorbisState::AssertHasRecordedPacketSamples(ogg_packet* aPacket) 1.166 +{ 1.167 +#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION 1.168 + NS_ASSERTION(mVorbisPacketSamples.count(aPacket) == 1, 1.169 + "Must have recorded packet samples"); 1.170 +#endif 1.171 +} 1.172 + 1.173 +static ogg_packet* Clone(ogg_packet* aPacket) { 1.174 + ogg_packet* p = new ogg_packet(); 1.175 + memcpy(p, aPacket, sizeof(ogg_packet)); 1.176 + p->packet = new unsigned char[p->bytes]; 1.177 + memcpy(p->packet, aPacket->packet, p->bytes); 1.178 + return p; 1.179 +} 1.180 + 1.181 +void OggCodecState::ReleasePacket(ogg_packet* aPacket) { 1.182 + if (aPacket) 1.183 + delete [] aPacket->packet; 1.184 + delete aPacket; 1.185 +} 1.186 + 1.187 +void OggPacketQueue::Append(ogg_packet* aPacket) { 1.188 + nsDeque::Push(aPacket); 1.189 +} 1.190 + 1.191 +ogg_packet* OggCodecState::PacketOut() { 1.192 + if (mPackets.IsEmpty()) { 1.193 + return nullptr; 1.194 + } 1.195 + return mPackets.PopFront(); 1.196 +} 1.197 + 1.198 +nsresult OggCodecState::PageIn(ogg_page* aPage) { 1.199 + if (!mActive) 1.200 + return NS_OK; 1.201 + NS_ASSERTION(static_cast<uint32_t>(ogg_page_serialno(aPage)) == mSerial, 1.202 + "Page must be for this stream!"); 1.203 + if (ogg_stream_pagein(&mState, aPage) == -1) 1.204 + return NS_ERROR_FAILURE; 1.205 + int r; 1.206 + do { 1.207 + ogg_packet packet; 1.208 + r = ogg_stream_packetout(&mState, &packet); 1.209 + if (r == 1) { 1.210 + mPackets.Append(Clone(&packet)); 1.211 + } 1.212 + } while (r != 0); 1.213 + if (ogg_stream_check(&mState)) { 1.214 + NS_WARNING("Unrecoverable error in ogg_stream_packetout"); 1.215 + return NS_ERROR_FAILURE; 1.216 + } 1.217 + return NS_OK; 1.218 +} 1.219 + 1.220 +nsresult OggCodecState::PacketOutUntilGranulepos(bool& aFoundGranulepos) { 1.221 + int r; 1.222 + aFoundGranulepos = false; 1.223 + // Extract packets from the sync state until either no more packets 1.224 + // come out, or we get a data packet with non -1 granulepos. 1.225 + do { 1.226 + ogg_packet packet; 1.227 + r = ogg_stream_packetout(&mState, &packet); 1.228 + if (r == 1) { 1.229 + ogg_packet* clone = Clone(&packet); 1.230 + if (IsHeader(&packet)) { 1.231 + // Header packets go straight into the packet queue. 1.232 + mPackets.Append(clone); 1.233 + } else { 1.234 + // We buffer data packets until we encounter a granulepos. We'll 1.235 + // then use the granulepos to figure out the granulepos of the 1.236 + // preceeding packets. 1.237 + mUnstamped.AppendElement(clone); 1.238 + aFoundGranulepos = packet.granulepos > 0; 1.239 + } 1.240 + } 1.241 + } while (r != 0 && !aFoundGranulepos); 1.242 + if (ogg_stream_check(&mState)) { 1.243 + NS_WARNING("Unrecoverable error in ogg_stream_packetout"); 1.244 + return NS_ERROR_FAILURE; 1.245 + } 1.246 + return NS_OK; 1.247 +} 1.248 + 1.249 +TheoraState::TheoraState(ogg_page* aBosPage) : 1.250 + OggCodecState(aBosPage, true), 1.251 + mSetup(0), 1.252 + mCtx(0), 1.253 + mPixelAspectRatio(0) 1.254 +{ 1.255 + MOZ_COUNT_CTOR(TheoraState); 1.256 + th_info_init(&mInfo); 1.257 + th_comment_init(&mComment); 1.258 +} 1.259 + 1.260 +TheoraState::~TheoraState() { 1.261 + MOZ_COUNT_DTOR(TheoraState); 1.262 + th_setup_free(mSetup); 1.263 + th_decode_free(mCtx); 1.264 + th_comment_clear(&mComment); 1.265 + th_info_clear(&mInfo); 1.266 +} 1.267 + 1.268 +bool TheoraState::Init() { 1.269 + if (!mActive) 1.270 + return false; 1.271 + 1.272 + int64_t n = mInfo.aspect_numerator; 1.273 + int64_t d = mInfo.aspect_denominator; 1.274 + 1.275 + mPixelAspectRatio = (n == 0 || d == 0) ? 1.276 + 1.0f : static_cast<float>(n) / static_cast<float>(d); 1.277 + 1.278 + // Ensure the frame and picture regions aren't larger than our prescribed 1.279 + // maximum, or zero sized. 1.280 + nsIntSize frame(mInfo.frame_width, mInfo.frame_height); 1.281 + nsIntRect picture(mInfo.pic_x, mInfo.pic_y, mInfo.pic_width, mInfo.pic_height); 1.282 + if (!IsValidVideoRegion(frame, picture, frame)) { 1.283 + return mActive = false; 1.284 + } 1.285 + 1.286 + mCtx = th_decode_alloc(&mInfo, mSetup); 1.287 + if (mCtx == nullptr) { 1.288 + return mActive = false; 1.289 + } 1.290 + 1.291 + return true; 1.292 +} 1.293 + 1.294 +bool 1.295 +TheoraState::DecodeHeader(ogg_packet* aPacket) 1.296 +{ 1.297 + nsAutoRef<ogg_packet> autoRelease(aPacket); 1.298 + mPacketCount++; 1.299 + int ret = th_decode_headerin(&mInfo, 1.300 + &mComment, 1.301 + &mSetup, 1.302 + aPacket); 1.303 + 1.304 + // We must determine when we've read the last header packet. 1.305 + // th_decode_headerin() does not tell us when it's read the last header, so 1.306 + // we must keep track of the headers externally. 1.307 + // 1.308 + // There are 3 header packets, the Identification, Comment, and Setup 1.309 + // headers, which must be in that order. If they're out of order, the file 1.310 + // is invalid. If we've successfully read a header, and it's the setup 1.311 + // header, then we're done reading headers. The first byte of each packet 1.312 + // determines it's type as follows: 1.313 + // 0x80 -> Identification header 1.314 + // 0x81 -> Comment header 1.315 + // 0x82 -> Setup header 1.316 + // See http://www.theora.org/doc/Theora.pdf Chapter 6, "Bitstream Headers", 1.317 + // for more details of the Ogg/Theora containment scheme. 1.318 + bool isSetupHeader = aPacket->bytes > 0 && aPacket->packet[0] == 0x82; 1.319 + if (ret < 0 || mPacketCount > 3) { 1.320 + // We've received an error, or the first three packets weren't valid 1.321 + // header packets. Assume bad input. 1.322 + // Our caller will deactivate the bitstream. 1.323 + return false; 1.324 + } else if (ret > 0 && isSetupHeader && mPacketCount == 3) { 1.325 + // Successfully read the three header packets. 1.326 + mDoneReadingHeaders = true; 1.327 + } 1.328 + return true; 1.329 +} 1.330 + 1.331 +int64_t 1.332 +TheoraState::Time(int64_t granulepos) { 1.333 + if (!mActive) { 1.334 + return -1; 1.335 + } 1.336 + return TheoraState::Time(&mInfo, granulepos); 1.337 +} 1.338 + 1.339 +bool 1.340 +TheoraState::IsHeader(ogg_packet* aPacket) { 1.341 + return th_packet_isheader(aPacket); 1.342 +} 1.343 + 1.344 +# define TH_VERSION_CHECK(_info,_maj,_min,_sub) \ 1.345 + (((_info)->version_major>(_maj)||(_info)->version_major==(_maj))&& \ 1.346 + (((_info)->version_minor>(_min)||(_info)->version_minor==(_min))&& \ 1.347 + (_info)->version_subminor>=(_sub))) 1.348 + 1.349 +int64_t TheoraState::Time(th_info* aInfo, int64_t aGranulepos) 1.350 +{ 1.351 + if (aGranulepos < 0 || aInfo->fps_numerator == 0) { 1.352 + return -1; 1.353 + } 1.354 + // Implementation of th_granule_frame inlined here to operate 1.355 + // on the th_info structure instead of the theora_state. 1.356 + int shift = aInfo->keyframe_granule_shift; 1.357 + ogg_int64_t iframe = aGranulepos >> shift; 1.358 + ogg_int64_t pframe = aGranulepos - (iframe << shift); 1.359 + int64_t frameno = iframe + pframe - TH_VERSION_CHECK(aInfo, 3, 2, 1); 1.360 + CheckedInt64 t = ((CheckedInt64(frameno) + 1) * USECS_PER_S) * aInfo->fps_denominator; 1.361 + if (!t.isValid()) 1.362 + return -1; 1.363 + t /= aInfo->fps_numerator; 1.364 + return t.isValid() ? t.value() : -1; 1.365 +} 1.366 + 1.367 +int64_t TheoraState::StartTime(int64_t granulepos) { 1.368 + if (granulepos < 0 || !mActive || mInfo.fps_numerator == 0) { 1.369 + return -1; 1.370 + } 1.371 + CheckedInt64 t = (CheckedInt64(th_granule_frame(mCtx, granulepos)) * USECS_PER_S) * mInfo.fps_denominator; 1.372 + if (!t.isValid()) 1.373 + return -1; 1.374 + return t.value() / mInfo.fps_numerator; 1.375 +} 1.376 + 1.377 +int64_t 1.378 +TheoraState::MaxKeyframeOffset() 1.379 +{ 1.380 + // Determine the maximum time in microseconds by which a key frame could 1.381 + // offset for the theora bitstream. Theora granulepos encode time as: 1.382 + // ((key_frame_number << granule_shift) + frame_offset). 1.383 + // Therefore the maximum possible time by which any frame could be offset 1.384 + // from a keyframe is the duration of (1 << granule_shift) - 1) frames. 1.385 + int64_t frameDuration; 1.386 + 1.387 + // Max number of frames keyframe could possibly be offset. 1.388 + int64_t keyframeDiff = (1 << mInfo.keyframe_granule_shift) - 1; 1.389 + 1.390 + // Length of frame in usecs. 1.391 + frameDuration = (mInfo.fps_denominator * USECS_PER_S) / mInfo.fps_numerator; 1.392 + 1.393 + // Total time in usecs keyframe can be offset from any given frame. 1.394 + return frameDuration * keyframeDiff; 1.395 +} 1.396 + 1.397 +nsresult 1.398 +TheoraState::PageIn(ogg_page* aPage) 1.399 +{ 1.400 + if (!mActive) 1.401 + return NS_OK; 1.402 + NS_ASSERTION(static_cast<uint32_t>(ogg_page_serialno(aPage)) == mSerial, 1.403 + "Page must be for this stream!"); 1.404 + if (ogg_stream_pagein(&mState, aPage) == -1) 1.405 + return NS_ERROR_FAILURE; 1.406 + bool foundGp; 1.407 + nsresult res = PacketOutUntilGranulepos(foundGp); 1.408 + if (NS_FAILED(res)) 1.409 + return res; 1.410 + if (foundGp && mDoneReadingHeaders) { 1.411 + // We've found a packet with a granulepos, and we've loaded our metadata 1.412 + // and initialized our decoder. Determine granulepos of buffered packets. 1.413 + ReconstructTheoraGranulepos(); 1.414 + for (uint32_t i = 0; i < mUnstamped.Length(); ++i) { 1.415 + ogg_packet* packet = mUnstamped[i]; 1.416 +#ifdef DEBUG 1.417 + NS_ASSERTION(!IsHeader(packet), "Don't try to recover header packet gp"); 1.418 + NS_ASSERTION(packet->granulepos != -1, "Packet must have gp by now"); 1.419 +#endif 1.420 + mPackets.Append(packet); 1.421 + } 1.422 + mUnstamped.Clear(); 1.423 + } 1.424 + return NS_OK; 1.425 +} 1.426 + 1.427 +// Returns 1 if the Theora info struct is decoding a media of Theora 1.428 +// version (maj,min,sub) or later, otherwise returns 0. 1.429 +int 1.430 +TheoraVersion(th_info* info, 1.431 + unsigned char maj, 1.432 + unsigned char min, 1.433 + unsigned char sub) 1.434 +{ 1.435 + ogg_uint32_t ver = (maj << 16) + (min << 8) + sub; 1.436 + ogg_uint32_t th_ver = (info->version_major << 16) + 1.437 + (info->version_minor << 8) + 1.438 + info->version_subminor; 1.439 + return (th_ver >= ver) ? 1 : 0; 1.440 +} 1.441 + 1.442 +void TheoraState::ReconstructTheoraGranulepos() 1.443 +{ 1.444 + if (mUnstamped.Length() == 0) { 1.445 + return; 1.446 + } 1.447 + ogg_int64_t lastGranulepos = mUnstamped[mUnstamped.Length() - 1]->granulepos; 1.448 + NS_ASSERTION(lastGranulepos != -1, "Must know last granulepos"); 1.449 + 1.450 + // Reconstruct the granulepos (and thus timestamps) of the decoded 1.451 + // frames. Granulepos are stored as ((keyframe<<shift)+offset). We 1.452 + // know the granulepos of the last frame in the list, so we can infer 1.453 + // the granulepos of the intermediate frames using their frame numbers. 1.454 + ogg_int64_t shift = mInfo.keyframe_granule_shift; 1.455 + ogg_int64_t version_3_2_1 = TheoraVersion(&mInfo,3,2,1); 1.456 + ogg_int64_t lastFrame = th_granule_frame(mCtx, 1.457 + lastGranulepos) + version_3_2_1; 1.458 + ogg_int64_t firstFrame = lastFrame - mUnstamped.Length() + 1; 1.459 + 1.460 + // Until we encounter a keyframe, we'll assume that the "keyframe" 1.461 + // segment of the granulepos is the first frame, or if that causes 1.462 + // the "offset" segment to overflow, we assume the required 1.463 + // keyframe is maximumally offset. Until we encounter a keyframe 1.464 + // the granulepos will probably be wrong, but we can't decode the 1.465 + // frame anyway (since we don't have its keyframe) so it doesn't really 1.466 + // matter. 1.467 + ogg_int64_t keyframe = lastGranulepos >> shift; 1.468 + 1.469 + // The lastFrame, firstFrame, keyframe variables, as well as the frame 1.470 + // variable in the loop below, store the frame number for Theora 1.471 + // version >= 3.2.1 streams, and store the frame index for Theora 1.472 + // version < 3.2.1 streams. 1.473 + for (uint32_t i = 0; i < mUnstamped.Length() - 1; ++i) { 1.474 + ogg_int64_t frame = firstFrame + i; 1.475 + ogg_int64_t granulepos; 1.476 + ogg_packet* packet = mUnstamped[i]; 1.477 + bool isKeyframe = th_packet_iskeyframe(packet) == 1; 1.478 + 1.479 + if (isKeyframe) { 1.480 + granulepos = frame << shift; 1.481 + keyframe = frame; 1.482 + } else if (frame >= keyframe && 1.483 + frame - keyframe < ((ogg_int64_t)1 << shift)) 1.484 + { 1.485 + // (frame - keyframe) won't overflow the "offset" segment of the 1.486 + // granulepos, so it's safe to calculate the granulepos. 1.487 + granulepos = (keyframe << shift) + (frame - keyframe); 1.488 + } else { 1.489 + // (frame - keyframeno) will overflow the "offset" segment of the 1.490 + // granulepos, so we take "keyframe" to be the max possible offset 1.491 + // frame instead. 1.492 + ogg_int64_t k = std::max(frame - (((ogg_int64_t)1 << shift) - 1), version_3_2_1); 1.493 + granulepos = (k << shift) + (frame - k); 1.494 + } 1.495 + // Theora 3.2.1+ granulepos store frame number [1..N], so granulepos 1.496 + // should be > 0. 1.497 + // Theora 3.2.0 granulepos store the frame index [0..(N-1)], so 1.498 + // granulepos should be >= 0. 1.499 + NS_ASSERTION(granulepos >= version_3_2_1, 1.500 + "Invalid granulepos for Theora version"); 1.501 + 1.502 + // Check that the frame's granule number is one more than the 1.503 + // previous frame's. 1.504 + NS_ASSERTION(i == 0 || 1.505 + th_granule_frame(mCtx, granulepos) == 1.506 + th_granule_frame(mCtx, mUnstamped[i-1]->granulepos) + 1, 1.507 + "Granulepos calculation is incorrect!"); 1.508 + 1.509 + packet->granulepos = granulepos; 1.510 + } 1.511 + 1.512 + // Check that the second to last frame's granule number is one less than 1.513 + // the last frame's (the known granule number). If not our granulepos 1.514 + // recovery missed a beat. 1.515 + NS_ASSERTION(mUnstamped.Length() < 2 || 1.516 + th_granule_frame(mCtx, mUnstamped[mUnstamped.Length()-2]->granulepos) + 1 == 1.517 + th_granule_frame(mCtx, lastGranulepos), 1.518 + "Granulepos recovery should catch up with packet->granulepos!"); 1.519 +} 1.520 + 1.521 +nsresult VorbisState::Reset() 1.522 +{ 1.523 + nsresult res = NS_OK; 1.524 + if (mActive && vorbis_synthesis_restart(&mDsp) != 0) { 1.525 + res = NS_ERROR_FAILURE; 1.526 + } 1.527 + if (NS_FAILED(OggCodecState::Reset())) { 1.528 + return NS_ERROR_FAILURE; 1.529 + } 1.530 + 1.531 + mGranulepos = 0; 1.532 + mPrevVorbisBlockSize = 0; 1.533 + 1.534 + return res; 1.535 +} 1.536 + 1.537 +VorbisState::VorbisState(ogg_page* aBosPage) : 1.538 + OggCodecState(aBosPage, true), 1.539 + mPrevVorbisBlockSize(0), 1.540 + mGranulepos(0) 1.541 +{ 1.542 + MOZ_COUNT_CTOR(VorbisState); 1.543 + vorbis_info_init(&mInfo); 1.544 + vorbis_comment_init(&mComment); 1.545 + memset(&mDsp, 0, sizeof(vorbis_dsp_state)); 1.546 + memset(&mBlock, 0, sizeof(vorbis_block)); 1.547 +} 1.548 + 1.549 +VorbisState::~VorbisState() { 1.550 + MOZ_COUNT_DTOR(VorbisState); 1.551 + Reset(); 1.552 + vorbis_block_clear(&mBlock); 1.553 + vorbis_dsp_clear(&mDsp); 1.554 + vorbis_info_clear(&mInfo); 1.555 + vorbis_comment_clear(&mComment); 1.556 +} 1.557 + 1.558 +bool VorbisState::DecodeHeader(ogg_packet* aPacket) { 1.559 + nsAutoRef<ogg_packet> autoRelease(aPacket); 1.560 + mPacketCount++; 1.561 + int ret = vorbis_synthesis_headerin(&mInfo, 1.562 + &mComment, 1.563 + aPacket); 1.564 + // We must determine when we've read the last header packet. 1.565 + // vorbis_synthesis_headerin() does not tell us when it's read the last 1.566 + // header, so we must keep track of the headers externally. 1.567 + // 1.568 + // There are 3 header packets, the Identification, Comment, and Setup 1.569 + // headers, which must be in that order. If they're out of order, the file 1.570 + // is invalid. If we've successfully read a header, and it's the setup 1.571 + // header, then we're done reading headers. The first byte of each packet 1.572 + // determines it's type as follows: 1.573 + // 0x1 -> Identification header 1.574 + // 0x3 -> Comment header 1.575 + // 0x5 -> Setup header 1.576 + // For more details of the Vorbis/Ogg containment scheme, see the Vorbis I 1.577 + // Specification, Chapter 4, Codec Setup and Packet Decode: 1.578 + // http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-580004 1.579 + 1.580 + bool isSetupHeader = aPacket->bytes > 0 && aPacket->packet[0] == 0x5; 1.581 + 1.582 + if (ret < 0 || mPacketCount > 3) { 1.583 + // We've received an error, or the first three packets weren't valid 1.584 + // header packets. Assume bad input. Our caller will deactivate the 1.585 + // bitstream. 1.586 + return false; 1.587 + } else if (ret == 0 && isSetupHeader && mPacketCount == 3) { 1.588 + // Successfully read the three header packets. 1.589 + // The bitstream remains active. 1.590 + mDoneReadingHeaders = true; 1.591 + } 1.592 + return true; 1.593 +} 1.594 + 1.595 +bool VorbisState::Init() 1.596 +{ 1.597 + if (!mActive) 1.598 + return false; 1.599 + 1.600 + int ret = vorbis_synthesis_init(&mDsp, &mInfo); 1.601 + if (ret != 0) { 1.602 + NS_WARNING("vorbis_synthesis_init() failed initializing vorbis bitstream"); 1.603 + return mActive = false; 1.604 + } 1.605 + ret = vorbis_block_init(&mDsp, &mBlock); 1.606 + if (ret != 0) { 1.607 + NS_WARNING("vorbis_block_init() failed initializing vorbis bitstream"); 1.608 + if (mActive) { 1.609 + vorbis_dsp_clear(&mDsp); 1.610 + } 1.611 + return mActive = false; 1.612 + } 1.613 + return true; 1.614 +} 1.615 + 1.616 +int64_t VorbisState::Time(int64_t granulepos) 1.617 +{ 1.618 + if (!mActive) { 1.619 + return -1; 1.620 + } 1.621 + 1.622 + return VorbisState::Time(&mInfo, granulepos); 1.623 +} 1.624 + 1.625 +int64_t VorbisState::Time(vorbis_info* aInfo, int64_t aGranulepos) 1.626 +{ 1.627 + if (aGranulepos == -1 || aInfo->rate == 0) { 1.628 + return -1; 1.629 + } 1.630 + CheckedInt64 t = CheckedInt64(aGranulepos) * USECS_PER_S; 1.631 + if (!t.isValid()) 1.632 + t = 0; 1.633 + return t.value() / aInfo->rate; 1.634 +} 1.635 + 1.636 +bool 1.637 +VorbisState::IsHeader(ogg_packet* aPacket) 1.638 +{ 1.639 + // The first byte in each Vorbis header packet is either 0x01, 0x03, or 0x05, 1.640 + // i.e. the first bit is odd. Audio data packets have their first bit as 0x0. 1.641 + // Any packet with its first bit set cannot be a data packet, it's a 1.642 + // (possibly invalid) header packet. 1.643 + // See: http://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-610004.2.1 1.644 + return aPacket->bytes > 0 ? (aPacket->packet[0] & 0x1) : false; 1.645 +} 1.646 + 1.647 +MetadataTags* 1.648 +VorbisState::GetTags() 1.649 +{ 1.650 + MetadataTags* tags; 1.651 + NS_ASSERTION(mComment.user_comments, "no vorbis comment strings!"); 1.652 + NS_ASSERTION(mComment.comment_lengths, "no vorbis comment lengths!"); 1.653 + tags = new MetadataTags; 1.654 + for (int i = 0; i < mComment.comments; i++) { 1.655 + AddVorbisComment(tags, mComment.user_comments[i], 1.656 + mComment.comment_lengths[i]); 1.657 + } 1.658 + return tags; 1.659 +} 1.660 + 1.661 +nsresult 1.662 +VorbisState::PageIn(ogg_page* aPage) 1.663 +{ 1.664 + if (!mActive) 1.665 + return NS_OK; 1.666 + NS_ASSERTION(static_cast<uint32_t>(ogg_page_serialno(aPage)) == mSerial, 1.667 + "Page must be for this stream!"); 1.668 + if (ogg_stream_pagein(&mState, aPage) == -1) 1.669 + return NS_ERROR_FAILURE; 1.670 + bool foundGp; 1.671 + nsresult res = PacketOutUntilGranulepos(foundGp); 1.672 + if (NS_FAILED(res)) 1.673 + return res; 1.674 + if (foundGp && mDoneReadingHeaders) { 1.675 + // We've found a packet with a granulepos, and we've loaded our metadata 1.676 + // and initialized our decoder. Determine granulepos of buffered packets. 1.677 + ReconstructVorbisGranulepos(); 1.678 + for (uint32_t i = 0; i < mUnstamped.Length(); ++i) { 1.679 + ogg_packet* packet = mUnstamped[i]; 1.680 + AssertHasRecordedPacketSamples(packet); 1.681 + NS_ASSERTION(!IsHeader(packet), "Don't try to recover header packet gp"); 1.682 + NS_ASSERTION(packet->granulepos != -1, "Packet must have gp by now"); 1.683 + mPackets.Append(packet); 1.684 + } 1.685 + mUnstamped.Clear(); 1.686 + } 1.687 + return NS_OK; 1.688 +} 1.689 + 1.690 +nsresult VorbisState::ReconstructVorbisGranulepos() 1.691 +{ 1.692 + // The number of samples in a Vorbis packet is: 1.693 + // window_blocksize(previous_packet)/4+window_blocksize(current_packet)/4 1.694 + // See: http://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-230001.3.2 1.695 + // So we maintain mPrevVorbisBlockSize, the block size of the last packet 1.696 + // encountered. We also maintain mGranulepos, which is the granulepos of 1.697 + // the last encountered packet. This enables us to give granulepos to 1.698 + // packets when the last packet in mUnstamped doesn't have a granulepos 1.699 + // (for example if the stream was truncated). 1.700 + // 1.701 + // We validate our prediction of the number of samples decoded when 1.702 + // VALIDATE_VORBIS_SAMPLE_CALCULATION is defined by recording the predicted 1.703 + // number of samples, and verifing we extract that many when decoding 1.704 + // each packet. 1.705 + 1.706 + NS_ASSERTION(mUnstamped.Length() > 0, "Length must be > 0"); 1.707 + ogg_packet* last = mUnstamped[mUnstamped.Length()-1]; 1.708 + NS_ASSERTION(last->e_o_s || last->granulepos >= 0, 1.709 + "Must know last granulepos!"); 1.710 + if (mUnstamped.Length() == 1) { 1.711 + ogg_packet* packet = mUnstamped[0]; 1.712 + long blockSize = vorbis_packet_blocksize(&mInfo, packet); 1.713 + if (blockSize < 0) { 1.714 + // On failure vorbis_packet_blocksize returns < 0. If we've got 1.715 + // a bad packet, we just assume that decode will have to skip this 1.716 + // packet, i.e. assume 0 samples are decodable from this packet. 1.717 + blockSize = 0; 1.718 + mPrevVorbisBlockSize = 0; 1.719 + } 1.720 + long samples = mPrevVorbisBlockSize / 4 + blockSize / 4; 1.721 + mPrevVorbisBlockSize = blockSize; 1.722 + if (packet->granulepos == -1) { 1.723 + packet->granulepos = mGranulepos + samples; 1.724 + } 1.725 + 1.726 + // Account for a partial last frame 1.727 + if (packet->e_o_s && packet->granulepos >= mGranulepos) { 1.728 + samples = packet->granulepos - mGranulepos; 1.729 + } 1.730 + 1.731 + mGranulepos = packet->granulepos; 1.732 + RecordVorbisPacketSamples(packet, samples); 1.733 + return NS_OK; 1.734 + } 1.735 + 1.736 + bool unknownGranulepos = last->granulepos == -1; 1.737 + int totalSamples = 0; 1.738 + for (int32_t i = mUnstamped.Length() - 1; i > 0; i--) { 1.739 + ogg_packet* packet = mUnstamped[i]; 1.740 + ogg_packet* prev = mUnstamped[i-1]; 1.741 + ogg_int64_t granulepos = packet->granulepos; 1.742 + NS_ASSERTION(granulepos != -1, "Must know granulepos!"); 1.743 + long prevBlockSize = vorbis_packet_blocksize(&mInfo, prev); 1.744 + long blockSize = vorbis_packet_blocksize(&mInfo, packet); 1.745 + 1.746 + if (blockSize < 0 || prevBlockSize < 0) { 1.747 + // On failure vorbis_packet_blocksize returns < 0. If we've got 1.748 + // a bad packet, we just assume that decode will have to skip this 1.749 + // packet, i.e. assume 0 samples are decodable from this packet. 1.750 + blockSize = 0; 1.751 + prevBlockSize = 0; 1.752 + } 1.753 + 1.754 + long samples = prevBlockSize / 4 + blockSize / 4; 1.755 + totalSamples += samples; 1.756 + prev->granulepos = granulepos - samples; 1.757 + RecordVorbisPacketSamples(packet, samples); 1.758 + } 1.759 + 1.760 + if (unknownGranulepos) { 1.761 + for (uint32_t i = 0; i < mUnstamped.Length(); i++) { 1.762 + ogg_packet* packet = mUnstamped[i]; 1.763 + packet->granulepos += mGranulepos + totalSamples + 1; 1.764 + } 1.765 + } 1.766 + 1.767 + ogg_packet* first = mUnstamped[0]; 1.768 + long blockSize = vorbis_packet_blocksize(&mInfo, first); 1.769 + if (blockSize < 0) { 1.770 + mPrevVorbisBlockSize = 0; 1.771 + blockSize = 0; 1.772 + } 1.773 + 1.774 + long samples = (mPrevVorbisBlockSize == 0) ? 0 : 1.775 + mPrevVorbisBlockSize / 4 + blockSize / 4; 1.776 + int64_t start = first->granulepos - samples; 1.777 + RecordVorbisPacketSamples(first, samples); 1.778 + 1.779 + if (last->e_o_s && start < mGranulepos) { 1.780 + // We've calculated that there are more samples in this page than its 1.781 + // granulepos claims, and it's the last page in the stream. This is legal, 1.782 + // and we will need to prune the trailing samples when we come to decode it. 1.783 + // We must correct the timestamps so that they follow the last Vorbis page's 1.784 + // samples. 1.785 + int64_t pruned = mGranulepos - start; 1.786 + for (uint32_t i = 0; i < mUnstamped.Length() - 1; i++) { 1.787 + mUnstamped[i]->granulepos += pruned; 1.788 + } 1.789 +#ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION 1.790 + mVorbisPacketSamples[last] -= pruned; 1.791 +#endif 1.792 + } 1.793 + 1.794 + mPrevVorbisBlockSize = vorbis_packet_blocksize(&mInfo, last); 1.795 + mPrevVorbisBlockSize = std::max(static_cast<long>(0), mPrevVorbisBlockSize); 1.796 + mGranulepos = last->granulepos; 1.797 + 1.798 + return NS_OK; 1.799 +} 1.800 + 1.801 +#ifdef MOZ_OPUS 1.802 +OpusState::OpusState(ogg_page* aBosPage) : 1.803 + OggCodecState(aBosPage, true), 1.804 + mParser(nullptr), 1.805 + mDecoder(nullptr), 1.806 + mSkip(0), 1.807 + mPrevPacketGranulepos(0), 1.808 + mPrevPageGranulepos(0) 1.809 +{ 1.810 + MOZ_COUNT_CTOR(OpusState); 1.811 +} 1.812 + 1.813 +OpusState::~OpusState() { 1.814 + MOZ_COUNT_DTOR(OpusState); 1.815 + Reset(); 1.816 + 1.817 + if (mDecoder) { 1.818 + opus_multistream_decoder_destroy(mDecoder); 1.819 + mDecoder = nullptr; 1.820 + } 1.821 +} 1.822 + 1.823 +nsresult OpusState::Reset() 1.824 +{ 1.825 + return Reset(false); 1.826 +} 1.827 + 1.828 +nsresult OpusState::Reset(bool aStart) 1.829 +{ 1.830 + nsresult res = NS_OK; 1.831 + 1.832 + if (mActive && mDecoder) { 1.833 + // Reset the decoder. 1.834 + opus_multistream_decoder_ctl(mDecoder, OPUS_RESET_STATE); 1.835 + // Let the seek logic handle pre-roll if we're not seeking to the start. 1.836 + mSkip = aStart ? mParser->mPreSkip : 0; 1.837 + // This lets us distinguish the first page being the last page vs. just 1.838 + // not having processed the previous page when we encounter the last page. 1.839 + mPrevPageGranulepos = aStart ? 0 : -1; 1.840 + mPrevPacketGranulepos = aStart ? 0 : -1; 1.841 + } 1.842 + 1.843 + // Clear queued data. 1.844 + if (NS_FAILED(OggCodecState::Reset())) { 1.845 + return NS_ERROR_FAILURE; 1.846 + } 1.847 + 1.848 + LOG(PR_LOG_DEBUG, ("Opus decoder reset, to skip %d", mSkip)); 1.849 + 1.850 + return res; 1.851 +} 1.852 + 1.853 +bool OpusState::Init(void) 1.854 +{ 1.855 + if (!mActive) 1.856 + return false; 1.857 + 1.858 + int error; 1.859 + 1.860 + NS_ASSERTION(mDecoder == nullptr, "leaking OpusDecoder"); 1.861 + 1.862 + mDecoder = opus_multistream_decoder_create(mParser->mRate, 1.863 + mParser->mChannels, 1.864 + mParser->mStreams, 1.865 + mParser->mCoupledStreams, 1.866 + mParser->mMappingTable, 1.867 + &error); 1.868 + 1.869 + mSkip = mParser->mPreSkip; 1.870 + 1.871 + LOG(PR_LOG_DEBUG, ("Opus decoder init, to skip %d", mSkip)); 1.872 + 1.873 + return error == OPUS_OK; 1.874 +} 1.875 + 1.876 +bool OpusState::DecodeHeader(ogg_packet* aPacket) 1.877 +{ 1.878 + nsAutoRef<ogg_packet> autoRelease(aPacket); 1.879 + switch(mPacketCount++) { 1.880 + // Parse the id header. 1.881 + case 0: { 1.882 + mParser = new OpusParser; 1.883 + if(!mParser->DecodeHeader(aPacket->packet, aPacket->bytes)) { 1.884 + return false; 1.885 + } 1.886 + mRate = mParser->mRate; 1.887 + mChannels = mParser->mChannels; 1.888 + mPreSkip = mParser->mPreSkip; 1.889 +#ifdef MOZ_SAMPLE_TYPE_FLOAT32 1.890 + mGain = mParser->mGain; 1.891 +#else 1.892 + mGain_Q16 = mParser->mGain_Q16; 1.893 +#endif 1.894 + } 1.895 + break; 1.896 + 1.897 + // Parse the metadata header. 1.898 + case 1: { 1.899 + if(!mParser->DecodeTags(aPacket->packet, aPacket->bytes)) { 1.900 + return false; 1.901 + } 1.902 + } 1.903 + break; 1.904 + 1.905 + // We made it to the first data packet (which includes reconstructing 1.906 + // timestamps for it in PageIn). Success! 1.907 + default: { 1.908 + mDoneReadingHeaders = true; 1.909 + // Put it back on the queue so we can decode it. 1.910 + mPackets.PushFront(autoRelease.disown()); 1.911 + } 1.912 + break; 1.913 + } 1.914 + return true; 1.915 +} 1.916 + 1.917 +/* Construct and return a tags hashmap from our internal array */ 1.918 +MetadataTags* OpusState::GetTags() 1.919 +{ 1.920 + MetadataTags* tags; 1.921 + 1.922 + tags = new MetadataTags; 1.923 + for (uint32_t i = 0; i < mParser->mTags.Length(); i++) { 1.924 + AddVorbisComment(tags, mParser->mTags[i].Data(), mParser->mTags[i].Length()); 1.925 + } 1.926 + 1.927 + return tags; 1.928 +} 1.929 + 1.930 +/* Return the timestamp (in microseconds) equivalent to a granulepos. */ 1.931 +int64_t OpusState::Time(int64_t aGranulepos) 1.932 +{ 1.933 + if (!mActive) 1.934 + return -1; 1.935 + 1.936 + return Time(mParser->mPreSkip, aGranulepos); 1.937 +} 1.938 + 1.939 +int64_t OpusState::Time(int aPreSkip, int64_t aGranulepos) 1.940 +{ 1.941 + if (aGranulepos < 0) 1.942 + return -1; 1.943 + 1.944 + // Ogg Opus always runs at a granule rate of 48 kHz. 1.945 + CheckedInt64 t = CheckedInt64(aGranulepos - aPreSkip) * USECS_PER_S; 1.946 + return t.isValid() ? t.value() / 48000 : -1; 1.947 +} 1.948 + 1.949 +bool OpusState::IsHeader(ogg_packet* aPacket) 1.950 +{ 1.951 + return aPacket->bytes >= 16 && 1.952 + (!memcmp(aPacket->packet, "OpusHead", 8) || 1.953 + !memcmp(aPacket->packet, "OpusTags", 8)); 1.954 +} 1.955 + 1.956 +nsresult OpusState::PageIn(ogg_page* aPage) 1.957 +{ 1.958 + if (!mActive) 1.959 + return NS_OK; 1.960 + NS_ASSERTION(static_cast<uint32_t>(ogg_page_serialno(aPage)) == mSerial, 1.961 + "Page must be for this stream!"); 1.962 + if (ogg_stream_pagein(&mState, aPage) == -1) 1.963 + return NS_ERROR_FAILURE; 1.964 + 1.965 + bool haveGranulepos; 1.966 + nsresult rv = PacketOutUntilGranulepos(haveGranulepos); 1.967 + if (NS_FAILED(rv) || !haveGranulepos || mPacketCount < 2) 1.968 + return rv; 1.969 + if(!ReconstructOpusGranulepos()) 1.970 + return NS_ERROR_FAILURE; 1.971 + for (uint32_t i = 0; i < mUnstamped.Length(); i++) { 1.972 + ogg_packet* packet = mUnstamped[i]; 1.973 + NS_ASSERTION(!IsHeader(packet), "Don't try to play a header packet"); 1.974 + NS_ASSERTION(packet->granulepos != -1, "Packet should have a granulepos"); 1.975 + mPackets.Append(packet); 1.976 + } 1.977 + mUnstamped.Clear(); 1.978 + return NS_OK; 1.979 +} 1.980 + 1.981 +// Helper method to return the change in granule position due to an Opus packet 1.982 +// (as distinct from the number of samples in the packet, which depends on the 1.983 +// decoder rate). It should work with a multistream Opus file, and continue to 1.984 +// work should we ever allow the decoder to decode at a rate other than 48 kHz. 1.985 +// It even works before we've created the actual Opus decoder. 1.986 +static int GetOpusDeltaGP(ogg_packet* packet) 1.987 +{ 1.988 + int nframes; 1.989 + nframes = opus_packet_get_nb_frames(packet->packet, packet->bytes); 1.990 + if (nframes > 0) { 1.991 + return nframes*opus_packet_get_samples_per_frame(packet->packet, 48000); 1.992 + } 1.993 + NS_WARNING("Invalid Opus packet."); 1.994 + return nframes; 1.995 +} 1.996 + 1.997 +bool OpusState::ReconstructOpusGranulepos(void) 1.998 +{ 1.999 + NS_ASSERTION(mUnstamped.Length() > 0, "Must have unstamped packets"); 1.1000 + ogg_packet* last = mUnstamped[mUnstamped.Length()-1]; 1.1001 + NS_ASSERTION(last->e_o_s || last->granulepos > 0, 1.1002 + "Must know last granulepos!"); 1.1003 + int64_t gp; 1.1004 + // If this is the last page, and we've seen at least one previous page (or 1.1005 + // this is the first page)... 1.1006 + if (last->e_o_s) { 1.1007 + if (mPrevPageGranulepos != -1) { 1.1008 + // If this file only has one page and the final granule position is 1.1009 + // smaller than the pre-skip amount, we MUST reject the stream. 1.1010 + if (!mDoneReadingHeaders && last->granulepos < mPreSkip) 1.1011 + return false; 1.1012 + int64_t last_gp = last->granulepos; 1.1013 + gp = mPrevPageGranulepos; 1.1014 + // Loop through the packets forwards, adding the current packet's 1.1015 + // duration to the previous granulepos to get the value for the 1.1016 + // current packet. 1.1017 + for (uint32_t i = 0; i < mUnstamped.Length() - 1; ++i) { 1.1018 + ogg_packet* packet = mUnstamped[i]; 1.1019 + int offset = GetOpusDeltaGP(packet); 1.1020 + // Check for error (negative offset) and overflow. 1.1021 + if (offset >= 0 && gp <= INT64_MAX - offset) { 1.1022 + gp += offset; 1.1023 + if (gp >= last_gp) { 1.1024 + NS_WARNING("Opus end trimming removed more than a full packet."); 1.1025 + // We were asked to remove a full packet's worth of data or more. 1.1026 + // Encoders SHOULD NOT produce streams like this, but we'll handle 1.1027 + // it for them anyway. 1.1028 + gp = last_gp; 1.1029 + for (uint32_t j = i+1; j < mUnstamped.Length(); ++j) { 1.1030 + OggCodecState::ReleasePacket(mUnstamped[j]); 1.1031 + } 1.1032 + mUnstamped.RemoveElementsAt(i+1, mUnstamped.Length() - (i+1)); 1.1033 + last = packet; 1.1034 + last->e_o_s = 1; 1.1035 + } 1.1036 + } 1.1037 + packet->granulepos = gp; 1.1038 + } 1.1039 + mPrevPageGranulepos = last_gp; 1.1040 + return true; 1.1041 + } else { 1.1042 + NS_WARNING("No previous granule position to use for Opus end trimming."); 1.1043 + // If we don't have a previous granule position, fall through. 1.1044 + // We simply won't trim any samples from the end. 1.1045 + // TODO: Are we guaranteed to have seen a previous page if there is one? 1.1046 + } 1.1047 + } 1.1048 + 1.1049 + gp = last->granulepos; 1.1050 + // Loop through the packets backwards, subtracting the next 1.1051 + // packet's duration from its granulepos to get the value 1.1052 + // for the current packet. 1.1053 + for (uint32_t i = mUnstamped.Length() - 1; i > 0; i--) { 1.1054 + int offset = GetOpusDeltaGP(mUnstamped[i]); 1.1055 + // Check for error (negative offset) and overflow. 1.1056 + if (offset >= 0) { 1.1057 + if (offset <= gp) { 1.1058 + gp -= offset; 1.1059 + } else { 1.1060 + // If the granule position of the first data page is smaller than the 1.1061 + // number of decodable audio samples on that page, then we MUST reject 1.1062 + // the stream. 1.1063 + if (!mDoneReadingHeaders) 1.1064 + return false; 1.1065 + // It's too late to reject the stream. 1.1066 + // If we get here, this almost certainly means the file has screwed-up 1.1067 + // timestamps somewhere after the first page. 1.1068 + NS_WARNING("Clamping negative Opus granulepos to zero."); 1.1069 + gp = 0; 1.1070 + } 1.1071 + } 1.1072 + mUnstamped[i - 1]->granulepos = gp; 1.1073 + } 1.1074 + 1.1075 + // Check to make sure the first granule position is at least as large as the 1.1076 + // total number of samples decodable from the first page with completed 1.1077 + // packets. This requires looking at the duration of the first packet, too. 1.1078 + // We MUST reject such streams. 1.1079 + if (!mDoneReadingHeaders && GetOpusDeltaGP(mUnstamped[0]) > gp) 1.1080 + return false; 1.1081 + mPrevPageGranulepos = last->granulepos; 1.1082 + return true; 1.1083 +} 1.1084 +#endif /* MOZ_OPUS */ 1.1085 + 1.1086 +SkeletonState::SkeletonState(ogg_page* aBosPage) : 1.1087 + OggCodecState(aBosPage, true), 1.1088 + mVersion(0), 1.1089 + mPresentationTime(0), 1.1090 + mLength(0) 1.1091 +{ 1.1092 + MOZ_COUNT_CTOR(SkeletonState); 1.1093 +} 1.1094 + 1.1095 +SkeletonState::~SkeletonState() 1.1096 +{ 1.1097 + MOZ_COUNT_DTOR(SkeletonState); 1.1098 +} 1.1099 + 1.1100 +// Support for Ogg Skeleton 4.0, as per specification at: 1.1101 +// http://wiki.xiph.org/Ogg_Skeleton_4 1.1102 + 1.1103 +// Minimum length in bytes of a Skeleton header packet. 1.1104 +static const long SKELETON_MIN_HEADER_LEN = 28; 1.1105 +static const long SKELETON_4_0_MIN_HEADER_LEN = 80; 1.1106 + 1.1107 +// Minimum length in bytes of a Skeleton 4.0 index packet. 1.1108 +static const long SKELETON_4_0_MIN_INDEX_LEN = 42; 1.1109 + 1.1110 +// Minimum possible size of a compressed index keypoint. 1.1111 +static const size_t MIN_KEY_POINT_SIZE = 2; 1.1112 + 1.1113 +// Byte offset of the major and minor version numbers in the 1.1114 +// Ogg Skeleton 4.0 header packet. 1.1115 +static const size_t SKELETON_VERSION_MAJOR_OFFSET = 8; 1.1116 +static const size_t SKELETON_VERSION_MINOR_OFFSET = 10; 1.1117 + 1.1118 +// Byte-offsets of the presentation time numerator and denominator 1.1119 +static const size_t SKELETON_PRESENTATION_TIME_NUMERATOR_OFFSET = 12; 1.1120 +static const size_t SKELETON_PRESENTATION_TIME_DENOMINATOR_OFFSET = 20; 1.1121 + 1.1122 +// Byte-offsets of the length of file field in the Skeleton 4.0 header packet. 1.1123 +static const size_t SKELETON_FILE_LENGTH_OFFSET = 64; 1.1124 + 1.1125 +// Byte-offsets of the fields in the Skeleton index packet. 1.1126 +static const size_t INDEX_SERIALNO_OFFSET = 6; 1.1127 +static const size_t INDEX_NUM_KEYPOINTS_OFFSET = 10; 1.1128 +static const size_t INDEX_TIME_DENOM_OFFSET = 18; 1.1129 +static const size_t INDEX_FIRST_NUMER_OFFSET = 26; 1.1130 +static const size_t INDEX_LAST_NUMER_OFFSET = 34; 1.1131 +static const size_t INDEX_KEYPOINT_OFFSET = 42; 1.1132 + 1.1133 +static bool IsSkeletonBOS(ogg_packet* aPacket) 1.1134 +{ 1.1135 + return aPacket->bytes >= SKELETON_MIN_HEADER_LEN && 1.1136 + memcmp(reinterpret_cast<char*>(aPacket->packet), "fishead", 8) == 0; 1.1137 +} 1.1138 + 1.1139 +static bool IsSkeletonIndex(ogg_packet* aPacket) 1.1140 +{ 1.1141 + return aPacket->bytes >= SKELETON_4_0_MIN_INDEX_LEN && 1.1142 + memcmp(reinterpret_cast<char*>(aPacket->packet), "index", 5) == 0; 1.1143 +} 1.1144 + 1.1145 +// Reads a variable length encoded integer at p. Will not read 1.1146 +// past aLimit. Returns pointer to character after end of integer. 1.1147 +static const unsigned char* ReadVariableLengthInt(const unsigned char* p, 1.1148 + const unsigned char* aLimit, 1.1149 + int64_t& n) 1.1150 +{ 1.1151 + int shift = 0; 1.1152 + int64_t byte = 0; 1.1153 + n = 0; 1.1154 + while (p < aLimit && 1.1155 + (byte & 0x80) != 0x80 && 1.1156 + shift < 57) 1.1157 + { 1.1158 + byte = static_cast<int64_t>(*p); 1.1159 + n |= ((byte & 0x7f) << shift); 1.1160 + shift += 7; 1.1161 + p++; 1.1162 + } 1.1163 + return p; 1.1164 +} 1.1165 + 1.1166 +bool SkeletonState::DecodeIndex(ogg_packet* aPacket) 1.1167 +{ 1.1168 + NS_ASSERTION(aPacket->bytes >= SKELETON_4_0_MIN_INDEX_LEN, 1.1169 + "Index must be at least minimum size"); 1.1170 + if (!mActive) { 1.1171 + return false; 1.1172 + } 1.1173 + 1.1174 + uint32_t serialno = LittleEndian::readUint32(aPacket->packet + INDEX_SERIALNO_OFFSET); 1.1175 + int64_t numKeyPoints = LittleEndian::readInt64(aPacket->packet + INDEX_NUM_KEYPOINTS_OFFSET); 1.1176 + 1.1177 + int64_t endTime = 0, startTime = 0; 1.1178 + const unsigned char* p = aPacket->packet; 1.1179 + 1.1180 + int64_t timeDenom = LittleEndian::readInt64(aPacket->packet + INDEX_TIME_DENOM_OFFSET); 1.1181 + if (timeDenom == 0) { 1.1182 + LOG(PR_LOG_DEBUG, ("Ogg Skeleton Index packet for stream %u has 0 " 1.1183 + "timestamp denominator.", serialno)); 1.1184 + return (mActive = false); 1.1185 + } 1.1186 + 1.1187 + // Extract the start time. 1.1188 + CheckedInt64 t = CheckedInt64(LittleEndian::readInt64(p + INDEX_FIRST_NUMER_OFFSET)) * USECS_PER_S; 1.1189 + if (!t.isValid()) { 1.1190 + return (mActive = false); 1.1191 + } else { 1.1192 + startTime = t.value() / timeDenom; 1.1193 + } 1.1194 + 1.1195 + // Extract the end time. 1.1196 + t = LittleEndian::readInt64(p + INDEX_LAST_NUMER_OFFSET) * USECS_PER_S; 1.1197 + if (!t.isValid()) { 1.1198 + return (mActive = false); 1.1199 + } else { 1.1200 + endTime = t.value() / timeDenom; 1.1201 + } 1.1202 + 1.1203 + // Check the numKeyPoints value read, ensure we're not going to run out of 1.1204 + // memory while trying to decode the index packet. 1.1205 + CheckedInt64 minPacketSize = (CheckedInt64(numKeyPoints) * MIN_KEY_POINT_SIZE) + INDEX_KEYPOINT_OFFSET; 1.1206 + if (!minPacketSize.isValid()) 1.1207 + { 1.1208 + return (mActive = false); 1.1209 + } 1.1210 + 1.1211 + int64_t sizeofIndex = aPacket->bytes - INDEX_KEYPOINT_OFFSET; 1.1212 + int64_t maxNumKeyPoints = sizeofIndex / MIN_KEY_POINT_SIZE; 1.1213 + if (aPacket->bytes < minPacketSize.value() || 1.1214 + numKeyPoints > maxNumKeyPoints || 1.1215 + numKeyPoints < 0) 1.1216 + { 1.1217 + // Packet size is less than the theoretical minimum size, or the packet is 1.1218 + // claiming to store more keypoints than it's capable of storing. This means 1.1219 + // that the numKeyPoints field is too large or small for the packet to 1.1220 + // possibly contain as many packets as it claims to, so the numKeyPoints 1.1221 + // field is possibly malicious. Don't try decoding this index, we may run 1.1222 + // out of memory. 1.1223 + LOG(PR_LOG_DEBUG, ("Possibly malicious number of key points reported " 1.1224 + "(%lld) in index packet for stream %u.", 1.1225 + numKeyPoints, 1.1226 + serialno)); 1.1227 + return (mActive = false); 1.1228 + } 1.1229 + 1.1230 + nsAutoPtr<nsKeyFrameIndex> keyPoints(new nsKeyFrameIndex(startTime, endTime)); 1.1231 + 1.1232 + p = aPacket->packet + INDEX_KEYPOINT_OFFSET; 1.1233 + const unsigned char* limit = aPacket->packet + aPacket->bytes; 1.1234 + int64_t numKeyPointsRead = 0; 1.1235 + CheckedInt64 offset = 0; 1.1236 + CheckedInt64 time = 0; 1.1237 + while (p < limit && 1.1238 + numKeyPointsRead < numKeyPoints) 1.1239 + { 1.1240 + int64_t delta = 0; 1.1241 + p = ReadVariableLengthInt(p, limit, delta); 1.1242 + offset += delta; 1.1243 + if (p == limit || 1.1244 + !offset.isValid() || 1.1245 + offset.value() > mLength || 1.1246 + offset.value() < 0) 1.1247 + { 1.1248 + return (mActive = false); 1.1249 + } 1.1250 + p = ReadVariableLengthInt(p, limit, delta); 1.1251 + time += delta; 1.1252 + if (!time.isValid() || 1.1253 + time.value() > endTime || 1.1254 + time.value() < startTime) 1.1255 + { 1.1256 + return (mActive = false); 1.1257 + } 1.1258 + CheckedInt64 timeUsecs = time * USECS_PER_S; 1.1259 + if (!timeUsecs.isValid()) 1.1260 + return mActive = false; 1.1261 + timeUsecs /= timeDenom; 1.1262 + keyPoints->Add(offset.value(), timeUsecs.value()); 1.1263 + numKeyPointsRead++; 1.1264 + } 1.1265 + 1.1266 + int32_t keyPointsRead = keyPoints->Length(); 1.1267 + if (keyPointsRead > 0) { 1.1268 + mIndex.Put(serialno, keyPoints.forget()); 1.1269 + } 1.1270 + 1.1271 + LOG(PR_LOG_DEBUG, ("Loaded %d keypoints for Skeleton on stream %u", 1.1272 + keyPointsRead, serialno)); 1.1273 + return true; 1.1274 +} 1.1275 + 1.1276 +nsresult SkeletonState::IndexedSeekTargetForTrack(uint32_t aSerialno, 1.1277 + int64_t aTarget, 1.1278 + nsKeyPoint& aResult) 1.1279 +{ 1.1280 + nsKeyFrameIndex* index = nullptr; 1.1281 + mIndex.Get(aSerialno, &index); 1.1282 + 1.1283 + if (!index || 1.1284 + index->Length() == 0 || 1.1285 + aTarget < index->mStartTime || 1.1286 + aTarget > index->mEndTime) 1.1287 + { 1.1288 + return NS_ERROR_FAILURE; 1.1289 + } 1.1290 + 1.1291 + // Binary search to find the last key point with time less than target. 1.1292 + int start = 0; 1.1293 + int end = index->Length() - 1; 1.1294 + while (end > start) { 1.1295 + int mid = start + ((end - start + 1) >> 1); 1.1296 + if (index->Get(mid).mTime == aTarget) { 1.1297 + start = mid; 1.1298 + break; 1.1299 + } else if (index->Get(mid).mTime < aTarget) { 1.1300 + start = mid; 1.1301 + } else { 1.1302 + end = mid - 1; 1.1303 + } 1.1304 + } 1.1305 + 1.1306 + aResult = index->Get(start); 1.1307 + NS_ASSERTION(aResult.mTime <= aTarget, "Result should have time <= target"); 1.1308 + return NS_OK; 1.1309 +} 1.1310 + 1.1311 +nsresult SkeletonState::IndexedSeekTarget(int64_t aTarget, 1.1312 + nsTArray<uint32_t>& aTracks, 1.1313 + nsSeekTarget& aResult) 1.1314 +{ 1.1315 + if (!mActive || mVersion < SKELETON_VERSION(4,0)) { 1.1316 + return NS_ERROR_FAILURE; 1.1317 + } 1.1318 + // Loop over all requested tracks' indexes, and get the keypoint for that 1.1319 + // seek target. Record the keypoint with the lowest offset, this will be 1.1320 + // our seek result. User must seek to the one with lowest offset to ensure we 1.1321 + // pass "keyframes" on all tracks when we decode forwards to the seek target. 1.1322 + nsSeekTarget r; 1.1323 + for (uint32_t i=0; i<aTracks.Length(); i++) { 1.1324 + nsKeyPoint k; 1.1325 + if (NS_SUCCEEDED(IndexedSeekTargetForTrack(aTracks[i], aTarget, k)) && 1.1326 + k.mOffset < r.mKeyPoint.mOffset) 1.1327 + { 1.1328 + r.mKeyPoint = k; 1.1329 + r.mSerial = aTracks[i]; 1.1330 + } 1.1331 + } 1.1332 + if (r.IsNull()) { 1.1333 + return NS_ERROR_FAILURE; 1.1334 + } 1.1335 + LOG(PR_LOG_DEBUG, ("Indexed seek target for time %lld is offset %lld", 1.1336 + aTarget, r.mKeyPoint.mOffset)); 1.1337 + aResult = r; 1.1338 + return NS_OK; 1.1339 +} 1.1340 + 1.1341 +nsresult SkeletonState::GetDuration(const nsTArray<uint32_t>& aTracks, 1.1342 + int64_t& aDuration) 1.1343 +{ 1.1344 + if (!mActive || 1.1345 + mVersion < SKELETON_VERSION(4,0) || 1.1346 + !HasIndex() || 1.1347 + aTracks.Length() == 0) 1.1348 + { 1.1349 + return NS_ERROR_FAILURE; 1.1350 + } 1.1351 + int64_t endTime = INT64_MIN; 1.1352 + int64_t startTime = INT64_MAX; 1.1353 + for (uint32_t i=0; i<aTracks.Length(); i++) { 1.1354 + nsKeyFrameIndex* index = nullptr; 1.1355 + mIndex.Get(aTracks[i], &index); 1.1356 + if (!index) { 1.1357 + // Can't get the timestamps for one of the required tracks, fail. 1.1358 + return NS_ERROR_FAILURE; 1.1359 + } 1.1360 + if (index->mEndTime > endTime) { 1.1361 + endTime = index->mEndTime; 1.1362 + } 1.1363 + if (index->mStartTime < startTime) { 1.1364 + startTime = index->mStartTime; 1.1365 + } 1.1366 + } 1.1367 + NS_ASSERTION(endTime > startTime, "Duration must be positive"); 1.1368 + CheckedInt64 duration = CheckedInt64(endTime) - startTime; 1.1369 + aDuration = duration.isValid() ? duration.value() : 0; 1.1370 + return duration.isValid() ? NS_OK : NS_ERROR_FAILURE; 1.1371 +} 1.1372 + 1.1373 +bool SkeletonState::DecodeHeader(ogg_packet* aPacket) 1.1374 +{ 1.1375 + nsAutoRef<ogg_packet> autoRelease(aPacket); 1.1376 + if (IsSkeletonBOS(aPacket)) { 1.1377 + uint16_t verMajor = LittleEndian::readUint16(aPacket->packet + SKELETON_VERSION_MAJOR_OFFSET); 1.1378 + uint16_t verMinor = LittleEndian::readUint16(aPacket->packet + SKELETON_VERSION_MINOR_OFFSET); 1.1379 + 1.1380 + // Read the presentation time. We read this before the version check as the 1.1381 + // presentation time exists in all versions. 1.1382 + int64_t n = LittleEndian::readInt64(aPacket->packet + SKELETON_PRESENTATION_TIME_NUMERATOR_OFFSET); 1.1383 + int64_t d = LittleEndian::readInt64(aPacket->packet + SKELETON_PRESENTATION_TIME_DENOMINATOR_OFFSET); 1.1384 + mPresentationTime = d == 0 ? 0 : (static_cast<float>(n) / static_cast<float>(d)) * USECS_PER_S; 1.1385 + 1.1386 + mVersion = SKELETON_VERSION(verMajor, verMinor); 1.1387 + // We can only care to parse Skeleton version 4.0+. 1.1388 + if (mVersion < SKELETON_VERSION(4,0) || 1.1389 + mVersion >= SKELETON_VERSION(5,0) || 1.1390 + aPacket->bytes < SKELETON_4_0_MIN_HEADER_LEN) 1.1391 + return false; 1.1392 + 1.1393 + // Extract the segment length. 1.1394 + mLength = LittleEndian::readInt64(aPacket->packet + SKELETON_FILE_LENGTH_OFFSET); 1.1395 + 1.1396 + LOG(PR_LOG_DEBUG, ("Skeleton segment length: %lld", mLength)); 1.1397 + 1.1398 + // Initialize the serialno-to-index map. 1.1399 + return true; 1.1400 + } else if (IsSkeletonIndex(aPacket) && mVersion >= SKELETON_VERSION(4,0)) { 1.1401 + return DecodeIndex(aPacket); 1.1402 + } else if (aPacket->e_o_s) { 1.1403 + mDoneReadingHeaders = true; 1.1404 + return true; 1.1405 + } 1.1406 + return true; 1.1407 +} 1.1408 + 1.1409 + 1.1410 +} // namespace mozilla 1.1411 +