1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/content/media/webspeech/synth/nsSpeechTask.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,520 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim:set ts=2 sw=2 sts=2 et cindent: */ 1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#include "AudioSegment.h" 1.11 +#include "nsSpeechTask.h" 1.12 +#include "SpeechSynthesis.h" 1.13 + 1.14 +// GetCurrentTime is defined in winbase.h as zero argument macro forwarding to 1.15 +// GetTickCount() and conflicts with nsSpeechTask::GetCurrentTime(). 1.16 +#ifdef GetCurrentTime 1.17 +#undef GetCurrentTime 1.18 +#endif 1.19 + 1.20 +#undef LOG 1.21 +#ifdef PR_LOGGING 1.22 +extern PRLogModuleInfo* GetSpeechSynthLog(); 1.23 +#define LOG(type, msg) PR_LOG(GetSpeechSynthLog(), type, msg) 1.24 +#else 1.25 +#define LOG(type, msg) 1.26 +#endif 1.27 + 1.28 +namespace mozilla { 1.29 +namespace dom { 1.30 + 1.31 +class SynthStreamListener : public MediaStreamListener 1.32 +{ 1.33 +public: 1.34 + SynthStreamListener(nsSpeechTask* aSpeechTask) : 1.35 + mSpeechTask(aSpeechTask), 1.36 + mStarted(false) 1.37 + { 1.38 + } 1.39 + 1.40 + void DoNotifyStarted() 1.41 + { 1.42 + if (mSpeechTask) { 1.43 + mSpeechTask->DispatchStartImpl(); 1.44 + } 1.45 + } 1.46 + 1.47 + void DoNotifyFinished() 1.48 + { 1.49 + if (mSpeechTask) { 1.50 + mSpeechTask->DispatchEndImpl(mSpeechTask->GetCurrentTime(), 1.51 + mSpeechTask->GetCurrentCharOffset()); 1.52 + } 1.53 + } 1.54 + 1.55 + virtual void NotifyFinished(MediaStreamGraph* aGraph) 1.56 + { 1.57 + nsCOMPtr<nsIRunnable> event = 1.58 + NS_NewRunnableMethod(this, &SynthStreamListener::DoNotifyFinished); 1.59 + aGraph->DispatchToMainThreadAfterStreamStateUpdate(event.forget()); 1.60 + } 1.61 + 1.62 + virtual void NotifyBlockingChanged(MediaStreamGraph* aGraph, Blocking aBlocked) 1.63 + { 1.64 + if (aBlocked == MediaStreamListener::UNBLOCKED && !mStarted) { 1.65 + mStarted = true; 1.66 + nsCOMPtr<nsIRunnable> event = 1.67 + NS_NewRunnableMethod(this, &SynthStreamListener::DoNotifyStarted); 1.68 + aGraph->DispatchToMainThreadAfterStreamStateUpdate(event.forget()); 1.69 + } 1.70 + } 1.71 + 1.72 + virtual void NotifyRemoved(MediaStreamGraph* aGraph) 1.73 + { 1.74 + mSpeechTask = nullptr; 1.75 + } 1.76 + 1.77 +private: 1.78 + // Raw pointer; if we exist, the stream exists, 1.79 + // and 'mSpeechTask' exclusively owns it and therefor exists as well. 1.80 + nsSpeechTask* mSpeechTask; 1.81 + 1.82 + bool mStarted; 1.83 +}; 1.84 + 1.85 +// nsSpeechTask 1.86 + 1.87 +NS_IMPL_CYCLE_COLLECTION(nsSpeechTask, mSpeechSynthesis, mUtterance); 1.88 + 1.89 +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsSpeechTask) 1.90 + NS_INTERFACE_MAP_ENTRY(nsISpeechTask) 1.91 + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTask) 1.92 +NS_INTERFACE_MAP_END 1.93 + 1.94 +NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeechTask) 1.95 +NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask) 1.96 + 1.97 +nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance) 1.98 + : mUtterance(aUtterance) 1.99 + , mCallback(nullptr) 1.100 + , mIndirectAudio(false) 1.101 +{ 1.102 + mText = aUtterance->mText; 1.103 + mVolume = aUtterance->Volume(); 1.104 +} 1.105 + 1.106 +nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText) 1.107 + : mUtterance(nullptr) 1.108 + , mVolume(aVolume) 1.109 + , mText(aText) 1.110 + , mCallback(nullptr) 1.111 + , mIndirectAudio(false) 1.112 +{ 1.113 +} 1.114 + 1.115 +nsSpeechTask::~nsSpeechTask() 1.116 +{ 1.117 + if (mStream) { 1.118 + if (!mStream->IsDestroyed()) { 1.119 + mStream->Destroy(); 1.120 + } 1.121 + 1.122 + mStream = nullptr; 1.123 + } 1.124 +} 1.125 + 1.126 +NS_IMETHODIMP 1.127 +nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback, 1.128 + uint32_t aChannels, uint32_t aRate, uint8_t argc) 1.129 +{ 1.130 + MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default); 1.131 + 1.132 + LOG(PR_LOG_DEBUG, ("nsSpeechTask::Setup")); 1.133 + 1.134 + mCallback = aCallback; 1.135 + 1.136 + if (argc < 2) { 1.137 + return NS_OK; 1.138 + } 1.139 + 1.140 + if (mIndirectAudio) { 1.141 + NS_WARNING("Audio info arguments in Setup() are ignored for indirect audio services."); 1.142 + } 1.143 + 1.144 + // XXX: Is there setup overhead here that hurtls latency? 1.145 + mStream = MediaStreamGraph::GetInstance()->CreateSourceStream(nullptr); 1.146 + mStream->AddListener(new SynthStreamListener(this)); 1.147 + 1.148 + // XXX: Support more than one channel 1.149 + NS_ENSURE_TRUE(aChannels == 1, NS_ERROR_FAILURE); 1.150 + 1.151 + mChannels = aChannels; 1.152 + 1.153 + AudioSegment* segment = new AudioSegment(); 1.154 + mStream->AddTrack(1, aRate, 0, segment); 1.155 + mStream->AddAudioOutput(this); 1.156 + mStream->SetAudioOutputVolume(this, mVolume); 1.157 + 1.158 + return NS_OK; 1.159 +} 1.160 + 1.161 +NS_IMETHODIMP 1.162 +nsSpeechTask::SendAudio(JS::Handle<JS::Value> aData, JS::Handle<JS::Value> aLandmarks, 1.163 + JSContext* aCx) 1.164 +{ 1.165 + MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default); 1.166 + 1.167 + NS_ENSURE_TRUE(mStream, NS_ERROR_NOT_AVAILABLE); 1.168 + NS_ENSURE_FALSE(mStream->IsDestroyed(), NS_ERROR_NOT_AVAILABLE); 1.169 + NS_ENSURE_TRUE(mChannels, NS_ERROR_FAILURE); 1.170 + 1.171 + if (mIndirectAudio) { 1.172 + NS_WARNING("Can't call SendAudio from an indirect audio speech service."); 1.173 + return NS_ERROR_FAILURE; 1.174 + } 1.175 + 1.176 + JS::Rooted<JSObject*> darray(aCx, &aData.toObject()); 1.177 + JSAutoCompartment ac(aCx, darray); 1.178 + 1.179 + JS::Rooted<JSObject*> tsrc(aCx, nullptr); 1.180 + 1.181 + // Allow either Int16Array or plain JS Array 1.182 + if (JS_IsInt16Array(darray)) { 1.183 + tsrc = darray; 1.184 + } else if (JS_IsArrayObject(aCx, darray)) { 1.185 + tsrc = JS_NewInt16ArrayFromArray(aCx, darray); 1.186 + } 1.187 + 1.188 + if (!tsrc) { 1.189 + return NS_ERROR_DOM_TYPE_MISMATCH_ERR; 1.190 + } 1.191 + 1.192 + SendAudioImpl(JS_GetInt16ArrayData(tsrc), 1.193 + JS_GetTypedArrayLength(tsrc)); 1.194 + 1.195 + return NS_OK; 1.196 +} 1.197 + 1.198 +NS_IMETHODIMP 1.199 +nsSpeechTask::SendAudioNative(int16_t* aData, uint32_t aDataLen) 1.200 +{ 1.201 + MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default); 1.202 + 1.203 + NS_ENSURE_TRUE(mStream, NS_ERROR_NOT_AVAILABLE); 1.204 + NS_ENSURE_FALSE(mStream->IsDestroyed(), NS_ERROR_NOT_AVAILABLE); 1.205 + NS_ENSURE_TRUE(mChannels, NS_ERROR_FAILURE); 1.206 + 1.207 + if (mIndirectAudio) { 1.208 + NS_WARNING("Can't call SendAudio from an indirect audio speech service."); 1.209 + return NS_ERROR_FAILURE; 1.210 + } 1.211 + 1.212 + SendAudioImpl(aData, aDataLen); 1.213 + 1.214 + return NS_OK; 1.215 +} 1.216 + 1.217 +void 1.218 +nsSpeechTask::SendAudioImpl(int16_t* aData, uint32_t aDataLen) 1.219 +{ 1.220 + if (aDataLen == 0) { 1.221 + mStream->EndAllTrackAndFinish(); 1.222 + return; 1.223 + } 1.224 + 1.225 + nsRefPtr<mozilla::SharedBuffer> samples = 1.226 + SharedBuffer::Create(aDataLen * sizeof(int16_t)); 1.227 + int16_t* frames = static_cast<int16_t*>(samples->Data()); 1.228 + 1.229 + for (uint32_t i = 0; i < aDataLen; i++) { 1.230 + frames[i] = aData[i]; 1.231 + } 1.232 + 1.233 + AudioSegment segment; 1.234 + nsAutoTArray<const int16_t*, 1> channelData; 1.235 + channelData.AppendElement(frames); 1.236 + segment.AppendFrames(samples.forget(), channelData, aDataLen); 1.237 + mStream->AppendToTrack(1, &segment); 1.238 + mStream->AdvanceKnownTracksTime(STREAM_TIME_MAX); 1.239 +} 1.240 + 1.241 +NS_IMETHODIMP 1.242 +nsSpeechTask::DispatchStart() 1.243 +{ 1.244 + if (!mIndirectAudio) { 1.245 + NS_WARNING("Can't call DispatchStart() from a direct audio speech service"); 1.246 + return NS_ERROR_FAILURE; 1.247 + } 1.248 + 1.249 + return DispatchStartImpl(); 1.250 +} 1.251 + 1.252 +nsresult 1.253 +nsSpeechTask::DispatchStartImpl() 1.254 +{ 1.255 + LOG(PR_LOG_DEBUG, ("nsSpeechTask::DispatchStart")); 1.256 + 1.257 + MOZ_ASSERT(mUtterance); 1.258 + NS_ENSURE_TRUE(mUtterance->mState == SpeechSynthesisUtterance::STATE_PENDING, 1.259 + NS_ERROR_NOT_AVAILABLE); 1.260 + 1.261 + mUtterance->mState = SpeechSynthesisUtterance::STATE_SPEAKING; 1.262 + mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("start"), 0, 0, 1.263 + NS_LITERAL_STRING("")); 1.264 + 1.265 + return NS_OK; 1.266 +} 1.267 + 1.268 +NS_IMETHODIMP 1.269 +nsSpeechTask::DispatchEnd(float aElapsedTime, uint32_t aCharIndex) 1.270 +{ 1.271 + if (!mIndirectAudio) { 1.272 + NS_WARNING("Can't call DispatchEnd() from a direct audio speech service"); 1.273 + return NS_ERROR_FAILURE; 1.274 + } 1.275 + 1.276 + return DispatchEndImpl(aElapsedTime, aCharIndex); 1.277 +} 1.278 + 1.279 +nsresult 1.280 +nsSpeechTask::DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex) 1.281 +{ 1.282 + LOG(PR_LOG_DEBUG, ("nsSpeechTask::DispatchEnd\n")); 1.283 + 1.284 + MOZ_ASSERT(mUtterance); 1.285 + NS_ENSURE_FALSE(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED, 1.286 + NS_ERROR_NOT_AVAILABLE); 1.287 + 1.288 + // XXX: This should not be here, but it prevents a crash in MSG. 1.289 + if (mStream) { 1.290 + mStream->Destroy(); 1.291 + } 1.292 + 1.293 + nsRefPtr<SpeechSynthesisUtterance> utterance = mUtterance; 1.294 + 1.295 + if (mSpeechSynthesis) { 1.296 + mSpeechSynthesis->OnEnd(this); 1.297 + } 1.298 + 1.299 + if (utterance->mState == SpeechSynthesisUtterance::STATE_PENDING) { 1.300 + utterance->mState = SpeechSynthesisUtterance::STATE_NONE; 1.301 + } else { 1.302 + utterance->mState = SpeechSynthesisUtterance::STATE_ENDED; 1.303 + utterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("end"), 1.304 + aCharIndex, aElapsedTime, 1.305 + EmptyString()); 1.306 + } 1.307 + 1.308 + return NS_OK; 1.309 +} 1.310 + 1.311 +NS_IMETHODIMP 1.312 +nsSpeechTask::DispatchPause(float aElapsedTime, uint32_t aCharIndex) 1.313 +{ 1.314 + if (!mIndirectAudio) { 1.315 + NS_WARNING("Can't call DispatchPause() from a direct audio speech service"); 1.316 + return NS_ERROR_FAILURE; 1.317 + } 1.318 + 1.319 + return DispatchPauseImpl(aElapsedTime, aCharIndex); 1.320 +} 1.321 + 1.322 +nsresult 1.323 +nsSpeechTask::DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex) 1.324 +{ 1.325 + LOG(PR_LOG_DEBUG, ("nsSpeechTask::DispatchPause")); 1.326 + MOZ_ASSERT(mUtterance); 1.327 + NS_ENSURE_FALSE(mUtterance->mPaused, NS_ERROR_NOT_AVAILABLE); 1.328 + NS_ENSURE_FALSE(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED, 1.329 + NS_ERROR_NOT_AVAILABLE); 1.330 + 1.331 + mUtterance->mPaused = true; 1.332 + mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("pause"), 1.333 + aCharIndex, aElapsedTime, 1.334 + NS_LITERAL_STRING("")); 1.335 + return NS_OK; 1.336 +} 1.337 + 1.338 +NS_IMETHODIMP 1.339 +nsSpeechTask::DispatchResume(float aElapsedTime, uint32_t aCharIndex) 1.340 +{ 1.341 + if (!mIndirectAudio) { 1.342 + NS_WARNING("Can't call DispatchResume() from a direct audio speech service"); 1.343 + return NS_ERROR_FAILURE; 1.344 + } 1.345 + 1.346 + return DispatchResumeImpl(aElapsedTime, aCharIndex); 1.347 +} 1.348 + 1.349 +nsresult 1.350 +nsSpeechTask::DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex) 1.351 +{ 1.352 + LOG(PR_LOG_DEBUG, ("nsSpeechTask::DispatchResume")); 1.353 + MOZ_ASSERT(mUtterance); 1.354 + NS_ENSURE_TRUE(mUtterance->mPaused, NS_ERROR_NOT_AVAILABLE); 1.355 + NS_ENSURE_FALSE(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED, 1.356 + NS_ERROR_NOT_AVAILABLE); 1.357 + 1.358 + mUtterance->mPaused = false; 1.359 + mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("resume"), 1.360 + aCharIndex, aElapsedTime, 1.361 + NS_LITERAL_STRING("")); 1.362 + return NS_OK; 1.363 +} 1.364 + 1.365 +NS_IMETHODIMP 1.366 +nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex) 1.367 +{ 1.368 + if (!mIndirectAudio) { 1.369 + NS_WARNING("Can't call DispatchError() from a direct audio speech service"); 1.370 + return NS_ERROR_FAILURE; 1.371 + } 1.372 + 1.373 + return DispatchErrorImpl(aElapsedTime, aCharIndex); 1.374 +} 1.375 + 1.376 +nsresult 1.377 +nsSpeechTask::DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex) 1.378 +{ 1.379 + MOZ_ASSERT(mUtterance); 1.380 + NS_ENSURE_FALSE(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED, 1.381 + NS_ERROR_NOT_AVAILABLE); 1.382 + 1.383 + mUtterance->mState = SpeechSynthesisUtterance::STATE_ENDED; 1.384 + mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("error"), 1.385 + aCharIndex, aElapsedTime, 1.386 + NS_LITERAL_STRING("")); 1.387 + return NS_OK; 1.388 +} 1.389 + 1.390 +NS_IMETHODIMP 1.391 +nsSpeechTask::DispatchBoundary(const nsAString& aName, 1.392 + float aElapsedTime, uint32_t aCharIndex) 1.393 +{ 1.394 + if (!mIndirectAudio) { 1.395 + NS_WARNING("Can't call DispatchBoundary() from a direct audio speech service"); 1.396 + return NS_ERROR_FAILURE; 1.397 + } 1.398 + 1.399 + return DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex); 1.400 +} 1.401 + 1.402 +nsresult 1.403 +nsSpeechTask::DispatchBoundaryImpl(const nsAString& aName, 1.404 + float aElapsedTime, uint32_t aCharIndex) 1.405 +{ 1.406 + MOZ_ASSERT(mUtterance); 1.407 + NS_ENSURE_TRUE(mUtterance->mState == SpeechSynthesisUtterance::STATE_SPEAKING, 1.408 + NS_ERROR_NOT_AVAILABLE); 1.409 + 1.410 + mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("boundary"), 1.411 + aCharIndex, aElapsedTime, 1.412 + aName); 1.413 + return NS_OK; 1.414 +} 1.415 + 1.416 +NS_IMETHODIMP 1.417 +nsSpeechTask::DispatchMark(const nsAString& aName, 1.418 + float aElapsedTime, uint32_t aCharIndex) 1.419 +{ 1.420 + if (!mIndirectAudio) { 1.421 + NS_WARNING("Can't call DispatchMark() from a direct audio speech service"); 1.422 + return NS_ERROR_FAILURE; 1.423 + } 1.424 + 1.425 + return DispatchMarkImpl(aName, aElapsedTime, aCharIndex); 1.426 +} 1.427 + 1.428 +nsresult 1.429 +nsSpeechTask::DispatchMarkImpl(const nsAString& aName, 1.430 + float aElapsedTime, uint32_t aCharIndex) 1.431 +{ 1.432 + MOZ_ASSERT(mUtterance); 1.433 + NS_ENSURE_TRUE(mUtterance->mState == SpeechSynthesisUtterance::STATE_SPEAKING, 1.434 + NS_ERROR_NOT_AVAILABLE); 1.435 + 1.436 + mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("mark"), 1.437 + aCharIndex, aElapsedTime, 1.438 + aName); 1.439 + return NS_OK; 1.440 +} 1.441 + 1.442 +void 1.443 +nsSpeechTask::Pause() 1.444 +{ 1.445 + MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default); 1.446 + 1.447 + if (mUtterance->IsPaused() || 1.448 + mUtterance->GetState() == SpeechSynthesisUtterance::STATE_ENDED) { 1.449 + return; 1.450 + } 1.451 + 1.452 + if (mCallback) { 1.453 + DebugOnly<nsresult> rv = mCallback->OnPause(); 1.454 + NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "Unable to call onPause() callback"); 1.455 + } 1.456 + 1.457 + if (mStream) { 1.458 + mStream->ChangeExplicitBlockerCount(1); 1.459 + } 1.460 + 1.461 + DispatchPauseImpl(GetCurrentTime(), GetCurrentCharOffset()); 1.462 +} 1.463 + 1.464 +void 1.465 +nsSpeechTask::Resume() 1.466 +{ 1.467 + MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default); 1.468 + 1.469 + if (!mUtterance->IsPaused()) { 1.470 + return; 1.471 + } 1.472 + 1.473 + if (mCallback) { 1.474 + DebugOnly<nsresult> rv = mCallback->OnResume(); 1.475 + NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "Unable to call onResume() callback"); 1.476 + } 1.477 + 1.478 + if (mStream) { 1.479 + mStream->ChangeExplicitBlockerCount(-1); 1.480 + } 1.481 + 1.482 + DispatchResumeImpl(GetCurrentTime(), GetCurrentCharOffset()); 1.483 +} 1.484 + 1.485 +void 1.486 +nsSpeechTask::Cancel() 1.487 +{ 1.488 + MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default); 1.489 + 1.490 + LOG(PR_LOG_DEBUG, ("nsSpeechTask::Cancel")); 1.491 + 1.492 + if (mCallback) { 1.493 + DebugOnly<nsresult> rv = mCallback->OnCancel(); 1.494 + NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "Unable to call onCancel() callback"); 1.495 + } 1.496 + 1.497 + if (mStream) { 1.498 + mStream->ChangeExplicitBlockerCount(1); 1.499 + } 1.500 + 1.501 + DispatchEndImpl(GetCurrentTime(), GetCurrentCharOffset()); 1.502 +} 1.503 + 1.504 +float 1.505 +nsSpeechTask::GetCurrentTime() 1.506 +{ 1.507 + return mStream ? (float)(mStream->GetCurrentTime() / 1000000.0) : 0; 1.508 +} 1.509 + 1.510 +uint32_t 1.511 +nsSpeechTask::GetCurrentCharOffset() 1.512 +{ 1.513 + return mStream && mStream->IsFinished() ? mText.Length() : 0; 1.514 +} 1.515 + 1.516 +void 1.517 +nsSpeechTask::SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis) 1.518 +{ 1.519 + mSpeechSynthesis = aSpeechSynthesis; 1.520 +} 1.521 + 1.522 +} // namespace dom 1.523 +} // namespace mozilla