michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim:set ts=2 sw=2 sts=2 et cindent: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "AudioSegment.h" michael@0: #include "nsSpeechTask.h" michael@0: #include "SpeechSynthesis.h" michael@0: michael@0: // GetCurrentTime is defined in winbase.h as zero argument macro forwarding to michael@0: // GetTickCount() and conflicts with nsSpeechTask::GetCurrentTime(). michael@0: #ifdef GetCurrentTime michael@0: #undef GetCurrentTime michael@0: #endif michael@0: michael@0: #undef LOG michael@0: #ifdef PR_LOGGING michael@0: extern PRLogModuleInfo* GetSpeechSynthLog(); michael@0: #define LOG(type, msg) PR_LOG(GetSpeechSynthLog(), type, msg) michael@0: #else michael@0: #define LOG(type, msg) michael@0: #endif michael@0: michael@0: namespace mozilla { michael@0: namespace dom { michael@0: michael@0: class SynthStreamListener : public MediaStreamListener michael@0: { michael@0: public: michael@0: SynthStreamListener(nsSpeechTask* aSpeechTask) : michael@0: mSpeechTask(aSpeechTask), michael@0: mStarted(false) michael@0: { michael@0: } michael@0: michael@0: void DoNotifyStarted() michael@0: { michael@0: if (mSpeechTask) { michael@0: mSpeechTask->DispatchStartImpl(); michael@0: } michael@0: } michael@0: michael@0: void DoNotifyFinished() michael@0: { michael@0: if (mSpeechTask) { michael@0: mSpeechTask->DispatchEndImpl(mSpeechTask->GetCurrentTime(), michael@0: mSpeechTask->GetCurrentCharOffset()); michael@0: } michael@0: } michael@0: michael@0: virtual void NotifyFinished(MediaStreamGraph* aGraph) michael@0: { michael@0: nsCOMPtr event = michael@0: NS_NewRunnableMethod(this, &SynthStreamListener::DoNotifyFinished); michael@0: aGraph->DispatchToMainThreadAfterStreamStateUpdate(event.forget()); michael@0: } michael@0: michael@0: virtual void NotifyBlockingChanged(MediaStreamGraph* aGraph, Blocking aBlocked) michael@0: { michael@0: if (aBlocked == MediaStreamListener::UNBLOCKED && !mStarted) { michael@0: mStarted = true; michael@0: nsCOMPtr event = michael@0: NS_NewRunnableMethod(this, &SynthStreamListener::DoNotifyStarted); michael@0: aGraph->DispatchToMainThreadAfterStreamStateUpdate(event.forget()); michael@0: } michael@0: } michael@0: michael@0: virtual void NotifyRemoved(MediaStreamGraph* aGraph) michael@0: { michael@0: mSpeechTask = nullptr; michael@0: } michael@0: michael@0: private: michael@0: // Raw pointer; if we exist, the stream exists, michael@0: // and 'mSpeechTask' exclusively owns it and therefor exists as well. michael@0: nsSpeechTask* mSpeechTask; michael@0: michael@0: bool mStarted; michael@0: }; michael@0: michael@0: // nsSpeechTask michael@0: michael@0: NS_IMPL_CYCLE_COLLECTION(nsSpeechTask, mSpeechSynthesis, mUtterance); michael@0: michael@0: NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsSpeechTask) michael@0: NS_INTERFACE_MAP_ENTRY(nsISpeechTask) michael@0: NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTask) michael@0: NS_INTERFACE_MAP_END michael@0: michael@0: NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeechTask) michael@0: NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask) michael@0: michael@0: nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance) michael@0: : mUtterance(aUtterance) michael@0: , mCallback(nullptr) michael@0: , mIndirectAudio(false) michael@0: { michael@0: mText = aUtterance->mText; michael@0: mVolume = aUtterance->Volume(); michael@0: } michael@0: michael@0: nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText) michael@0: : mUtterance(nullptr) michael@0: , mVolume(aVolume) michael@0: , mText(aText) michael@0: , mCallback(nullptr) michael@0: , mIndirectAudio(false) michael@0: { michael@0: } michael@0: michael@0: nsSpeechTask::~nsSpeechTask() michael@0: { michael@0: if (mStream) { michael@0: if (!mStream->IsDestroyed()) { michael@0: mStream->Destroy(); michael@0: } michael@0: michael@0: mStream = nullptr; michael@0: } michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback, michael@0: uint32_t aChannels, uint32_t aRate, uint8_t argc) michael@0: { michael@0: MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default); michael@0: michael@0: LOG(PR_LOG_DEBUG, ("nsSpeechTask::Setup")); michael@0: michael@0: mCallback = aCallback; michael@0: michael@0: if (argc < 2) { michael@0: return NS_OK; michael@0: } michael@0: michael@0: if (mIndirectAudio) { michael@0: NS_WARNING("Audio info arguments in Setup() are ignored for indirect audio services."); michael@0: } michael@0: michael@0: // XXX: Is there setup overhead here that hurtls latency? michael@0: mStream = MediaStreamGraph::GetInstance()->CreateSourceStream(nullptr); michael@0: mStream->AddListener(new SynthStreamListener(this)); michael@0: michael@0: // XXX: Support more than one channel michael@0: NS_ENSURE_TRUE(aChannels == 1, NS_ERROR_FAILURE); michael@0: michael@0: mChannels = aChannels; michael@0: michael@0: AudioSegment* segment = new AudioSegment(); michael@0: mStream->AddTrack(1, aRate, 0, segment); michael@0: mStream->AddAudioOutput(this); michael@0: mStream->SetAudioOutputVolume(this, mVolume); michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSpeechTask::SendAudio(JS::Handle aData, JS::Handle aLandmarks, michael@0: JSContext* aCx) michael@0: { michael@0: MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default); michael@0: michael@0: NS_ENSURE_TRUE(mStream, NS_ERROR_NOT_AVAILABLE); michael@0: NS_ENSURE_FALSE(mStream->IsDestroyed(), NS_ERROR_NOT_AVAILABLE); michael@0: NS_ENSURE_TRUE(mChannels, NS_ERROR_FAILURE); michael@0: michael@0: if (mIndirectAudio) { michael@0: NS_WARNING("Can't call SendAudio from an indirect audio speech service."); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: JS::Rooted darray(aCx, &aData.toObject()); michael@0: JSAutoCompartment ac(aCx, darray); michael@0: michael@0: JS::Rooted tsrc(aCx, nullptr); michael@0: michael@0: // Allow either Int16Array or plain JS Array michael@0: if (JS_IsInt16Array(darray)) { michael@0: tsrc = darray; michael@0: } else if (JS_IsArrayObject(aCx, darray)) { michael@0: tsrc = JS_NewInt16ArrayFromArray(aCx, darray); michael@0: } michael@0: michael@0: if (!tsrc) { michael@0: return NS_ERROR_DOM_TYPE_MISMATCH_ERR; michael@0: } michael@0: michael@0: SendAudioImpl(JS_GetInt16ArrayData(tsrc), michael@0: JS_GetTypedArrayLength(tsrc)); michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSpeechTask::SendAudioNative(int16_t* aData, uint32_t aDataLen) michael@0: { michael@0: MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default); michael@0: michael@0: NS_ENSURE_TRUE(mStream, NS_ERROR_NOT_AVAILABLE); michael@0: NS_ENSURE_FALSE(mStream->IsDestroyed(), NS_ERROR_NOT_AVAILABLE); michael@0: NS_ENSURE_TRUE(mChannels, NS_ERROR_FAILURE); michael@0: michael@0: if (mIndirectAudio) { michael@0: NS_WARNING("Can't call SendAudio from an indirect audio speech service."); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: SendAudioImpl(aData, aDataLen); michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: void michael@0: nsSpeechTask::SendAudioImpl(int16_t* aData, uint32_t aDataLen) michael@0: { michael@0: if (aDataLen == 0) { michael@0: mStream->EndAllTrackAndFinish(); michael@0: return; michael@0: } michael@0: michael@0: nsRefPtr samples = michael@0: SharedBuffer::Create(aDataLen * sizeof(int16_t)); michael@0: int16_t* frames = static_cast(samples->Data()); michael@0: michael@0: for (uint32_t i = 0; i < aDataLen; i++) { michael@0: frames[i] = aData[i]; michael@0: } michael@0: michael@0: AudioSegment segment; michael@0: nsAutoTArray channelData; michael@0: channelData.AppendElement(frames); michael@0: segment.AppendFrames(samples.forget(), channelData, aDataLen); michael@0: mStream->AppendToTrack(1, &segment); michael@0: mStream->AdvanceKnownTracksTime(STREAM_TIME_MAX); michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSpeechTask::DispatchStart() michael@0: { michael@0: if (!mIndirectAudio) { michael@0: NS_WARNING("Can't call DispatchStart() from a direct audio speech service"); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: return DispatchStartImpl(); michael@0: } michael@0: michael@0: nsresult michael@0: nsSpeechTask::DispatchStartImpl() michael@0: { michael@0: LOG(PR_LOG_DEBUG, ("nsSpeechTask::DispatchStart")); michael@0: michael@0: MOZ_ASSERT(mUtterance); michael@0: NS_ENSURE_TRUE(mUtterance->mState == SpeechSynthesisUtterance::STATE_PENDING, michael@0: NS_ERROR_NOT_AVAILABLE); michael@0: michael@0: mUtterance->mState = SpeechSynthesisUtterance::STATE_SPEAKING; michael@0: mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("start"), 0, 0, michael@0: NS_LITERAL_STRING("")); michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSpeechTask::DispatchEnd(float aElapsedTime, uint32_t aCharIndex) michael@0: { michael@0: if (!mIndirectAudio) { michael@0: NS_WARNING("Can't call DispatchEnd() from a direct audio speech service"); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: return DispatchEndImpl(aElapsedTime, aCharIndex); michael@0: } michael@0: michael@0: nsresult michael@0: nsSpeechTask::DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex) michael@0: { michael@0: LOG(PR_LOG_DEBUG, ("nsSpeechTask::DispatchEnd\n")); michael@0: michael@0: MOZ_ASSERT(mUtterance); michael@0: NS_ENSURE_FALSE(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED, michael@0: NS_ERROR_NOT_AVAILABLE); michael@0: michael@0: // XXX: This should not be here, but it prevents a crash in MSG. michael@0: if (mStream) { michael@0: mStream->Destroy(); michael@0: } michael@0: michael@0: nsRefPtr utterance = mUtterance; michael@0: michael@0: if (mSpeechSynthesis) { michael@0: mSpeechSynthesis->OnEnd(this); michael@0: } michael@0: michael@0: if (utterance->mState == SpeechSynthesisUtterance::STATE_PENDING) { michael@0: utterance->mState = SpeechSynthesisUtterance::STATE_NONE; michael@0: } else { michael@0: utterance->mState = SpeechSynthesisUtterance::STATE_ENDED; michael@0: utterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("end"), michael@0: aCharIndex, aElapsedTime, michael@0: EmptyString()); michael@0: } michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSpeechTask::DispatchPause(float aElapsedTime, uint32_t aCharIndex) michael@0: { michael@0: if (!mIndirectAudio) { michael@0: NS_WARNING("Can't call DispatchPause() from a direct audio speech service"); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: return DispatchPauseImpl(aElapsedTime, aCharIndex); michael@0: } michael@0: michael@0: nsresult michael@0: nsSpeechTask::DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex) michael@0: { michael@0: LOG(PR_LOG_DEBUG, ("nsSpeechTask::DispatchPause")); michael@0: MOZ_ASSERT(mUtterance); michael@0: NS_ENSURE_FALSE(mUtterance->mPaused, NS_ERROR_NOT_AVAILABLE); michael@0: NS_ENSURE_FALSE(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED, michael@0: NS_ERROR_NOT_AVAILABLE); michael@0: michael@0: mUtterance->mPaused = true; michael@0: mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("pause"), michael@0: aCharIndex, aElapsedTime, michael@0: NS_LITERAL_STRING("")); michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSpeechTask::DispatchResume(float aElapsedTime, uint32_t aCharIndex) michael@0: { michael@0: if (!mIndirectAudio) { michael@0: NS_WARNING("Can't call DispatchResume() from a direct audio speech service"); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: return DispatchResumeImpl(aElapsedTime, aCharIndex); michael@0: } michael@0: michael@0: nsresult michael@0: nsSpeechTask::DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex) michael@0: { michael@0: LOG(PR_LOG_DEBUG, ("nsSpeechTask::DispatchResume")); michael@0: MOZ_ASSERT(mUtterance); michael@0: NS_ENSURE_TRUE(mUtterance->mPaused, NS_ERROR_NOT_AVAILABLE); michael@0: NS_ENSURE_FALSE(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED, michael@0: NS_ERROR_NOT_AVAILABLE); michael@0: michael@0: mUtterance->mPaused = false; michael@0: mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("resume"), michael@0: aCharIndex, aElapsedTime, michael@0: NS_LITERAL_STRING("")); michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex) michael@0: { michael@0: if (!mIndirectAudio) { michael@0: NS_WARNING("Can't call DispatchError() from a direct audio speech service"); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: return DispatchErrorImpl(aElapsedTime, aCharIndex); michael@0: } michael@0: michael@0: nsresult michael@0: nsSpeechTask::DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex) michael@0: { michael@0: MOZ_ASSERT(mUtterance); michael@0: NS_ENSURE_FALSE(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED, michael@0: NS_ERROR_NOT_AVAILABLE); michael@0: michael@0: mUtterance->mState = SpeechSynthesisUtterance::STATE_ENDED; michael@0: mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("error"), michael@0: aCharIndex, aElapsedTime, michael@0: NS_LITERAL_STRING("")); michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSpeechTask::DispatchBoundary(const nsAString& aName, michael@0: float aElapsedTime, uint32_t aCharIndex) michael@0: { michael@0: if (!mIndirectAudio) { michael@0: NS_WARNING("Can't call DispatchBoundary() from a direct audio speech service"); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: return DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex); michael@0: } michael@0: michael@0: nsresult michael@0: nsSpeechTask::DispatchBoundaryImpl(const nsAString& aName, michael@0: float aElapsedTime, uint32_t aCharIndex) michael@0: { michael@0: MOZ_ASSERT(mUtterance); michael@0: NS_ENSURE_TRUE(mUtterance->mState == SpeechSynthesisUtterance::STATE_SPEAKING, michael@0: NS_ERROR_NOT_AVAILABLE); michael@0: michael@0: mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("boundary"), michael@0: aCharIndex, aElapsedTime, michael@0: aName); michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsSpeechTask::DispatchMark(const nsAString& aName, michael@0: float aElapsedTime, uint32_t aCharIndex) michael@0: { michael@0: if (!mIndirectAudio) { michael@0: NS_WARNING("Can't call DispatchMark() from a direct audio speech service"); michael@0: return NS_ERROR_FAILURE; michael@0: } michael@0: michael@0: return DispatchMarkImpl(aName, aElapsedTime, aCharIndex); michael@0: } michael@0: michael@0: nsresult michael@0: nsSpeechTask::DispatchMarkImpl(const nsAString& aName, michael@0: float aElapsedTime, uint32_t aCharIndex) michael@0: { michael@0: MOZ_ASSERT(mUtterance); michael@0: NS_ENSURE_TRUE(mUtterance->mState == SpeechSynthesisUtterance::STATE_SPEAKING, michael@0: NS_ERROR_NOT_AVAILABLE); michael@0: michael@0: mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("mark"), michael@0: aCharIndex, aElapsedTime, michael@0: aName); michael@0: return NS_OK; michael@0: } michael@0: michael@0: void michael@0: nsSpeechTask::Pause() michael@0: { michael@0: MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default); michael@0: michael@0: if (mUtterance->IsPaused() || michael@0: mUtterance->GetState() == SpeechSynthesisUtterance::STATE_ENDED) { michael@0: return; michael@0: } michael@0: michael@0: if (mCallback) { michael@0: DebugOnly rv = mCallback->OnPause(); michael@0: NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "Unable to call onPause() callback"); michael@0: } michael@0: michael@0: if (mStream) { michael@0: mStream->ChangeExplicitBlockerCount(1); michael@0: } michael@0: michael@0: DispatchPauseImpl(GetCurrentTime(), GetCurrentCharOffset()); michael@0: } michael@0: michael@0: void michael@0: nsSpeechTask::Resume() michael@0: { michael@0: MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default); michael@0: michael@0: if (!mUtterance->IsPaused()) { michael@0: return; michael@0: } michael@0: michael@0: if (mCallback) { michael@0: DebugOnly rv = mCallback->OnResume(); michael@0: NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "Unable to call onResume() callback"); michael@0: } michael@0: michael@0: if (mStream) { michael@0: mStream->ChangeExplicitBlockerCount(-1); michael@0: } michael@0: michael@0: DispatchResumeImpl(GetCurrentTime(), GetCurrentCharOffset()); michael@0: } michael@0: michael@0: void michael@0: nsSpeechTask::Cancel() michael@0: { michael@0: MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default); michael@0: michael@0: LOG(PR_LOG_DEBUG, ("nsSpeechTask::Cancel")); michael@0: michael@0: if (mCallback) { michael@0: DebugOnly rv = mCallback->OnCancel(); michael@0: NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "Unable to call onCancel() callback"); michael@0: } michael@0: michael@0: if (mStream) { michael@0: mStream->ChangeExplicitBlockerCount(1); michael@0: } michael@0: michael@0: DispatchEndImpl(GetCurrentTime(), GetCurrentCharOffset()); michael@0: } michael@0: michael@0: float michael@0: nsSpeechTask::GetCurrentTime() michael@0: { michael@0: return mStream ? (float)(mStream->GetCurrentTime() / 1000000.0) : 0; michael@0: } michael@0: michael@0: uint32_t michael@0: nsSpeechTask::GetCurrentCharOffset() michael@0: { michael@0: return mStream && mStream->IsFinished() ? mText.Length() : 0; michael@0: } michael@0: michael@0: void michael@0: nsSpeechTask::SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis) michael@0: { michael@0: mSpeechSynthesis = aSpeechSynthesis; michael@0: } michael@0: michael@0: } // namespace dom michael@0: } // namespace mozilla