content/media/webspeech/synth/nsSpeechTask.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
     3 /* This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 #include "AudioSegment.h"
     8 #include "nsSpeechTask.h"
     9 #include "SpeechSynthesis.h"
    11 // GetCurrentTime is defined in winbase.h as zero argument macro forwarding to
    12 // GetTickCount() and conflicts with nsSpeechTask::GetCurrentTime().
    13 #ifdef GetCurrentTime
    14 #undef GetCurrentTime
    15 #endif
    17 #undef LOG
    18 #ifdef PR_LOGGING
    19 extern PRLogModuleInfo* GetSpeechSynthLog();
    20 #define LOG(type, msg) PR_LOG(GetSpeechSynthLog(), type, msg)
    21 #else
    22 #define LOG(type, msg)
    23 #endif
    25 namespace mozilla {
    26 namespace dom {
    28 class SynthStreamListener : public MediaStreamListener
    29 {
    30 public:
    31   SynthStreamListener(nsSpeechTask* aSpeechTask) :
    32     mSpeechTask(aSpeechTask),
    33     mStarted(false)
    34   {
    35   }
    37   void DoNotifyStarted()
    38   {
    39     if (mSpeechTask) {
    40       mSpeechTask->DispatchStartImpl();
    41     }
    42   }
    44   void DoNotifyFinished()
    45   {
    46     if (mSpeechTask) {
    47       mSpeechTask->DispatchEndImpl(mSpeechTask->GetCurrentTime(),
    48                                    mSpeechTask->GetCurrentCharOffset());
    49     }
    50   }
    52   virtual void NotifyFinished(MediaStreamGraph* aGraph)
    53   {
    54     nsCOMPtr<nsIRunnable> event =
    55       NS_NewRunnableMethod(this, &SynthStreamListener::DoNotifyFinished);
    56     aGraph->DispatchToMainThreadAfterStreamStateUpdate(event.forget());
    57   }
    59   virtual void NotifyBlockingChanged(MediaStreamGraph* aGraph, Blocking aBlocked)
    60   {
    61     if (aBlocked == MediaStreamListener::UNBLOCKED && !mStarted) {
    62       mStarted = true;
    63       nsCOMPtr<nsIRunnable> event =
    64         NS_NewRunnableMethod(this, &SynthStreamListener::DoNotifyStarted);
    65       aGraph->DispatchToMainThreadAfterStreamStateUpdate(event.forget());
    66     }
    67   }
    69   virtual void NotifyRemoved(MediaStreamGraph* aGraph)
    70   {
    71     mSpeechTask = nullptr;
    72   }
    74 private:
    75   // Raw pointer; if we exist, the stream exists,
    76   // and 'mSpeechTask' exclusively owns it and therefor exists as well.
    77   nsSpeechTask* mSpeechTask;
    79   bool mStarted;
    80 };
    82 // nsSpeechTask
    84 NS_IMPL_CYCLE_COLLECTION(nsSpeechTask, mSpeechSynthesis, mUtterance);
    86 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsSpeechTask)
    87   NS_INTERFACE_MAP_ENTRY(nsISpeechTask)
    88   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTask)
    89 NS_INTERFACE_MAP_END
    91 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeechTask)
    92 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask)
    94 nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance)
    95   : mUtterance(aUtterance)
    96   , mCallback(nullptr)
    97   , mIndirectAudio(false)
    98 {
    99   mText = aUtterance->mText;
   100   mVolume = aUtterance->Volume();
   101 }
   103 nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText)
   104   : mUtterance(nullptr)
   105   , mVolume(aVolume)
   106   , mText(aText)
   107   , mCallback(nullptr)
   108   , mIndirectAudio(false)
   109 {
   110 }
   112 nsSpeechTask::~nsSpeechTask()
   113 {
   114   if (mStream) {
   115     if (!mStream->IsDestroyed()) {
   116       mStream->Destroy();
   117     }
   119     mStream = nullptr;
   120   }
   121 }
   123 NS_IMETHODIMP
   124 nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback,
   125                     uint32_t aChannels, uint32_t aRate, uint8_t argc)
   126 {
   127   MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default);
   129   LOG(PR_LOG_DEBUG, ("nsSpeechTask::Setup"));
   131   mCallback = aCallback;
   133   if (argc < 2) {
   134     return NS_OK;
   135   }
   137   if (mIndirectAudio) {
   138     NS_WARNING("Audio info arguments in Setup() are ignored for indirect audio services.");
   139   }
   141   // XXX: Is there setup overhead here that hurtls latency?
   142   mStream = MediaStreamGraph::GetInstance()->CreateSourceStream(nullptr);
   143   mStream->AddListener(new SynthStreamListener(this));
   145   // XXX: Support more than one channel
   146   NS_ENSURE_TRUE(aChannels == 1, NS_ERROR_FAILURE);
   148   mChannels = aChannels;
   150   AudioSegment* segment = new AudioSegment();
   151   mStream->AddTrack(1, aRate, 0, segment);
   152   mStream->AddAudioOutput(this);
   153   mStream->SetAudioOutputVolume(this, mVolume);
   155   return NS_OK;
   156 }
   158 NS_IMETHODIMP
   159 nsSpeechTask::SendAudio(JS::Handle<JS::Value> aData, JS::Handle<JS::Value> aLandmarks,
   160                         JSContext* aCx)
   161 {
   162   MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default);
   164   NS_ENSURE_TRUE(mStream, NS_ERROR_NOT_AVAILABLE);
   165   NS_ENSURE_FALSE(mStream->IsDestroyed(), NS_ERROR_NOT_AVAILABLE);
   166   NS_ENSURE_TRUE(mChannels, NS_ERROR_FAILURE);
   168   if (mIndirectAudio) {
   169     NS_WARNING("Can't call SendAudio from an indirect audio speech service.");
   170     return NS_ERROR_FAILURE;
   171   }
   173   JS::Rooted<JSObject*> darray(aCx, &aData.toObject());
   174   JSAutoCompartment ac(aCx, darray);
   176   JS::Rooted<JSObject*> tsrc(aCx, nullptr);
   178   // Allow either Int16Array or plain JS Array
   179   if (JS_IsInt16Array(darray)) {
   180     tsrc = darray;
   181   } else if (JS_IsArrayObject(aCx, darray)) {
   182     tsrc = JS_NewInt16ArrayFromArray(aCx, darray);
   183   }
   185   if (!tsrc) {
   186     return NS_ERROR_DOM_TYPE_MISMATCH_ERR;
   187   }
   189   SendAudioImpl(JS_GetInt16ArrayData(tsrc),
   190                 JS_GetTypedArrayLength(tsrc));
   192   return NS_OK;
   193 }
   195 NS_IMETHODIMP
   196 nsSpeechTask::SendAudioNative(int16_t* aData, uint32_t aDataLen)
   197 {
   198   MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default);
   200   NS_ENSURE_TRUE(mStream, NS_ERROR_NOT_AVAILABLE);
   201   NS_ENSURE_FALSE(mStream->IsDestroyed(), NS_ERROR_NOT_AVAILABLE);
   202   NS_ENSURE_TRUE(mChannels, NS_ERROR_FAILURE);
   204   if (mIndirectAudio) {
   205     NS_WARNING("Can't call SendAudio from an indirect audio speech service.");
   206     return NS_ERROR_FAILURE;
   207   }
   209   SendAudioImpl(aData, aDataLen);
   211   return NS_OK;
   212 }
   214 void
   215 nsSpeechTask::SendAudioImpl(int16_t* aData, uint32_t aDataLen)
   216 {
   217   if (aDataLen == 0) {
   218     mStream->EndAllTrackAndFinish();
   219     return;
   220   }
   222   nsRefPtr<mozilla::SharedBuffer> samples =
   223     SharedBuffer::Create(aDataLen * sizeof(int16_t));
   224   int16_t* frames = static_cast<int16_t*>(samples->Data());
   226   for (uint32_t i = 0; i < aDataLen; i++) {
   227     frames[i] = aData[i];
   228   }
   230   AudioSegment segment;
   231   nsAutoTArray<const int16_t*, 1> channelData;
   232   channelData.AppendElement(frames);
   233   segment.AppendFrames(samples.forget(), channelData, aDataLen);
   234   mStream->AppendToTrack(1, &segment);
   235   mStream->AdvanceKnownTracksTime(STREAM_TIME_MAX);
   236 }
   238 NS_IMETHODIMP
   239 nsSpeechTask::DispatchStart()
   240 {
   241   if (!mIndirectAudio) {
   242     NS_WARNING("Can't call DispatchStart() from a direct audio speech service");
   243     return NS_ERROR_FAILURE;
   244   }
   246   return DispatchStartImpl();
   247 }
   249 nsresult
   250 nsSpeechTask::DispatchStartImpl()
   251 {
   252   LOG(PR_LOG_DEBUG, ("nsSpeechTask::DispatchStart"));
   254   MOZ_ASSERT(mUtterance);
   255   NS_ENSURE_TRUE(mUtterance->mState == SpeechSynthesisUtterance::STATE_PENDING,
   256                  NS_ERROR_NOT_AVAILABLE);
   258   mUtterance->mState = SpeechSynthesisUtterance::STATE_SPEAKING;
   259   mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("start"), 0, 0,
   260                                            NS_LITERAL_STRING(""));
   262   return NS_OK;
   263 }
   265 NS_IMETHODIMP
   266 nsSpeechTask::DispatchEnd(float aElapsedTime, uint32_t aCharIndex)
   267 {
   268   if (!mIndirectAudio) {
   269     NS_WARNING("Can't call DispatchEnd() from a direct audio speech service");
   270     return NS_ERROR_FAILURE;
   271   }
   273   return DispatchEndImpl(aElapsedTime, aCharIndex);
   274 }
   276 nsresult
   277 nsSpeechTask::DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex)
   278 {
   279   LOG(PR_LOG_DEBUG, ("nsSpeechTask::DispatchEnd\n"));
   281   MOZ_ASSERT(mUtterance);
   282   NS_ENSURE_FALSE(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED,
   283                   NS_ERROR_NOT_AVAILABLE);
   285   // XXX: This should not be here, but it prevents a crash in MSG.
   286   if (mStream) {
   287     mStream->Destroy();
   288   }
   290   nsRefPtr<SpeechSynthesisUtterance> utterance = mUtterance;
   292   if (mSpeechSynthesis) {
   293     mSpeechSynthesis->OnEnd(this);
   294   }
   296   if (utterance->mState == SpeechSynthesisUtterance::STATE_PENDING) {
   297     utterance->mState = SpeechSynthesisUtterance::STATE_NONE;
   298   } else {
   299     utterance->mState = SpeechSynthesisUtterance::STATE_ENDED;
   300     utterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("end"),
   301                                             aCharIndex, aElapsedTime,
   302                                             EmptyString());
   303   }
   305   return NS_OK;
   306 }
   308 NS_IMETHODIMP
   309 nsSpeechTask::DispatchPause(float aElapsedTime, uint32_t aCharIndex)
   310 {
   311   if (!mIndirectAudio) {
   312     NS_WARNING("Can't call DispatchPause() from a direct audio speech service");
   313     return NS_ERROR_FAILURE;
   314   }
   316   return DispatchPauseImpl(aElapsedTime, aCharIndex);
   317 }
   319 nsresult
   320 nsSpeechTask::DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex)
   321 {
   322   LOG(PR_LOG_DEBUG, ("nsSpeechTask::DispatchPause"));
   323   MOZ_ASSERT(mUtterance);
   324   NS_ENSURE_FALSE(mUtterance->mPaused, NS_ERROR_NOT_AVAILABLE);
   325   NS_ENSURE_FALSE(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED,
   326                   NS_ERROR_NOT_AVAILABLE);
   328   mUtterance->mPaused = true;
   329   mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("pause"),
   330                                            aCharIndex, aElapsedTime,
   331                                            NS_LITERAL_STRING(""));
   332   return NS_OK;
   333 }
   335 NS_IMETHODIMP
   336 nsSpeechTask::DispatchResume(float aElapsedTime, uint32_t aCharIndex)
   337 {
   338   if (!mIndirectAudio) {
   339     NS_WARNING("Can't call DispatchResume() from a direct audio speech service");
   340     return NS_ERROR_FAILURE;
   341   }
   343   return DispatchResumeImpl(aElapsedTime, aCharIndex);
   344 }
   346 nsresult
   347 nsSpeechTask::DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex)
   348 {
   349   LOG(PR_LOG_DEBUG, ("nsSpeechTask::DispatchResume"));
   350   MOZ_ASSERT(mUtterance);
   351   NS_ENSURE_TRUE(mUtterance->mPaused, NS_ERROR_NOT_AVAILABLE);
   352   NS_ENSURE_FALSE(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED,
   353                   NS_ERROR_NOT_AVAILABLE);
   355   mUtterance->mPaused = false;
   356   mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("resume"),
   357                                            aCharIndex, aElapsedTime,
   358                                            NS_LITERAL_STRING(""));
   359   return NS_OK;
   360 }
   362 NS_IMETHODIMP
   363 nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex)
   364 {
   365   if (!mIndirectAudio) {
   366     NS_WARNING("Can't call DispatchError() from a direct audio speech service");
   367     return NS_ERROR_FAILURE;
   368   }
   370   return DispatchErrorImpl(aElapsedTime, aCharIndex);
   371 }
   373 nsresult
   374 nsSpeechTask::DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex)
   375 {
   376   MOZ_ASSERT(mUtterance);
   377   NS_ENSURE_FALSE(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED,
   378                   NS_ERROR_NOT_AVAILABLE);
   380   mUtterance->mState = SpeechSynthesisUtterance::STATE_ENDED;
   381   mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("error"),
   382                                            aCharIndex, aElapsedTime,
   383                                            NS_LITERAL_STRING(""));
   384   return NS_OK;
   385 }
   387 NS_IMETHODIMP
   388 nsSpeechTask::DispatchBoundary(const nsAString& aName,
   389                                float aElapsedTime, uint32_t aCharIndex)
   390 {
   391   if (!mIndirectAudio) {
   392     NS_WARNING("Can't call DispatchBoundary() from a direct audio speech service");
   393     return NS_ERROR_FAILURE;
   394   }
   396   return DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex);
   397 }
   399 nsresult
   400 nsSpeechTask::DispatchBoundaryImpl(const nsAString& aName,
   401                                    float aElapsedTime, uint32_t aCharIndex)
   402 {
   403   MOZ_ASSERT(mUtterance);
   404   NS_ENSURE_TRUE(mUtterance->mState == SpeechSynthesisUtterance::STATE_SPEAKING,
   405                  NS_ERROR_NOT_AVAILABLE);
   407   mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("boundary"),
   408                                            aCharIndex, aElapsedTime,
   409                                            aName);
   410   return NS_OK;
   411 }
   413 NS_IMETHODIMP
   414 nsSpeechTask::DispatchMark(const nsAString& aName,
   415                            float aElapsedTime, uint32_t aCharIndex)
   416 {
   417   if (!mIndirectAudio) {
   418     NS_WARNING("Can't call DispatchMark() from a direct audio speech service");
   419     return NS_ERROR_FAILURE;
   420   }
   422   return DispatchMarkImpl(aName, aElapsedTime, aCharIndex);
   423 }
   425 nsresult
   426 nsSpeechTask::DispatchMarkImpl(const nsAString& aName,
   427                                float aElapsedTime, uint32_t aCharIndex)
   428 {
   429   MOZ_ASSERT(mUtterance);
   430   NS_ENSURE_TRUE(mUtterance->mState == SpeechSynthesisUtterance::STATE_SPEAKING,
   431                  NS_ERROR_NOT_AVAILABLE);
   433   mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("mark"),
   434                                            aCharIndex, aElapsedTime,
   435                                            aName);
   436   return NS_OK;
   437 }
   439 void
   440 nsSpeechTask::Pause()
   441 {
   442   MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default);
   444   if (mUtterance->IsPaused() ||
   445       mUtterance->GetState() == SpeechSynthesisUtterance::STATE_ENDED) {
   446     return;
   447   }
   449   if (mCallback) {
   450     DebugOnly<nsresult> rv = mCallback->OnPause();
   451     NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "Unable to call onPause() callback");
   452   }
   454   if (mStream) {
   455     mStream->ChangeExplicitBlockerCount(1);
   456   }
   458   DispatchPauseImpl(GetCurrentTime(), GetCurrentCharOffset());
   459 }
   461 void
   462 nsSpeechTask::Resume()
   463 {
   464   MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default);
   466   if (!mUtterance->IsPaused()) {
   467     return;
   468   }
   470   if (mCallback) {
   471     DebugOnly<nsresult> rv = mCallback->OnResume();
   472     NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "Unable to call onResume() callback");
   473   }
   475   if (mStream) {
   476     mStream->ChangeExplicitBlockerCount(-1);
   477   }
   479   DispatchResumeImpl(GetCurrentTime(), GetCurrentCharOffset());
   480 }
   482 void
   483 nsSpeechTask::Cancel()
   484 {
   485   MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default);
   487   LOG(PR_LOG_DEBUG, ("nsSpeechTask::Cancel"));
   489   if (mCallback) {
   490     DebugOnly<nsresult> rv = mCallback->OnCancel();
   491     NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "Unable to call onCancel() callback");
   492   }
   494   if (mStream) {
   495     mStream->ChangeExplicitBlockerCount(1);
   496   }
   498   DispatchEndImpl(GetCurrentTime(), GetCurrentCharOffset());
   499 }
   501 float
   502 nsSpeechTask::GetCurrentTime()
   503 {
   504   return mStream ? (float)(mStream->GetCurrentTime() / 1000000.0) : 0;
   505 }
   507 uint32_t
   508 nsSpeechTask::GetCurrentCharOffset()
   509 {
   510   return mStream && mStream->IsFinished() ? mText.Length() : 0;
   511 }
   513 void
   514 nsSpeechTask::SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis)
   515 {
   516   mSpeechSynthesis = aSpeechSynthesis;
   517 }
   519 } // namespace dom
   520 } // namespace mozilla

mercurial