|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ |
|
3 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 #include "AudioSegment.h" |
|
8 #include "nsSpeechTask.h" |
|
9 #include "SpeechSynthesis.h" |
|
10 |
|
11 // GetCurrentTime is defined in winbase.h as zero argument macro forwarding to |
|
12 // GetTickCount() and conflicts with nsSpeechTask::GetCurrentTime(). |
|
13 #ifdef GetCurrentTime |
|
14 #undef GetCurrentTime |
|
15 #endif |
|
16 |
|
17 #undef LOG |
|
18 #ifdef PR_LOGGING |
|
19 extern PRLogModuleInfo* GetSpeechSynthLog(); |
|
20 #define LOG(type, msg) PR_LOG(GetSpeechSynthLog(), type, msg) |
|
21 #else |
|
22 #define LOG(type, msg) |
|
23 #endif |
|
24 |
|
25 namespace mozilla { |
|
26 namespace dom { |
|
27 |
|
28 class SynthStreamListener : public MediaStreamListener |
|
29 { |
|
30 public: |
|
31 SynthStreamListener(nsSpeechTask* aSpeechTask) : |
|
32 mSpeechTask(aSpeechTask), |
|
33 mStarted(false) |
|
34 { |
|
35 } |
|
36 |
|
37 void DoNotifyStarted() |
|
38 { |
|
39 if (mSpeechTask) { |
|
40 mSpeechTask->DispatchStartImpl(); |
|
41 } |
|
42 } |
|
43 |
|
44 void DoNotifyFinished() |
|
45 { |
|
46 if (mSpeechTask) { |
|
47 mSpeechTask->DispatchEndImpl(mSpeechTask->GetCurrentTime(), |
|
48 mSpeechTask->GetCurrentCharOffset()); |
|
49 } |
|
50 } |
|
51 |
|
52 virtual void NotifyFinished(MediaStreamGraph* aGraph) |
|
53 { |
|
54 nsCOMPtr<nsIRunnable> event = |
|
55 NS_NewRunnableMethod(this, &SynthStreamListener::DoNotifyFinished); |
|
56 aGraph->DispatchToMainThreadAfterStreamStateUpdate(event.forget()); |
|
57 } |
|
58 |
|
59 virtual void NotifyBlockingChanged(MediaStreamGraph* aGraph, Blocking aBlocked) |
|
60 { |
|
61 if (aBlocked == MediaStreamListener::UNBLOCKED && !mStarted) { |
|
62 mStarted = true; |
|
63 nsCOMPtr<nsIRunnable> event = |
|
64 NS_NewRunnableMethod(this, &SynthStreamListener::DoNotifyStarted); |
|
65 aGraph->DispatchToMainThreadAfterStreamStateUpdate(event.forget()); |
|
66 } |
|
67 } |
|
68 |
|
69 virtual void NotifyRemoved(MediaStreamGraph* aGraph) |
|
70 { |
|
71 mSpeechTask = nullptr; |
|
72 } |
|
73 |
|
74 private: |
|
75 // Raw pointer; if we exist, the stream exists, |
|
76 // and 'mSpeechTask' exclusively owns it and therefor exists as well. |
|
77 nsSpeechTask* mSpeechTask; |
|
78 |
|
79 bool mStarted; |
|
80 }; |
|
81 |
|
82 // nsSpeechTask |
|
83 |
|
84 NS_IMPL_CYCLE_COLLECTION(nsSpeechTask, mSpeechSynthesis, mUtterance); |
|
85 |
|
86 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsSpeechTask) |
|
87 NS_INTERFACE_MAP_ENTRY(nsISpeechTask) |
|
88 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTask) |
|
89 NS_INTERFACE_MAP_END |
|
90 |
|
91 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeechTask) |
|
92 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask) |
|
93 |
|
94 nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance) |
|
95 : mUtterance(aUtterance) |
|
96 , mCallback(nullptr) |
|
97 , mIndirectAudio(false) |
|
98 { |
|
99 mText = aUtterance->mText; |
|
100 mVolume = aUtterance->Volume(); |
|
101 } |
|
102 |
|
103 nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText) |
|
104 : mUtterance(nullptr) |
|
105 , mVolume(aVolume) |
|
106 , mText(aText) |
|
107 , mCallback(nullptr) |
|
108 , mIndirectAudio(false) |
|
109 { |
|
110 } |
|
111 |
|
112 nsSpeechTask::~nsSpeechTask() |
|
113 { |
|
114 if (mStream) { |
|
115 if (!mStream->IsDestroyed()) { |
|
116 mStream->Destroy(); |
|
117 } |
|
118 |
|
119 mStream = nullptr; |
|
120 } |
|
121 } |
|
122 |
|
123 NS_IMETHODIMP |
|
124 nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback, |
|
125 uint32_t aChannels, uint32_t aRate, uint8_t argc) |
|
126 { |
|
127 MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default); |
|
128 |
|
129 LOG(PR_LOG_DEBUG, ("nsSpeechTask::Setup")); |
|
130 |
|
131 mCallback = aCallback; |
|
132 |
|
133 if (argc < 2) { |
|
134 return NS_OK; |
|
135 } |
|
136 |
|
137 if (mIndirectAudio) { |
|
138 NS_WARNING("Audio info arguments in Setup() are ignored for indirect audio services."); |
|
139 } |
|
140 |
|
141 // XXX: Is there setup overhead here that hurtls latency? |
|
142 mStream = MediaStreamGraph::GetInstance()->CreateSourceStream(nullptr); |
|
143 mStream->AddListener(new SynthStreamListener(this)); |
|
144 |
|
145 // XXX: Support more than one channel |
|
146 NS_ENSURE_TRUE(aChannels == 1, NS_ERROR_FAILURE); |
|
147 |
|
148 mChannels = aChannels; |
|
149 |
|
150 AudioSegment* segment = new AudioSegment(); |
|
151 mStream->AddTrack(1, aRate, 0, segment); |
|
152 mStream->AddAudioOutput(this); |
|
153 mStream->SetAudioOutputVolume(this, mVolume); |
|
154 |
|
155 return NS_OK; |
|
156 } |
|
157 |
|
158 NS_IMETHODIMP |
|
159 nsSpeechTask::SendAudio(JS::Handle<JS::Value> aData, JS::Handle<JS::Value> aLandmarks, |
|
160 JSContext* aCx) |
|
161 { |
|
162 MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default); |
|
163 |
|
164 NS_ENSURE_TRUE(mStream, NS_ERROR_NOT_AVAILABLE); |
|
165 NS_ENSURE_FALSE(mStream->IsDestroyed(), NS_ERROR_NOT_AVAILABLE); |
|
166 NS_ENSURE_TRUE(mChannels, NS_ERROR_FAILURE); |
|
167 |
|
168 if (mIndirectAudio) { |
|
169 NS_WARNING("Can't call SendAudio from an indirect audio speech service."); |
|
170 return NS_ERROR_FAILURE; |
|
171 } |
|
172 |
|
173 JS::Rooted<JSObject*> darray(aCx, &aData.toObject()); |
|
174 JSAutoCompartment ac(aCx, darray); |
|
175 |
|
176 JS::Rooted<JSObject*> tsrc(aCx, nullptr); |
|
177 |
|
178 // Allow either Int16Array or plain JS Array |
|
179 if (JS_IsInt16Array(darray)) { |
|
180 tsrc = darray; |
|
181 } else if (JS_IsArrayObject(aCx, darray)) { |
|
182 tsrc = JS_NewInt16ArrayFromArray(aCx, darray); |
|
183 } |
|
184 |
|
185 if (!tsrc) { |
|
186 return NS_ERROR_DOM_TYPE_MISMATCH_ERR; |
|
187 } |
|
188 |
|
189 SendAudioImpl(JS_GetInt16ArrayData(tsrc), |
|
190 JS_GetTypedArrayLength(tsrc)); |
|
191 |
|
192 return NS_OK; |
|
193 } |
|
194 |
|
195 NS_IMETHODIMP |
|
196 nsSpeechTask::SendAudioNative(int16_t* aData, uint32_t aDataLen) |
|
197 { |
|
198 MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default); |
|
199 |
|
200 NS_ENSURE_TRUE(mStream, NS_ERROR_NOT_AVAILABLE); |
|
201 NS_ENSURE_FALSE(mStream->IsDestroyed(), NS_ERROR_NOT_AVAILABLE); |
|
202 NS_ENSURE_TRUE(mChannels, NS_ERROR_FAILURE); |
|
203 |
|
204 if (mIndirectAudio) { |
|
205 NS_WARNING("Can't call SendAudio from an indirect audio speech service."); |
|
206 return NS_ERROR_FAILURE; |
|
207 } |
|
208 |
|
209 SendAudioImpl(aData, aDataLen); |
|
210 |
|
211 return NS_OK; |
|
212 } |
|
213 |
|
214 void |
|
215 nsSpeechTask::SendAudioImpl(int16_t* aData, uint32_t aDataLen) |
|
216 { |
|
217 if (aDataLen == 0) { |
|
218 mStream->EndAllTrackAndFinish(); |
|
219 return; |
|
220 } |
|
221 |
|
222 nsRefPtr<mozilla::SharedBuffer> samples = |
|
223 SharedBuffer::Create(aDataLen * sizeof(int16_t)); |
|
224 int16_t* frames = static_cast<int16_t*>(samples->Data()); |
|
225 |
|
226 for (uint32_t i = 0; i < aDataLen; i++) { |
|
227 frames[i] = aData[i]; |
|
228 } |
|
229 |
|
230 AudioSegment segment; |
|
231 nsAutoTArray<const int16_t*, 1> channelData; |
|
232 channelData.AppendElement(frames); |
|
233 segment.AppendFrames(samples.forget(), channelData, aDataLen); |
|
234 mStream->AppendToTrack(1, &segment); |
|
235 mStream->AdvanceKnownTracksTime(STREAM_TIME_MAX); |
|
236 } |
|
237 |
|
238 NS_IMETHODIMP |
|
239 nsSpeechTask::DispatchStart() |
|
240 { |
|
241 if (!mIndirectAudio) { |
|
242 NS_WARNING("Can't call DispatchStart() from a direct audio speech service"); |
|
243 return NS_ERROR_FAILURE; |
|
244 } |
|
245 |
|
246 return DispatchStartImpl(); |
|
247 } |
|
248 |
|
249 nsresult |
|
250 nsSpeechTask::DispatchStartImpl() |
|
251 { |
|
252 LOG(PR_LOG_DEBUG, ("nsSpeechTask::DispatchStart")); |
|
253 |
|
254 MOZ_ASSERT(mUtterance); |
|
255 NS_ENSURE_TRUE(mUtterance->mState == SpeechSynthesisUtterance::STATE_PENDING, |
|
256 NS_ERROR_NOT_AVAILABLE); |
|
257 |
|
258 mUtterance->mState = SpeechSynthesisUtterance::STATE_SPEAKING; |
|
259 mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("start"), 0, 0, |
|
260 NS_LITERAL_STRING("")); |
|
261 |
|
262 return NS_OK; |
|
263 } |
|
264 |
|
265 NS_IMETHODIMP |
|
266 nsSpeechTask::DispatchEnd(float aElapsedTime, uint32_t aCharIndex) |
|
267 { |
|
268 if (!mIndirectAudio) { |
|
269 NS_WARNING("Can't call DispatchEnd() from a direct audio speech service"); |
|
270 return NS_ERROR_FAILURE; |
|
271 } |
|
272 |
|
273 return DispatchEndImpl(aElapsedTime, aCharIndex); |
|
274 } |
|
275 |
|
276 nsresult |
|
277 nsSpeechTask::DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex) |
|
278 { |
|
279 LOG(PR_LOG_DEBUG, ("nsSpeechTask::DispatchEnd\n")); |
|
280 |
|
281 MOZ_ASSERT(mUtterance); |
|
282 NS_ENSURE_FALSE(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED, |
|
283 NS_ERROR_NOT_AVAILABLE); |
|
284 |
|
285 // XXX: This should not be here, but it prevents a crash in MSG. |
|
286 if (mStream) { |
|
287 mStream->Destroy(); |
|
288 } |
|
289 |
|
290 nsRefPtr<SpeechSynthesisUtterance> utterance = mUtterance; |
|
291 |
|
292 if (mSpeechSynthesis) { |
|
293 mSpeechSynthesis->OnEnd(this); |
|
294 } |
|
295 |
|
296 if (utterance->mState == SpeechSynthesisUtterance::STATE_PENDING) { |
|
297 utterance->mState = SpeechSynthesisUtterance::STATE_NONE; |
|
298 } else { |
|
299 utterance->mState = SpeechSynthesisUtterance::STATE_ENDED; |
|
300 utterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("end"), |
|
301 aCharIndex, aElapsedTime, |
|
302 EmptyString()); |
|
303 } |
|
304 |
|
305 return NS_OK; |
|
306 } |
|
307 |
|
308 NS_IMETHODIMP |
|
309 nsSpeechTask::DispatchPause(float aElapsedTime, uint32_t aCharIndex) |
|
310 { |
|
311 if (!mIndirectAudio) { |
|
312 NS_WARNING("Can't call DispatchPause() from a direct audio speech service"); |
|
313 return NS_ERROR_FAILURE; |
|
314 } |
|
315 |
|
316 return DispatchPauseImpl(aElapsedTime, aCharIndex); |
|
317 } |
|
318 |
|
319 nsresult |
|
320 nsSpeechTask::DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex) |
|
321 { |
|
322 LOG(PR_LOG_DEBUG, ("nsSpeechTask::DispatchPause")); |
|
323 MOZ_ASSERT(mUtterance); |
|
324 NS_ENSURE_FALSE(mUtterance->mPaused, NS_ERROR_NOT_AVAILABLE); |
|
325 NS_ENSURE_FALSE(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED, |
|
326 NS_ERROR_NOT_AVAILABLE); |
|
327 |
|
328 mUtterance->mPaused = true; |
|
329 mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("pause"), |
|
330 aCharIndex, aElapsedTime, |
|
331 NS_LITERAL_STRING("")); |
|
332 return NS_OK; |
|
333 } |
|
334 |
|
335 NS_IMETHODIMP |
|
336 nsSpeechTask::DispatchResume(float aElapsedTime, uint32_t aCharIndex) |
|
337 { |
|
338 if (!mIndirectAudio) { |
|
339 NS_WARNING("Can't call DispatchResume() from a direct audio speech service"); |
|
340 return NS_ERROR_FAILURE; |
|
341 } |
|
342 |
|
343 return DispatchResumeImpl(aElapsedTime, aCharIndex); |
|
344 } |
|
345 |
|
346 nsresult |
|
347 nsSpeechTask::DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex) |
|
348 { |
|
349 LOG(PR_LOG_DEBUG, ("nsSpeechTask::DispatchResume")); |
|
350 MOZ_ASSERT(mUtterance); |
|
351 NS_ENSURE_TRUE(mUtterance->mPaused, NS_ERROR_NOT_AVAILABLE); |
|
352 NS_ENSURE_FALSE(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED, |
|
353 NS_ERROR_NOT_AVAILABLE); |
|
354 |
|
355 mUtterance->mPaused = false; |
|
356 mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("resume"), |
|
357 aCharIndex, aElapsedTime, |
|
358 NS_LITERAL_STRING("")); |
|
359 return NS_OK; |
|
360 } |
|
361 |
|
362 NS_IMETHODIMP |
|
363 nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex) |
|
364 { |
|
365 if (!mIndirectAudio) { |
|
366 NS_WARNING("Can't call DispatchError() from a direct audio speech service"); |
|
367 return NS_ERROR_FAILURE; |
|
368 } |
|
369 |
|
370 return DispatchErrorImpl(aElapsedTime, aCharIndex); |
|
371 } |
|
372 |
|
373 nsresult |
|
374 nsSpeechTask::DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex) |
|
375 { |
|
376 MOZ_ASSERT(mUtterance); |
|
377 NS_ENSURE_FALSE(mUtterance->mState == SpeechSynthesisUtterance::STATE_ENDED, |
|
378 NS_ERROR_NOT_AVAILABLE); |
|
379 |
|
380 mUtterance->mState = SpeechSynthesisUtterance::STATE_ENDED; |
|
381 mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("error"), |
|
382 aCharIndex, aElapsedTime, |
|
383 NS_LITERAL_STRING("")); |
|
384 return NS_OK; |
|
385 } |
|
386 |
|
387 NS_IMETHODIMP |
|
388 nsSpeechTask::DispatchBoundary(const nsAString& aName, |
|
389 float aElapsedTime, uint32_t aCharIndex) |
|
390 { |
|
391 if (!mIndirectAudio) { |
|
392 NS_WARNING("Can't call DispatchBoundary() from a direct audio speech service"); |
|
393 return NS_ERROR_FAILURE; |
|
394 } |
|
395 |
|
396 return DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex); |
|
397 } |
|
398 |
|
399 nsresult |
|
400 nsSpeechTask::DispatchBoundaryImpl(const nsAString& aName, |
|
401 float aElapsedTime, uint32_t aCharIndex) |
|
402 { |
|
403 MOZ_ASSERT(mUtterance); |
|
404 NS_ENSURE_TRUE(mUtterance->mState == SpeechSynthesisUtterance::STATE_SPEAKING, |
|
405 NS_ERROR_NOT_AVAILABLE); |
|
406 |
|
407 mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("boundary"), |
|
408 aCharIndex, aElapsedTime, |
|
409 aName); |
|
410 return NS_OK; |
|
411 } |
|
412 |
|
413 NS_IMETHODIMP |
|
414 nsSpeechTask::DispatchMark(const nsAString& aName, |
|
415 float aElapsedTime, uint32_t aCharIndex) |
|
416 { |
|
417 if (!mIndirectAudio) { |
|
418 NS_WARNING("Can't call DispatchMark() from a direct audio speech service"); |
|
419 return NS_ERROR_FAILURE; |
|
420 } |
|
421 |
|
422 return DispatchMarkImpl(aName, aElapsedTime, aCharIndex); |
|
423 } |
|
424 |
|
425 nsresult |
|
426 nsSpeechTask::DispatchMarkImpl(const nsAString& aName, |
|
427 float aElapsedTime, uint32_t aCharIndex) |
|
428 { |
|
429 MOZ_ASSERT(mUtterance); |
|
430 NS_ENSURE_TRUE(mUtterance->mState == SpeechSynthesisUtterance::STATE_SPEAKING, |
|
431 NS_ERROR_NOT_AVAILABLE); |
|
432 |
|
433 mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("mark"), |
|
434 aCharIndex, aElapsedTime, |
|
435 aName); |
|
436 return NS_OK; |
|
437 } |
|
438 |
|
439 void |
|
440 nsSpeechTask::Pause() |
|
441 { |
|
442 MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default); |
|
443 |
|
444 if (mUtterance->IsPaused() || |
|
445 mUtterance->GetState() == SpeechSynthesisUtterance::STATE_ENDED) { |
|
446 return; |
|
447 } |
|
448 |
|
449 if (mCallback) { |
|
450 DebugOnly<nsresult> rv = mCallback->OnPause(); |
|
451 NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "Unable to call onPause() callback"); |
|
452 } |
|
453 |
|
454 if (mStream) { |
|
455 mStream->ChangeExplicitBlockerCount(1); |
|
456 } |
|
457 |
|
458 DispatchPauseImpl(GetCurrentTime(), GetCurrentCharOffset()); |
|
459 } |
|
460 |
|
461 void |
|
462 nsSpeechTask::Resume() |
|
463 { |
|
464 MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default); |
|
465 |
|
466 if (!mUtterance->IsPaused()) { |
|
467 return; |
|
468 } |
|
469 |
|
470 if (mCallback) { |
|
471 DebugOnly<nsresult> rv = mCallback->OnResume(); |
|
472 NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "Unable to call onResume() callback"); |
|
473 } |
|
474 |
|
475 if (mStream) { |
|
476 mStream->ChangeExplicitBlockerCount(-1); |
|
477 } |
|
478 |
|
479 DispatchResumeImpl(GetCurrentTime(), GetCurrentCharOffset()); |
|
480 } |
|
481 |
|
482 void |
|
483 nsSpeechTask::Cancel() |
|
484 { |
|
485 MOZ_ASSERT(XRE_GetProcessType() == GeckoProcessType_Default); |
|
486 |
|
487 LOG(PR_LOG_DEBUG, ("nsSpeechTask::Cancel")); |
|
488 |
|
489 if (mCallback) { |
|
490 DebugOnly<nsresult> rv = mCallback->OnCancel(); |
|
491 NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "Unable to call onCancel() callback"); |
|
492 } |
|
493 |
|
494 if (mStream) { |
|
495 mStream->ChangeExplicitBlockerCount(1); |
|
496 } |
|
497 |
|
498 DispatchEndImpl(GetCurrentTime(), GetCurrentCharOffset()); |
|
499 } |
|
500 |
|
501 float |
|
502 nsSpeechTask::GetCurrentTime() |
|
503 { |
|
504 return mStream ? (float)(mStream->GetCurrentTime() / 1000000.0) : 0; |
|
505 } |
|
506 |
|
507 uint32_t |
|
508 nsSpeechTask::GetCurrentCharOffset() |
|
509 { |
|
510 return mStream && mStream->IsFinished() ? mText.Length() : 0; |
|
511 } |
|
512 |
|
513 void |
|
514 nsSpeechTask::SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis) |
|
515 { |
|
516 mSpeechSynthesis = aSpeechSynthesis; |
|
517 } |
|
518 |
|
519 } // namespace dom |
|
520 } // namespace mozilla |