|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ |
|
3 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 #ifndef mozilla_dom_SpeechRecognition_h |
|
8 #define mozilla_dom_SpeechRecognition_h |
|
9 |
|
10 #include "mozilla/Attributes.h" |
|
11 #include "mozilla/DOMEventTargetHelper.h" |
|
12 #include "nsCOMPtr.h" |
|
13 #include "nsString.h" |
|
14 #include "nsWrapperCache.h" |
|
15 #include "nsTArray.h" |
|
16 #include "js/TypeDecls.h" |
|
17 |
|
18 #include "nsIDOMNavigatorUserMedia.h" |
|
19 #include "nsITimer.h" |
|
20 #include "MediaEngine.h" |
|
21 #include "MediaStreamGraph.h" |
|
22 #include "AudioSegment.h" |
|
23 #include "mozilla/WeakPtr.h" |
|
24 #include "mozilla/Preferences.h" |
|
25 |
|
26 #include "SpeechGrammarList.h" |
|
27 #include "SpeechRecognitionResultList.h" |
|
28 #include "SpeechStreamListener.h" |
|
29 #include "nsISpeechRecognitionService.h" |
|
30 #include "endpointer.h" |
|
31 |
|
32 #include "mozilla/dom/SpeechRecognitionError.h" |
|
33 |
|
34 class nsIDOMWindow; |
|
35 |
|
36 namespace mozilla { |
|
37 |
|
38 namespace dom { |
|
39 |
|
40 #define TEST_PREFERENCE_ENABLE "media.webspeech.test.enable" |
|
41 #define TEST_PREFERENCE_FAKE_FSM_EVENTS "media.webspeech.test.fake_fsm_events" |
|
42 #define TEST_PREFERENCE_FAKE_RECOGNITION_SERVICE "media.webspeech.test.fake_recognition_service" |
|
43 #define SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC "SpeechRecognitionTest:RequestEvent" |
|
44 #define SPEECH_RECOGNITION_TEST_END_TOPIC "SpeechRecognitionTest:End" |
|
45 |
|
46 class GlobalObject; |
|
47 class SpeechEvent; |
|
48 |
|
49 #ifdef PR_LOGGING |
|
50 PRLogModuleInfo* GetSpeechRecognitionLog(); |
|
51 #define SR_LOG(...) PR_LOG(GetSpeechRecognitionLog(), PR_LOG_DEBUG, (__VA_ARGS__)) |
|
52 #else |
|
53 #define SR_LOG(...) |
|
54 #endif |
|
55 |
|
56 class SpeechRecognition MOZ_FINAL : public DOMEventTargetHelper, |
|
57 public nsIObserver, |
|
58 public SupportsWeakPtr<SpeechRecognition> |
|
59 { |
|
60 public: |
|
61 MOZ_DECLARE_REFCOUNTED_TYPENAME(SpeechRecognition) |
|
62 SpeechRecognition(nsPIDOMWindow* aOwnerWindow); |
|
63 virtual ~SpeechRecognition() {}; |
|
64 |
|
65 NS_DECL_ISUPPORTS_INHERITED |
|
66 |
|
67 NS_DECL_NSIOBSERVER |
|
68 |
|
69 nsISupports* GetParentObject() const; |
|
70 |
|
71 virtual JSObject* WrapObject(JSContext* aCx) MOZ_OVERRIDE; |
|
72 |
|
73 static already_AddRefed<SpeechRecognition> |
|
74 Constructor(const GlobalObject& aGlobal, ErrorResult& aRv); |
|
75 |
|
76 already_AddRefed<SpeechGrammarList> GetGrammars(ErrorResult& aRv) const; |
|
77 |
|
78 void SetGrammars(mozilla::dom::SpeechGrammarList& aArg, ErrorResult& aRv); |
|
79 |
|
80 void GetLang(nsString& aRetVal, ErrorResult& aRv) const; |
|
81 |
|
82 void SetLang(const nsAString& aArg, ErrorResult& aRv); |
|
83 |
|
84 bool GetContinuous(ErrorResult& aRv) const; |
|
85 |
|
86 void SetContinuous(bool aArg, ErrorResult& aRv); |
|
87 |
|
88 bool GetInterimResults(ErrorResult& aRv) const; |
|
89 |
|
90 void SetInterimResults(bool aArg, ErrorResult& aRv); |
|
91 |
|
92 uint32_t GetMaxAlternatives(ErrorResult& aRv) const; |
|
93 |
|
94 void SetMaxAlternatives(uint32_t aArg, ErrorResult& aRv); |
|
95 |
|
96 void GetServiceURI(nsString& aRetVal, ErrorResult& aRv) const; |
|
97 |
|
98 void SetServiceURI(const nsAString& aArg, ErrorResult& aRv); |
|
99 |
|
100 void Start(ErrorResult& aRv); |
|
101 |
|
102 void Stop(); |
|
103 |
|
104 void Abort(); |
|
105 |
|
106 IMPL_EVENT_HANDLER(audiostart) |
|
107 IMPL_EVENT_HANDLER(soundstart) |
|
108 IMPL_EVENT_HANDLER(speechstart) |
|
109 IMPL_EVENT_HANDLER(speechend) |
|
110 IMPL_EVENT_HANDLER(soundend) |
|
111 IMPL_EVENT_HANDLER(audioend) |
|
112 IMPL_EVENT_HANDLER(result) |
|
113 IMPL_EVENT_HANDLER(nomatch) |
|
114 IMPL_EVENT_HANDLER(error) |
|
115 IMPL_EVENT_HANDLER(start) |
|
116 IMPL_EVENT_HANDLER(end) |
|
117 |
|
118 enum EventType { |
|
119 EVENT_START, |
|
120 EVENT_STOP, |
|
121 EVENT_ABORT, |
|
122 EVENT_AUDIO_DATA, |
|
123 EVENT_AUDIO_ERROR, |
|
124 EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT, |
|
125 EVENT_RECOGNITIONSERVICE_FINAL_RESULT, |
|
126 EVENT_RECOGNITIONSERVICE_ERROR, |
|
127 EVENT_COUNT |
|
128 }; |
|
129 |
|
130 void DispatchError(EventType aErrorType, SpeechRecognitionErrorCode aErrorCode, const nsAString& aMessage); |
|
131 uint32_t FillSamplesBuffer(const int16_t* aSamples, uint32_t aSampleCount); |
|
132 uint32_t SplitSamplesBuffer(const int16_t* aSamplesBuffer, uint32_t aSampleCount, nsTArray<nsRefPtr<SharedBuffer>>& aResult); |
|
133 AudioSegment* CreateAudioSegment(nsTArray<nsRefPtr<SharedBuffer>>& aChunks); |
|
134 void FeedAudioData(already_AddRefed<SharedBuffer> aSamples, uint32_t aDuration, MediaStreamListener* aProvider); |
|
135 |
|
136 static struct TestConfig |
|
137 { |
|
138 public: |
|
139 bool mEnableTests; |
|
140 bool mFakeFSMEvents; |
|
141 bool mFakeRecognitionService; |
|
142 |
|
143 void Init() |
|
144 { |
|
145 if (mInitialized) { |
|
146 return; |
|
147 } |
|
148 |
|
149 Preferences::AddBoolVarCache(&mEnableTests, TEST_PREFERENCE_ENABLE); |
|
150 |
|
151 if (mEnableTests) { |
|
152 Preferences::AddBoolVarCache(&mFakeFSMEvents, TEST_PREFERENCE_FAKE_FSM_EVENTS); |
|
153 Preferences::AddBoolVarCache(&mFakeRecognitionService, TEST_PREFERENCE_FAKE_RECOGNITION_SERVICE); |
|
154 } |
|
155 |
|
156 mInitialized = true; |
|
157 } |
|
158 private: |
|
159 bool mInitialized; |
|
160 } mTestConfig; |
|
161 |
|
162 |
|
163 friend class SpeechEvent; |
|
164 private: |
|
165 enum FSMState { |
|
166 STATE_IDLE, |
|
167 STATE_STARTING, |
|
168 STATE_ESTIMATING, |
|
169 STATE_WAITING_FOR_SPEECH, |
|
170 STATE_RECOGNIZING, |
|
171 STATE_WAITING_FOR_RESULT, |
|
172 STATE_COUNT |
|
173 }; |
|
174 |
|
175 void SetState(FSMState state); |
|
176 bool StateBetween(FSMState begin, FSMState end); |
|
177 |
|
178 class GetUserMediaSuccessCallback : public nsIDOMGetUserMediaSuccessCallback |
|
179 { |
|
180 public: |
|
181 NS_DECL_ISUPPORTS |
|
182 NS_DECL_NSIDOMGETUSERMEDIASUCCESSCALLBACK |
|
183 |
|
184 GetUserMediaSuccessCallback(SpeechRecognition* aRecognition) |
|
185 : mRecognition(aRecognition) |
|
186 {} |
|
187 |
|
188 virtual ~GetUserMediaSuccessCallback() {} |
|
189 |
|
190 private: |
|
191 nsRefPtr<SpeechRecognition> mRecognition; |
|
192 }; |
|
193 |
|
194 class GetUserMediaErrorCallback : public nsIDOMGetUserMediaErrorCallback |
|
195 { |
|
196 public: |
|
197 NS_DECL_ISUPPORTS |
|
198 NS_DECL_NSIDOMGETUSERMEDIAERRORCALLBACK |
|
199 |
|
200 GetUserMediaErrorCallback(SpeechRecognition* aRecognition) |
|
201 : mRecognition(aRecognition) |
|
202 {} |
|
203 |
|
204 virtual ~GetUserMediaErrorCallback() {} |
|
205 |
|
206 private: |
|
207 nsRefPtr<SpeechRecognition> mRecognition; |
|
208 }; |
|
209 |
|
210 NS_IMETHOD StartRecording(DOMMediaStream* aDOMStream); |
|
211 NS_IMETHOD StopRecording(); |
|
212 |
|
213 uint32_t ProcessAudioSegment(AudioSegment* aSegment); |
|
214 void NotifyError(SpeechEvent* aEvent); |
|
215 |
|
216 void ProcessEvent(SpeechEvent* aEvent); |
|
217 void Transition(SpeechEvent* aEvent); |
|
218 |
|
219 void Reset(); |
|
220 void ResetAndEnd(); |
|
221 void WaitForAudioData(SpeechEvent* aEvent); |
|
222 void StartedAudioCapture(SpeechEvent* aEvent); |
|
223 void StopRecordingAndRecognize(SpeechEvent* aEvent); |
|
224 void WaitForEstimation(SpeechEvent* aEvent); |
|
225 void DetectSpeech(SpeechEvent* aEvent); |
|
226 void WaitForSpeechEnd(SpeechEvent* aEvent); |
|
227 void NotifyFinalResult(SpeechEvent* aEvent); |
|
228 void DoNothing(SpeechEvent* aEvent); |
|
229 void AbortSilently(SpeechEvent* aEvent); |
|
230 void AbortError(SpeechEvent* aEvent); |
|
231 |
|
232 nsRefPtr<DOMMediaStream> mDOMStream; |
|
233 nsRefPtr<SpeechStreamListener> mSpeechListener; |
|
234 nsCOMPtr<nsISpeechRecognitionService> mRecognitionService; |
|
235 |
|
236 void GetRecognitionServiceCID(nsACString& aResultCID); |
|
237 |
|
238 FSMState mCurrentState; |
|
239 |
|
240 Endpointer mEndpointer; |
|
241 uint32_t mEstimationSamples; |
|
242 |
|
243 uint32_t mAudioSamplesPerChunk; |
|
244 |
|
245 // buffer holds one chunk of mAudioSamplesPerChunk |
|
246 // samples before feeding it to mEndpointer |
|
247 nsRefPtr<SharedBuffer> mAudioSamplesBuffer; |
|
248 uint32_t mBufferedSamples; |
|
249 |
|
250 nsCOMPtr<nsITimer> mSpeechDetectionTimer; |
|
251 bool mAborted; |
|
252 |
|
253 void ProcessTestEventRequest(nsISupports* aSubject, const nsAString& aEventName); |
|
254 |
|
255 const char* GetName(FSMState aId); |
|
256 const char* GetName(SpeechEvent* aId); |
|
257 }; |
|
258 |
|
259 class SpeechEvent : public nsRunnable |
|
260 { |
|
261 public: |
|
262 SpeechEvent(SpeechRecognition* aRecognition, SpeechRecognition::EventType aType) |
|
263 : mAudioSegment(0) |
|
264 , mRecognitionResultList(0) |
|
265 , mError(0) |
|
266 , mRecognition(aRecognition) |
|
267 , mType(aType) |
|
268 { |
|
269 } |
|
270 |
|
271 ~SpeechEvent(); |
|
272 |
|
273 NS_IMETHOD Run() MOZ_OVERRIDE; |
|
274 AudioSegment* mAudioSegment; |
|
275 nsRefPtr<SpeechRecognitionResultList> mRecognitionResultList; // TODO: make this a session being passed which also has index and stuff |
|
276 nsRefPtr<SpeechRecognitionError> mError; |
|
277 |
|
278 friend class SpeechRecognition; |
|
279 private: |
|
280 SpeechRecognition* mRecognition; |
|
281 |
|
282 // for AUDIO_DATA events, keep a reference to the provider |
|
283 // of the data (i.e., the SpeechStreamListener) to ensure it |
|
284 // is kept alive (and keeps SpeechRecognition alive) until this |
|
285 // event gets processed. |
|
286 nsRefPtr<MediaStreamListener> mProvider; |
|
287 SpeechRecognition::EventType mType; |
|
288 }; |
|
289 |
|
290 } // namespace dom |
|
291 |
|
292 inline nsISupports* |
|
293 ToSupports(dom::SpeechRecognition* aRec) |
|
294 { |
|
295 return ToSupports(static_cast<DOMEventTargetHelper*>(aRec)); |
|
296 } |
|
297 } // namespace mozilla |
|
298 |
|
299 #endif |