Thu, 15 Jan 2015 15:55:04 +0100
Back out 97036ab72558 which inappropriately compared turds to third parties.
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #ifndef mozilla_dom_SpeechRecognition_h
8 #define mozilla_dom_SpeechRecognition_h
10 #include "mozilla/Attributes.h"
11 #include "mozilla/DOMEventTargetHelper.h"
12 #include "nsCOMPtr.h"
13 #include "nsString.h"
14 #include "nsWrapperCache.h"
15 #include "nsTArray.h"
16 #include "js/TypeDecls.h"
18 #include "nsIDOMNavigatorUserMedia.h"
19 #include "nsITimer.h"
20 #include "MediaEngine.h"
21 #include "MediaStreamGraph.h"
22 #include "AudioSegment.h"
23 #include "mozilla/WeakPtr.h"
24 #include "mozilla/Preferences.h"
26 #include "SpeechGrammarList.h"
27 #include "SpeechRecognitionResultList.h"
28 #include "SpeechStreamListener.h"
29 #include "nsISpeechRecognitionService.h"
30 #include "endpointer.h"
32 #include "mozilla/dom/SpeechRecognitionError.h"
34 class nsIDOMWindow;
36 namespace mozilla {
38 namespace dom {
40 #define TEST_PREFERENCE_ENABLE "media.webspeech.test.enable"
41 #define TEST_PREFERENCE_FAKE_FSM_EVENTS "media.webspeech.test.fake_fsm_events"
42 #define TEST_PREFERENCE_FAKE_RECOGNITION_SERVICE "media.webspeech.test.fake_recognition_service"
43 #define SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC "SpeechRecognitionTest:RequestEvent"
44 #define SPEECH_RECOGNITION_TEST_END_TOPIC "SpeechRecognitionTest:End"
46 class GlobalObject;
47 class SpeechEvent;
49 #ifdef PR_LOGGING
50 PRLogModuleInfo* GetSpeechRecognitionLog();
51 #define SR_LOG(...) PR_LOG(GetSpeechRecognitionLog(), PR_LOG_DEBUG, (__VA_ARGS__))
52 #else
53 #define SR_LOG(...)
54 #endif
56 class SpeechRecognition MOZ_FINAL : public DOMEventTargetHelper,
57 public nsIObserver,
58 public SupportsWeakPtr<SpeechRecognition>
59 {
60 public:
61 MOZ_DECLARE_REFCOUNTED_TYPENAME(SpeechRecognition)
62 SpeechRecognition(nsPIDOMWindow* aOwnerWindow);
63 virtual ~SpeechRecognition() {};
65 NS_DECL_ISUPPORTS_INHERITED
67 NS_DECL_NSIOBSERVER
69 nsISupports* GetParentObject() const;
71 virtual JSObject* WrapObject(JSContext* aCx) MOZ_OVERRIDE;
73 static already_AddRefed<SpeechRecognition>
74 Constructor(const GlobalObject& aGlobal, ErrorResult& aRv);
76 already_AddRefed<SpeechGrammarList> GetGrammars(ErrorResult& aRv) const;
78 void SetGrammars(mozilla::dom::SpeechGrammarList& aArg, ErrorResult& aRv);
80 void GetLang(nsString& aRetVal, ErrorResult& aRv) const;
82 void SetLang(const nsAString& aArg, ErrorResult& aRv);
84 bool GetContinuous(ErrorResult& aRv) const;
86 void SetContinuous(bool aArg, ErrorResult& aRv);
88 bool GetInterimResults(ErrorResult& aRv) const;
90 void SetInterimResults(bool aArg, ErrorResult& aRv);
92 uint32_t GetMaxAlternatives(ErrorResult& aRv) const;
94 void SetMaxAlternatives(uint32_t aArg, ErrorResult& aRv);
96 void GetServiceURI(nsString& aRetVal, ErrorResult& aRv) const;
98 void SetServiceURI(const nsAString& aArg, ErrorResult& aRv);
100 void Start(ErrorResult& aRv);
102 void Stop();
104 void Abort();
106 IMPL_EVENT_HANDLER(audiostart)
107 IMPL_EVENT_HANDLER(soundstart)
108 IMPL_EVENT_HANDLER(speechstart)
109 IMPL_EVENT_HANDLER(speechend)
110 IMPL_EVENT_HANDLER(soundend)
111 IMPL_EVENT_HANDLER(audioend)
112 IMPL_EVENT_HANDLER(result)
113 IMPL_EVENT_HANDLER(nomatch)
114 IMPL_EVENT_HANDLER(error)
115 IMPL_EVENT_HANDLER(start)
116 IMPL_EVENT_HANDLER(end)
118 enum EventType {
119 EVENT_START,
120 EVENT_STOP,
121 EVENT_ABORT,
122 EVENT_AUDIO_DATA,
123 EVENT_AUDIO_ERROR,
124 EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT,
125 EVENT_RECOGNITIONSERVICE_FINAL_RESULT,
126 EVENT_RECOGNITIONSERVICE_ERROR,
127 EVENT_COUNT
128 };
130 void DispatchError(EventType aErrorType, SpeechRecognitionErrorCode aErrorCode, const nsAString& aMessage);
131 uint32_t FillSamplesBuffer(const int16_t* aSamples, uint32_t aSampleCount);
132 uint32_t SplitSamplesBuffer(const int16_t* aSamplesBuffer, uint32_t aSampleCount, nsTArray<nsRefPtr<SharedBuffer>>& aResult);
133 AudioSegment* CreateAudioSegment(nsTArray<nsRefPtr<SharedBuffer>>& aChunks);
134 void FeedAudioData(already_AddRefed<SharedBuffer> aSamples, uint32_t aDuration, MediaStreamListener* aProvider);
136 static struct TestConfig
137 {
138 public:
139 bool mEnableTests;
140 bool mFakeFSMEvents;
141 bool mFakeRecognitionService;
143 void Init()
144 {
145 if (mInitialized) {
146 return;
147 }
149 Preferences::AddBoolVarCache(&mEnableTests, TEST_PREFERENCE_ENABLE);
151 if (mEnableTests) {
152 Preferences::AddBoolVarCache(&mFakeFSMEvents, TEST_PREFERENCE_FAKE_FSM_EVENTS);
153 Preferences::AddBoolVarCache(&mFakeRecognitionService, TEST_PREFERENCE_FAKE_RECOGNITION_SERVICE);
154 }
156 mInitialized = true;
157 }
158 private:
159 bool mInitialized;
160 } mTestConfig;
163 friend class SpeechEvent;
164 private:
165 enum FSMState {
166 STATE_IDLE,
167 STATE_STARTING,
168 STATE_ESTIMATING,
169 STATE_WAITING_FOR_SPEECH,
170 STATE_RECOGNIZING,
171 STATE_WAITING_FOR_RESULT,
172 STATE_COUNT
173 };
175 void SetState(FSMState state);
176 bool StateBetween(FSMState begin, FSMState end);
178 class GetUserMediaSuccessCallback : public nsIDOMGetUserMediaSuccessCallback
179 {
180 public:
181 NS_DECL_ISUPPORTS
182 NS_DECL_NSIDOMGETUSERMEDIASUCCESSCALLBACK
184 GetUserMediaSuccessCallback(SpeechRecognition* aRecognition)
185 : mRecognition(aRecognition)
186 {}
188 virtual ~GetUserMediaSuccessCallback() {}
190 private:
191 nsRefPtr<SpeechRecognition> mRecognition;
192 };
194 class GetUserMediaErrorCallback : public nsIDOMGetUserMediaErrorCallback
195 {
196 public:
197 NS_DECL_ISUPPORTS
198 NS_DECL_NSIDOMGETUSERMEDIAERRORCALLBACK
200 GetUserMediaErrorCallback(SpeechRecognition* aRecognition)
201 : mRecognition(aRecognition)
202 {}
204 virtual ~GetUserMediaErrorCallback() {}
206 private:
207 nsRefPtr<SpeechRecognition> mRecognition;
208 };
210 NS_IMETHOD StartRecording(DOMMediaStream* aDOMStream);
211 NS_IMETHOD StopRecording();
213 uint32_t ProcessAudioSegment(AudioSegment* aSegment);
214 void NotifyError(SpeechEvent* aEvent);
216 void ProcessEvent(SpeechEvent* aEvent);
217 void Transition(SpeechEvent* aEvent);
219 void Reset();
220 void ResetAndEnd();
221 void WaitForAudioData(SpeechEvent* aEvent);
222 void StartedAudioCapture(SpeechEvent* aEvent);
223 void StopRecordingAndRecognize(SpeechEvent* aEvent);
224 void WaitForEstimation(SpeechEvent* aEvent);
225 void DetectSpeech(SpeechEvent* aEvent);
226 void WaitForSpeechEnd(SpeechEvent* aEvent);
227 void NotifyFinalResult(SpeechEvent* aEvent);
228 void DoNothing(SpeechEvent* aEvent);
229 void AbortSilently(SpeechEvent* aEvent);
230 void AbortError(SpeechEvent* aEvent);
232 nsRefPtr<DOMMediaStream> mDOMStream;
233 nsRefPtr<SpeechStreamListener> mSpeechListener;
234 nsCOMPtr<nsISpeechRecognitionService> mRecognitionService;
236 void GetRecognitionServiceCID(nsACString& aResultCID);
238 FSMState mCurrentState;
240 Endpointer mEndpointer;
241 uint32_t mEstimationSamples;
243 uint32_t mAudioSamplesPerChunk;
245 // buffer holds one chunk of mAudioSamplesPerChunk
246 // samples before feeding it to mEndpointer
247 nsRefPtr<SharedBuffer> mAudioSamplesBuffer;
248 uint32_t mBufferedSamples;
250 nsCOMPtr<nsITimer> mSpeechDetectionTimer;
251 bool mAborted;
253 void ProcessTestEventRequest(nsISupports* aSubject, const nsAString& aEventName);
255 const char* GetName(FSMState aId);
256 const char* GetName(SpeechEvent* aId);
257 };
259 class SpeechEvent : public nsRunnable
260 {
261 public:
262 SpeechEvent(SpeechRecognition* aRecognition, SpeechRecognition::EventType aType)
263 : mAudioSegment(0)
264 , mRecognitionResultList(0)
265 , mError(0)
266 , mRecognition(aRecognition)
267 , mType(aType)
268 {
269 }
271 ~SpeechEvent();
273 NS_IMETHOD Run() MOZ_OVERRIDE;
274 AudioSegment* mAudioSegment;
275 nsRefPtr<SpeechRecognitionResultList> mRecognitionResultList; // TODO: make this a session being passed which also has index and stuff
276 nsRefPtr<SpeechRecognitionError> mError;
278 friend class SpeechRecognition;
279 private:
280 SpeechRecognition* mRecognition;
282 // for AUDIO_DATA events, keep a reference to the provider
283 // of the data (i.e., the SpeechStreamListener) to ensure it
284 // is kept alive (and keeps SpeechRecognition alive) until this
285 // event gets processed.
286 nsRefPtr<MediaStreamListener> mProvider;
287 SpeechRecognition::EventType mType;
288 };
290 } // namespace dom
292 inline nsISupports*
293 ToSupports(dom::SpeechRecognition* aRec)
294 {
295 return ToSupports(static_cast<DOMEventTargetHelper*>(aRec));
296 }
297 } // namespace mozilla
299 #endif