michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim:set ts=2 sw=2 sts=2 et cindent: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #ifndef mozilla_dom_SpeechRecognition_h michael@0: #define mozilla_dom_SpeechRecognition_h michael@0: michael@0: #include "mozilla/Attributes.h" michael@0: #include "mozilla/DOMEventTargetHelper.h" michael@0: #include "nsCOMPtr.h" michael@0: #include "nsString.h" michael@0: #include "nsWrapperCache.h" michael@0: #include "nsTArray.h" michael@0: #include "js/TypeDecls.h" michael@0: michael@0: #include "nsIDOMNavigatorUserMedia.h" michael@0: #include "nsITimer.h" michael@0: #include "MediaEngine.h" michael@0: #include "MediaStreamGraph.h" michael@0: #include "AudioSegment.h" michael@0: #include "mozilla/WeakPtr.h" michael@0: #include "mozilla/Preferences.h" michael@0: michael@0: #include "SpeechGrammarList.h" michael@0: #include "SpeechRecognitionResultList.h" michael@0: #include "SpeechStreamListener.h" michael@0: #include "nsISpeechRecognitionService.h" michael@0: #include "endpointer.h" michael@0: michael@0: #include "mozilla/dom/SpeechRecognitionError.h" michael@0: michael@0: class nsIDOMWindow; michael@0: michael@0: namespace mozilla { michael@0: michael@0: namespace dom { michael@0: michael@0: #define TEST_PREFERENCE_ENABLE "media.webspeech.test.enable" michael@0: #define TEST_PREFERENCE_FAKE_FSM_EVENTS "media.webspeech.test.fake_fsm_events" michael@0: #define TEST_PREFERENCE_FAKE_RECOGNITION_SERVICE "media.webspeech.test.fake_recognition_service" michael@0: #define SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC "SpeechRecognitionTest:RequestEvent" michael@0: #define SPEECH_RECOGNITION_TEST_END_TOPIC "SpeechRecognitionTest:End" michael@0: michael@0: class GlobalObject; michael@0: class SpeechEvent; michael@0: michael@0: #ifdef PR_LOGGING michael@0: PRLogModuleInfo* GetSpeechRecognitionLog(); michael@0: #define SR_LOG(...) PR_LOG(GetSpeechRecognitionLog(), PR_LOG_DEBUG, (__VA_ARGS__)) michael@0: #else michael@0: #define SR_LOG(...) michael@0: #endif michael@0: michael@0: class SpeechRecognition MOZ_FINAL : public DOMEventTargetHelper, michael@0: public nsIObserver, michael@0: public SupportsWeakPtr michael@0: { michael@0: public: michael@0: MOZ_DECLARE_REFCOUNTED_TYPENAME(SpeechRecognition) michael@0: SpeechRecognition(nsPIDOMWindow* aOwnerWindow); michael@0: virtual ~SpeechRecognition() {}; michael@0: michael@0: NS_DECL_ISUPPORTS_INHERITED michael@0: michael@0: NS_DECL_NSIOBSERVER michael@0: michael@0: nsISupports* GetParentObject() const; michael@0: michael@0: virtual JSObject* WrapObject(JSContext* aCx) MOZ_OVERRIDE; michael@0: michael@0: static already_AddRefed michael@0: Constructor(const GlobalObject& aGlobal, ErrorResult& aRv); michael@0: michael@0: already_AddRefed GetGrammars(ErrorResult& aRv) const; michael@0: michael@0: void SetGrammars(mozilla::dom::SpeechGrammarList& aArg, ErrorResult& aRv); michael@0: michael@0: void GetLang(nsString& aRetVal, ErrorResult& aRv) const; michael@0: michael@0: void SetLang(const nsAString& aArg, ErrorResult& aRv); michael@0: michael@0: bool GetContinuous(ErrorResult& aRv) const; michael@0: michael@0: void SetContinuous(bool aArg, ErrorResult& aRv); michael@0: michael@0: bool GetInterimResults(ErrorResult& aRv) const; michael@0: michael@0: void SetInterimResults(bool aArg, ErrorResult& aRv); michael@0: michael@0: uint32_t GetMaxAlternatives(ErrorResult& aRv) const; michael@0: michael@0: void SetMaxAlternatives(uint32_t aArg, ErrorResult& aRv); michael@0: michael@0: void GetServiceURI(nsString& aRetVal, ErrorResult& aRv) const; michael@0: michael@0: void SetServiceURI(const nsAString& aArg, ErrorResult& aRv); michael@0: michael@0: void Start(ErrorResult& aRv); michael@0: michael@0: void Stop(); michael@0: michael@0: void Abort(); michael@0: michael@0: IMPL_EVENT_HANDLER(audiostart) michael@0: IMPL_EVENT_HANDLER(soundstart) michael@0: IMPL_EVENT_HANDLER(speechstart) michael@0: IMPL_EVENT_HANDLER(speechend) michael@0: IMPL_EVENT_HANDLER(soundend) michael@0: IMPL_EVENT_HANDLER(audioend) michael@0: IMPL_EVENT_HANDLER(result) michael@0: IMPL_EVENT_HANDLER(nomatch) michael@0: IMPL_EVENT_HANDLER(error) michael@0: IMPL_EVENT_HANDLER(start) michael@0: IMPL_EVENT_HANDLER(end) michael@0: michael@0: enum EventType { michael@0: EVENT_START, michael@0: EVENT_STOP, michael@0: EVENT_ABORT, michael@0: EVENT_AUDIO_DATA, michael@0: EVENT_AUDIO_ERROR, michael@0: EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT, michael@0: EVENT_RECOGNITIONSERVICE_FINAL_RESULT, michael@0: EVENT_RECOGNITIONSERVICE_ERROR, michael@0: EVENT_COUNT michael@0: }; michael@0: michael@0: void DispatchError(EventType aErrorType, SpeechRecognitionErrorCode aErrorCode, const nsAString& aMessage); michael@0: uint32_t FillSamplesBuffer(const int16_t* aSamples, uint32_t aSampleCount); michael@0: uint32_t SplitSamplesBuffer(const int16_t* aSamplesBuffer, uint32_t aSampleCount, nsTArray>& aResult); michael@0: AudioSegment* CreateAudioSegment(nsTArray>& aChunks); michael@0: void FeedAudioData(already_AddRefed aSamples, uint32_t aDuration, MediaStreamListener* aProvider); michael@0: michael@0: static struct TestConfig michael@0: { michael@0: public: michael@0: bool mEnableTests; michael@0: bool mFakeFSMEvents; michael@0: bool mFakeRecognitionService; michael@0: michael@0: void Init() michael@0: { michael@0: if (mInitialized) { michael@0: return; michael@0: } michael@0: michael@0: Preferences::AddBoolVarCache(&mEnableTests, TEST_PREFERENCE_ENABLE); michael@0: michael@0: if (mEnableTests) { michael@0: Preferences::AddBoolVarCache(&mFakeFSMEvents, TEST_PREFERENCE_FAKE_FSM_EVENTS); michael@0: Preferences::AddBoolVarCache(&mFakeRecognitionService, TEST_PREFERENCE_FAKE_RECOGNITION_SERVICE); michael@0: } michael@0: michael@0: mInitialized = true; michael@0: } michael@0: private: michael@0: bool mInitialized; michael@0: } mTestConfig; michael@0: michael@0: michael@0: friend class SpeechEvent; michael@0: private: michael@0: enum FSMState { michael@0: STATE_IDLE, michael@0: STATE_STARTING, michael@0: STATE_ESTIMATING, michael@0: STATE_WAITING_FOR_SPEECH, michael@0: STATE_RECOGNIZING, michael@0: STATE_WAITING_FOR_RESULT, michael@0: STATE_COUNT michael@0: }; michael@0: michael@0: void SetState(FSMState state); michael@0: bool StateBetween(FSMState begin, FSMState end); michael@0: michael@0: class GetUserMediaSuccessCallback : public nsIDOMGetUserMediaSuccessCallback michael@0: { michael@0: public: michael@0: NS_DECL_ISUPPORTS michael@0: NS_DECL_NSIDOMGETUSERMEDIASUCCESSCALLBACK michael@0: michael@0: GetUserMediaSuccessCallback(SpeechRecognition* aRecognition) michael@0: : mRecognition(aRecognition) michael@0: {} michael@0: michael@0: virtual ~GetUserMediaSuccessCallback() {} michael@0: michael@0: private: michael@0: nsRefPtr mRecognition; michael@0: }; michael@0: michael@0: class GetUserMediaErrorCallback : public nsIDOMGetUserMediaErrorCallback michael@0: { michael@0: public: michael@0: NS_DECL_ISUPPORTS michael@0: NS_DECL_NSIDOMGETUSERMEDIAERRORCALLBACK michael@0: michael@0: GetUserMediaErrorCallback(SpeechRecognition* aRecognition) michael@0: : mRecognition(aRecognition) michael@0: {} michael@0: michael@0: virtual ~GetUserMediaErrorCallback() {} michael@0: michael@0: private: michael@0: nsRefPtr mRecognition; michael@0: }; michael@0: michael@0: NS_IMETHOD StartRecording(DOMMediaStream* aDOMStream); michael@0: NS_IMETHOD StopRecording(); michael@0: michael@0: uint32_t ProcessAudioSegment(AudioSegment* aSegment); michael@0: void NotifyError(SpeechEvent* aEvent); michael@0: michael@0: void ProcessEvent(SpeechEvent* aEvent); michael@0: void Transition(SpeechEvent* aEvent); michael@0: michael@0: void Reset(); michael@0: void ResetAndEnd(); michael@0: void WaitForAudioData(SpeechEvent* aEvent); michael@0: void StartedAudioCapture(SpeechEvent* aEvent); michael@0: void StopRecordingAndRecognize(SpeechEvent* aEvent); michael@0: void WaitForEstimation(SpeechEvent* aEvent); michael@0: void DetectSpeech(SpeechEvent* aEvent); michael@0: void WaitForSpeechEnd(SpeechEvent* aEvent); michael@0: void NotifyFinalResult(SpeechEvent* aEvent); michael@0: void DoNothing(SpeechEvent* aEvent); michael@0: void AbortSilently(SpeechEvent* aEvent); michael@0: void AbortError(SpeechEvent* aEvent); michael@0: michael@0: nsRefPtr mDOMStream; michael@0: nsRefPtr mSpeechListener; michael@0: nsCOMPtr mRecognitionService; michael@0: michael@0: void GetRecognitionServiceCID(nsACString& aResultCID); michael@0: michael@0: FSMState mCurrentState; michael@0: michael@0: Endpointer mEndpointer; michael@0: uint32_t mEstimationSamples; michael@0: michael@0: uint32_t mAudioSamplesPerChunk; michael@0: michael@0: // buffer holds one chunk of mAudioSamplesPerChunk michael@0: // samples before feeding it to mEndpointer michael@0: nsRefPtr mAudioSamplesBuffer; michael@0: uint32_t mBufferedSamples; michael@0: michael@0: nsCOMPtr mSpeechDetectionTimer; michael@0: bool mAborted; michael@0: michael@0: void ProcessTestEventRequest(nsISupports* aSubject, const nsAString& aEventName); michael@0: michael@0: const char* GetName(FSMState aId); michael@0: const char* GetName(SpeechEvent* aId); michael@0: }; michael@0: michael@0: class SpeechEvent : public nsRunnable michael@0: { michael@0: public: michael@0: SpeechEvent(SpeechRecognition* aRecognition, SpeechRecognition::EventType aType) michael@0: : mAudioSegment(0) michael@0: , mRecognitionResultList(0) michael@0: , mError(0) michael@0: , mRecognition(aRecognition) michael@0: , mType(aType) michael@0: { michael@0: } michael@0: michael@0: ~SpeechEvent(); michael@0: michael@0: NS_IMETHOD Run() MOZ_OVERRIDE; michael@0: AudioSegment* mAudioSegment; michael@0: nsRefPtr mRecognitionResultList; // TODO: make this a session being passed which also has index and stuff michael@0: nsRefPtr mError; michael@0: michael@0: friend class SpeechRecognition; michael@0: private: michael@0: SpeechRecognition* mRecognition; michael@0: michael@0: // for AUDIO_DATA events, keep a reference to the provider michael@0: // of the data (i.e., the SpeechStreamListener) to ensure it michael@0: // is kept alive (and keeps SpeechRecognition alive) until this michael@0: // event gets processed. michael@0: nsRefPtr mProvider; michael@0: SpeechRecognition::EventType mType; michael@0: }; michael@0: michael@0: } // namespace dom michael@0: michael@0: inline nsISupports* michael@0: ToSupports(dom::SpeechRecognition* aRec) michael@0: { michael@0: return ToSupports(static_cast(aRec)); michael@0: } michael@0: } // namespace mozilla michael@0: michael@0: #endif