The Tor Browser: content/media/webspeech/recognition/SpeechRecognition.h@a63d609f5ebe

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

     2 /* vim:set ts=2 sw=2 sts=2 et cindent: */

     3 /* This Source Code Form is subject to the terms of the Mozilla Public

     4  * License, v. 2.0. If a copy of the MPL was not distributed with this

     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

     7 #ifndef mozilla_dom_SpeechRecognition_h

     8 #define mozilla_dom_SpeechRecognition_h

    10 #include "mozilla/Attributes.h"

    11 #include "mozilla/DOMEventTargetHelper.h"

    12 #include "nsCOMPtr.h"

    13 #include "nsString.h"

    14 #include "nsWrapperCache.h"

    15 #include "nsTArray.h"

    16 #include "js/TypeDecls.h"

    18 #include "nsIDOMNavigatorUserMedia.h"

    19 #include "nsITimer.h"

    20 #include "MediaEngine.h"

    21 #include "MediaStreamGraph.h"

    22 #include "AudioSegment.h"

    23 #include "mozilla/WeakPtr.h"

    24 #include "mozilla/Preferences.h"

    26 #include "SpeechGrammarList.h"

    27 #include "SpeechRecognitionResultList.h"

    28 #include "SpeechStreamListener.h"

    29 #include "nsISpeechRecognitionService.h"

    30 #include "endpointer.h"

    32 #include "mozilla/dom/SpeechRecognitionError.h"

    34 class nsIDOMWindow;

    36 namespace mozilla {

    38 namespace dom {

    40 #define TEST_PREFERENCE_ENABLE "media.webspeech.test.enable"

    41 #define TEST_PREFERENCE_FAKE_FSM_EVENTS "media.webspeech.test.fake_fsm_events"

    42 #define TEST_PREFERENCE_FAKE_RECOGNITION_SERVICE "media.webspeech.test.fake_recognition_service"

    43 #define SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC "SpeechRecognitionTest:RequestEvent"

    44 #define SPEECH_RECOGNITION_TEST_END_TOPIC "SpeechRecognitionTest:End"

    46 class GlobalObject;

    47 class SpeechEvent;

    49 #ifdef PR_LOGGING

    50 PRLogModuleInfo* GetSpeechRecognitionLog();

    51 #define SR_LOG(...) PR_LOG(GetSpeechRecognitionLog(), PR_LOG_DEBUG, (__VA_ARGS__))

    52 #else

    53 #define SR_LOG(...)

    54 #endif

    56 class SpeechRecognition MOZ_FINAL : public DOMEventTargetHelper,

    57                                     public nsIObserver,

    58                                     public SupportsWeakPtr<SpeechRecognition>

    59 {

    60 public:

    61   MOZ_DECLARE_REFCOUNTED_TYPENAME(SpeechRecognition)

    62   SpeechRecognition(nsPIDOMWindow* aOwnerWindow);

    63   virtual ~SpeechRecognition() {};

    65   NS_DECL_ISUPPORTS_INHERITED

    67   NS_DECL_NSIOBSERVER

    69   nsISupports* GetParentObject() const;

    71   virtual JSObject* WrapObject(JSContext* aCx) MOZ_OVERRIDE;

    73   static already_AddRefed<SpeechRecognition>

    74   Constructor(const GlobalObject& aGlobal, ErrorResult& aRv);

    76   already_AddRefed<SpeechGrammarList> GetGrammars(ErrorResult& aRv) const;

    78   void SetGrammars(mozilla::dom::SpeechGrammarList& aArg, ErrorResult& aRv);

    80   void GetLang(nsString& aRetVal, ErrorResult& aRv) const;

    82   void SetLang(const nsAString& aArg, ErrorResult& aRv);

    84   bool GetContinuous(ErrorResult& aRv) const;

    86   void SetContinuous(bool aArg, ErrorResult& aRv);

    88   bool GetInterimResults(ErrorResult& aRv) const;

    90   void SetInterimResults(bool aArg, ErrorResult& aRv);

    92   uint32_t GetMaxAlternatives(ErrorResult& aRv) const;

    94   void SetMaxAlternatives(uint32_t aArg, ErrorResult& aRv);

    96   void GetServiceURI(nsString& aRetVal, ErrorResult& aRv) const;

    98   void SetServiceURI(const nsAString& aArg, ErrorResult& aRv);

   100   void Start(ErrorResult& aRv);

   102   void Stop();

   104   void Abort();

   106   IMPL_EVENT_HANDLER(audiostart)

   107   IMPL_EVENT_HANDLER(soundstart)

   108   IMPL_EVENT_HANDLER(speechstart)

   109   IMPL_EVENT_HANDLER(speechend)

   110   IMPL_EVENT_HANDLER(soundend)

   111   IMPL_EVENT_HANDLER(audioend)

   112   IMPL_EVENT_HANDLER(result)

   113   IMPL_EVENT_HANDLER(nomatch)

   114   IMPL_EVENT_HANDLER(error)

   115   IMPL_EVENT_HANDLER(start)

   116   IMPL_EVENT_HANDLER(end)

   118   enum EventType {

   119     EVENT_START,

   120     EVENT_STOP,

   121     EVENT_ABORT,

   122     EVENT_AUDIO_DATA,

   123     EVENT_AUDIO_ERROR,

   124     EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT,

   125     EVENT_RECOGNITIONSERVICE_FINAL_RESULT,

   126     EVENT_RECOGNITIONSERVICE_ERROR,

   127     EVENT_COUNT

   128   };

   130   void DispatchError(EventType aErrorType, SpeechRecognitionErrorCode aErrorCode, const nsAString& aMessage);

   131   uint32_t FillSamplesBuffer(const int16_t* aSamples, uint32_t aSampleCount);

   132   uint32_t SplitSamplesBuffer(const int16_t* aSamplesBuffer, uint32_t aSampleCount, nsTArray<nsRefPtr<SharedBuffer>>& aResult);

   133   AudioSegment* CreateAudioSegment(nsTArray<nsRefPtr<SharedBuffer>>& aChunks);

   134   void FeedAudioData(already_AddRefed<SharedBuffer> aSamples, uint32_t aDuration, MediaStreamListener* aProvider);

   136   static struct TestConfig

   137   {

   138   public:

   139     bool mEnableTests;

   140     bool mFakeFSMEvents;

   141     bool mFakeRecognitionService;

   143     void Init()

   144     {

   145       if (mInitialized) {

   146         return;

   147       }

   149       Preferences::AddBoolVarCache(&mEnableTests, TEST_PREFERENCE_ENABLE);

   151       if (mEnableTests) {

   152         Preferences::AddBoolVarCache(&mFakeFSMEvents, TEST_PREFERENCE_FAKE_FSM_EVENTS);

   153         Preferences::AddBoolVarCache(&mFakeRecognitionService, TEST_PREFERENCE_FAKE_RECOGNITION_SERVICE);

   154       }

   156       mInitialized = true;

   157     }

   158   private:

   159     bool mInitialized;

   160   } mTestConfig;

   163   friend class SpeechEvent;

   164 private:

   165   enum FSMState {

   166     STATE_IDLE,

   167     STATE_STARTING,

   168     STATE_ESTIMATING,

   169     STATE_WAITING_FOR_SPEECH,

   170     STATE_RECOGNIZING,

   171     STATE_WAITING_FOR_RESULT,

   172     STATE_COUNT

   173   };

   175   void SetState(FSMState state);

   176   bool StateBetween(FSMState begin, FSMState end);

   178   class GetUserMediaSuccessCallback : public nsIDOMGetUserMediaSuccessCallback

   179   {

   180   public:

   181     NS_DECL_ISUPPORTS

   182     NS_DECL_NSIDOMGETUSERMEDIASUCCESSCALLBACK

   184     GetUserMediaSuccessCallback(SpeechRecognition* aRecognition)

   185       : mRecognition(aRecognition)

   186     {}

   188     virtual ~GetUserMediaSuccessCallback() {}

   190   private:

   191     nsRefPtr<SpeechRecognition> mRecognition;

   192   };

   194   class GetUserMediaErrorCallback : public nsIDOMGetUserMediaErrorCallback

   195   {

   196   public:

   197     NS_DECL_ISUPPORTS

   198     NS_DECL_NSIDOMGETUSERMEDIAERRORCALLBACK

   200     GetUserMediaErrorCallback(SpeechRecognition* aRecognition)

   201       : mRecognition(aRecognition)

   202     {}

   204     virtual ~GetUserMediaErrorCallback() {}

   206   private:

   207     nsRefPtr<SpeechRecognition> mRecognition;

   208   };

   210   NS_IMETHOD StartRecording(DOMMediaStream* aDOMStream);

   211   NS_IMETHOD StopRecording();

   213   uint32_t ProcessAudioSegment(AudioSegment* aSegment);

   214   void NotifyError(SpeechEvent* aEvent);

   216   void ProcessEvent(SpeechEvent* aEvent);

   217   void Transition(SpeechEvent* aEvent);

   219   void Reset();

   220   void ResetAndEnd();

   221   void WaitForAudioData(SpeechEvent* aEvent);

   222   void StartedAudioCapture(SpeechEvent* aEvent);

   223   void StopRecordingAndRecognize(SpeechEvent* aEvent);

   224   void WaitForEstimation(SpeechEvent* aEvent);

   225   void DetectSpeech(SpeechEvent* aEvent);

   226   void WaitForSpeechEnd(SpeechEvent* aEvent);

   227   void NotifyFinalResult(SpeechEvent* aEvent);

   228   void DoNothing(SpeechEvent* aEvent);

   229   void AbortSilently(SpeechEvent* aEvent);

   230   void AbortError(SpeechEvent* aEvent);

   232   nsRefPtr<DOMMediaStream> mDOMStream;

   233   nsRefPtr<SpeechStreamListener> mSpeechListener;

   234   nsCOMPtr<nsISpeechRecognitionService> mRecognitionService;

   236   void GetRecognitionServiceCID(nsACString& aResultCID);

   238   FSMState mCurrentState;

   240   Endpointer mEndpointer;

   241   uint32_t mEstimationSamples;

   243   uint32_t mAudioSamplesPerChunk;

   245   // buffer holds one chunk of mAudioSamplesPerChunk

   246   // samples before feeding it to mEndpointer

   247   nsRefPtr<SharedBuffer> mAudioSamplesBuffer;

   248   uint32_t mBufferedSamples;

   250   nsCOMPtr<nsITimer> mSpeechDetectionTimer;

   251   bool mAborted;

   253   void ProcessTestEventRequest(nsISupports* aSubject, const nsAString& aEventName);

   255   const char* GetName(FSMState aId);

   256   const char* GetName(SpeechEvent* aId);

   257 };

   259 class SpeechEvent : public nsRunnable

   260 {

   261 public:

   262   SpeechEvent(SpeechRecognition* aRecognition, SpeechRecognition::EventType aType)

   263   : mAudioSegment(0)

   264   , mRecognitionResultList(0)

   265   , mError(0)

   266   , mRecognition(aRecognition)

   267   , mType(aType)

   268   {

   269   }

   271   ~SpeechEvent();

   273   NS_IMETHOD Run() MOZ_OVERRIDE;

   274   AudioSegment* mAudioSegment;

   275   nsRefPtr<SpeechRecognitionResultList> mRecognitionResultList; // TODO: make this a session being passed which also has index and stuff

   276   nsRefPtr<SpeechRecognitionError> mError;

   278   friend class SpeechRecognition;

   279 private:

   280   SpeechRecognition* mRecognition;

   282   // for AUDIO_DATA events, keep a reference to the provider

   283   // of the data (i.e., the SpeechStreamListener) to ensure it

   284   // is kept alive (and keeps SpeechRecognition alive) until this

   285   // event gets processed.

   286   nsRefPtr<MediaStreamListener> mProvider;

   287   SpeechRecognition::EventType mType;

   288 };

   290 } // namespace dom

   292 inline nsISupports*

   293 ToSupports(dom::SpeechRecognition* aRec)

   294 {

   295   return ToSupports(static_cast<DOMEventTargetHelper*>(aRec));

   296 }

   297 } // namespace mozilla

   299 #endif

The Tor Browser / file revision

content/media/webspeech/recognition/SpeechRecognition.h@a63d609f5ebe

content/media/webspeech/recognition/SpeechRecognition.h