content/media/webspeech/recognition/SpeechRecognition.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/content/media/webspeech/recognition/SpeechRecognition.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,299 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* vim:set ts=2 sw=2 sts=2 et cindent: */
     1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +#ifndef mozilla_dom_SpeechRecognition_h
    1.11 +#define mozilla_dom_SpeechRecognition_h
    1.12 +
    1.13 +#include "mozilla/Attributes.h"
    1.14 +#include "mozilla/DOMEventTargetHelper.h"
    1.15 +#include "nsCOMPtr.h"
    1.16 +#include "nsString.h"
    1.17 +#include "nsWrapperCache.h"
    1.18 +#include "nsTArray.h"
    1.19 +#include "js/TypeDecls.h"
    1.20 +
    1.21 +#include "nsIDOMNavigatorUserMedia.h"
    1.22 +#include "nsITimer.h"
    1.23 +#include "MediaEngine.h"
    1.24 +#include "MediaStreamGraph.h"
    1.25 +#include "AudioSegment.h"
    1.26 +#include "mozilla/WeakPtr.h"
    1.27 +#include "mozilla/Preferences.h"
    1.28 +
    1.29 +#include "SpeechGrammarList.h"
    1.30 +#include "SpeechRecognitionResultList.h"
    1.31 +#include "SpeechStreamListener.h"
    1.32 +#include "nsISpeechRecognitionService.h"
    1.33 +#include "endpointer.h"
    1.34 +
    1.35 +#include "mozilla/dom/SpeechRecognitionError.h"
    1.36 +
    1.37 +class nsIDOMWindow;
    1.38 +
    1.39 +namespace mozilla {
    1.40 +
    1.41 +namespace dom {
    1.42 +
    1.43 +#define TEST_PREFERENCE_ENABLE "media.webspeech.test.enable"
    1.44 +#define TEST_PREFERENCE_FAKE_FSM_EVENTS "media.webspeech.test.fake_fsm_events"
    1.45 +#define TEST_PREFERENCE_FAKE_RECOGNITION_SERVICE "media.webspeech.test.fake_recognition_service"
    1.46 +#define SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC "SpeechRecognitionTest:RequestEvent"
    1.47 +#define SPEECH_RECOGNITION_TEST_END_TOPIC "SpeechRecognitionTest:End"
    1.48 +
    1.49 +class GlobalObject;
    1.50 +class SpeechEvent;
    1.51 +
    1.52 +#ifdef PR_LOGGING
    1.53 +PRLogModuleInfo* GetSpeechRecognitionLog();
    1.54 +#define SR_LOG(...) PR_LOG(GetSpeechRecognitionLog(), PR_LOG_DEBUG, (__VA_ARGS__))
    1.55 +#else
    1.56 +#define SR_LOG(...)
    1.57 +#endif
    1.58 +
    1.59 +class SpeechRecognition MOZ_FINAL : public DOMEventTargetHelper,
    1.60 +                                    public nsIObserver,
    1.61 +                                    public SupportsWeakPtr<SpeechRecognition>
    1.62 +{
    1.63 +public:
    1.64 +  MOZ_DECLARE_REFCOUNTED_TYPENAME(SpeechRecognition)
    1.65 +  SpeechRecognition(nsPIDOMWindow* aOwnerWindow);
    1.66 +  virtual ~SpeechRecognition() {};
    1.67 +
    1.68 +  NS_DECL_ISUPPORTS_INHERITED
    1.69 +
    1.70 +  NS_DECL_NSIOBSERVER
    1.71 +
    1.72 +  nsISupports* GetParentObject() const;
    1.73 +
    1.74 +  virtual JSObject* WrapObject(JSContext* aCx) MOZ_OVERRIDE;
    1.75 +
    1.76 +  static already_AddRefed<SpeechRecognition>
    1.77 +  Constructor(const GlobalObject& aGlobal, ErrorResult& aRv);
    1.78 +
    1.79 +  already_AddRefed<SpeechGrammarList> GetGrammars(ErrorResult& aRv) const;
    1.80 +
    1.81 +  void SetGrammars(mozilla::dom::SpeechGrammarList& aArg, ErrorResult& aRv);
    1.82 +
    1.83 +  void GetLang(nsString& aRetVal, ErrorResult& aRv) const;
    1.84 +
    1.85 +  void SetLang(const nsAString& aArg, ErrorResult& aRv);
    1.86 +
    1.87 +  bool GetContinuous(ErrorResult& aRv) const;
    1.88 +
    1.89 +  void SetContinuous(bool aArg, ErrorResult& aRv);
    1.90 +
    1.91 +  bool GetInterimResults(ErrorResult& aRv) const;
    1.92 +
    1.93 +  void SetInterimResults(bool aArg, ErrorResult& aRv);
    1.94 +
    1.95 +  uint32_t GetMaxAlternatives(ErrorResult& aRv) const;
    1.96 +
    1.97 +  void SetMaxAlternatives(uint32_t aArg, ErrorResult& aRv);
    1.98 +
    1.99 +  void GetServiceURI(nsString& aRetVal, ErrorResult& aRv) const;
   1.100 +
   1.101 +  void SetServiceURI(const nsAString& aArg, ErrorResult& aRv);
   1.102 +
   1.103 +  void Start(ErrorResult& aRv);
   1.104 +
   1.105 +  void Stop();
   1.106 +
   1.107 +  void Abort();
   1.108 +
   1.109 +  IMPL_EVENT_HANDLER(audiostart)
   1.110 +  IMPL_EVENT_HANDLER(soundstart)
   1.111 +  IMPL_EVENT_HANDLER(speechstart)
   1.112 +  IMPL_EVENT_HANDLER(speechend)
   1.113 +  IMPL_EVENT_HANDLER(soundend)
   1.114 +  IMPL_EVENT_HANDLER(audioend)
   1.115 +  IMPL_EVENT_HANDLER(result)
   1.116 +  IMPL_EVENT_HANDLER(nomatch)
   1.117 +  IMPL_EVENT_HANDLER(error)
   1.118 +  IMPL_EVENT_HANDLER(start)
   1.119 +  IMPL_EVENT_HANDLER(end)
   1.120 +
   1.121 +  enum EventType {
   1.122 +    EVENT_START,
   1.123 +    EVENT_STOP,
   1.124 +    EVENT_ABORT,
   1.125 +    EVENT_AUDIO_DATA,
   1.126 +    EVENT_AUDIO_ERROR,
   1.127 +    EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT,
   1.128 +    EVENT_RECOGNITIONSERVICE_FINAL_RESULT,
   1.129 +    EVENT_RECOGNITIONSERVICE_ERROR,
   1.130 +    EVENT_COUNT
   1.131 +  };
   1.132 +
   1.133 +  void DispatchError(EventType aErrorType, SpeechRecognitionErrorCode aErrorCode, const nsAString& aMessage);
   1.134 +  uint32_t FillSamplesBuffer(const int16_t* aSamples, uint32_t aSampleCount);
   1.135 +  uint32_t SplitSamplesBuffer(const int16_t* aSamplesBuffer, uint32_t aSampleCount, nsTArray<nsRefPtr<SharedBuffer>>& aResult);
   1.136 +  AudioSegment* CreateAudioSegment(nsTArray<nsRefPtr<SharedBuffer>>& aChunks);
   1.137 +  void FeedAudioData(already_AddRefed<SharedBuffer> aSamples, uint32_t aDuration, MediaStreamListener* aProvider);
   1.138 +
   1.139 +  static struct TestConfig
   1.140 +  {
   1.141 +  public:
   1.142 +    bool mEnableTests;
   1.143 +    bool mFakeFSMEvents;
   1.144 +    bool mFakeRecognitionService;
   1.145 +
   1.146 +    void Init()
   1.147 +    {
   1.148 +      if (mInitialized) {
   1.149 +        return;
   1.150 +      }
   1.151 +
   1.152 +      Preferences::AddBoolVarCache(&mEnableTests, TEST_PREFERENCE_ENABLE);
   1.153 +
   1.154 +      if (mEnableTests) {
   1.155 +        Preferences::AddBoolVarCache(&mFakeFSMEvents, TEST_PREFERENCE_FAKE_FSM_EVENTS);
   1.156 +        Preferences::AddBoolVarCache(&mFakeRecognitionService, TEST_PREFERENCE_FAKE_RECOGNITION_SERVICE);
   1.157 +      }
   1.158 +
   1.159 +      mInitialized = true;
   1.160 +    }
   1.161 +  private:
   1.162 +    bool mInitialized;
   1.163 +  } mTestConfig;
   1.164 +
   1.165 +
   1.166 +  friend class SpeechEvent;
   1.167 +private:
   1.168 +  enum FSMState {
   1.169 +    STATE_IDLE,
   1.170 +    STATE_STARTING,
   1.171 +    STATE_ESTIMATING,
   1.172 +    STATE_WAITING_FOR_SPEECH,
   1.173 +    STATE_RECOGNIZING,
   1.174 +    STATE_WAITING_FOR_RESULT,
   1.175 +    STATE_COUNT
   1.176 +  };
   1.177 +
   1.178 +  void SetState(FSMState state);
   1.179 +  bool StateBetween(FSMState begin, FSMState end);
   1.180 +
   1.181 +  class GetUserMediaSuccessCallback : public nsIDOMGetUserMediaSuccessCallback
   1.182 +  {
   1.183 +  public:
   1.184 +    NS_DECL_ISUPPORTS
   1.185 +    NS_DECL_NSIDOMGETUSERMEDIASUCCESSCALLBACK
   1.186 +
   1.187 +    GetUserMediaSuccessCallback(SpeechRecognition* aRecognition)
   1.188 +      : mRecognition(aRecognition)
   1.189 +    {}
   1.190 +
   1.191 +    virtual ~GetUserMediaSuccessCallback() {}
   1.192 +
   1.193 +  private:
   1.194 +    nsRefPtr<SpeechRecognition> mRecognition;
   1.195 +  };
   1.196 +
   1.197 +  class GetUserMediaErrorCallback : public nsIDOMGetUserMediaErrorCallback
   1.198 +  {
   1.199 +  public:
   1.200 +    NS_DECL_ISUPPORTS
   1.201 +    NS_DECL_NSIDOMGETUSERMEDIAERRORCALLBACK
   1.202 +
   1.203 +    GetUserMediaErrorCallback(SpeechRecognition* aRecognition)
   1.204 +      : mRecognition(aRecognition)
   1.205 +    {}
   1.206 +
   1.207 +    virtual ~GetUserMediaErrorCallback() {}
   1.208 +
   1.209 +  private:
   1.210 +    nsRefPtr<SpeechRecognition> mRecognition;
   1.211 +  };
   1.212 +
   1.213 +  NS_IMETHOD StartRecording(DOMMediaStream* aDOMStream);
   1.214 +  NS_IMETHOD StopRecording();
   1.215 +
   1.216 +  uint32_t ProcessAudioSegment(AudioSegment* aSegment);
   1.217 +  void NotifyError(SpeechEvent* aEvent);
   1.218 +
   1.219 +  void ProcessEvent(SpeechEvent* aEvent);
   1.220 +  void Transition(SpeechEvent* aEvent);
   1.221 +
   1.222 +  void Reset();
   1.223 +  void ResetAndEnd();
   1.224 +  void WaitForAudioData(SpeechEvent* aEvent);
   1.225 +  void StartedAudioCapture(SpeechEvent* aEvent);
   1.226 +  void StopRecordingAndRecognize(SpeechEvent* aEvent);
   1.227 +  void WaitForEstimation(SpeechEvent* aEvent);
   1.228 +  void DetectSpeech(SpeechEvent* aEvent);
   1.229 +  void WaitForSpeechEnd(SpeechEvent* aEvent);
   1.230 +  void NotifyFinalResult(SpeechEvent* aEvent);
   1.231 +  void DoNothing(SpeechEvent* aEvent);
   1.232 +  void AbortSilently(SpeechEvent* aEvent);
   1.233 +  void AbortError(SpeechEvent* aEvent);
   1.234 +
   1.235 +  nsRefPtr<DOMMediaStream> mDOMStream;
   1.236 +  nsRefPtr<SpeechStreamListener> mSpeechListener;
   1.237 +  nsCOMPtr<nsISpeechRecognitionService> mRecognitionService;
   1.238 +
   1.239 +  void GetRecognitionServiceCID(nsACString& aResultCID);
   1.240 +
   1.241 +  FSMState mCurrentState;
   1.242 +
   1.243 +  Endpointer mEndpointer;
   1.244 +  uint32_t mEstimationSamples;
   1.245 +
   1.246 +  uint32_t mAudioSamplesPerChunk;
   1.247 +
   1.248 +  // buffer holds one chunk of mAudioSamplesPerChunk
   1.249 +  // samples before feeding it to mEndpointer
   1.250 +  nsRefPtr<SharedBuffer> mAudioSamplesBuffer;
   1.251 +  uint32_t mBufferedSamples;
   1.252 +
   1.253 +  nsCOMPtr<nsITimer> mSpeechDetectionTimer;
   1.254 +  bool mAborted;
   1.255 +
   1.256 +  void ProcessTestEventRequest(nsISupports* aSubject, const nsAString& aEventName);
   1.257 +
   1.258 +  const char* GetName(FSMState aId);
   1.259 +  const char* GetName(SpeechEvent* aId);
   1.260 +};
   1.261 +
   1.262 +class SpeechEvent : public nsRunnable
   1.263 +{
   1.264 +public:
   1.265 +  SpeechEvent(SpeechRecognition* aRecognition, SpeechRecognition::EventType aType)
   1.266 +  : mAudioSegment(0)
   1.267 +  , mRecognitionResultList(0)
   1.268 +  , mError(0)
   1.269 +  , mRecognition(aRecognition)
   1.270 +  , mType(aType)
   1.271 +  {
   1.272 +  }
   1.273 +
   1.274 +  ~SpeechEvent();
   1.275 +
   1.276 +  NS_IMETHOD Run() MOZ_OVERRIDE;
   1.277 +  AudioSegment* mAudioSegment;
   1.278 +  nsRefPtr<SpeechRecognitionResultList> mRecognitionResultList; // TODO: make this a session being passed which also has index and stuff
   1.279 +  nsRefPtr<SpeechRecognitionError> mError;
   1.280 +
   1.281 +  friend class SpeechRecognition;
   1.282 +private:
   1.283 +  SpeechRecognition* mRecognition;
   1.284 +
   1.285 +  // for AUDIO_DATA events, keep a reference to the provider
   1.286 +  // of the data (i.e., the SpeechStreamListener) to ensure it
   1.287 +  // is kept alive (and keeps SpeechRecognition alive) until this
   1.288 +  // event gets processed.
   1.289 +  nsRefPtr<MediaStreamListener> mProvider;
   1.290 +  SpeechRecognition::EventType mType;
   1.291 +};
   1.292 +
   1.293 +} // namespace dom
   1.294 +
   1.295 +inline nsISupports*
   1.296 +ToSupports(dom::SpeechRecognition* aRec)
   1.297 +{
   1.298 +  return ToSupports(static_cast<DOMEventTargetHelper*>(aRec));
   1.299 +}
   1.300 +} // namespace mozilla
   1.301 +
   1.302 +#endif

mercurial