michael@0: /* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0:  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
michael@0:  * You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0: 
michael@0: #include "nsISupports.idl"
michael@0: 
michael@0: typedef unsigned short SpeechServiceType;
michael@0: 
michael@0: /**
michael@0:  * A callback is implemented by the service. For direct audio services, it is
michael@0:  * required to implement these, although it could be helpful to use the
michael@0:  * cancel method for shutting down the speech resources.
michael@0:  */
michael@0: [scriptable, uuid(408251b0-1d7b-4876-888f-718859ce8c9d)]
michael@0: interface nsISpeechTaskCallback : nsISupports
michael@0: {
michael@0:   /**
michael@0:    * The user or application has paused the speech.
michael@0:    */
michael@0:   void onPause();
michael@0: 
michael@0:   /**
michael@0:    * The user or application has resumed the speech.
michael@0:    */
michael@0:   void onResume();
michael@0: 
michael@0:   /**
michael@0:    * The user or application has canceled the speech.
michael@0:    */
michael@0:   void onCancel();
michael@0: };
michael@0: 
michael@0: 
michael@0: /**
michael@0:  * A task is associated with a single utterance. It is provided by the browser
michael@0:  * to the service in the speak() method.
michael@0:  */
michael@0: [scriptable, builtinclass, uuid(ad59949c-2437-4b35-8eeb-d760caab75c5)]
michael@0: interface nsISpeechTask : nsISupports
michael@0: {
michael@0:   /**
michael@0:    * Prepare browser for speech.
michael@0:    *
michael@0:    * @param aCallback callback object for mid-speech operations.
michael@0:    * @param aChannels number of audio channels. Only required
michael@0:    *                    in direct audio services
michael@0:    * @param aRate     audio rate. Only required in direct audio services
michael@0:    */
michael@0:   [optional_argc] void setup(in nsISpeechTaskCallback aCallback,
michael@0:                                [optional] in uint32_t aChannels,
michael@0:                                [optional] in uint32_t aRate);
michael@0: 
michael@0:   /**
michael@0:    * Send audio data to browser.
michael@0:    *
michael@0:    * @param aData     an Int16Array with PCM-16 audio data.
michael@0:    * @param aLandmarks an array of sample offset and landmark pairs.
michael@0:    *                     Used for emiting boundary and mark events.
michael@0:    */
michael@0:   [implicit_jscontext]
michael@0:   void sendAudio(in jsval aData, in jsval aLandmarks);
michael@0: 
michael@0:   [noscript]
michael@0:   void sendAudioNative([array, size_is(aDataLen)] in short aData, in unsigned long aDataLen);
michael@0: 
michael@0:   /**
michael@0:    * Dispatch start event.
michael@0:    */
michael@0:   void dispatchStart();
michael@0: 
michael@0:   /**
michael@0:    * Dispatch end event.
michael@0:    *
michael@0:    * @param aElapsedTime time in seconds since speech has started.
michael@0:    * @param aCharIndex   offset of spoken characters.
michael@0:    */
michael@0:   void dispatchEnd(in float aElapsedTime, in unsigned long aCharIndex);
michael@0: 
michael@0:   /**
michael@0:    * Dispatch pause event. Should not be called directly by service.
michael@0:    *
michael@0:    * @param aElapsedTime time in seconds since speech has started.
michael@0:    * @param aCharIndex   offset of spoken characters.
michael@0:    */
michael@0:   void dispatchPause(in float aElapsedTime, in unsigned long aCharIndex);
michael@0: 
michael@0:   /**
michael@0:    * Dispatch resume event. Should not be called directly by service.
michael@0:    *
michael@0:    * @param aElapsedTime time in seconds since speech has started.
michael@0:    * @param aCharIndex   offset of spoken characters.
michael@0:    */
michael@0:   void dispatchResume(in float aElapsedTime, in unsigned long aCharIndex);
michael@0: 
michael@0:   /**
michael@0:    * Dispatch error event.
michael@0:    *
michael@0:    * @param aElapsedTime time in seconds since speech has started.
michael@0:    * @param aCharIndex   offset of spoken characters.
michael@0:    */
michael@0:   void dispatchError(in float aElapsedTime, in unsigned long aCharIndex);
michael@0: 
michael@0:   /**
michael@0:    * Dispatch boundary event.
michael@0:    *
michael@0:    * @param aName        name of boundary, 'word' or 'sentence'
michael@0:    * @param aElapsedTime time in seconds since speech has started.
michael@0:    * @param aCharIndex   offset of spoken characters.
michael@0:    */
michael@0:   void dispatchBoundary(in DOMString aName, in float aElapsedTime,
michael@0:                         in unsigned long aCharIndex);
michael@0: 
michael@0:   /**
michael@0:    * Dispatch mark event.
michael@0:    *
michael@0:    * @param aName        mark identifier.
michael@0:    * @param aElapsedTime time in seconds since speech has started.
michael@0:    * @param aCharIndex   offset of spoken characters.
michael@0:    */
michael@0:   void dispatchMark(in DOMString aName, in float aElapsedTime, in unsigned long aCharIndex);
michael@0: };
michael@0: 
michael@0: /**
michael@0:  * The main interface of a speech synthesis service.
michael@0:  *
michael@0:  * A service's speak method could be implemented in two ways:
michael@0:  *  1. Indirect audio - the service is responsible for outputting audio.
michael@0:  *    The service calls the nsISpeechTask.dispatch* methods directly. Starting
michael@0:  *    with dispatchStart() and ending with dispatchEnd or dispatchError().
michael@0:  *
michael@0:  *  2. Direct audio - the service provides us with PCM-16 data, and we output it.
michael@0:  *    The service does not call the dispatch task methods directly. Instead,
michael@0:  *    audio information is provided at setup(), and audio data is sent with
michael@0:  *    sendAudio(). The utterance is terminated with an empty sendAudio().
michael@0:  */
michael@0: [scriptable, uuid(3952d388-050c-47ba-a70f-5fc1cadf1db0)]
michael@0: interface nsISpeechService : nsISupports
michael@0: {
michael@0:   /**
michael@0:    * Speak the given text using the voice identified byu the given uri. See
michael@0:    * W3C Speech API spec for information about pitch and rate.
michael@0:    * https://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html#utterance-attributes
michael@0:    *
michael@0:    * @param aText  text to utter.
michael@0:    * @param aUri   unique voice identifier.
michael@0:    * @param aRate  rate to speak voice in.
michael@0:    * @param aPitch pitch to speak voice in.
michael@0:    * @param aTask  task instance for utterance, used for sending events or audio
michael@0:    *                 data back to browser.
michael@0:    */
michael@0:   void speak(in DOMString aText, in DOMString aUri,
michael@0:              in float aRate, in float aPitch,
michael@0:              in nsISpeechTask aTask);
michael@0: 
michael@0:   const SpeechServiceType SERVICETYPE_DIRECT_AUDIO = 1;
michael@0:   const SpeechServiceType SERVICETYPE_INDIRECT_AUDIO = 2;
michael@0: 
michael@0:   readonly attribute SpeechServiceType serviceType;
michael@0: };