content/media/webspeech/recognition/endpointer.h

Fri, 16 Jan 2015 04:50:19 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Fri, 16 Jan 2015 04:50:19 +0100
branch
TOR_BUG_9701
changeset 13
44a2da4a2ab2
permissions
-rw-r--r--

Replace accessor implementation with direct member state manipulation, by
request https://trac.torproject.org/projects/tor/ticket/9701#comment:32

     1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
     2 //
     3 // Redistribution and use in source and binary forms, with or without
     4 // modification, are permitted provided that the following conditions are
     5 // met:
     6 //
     7 //    * Redistributions of source code must retain the above copyright
     8 // notice, this list of conditions and the following disclaimer.
     9 //    * Redistributions in binary form must reproduce the above
    10 // copyright notice, this list of conditions and the following disclaimer
    11 // in the documentation and/or other materials provided with the
    12 // distribution.
    13 //    * Neither the name of Google Inc. nor the names of its
    14 // contributors may be used to endorse or promote products derived from
    15 // this software without specific prior written permission.
    16 //
    17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
    21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    29 #ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
    30 #define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
    32 #include "energy_endpointer.h"
    34 namespace mozilla {
    36 struct AudioChunk;
    38 // A simple interface to the underlying energy-endpointer implementation, this
    39 // class lets callers provide audio as being recorded and let them poll to find
    40 // when the user has stopped speaking.
    41 //
    42 // There are two events that may trigger the end of speech:
    43 //
    44 // speechInputPossiblyComplete event:
    45 //
    46 // Signals that silence/noise has  been detected for a *short* amount of
    47 // time after some speech has been detected. It can be used for low latency
    48 // UI feedback. To disable it, set it to a large amount.
    49 //
    50 // speechInputComplete event:
    51 //
    52 // This event is intended to signal end of input and to stop recording.
    53 // The amount of time to wait after speech is set by
    54 // speech_input_complete_silence_length_ and optionally two other
    55 // parameters (see below).
    56 // This time can be held constant, or can change as more speech is detected.
    57 // In the latter case, the time changes after a set amount of time from the
    58 // *beginning* of speech.  This is motivated by the expectation that there
    59 // will be two distinct types of inputs: short search queries and longer
    60 // dictation style input.
    61 //
    62 // Three parameters are used to define the piecewise constant timeout function.
    63 // The timeout length is speech_input_complete_silence_length until
    64 // long_speech_length, when it changes to
    65 // long_speech_input_complete_silence_length.
    66 class Endpointer {
    67  public:
    68   explicit Endpointer(int sample_rate);
    70   // Start the endpointer. This should be called at the beginning of a session.
    71   void StartSession();
    73   // Stop the endpointer.
    74   void EndSession();
    76   // Start environment estimation. Audio will be used for environment estimation
    77   // i.e. noise level estimation.
    78   void SetEnvironmentEstimationMode();
    80   // Start user input. This should be called when the user indicates start of
    81   // input, e.g. by pressing a button.
    82   void SetUserInputMode();
    84   // Process a segment of audio, which may be more than one frame.
    85   // The status of the last frame will be returned.
    86   EpStatus ProcessAudio(const AudioChunk& raw_audio, float* rms_out);
    88   // Get the status of the endpointer.
    89   EpStatus Status(int64_t *time_us);
    91   // Get the expected frame size for audio chunks. Audio chunks are expected
    92   // to contain a number of samples that is a multiple of this number, and extra
    93   // samples will be dropped.
    94   int32_t FrameSize() const {
    95     return frame_size_;
    96   }
    98   // Returns true if the endpointer detected reasonable audio levels above
    99   // background noise which could be user speech, false if not.
   100   bool DidStartReceivingSpeech() const {
   101     return speech_previously_detected_;
   102   }
   104   bool IsEstimatingEnvironment() const {
   105     return energy_endpointer_.estimating_environment();
   106   }
   108   void set_speech_input_complete_silence_length(int64_t time_us) {
   109     speech_input_complete_silence_length_us_ = time_us;
   110   }
   112   void set_long_speech_input_complete_silence_length(int64_t time_us) {
   113     long_speech_input_complete_silence_length_us_ = time_us;
   114   }
   116   void set_speech_input_possibly_complete_silence_length(int64_t time_us) {
   117     speech_input_possibly_complete_silence_length_us_ = time_us;
   118   }
   120   void set_long_speech_length(int64_t time_us) {
   121     long_speech_length_us_ = time_us;
   122   }
   124   bool speech_input_complete() const {
   125     return speech_input_complete_;
   126   }
   128   // RMS background noise level in dB.
   129   float NoiseLevelDb() const { return energy_endpointer_.GetNoiseLevelDb(); }
   131  private:
   132   // Reset internal states. Helper method common to initial input utterance
   133   // and following input utternaces.
   134   void Reset();
   136   // Minimum allowable length of speech input.
   137   int64_t speech_input_minimum_length_us_;
   139   // The speechInputPossiblyComplete event signals that silence/noise has been
   140   // detected for a *short* amount of time after some speech has been detected.
   141   // This proporty specifies the time period.
   142   int64_t speech_input_possibly_complete_silence_length_us_;
   144   // The speechInputComplete event signals that silence/noise has been
   145   // detected for a *long* amount of time after some speech has been detected.
   146   // This property specifies the time period.
   147   int64_t speech_input_complete_silence_length_us_;
   149   // Same as above, this specifies the required silence period after speech
   150   // detection. This period is used instead of
   151   // speech_input_complete_silence_length_ when the utterance is longer than
   152   // long_speech_length_. This parameter is optional.
   153   int64_t long_speech_input_complete_silence_length_us_;
   155   // The period of time after which the endpointer should consider
   156   // long_speech_input_complete_silence_length_ as a valid silence period
   157   // instead of speech_input_complete_silence_length_. This parameter is
   158   // optional.
   159   int64_t long_speech_length_us_;
   161   // First speech onset time, used in determination of speech complete timeout.
   162   int64_t speech_start_time_us_;
   164   // Most recent end time, used in determination of speech complete timeout.
   165   int64_t speech_end_time_us_;
   167   int64_t audio_frame_time_us_;
   168   EpStatus old_ep_status_;
   169   bool waiting_for_speech_possibly_complete_timeout_;
   170   bool waiting_for_speech_complete_timeout_;
   171   bool speech_previously_detected_;
   172   bool speech_input_complete_;
   173   EnergyEndpointer energy_endpointer_;
   174   int sample_rate_;
   175   int32_t frame_size_;
   176 };
   178 }  // namespace mozilla
   180 #endif  // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_

mercurial