content/media/webspeech/recognition/endpointer.h

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

michael@0 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
michael@0 2 //
michael@0 3 // Redistribution and use in source and binary forms, with or without
michael@0 4 // modification, are permitted provided that the following conditions are
michael@0 5 // met:
michael@0 6 //
michael@0 7 // * Redistributions of source code must retain the above copyright
michael@0 8 // notice, this list of conditions and the following disclaimer.
michael@0 9 // * Redistributions in binary form must reproduce the above
michael@0 10 // copyright notice, this list of conditions and the following disclaimer
michael@0 11 // in the documentation and/or other materials provided with the
michael@0 12 // distribution.
michael@0 13 // * Neither the name of Google Inc. nor the names of its
michael@0 14 // contributors may be used to endorse or promote products derived from
michael@0 15 // this software without specific prior written permission.
michael@0 16 //
michael@0 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
michael@0 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
michael@0 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
michael@0 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
michael@0 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
michael@0 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
michael@0 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
michael@0 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
michael@0 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
michael@0 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
michael@0 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
michael@0 28
michael@0 29 #ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
michael@0 30 #define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
michael@0 31
michael@0 32 #include "energy_endpointer.h"
michael@0 33
michael@0 34 namespace mozilla {
michael@0 35
michael@0 36 struct AudioChunk;
michael@0 37
michael@0 38 // A simple interface to the underlying energy-endpointer implementation, this
michael@0 39 // class lets callers provide audio as being recorded and let them poll to find
michael@0 40 // when the user has stopped speaking.
michael@0 41 //
michael@0 42 // There are two events that may trigger the end of speech:
michael@0 43 //
michael@0 44 // speechInputPossiblyComplete event:
michael@0 45 //
michael@0 46 // Signals that silence/noise has been detected for a *short* amount of
michael@0 47 // time after some speech has been detected. It can be used for low latency
michael@0 48 // UI feedback. To disable it, set it to a large amount.
michael@0 49 //
michael@0 50 // speechInputComplete event:
michael@0 51 //
michael@0 52 // This event is intended to signal end of input and to stop recording.
michael@0 53 // The amount of time to wait after speech is set by
michael@0 54 // speech_input_complete_silence_length_ and optionally two other
michael@0 55 // parameters (see below).
michael@0 56 // This time can be held constant, or can change as more speech is detected.
michael@0 57 // In the latter case, the time changes after a set amount of time from the
michael@0 58 // *beginning* of speech. This is motivated by the expectation that there
michael@0 59 // will be two distinct types of inputs: short search queries and longer
michael@0 60 // dictation style input.
michael@0 61 //
michael@0 62 // Three parameters are used to define the piecewise constant timeout function.
michael@0 63 // The timeout length is speech_input_complete_silence_length until
michael@0 64 // long_speech_length, when it changes to
michael@0 65 // long_speech_input_complete_silence_length.
michael@0 66 class Endpointer {
michael@0 67 public:
michael@0 68 explicit Endpointer(int sample_rate);
michael@0 69
michael@0 70 // Start the endpointer. This should be called at the beginning of a session.
michael@0 71 void StartSession();
michael@0 72
michael@0 73 // Stop the endpointer.
michael@0 74 void EndSession();
michael@0 75
michael@0 76 // Start environment estimation. Audio will be used for environment estimation
michael@0 77 // i.e. noise level estimation.
michael@0 78 void SetEnvironmentEstimationMode();
michael@0 79
michael@0 80 // Start user input. This should be called when the user indicates start of
michael@0 81 // input, e.g. by pressing a button.
michael@0 82 void SetUserInputMode();
michael@0 83
michael@0 84 // Process a segment of audio, which may be more than one frame.
michael@0 85 // The status of the last frame will be returned.
michael@0 86 EpStatus ProcessAudio(const AudioChunk& raw_audio, float* rms_out);
michael@0 87
michael@0 88 // Get the status of the endpointer.
michael@0 89 EpStatus Status(int64_t *time_us);
michael@0 90
michael@0 91 // Get the expected frame size for audio chunks. Audio chunks are expected
michael@0 92 // to contain a number of samples that is a multiple of this number, and extra
michael@0 93 // samples will be dropped.
michael@0 94 int32_t FrameSize() const {
michael@0 95 return frame_size_;
michael@0 96 }
michael@0 97
michael@0 98 // Returns true if the endpointer detected reasonable audio levels above
michael@0 99 // background noise which could be user speech, false if not.
michael@0 100 bool DidStartReceivingSpeech() const {
michael@0 101 return speech_previously_detected_;
michael@0 102 }
michael@0 103
michael@0 104 bool IsEstimatingEnvironment() const {
michael@0 105 return energy_endpointer_.estimating_environment();
michael@0 106 }
michael@0 107
michael@0 108 void set_speech_input_complete_silence_length(int64_t time_us) {
michael@0 109 speech_input_complete_silence_length_us_ = time_us;
michael@0 110 }
michael@0 111
michael@0 112 void set_long_speech_input_complete_silence_length(int64_t time_us) {
michael@0 113 long_speech_input_complete_silence_length_us_ = time_us;
michael@0 114 }
michael@0 115
michael@0 116 void set_speech_input_possibly_complete_silence_length(int64_t time_us) {
michael@0 117 speech_input_possibly_complete_silence_length_us_ = time_us;
michael@0 118 }
michael@0 119
michael@0 120 void set_long_speech_length(int64_t time_us) {
michael@0 121 long_speech_length_us_ = time_us;
michael@0 122 }
michael@0 123
michael@0 124 bool speech_input_complete() const {
michael@0 125 return speech_input_complete_;
michael@0 126 }
michael@0 127
michael@0 128 // RMS background noise level in dB.
michael@0 129 float NoiseLevelDb() const { return energy_endpointer_.GetNoiseLevelDb(); }
michael@0 130
michael@0 131 private:
michael@0 132 // Reset internal states. Helper method common to initial input utterance
michael@0 133 // and following input utternaces.
michael@0 134 void Reset();
michael@0 135
michael@0 136 // Minimum allowable length of speech input.
michael@0 137 int64_t speech_input_minimum_length_us_;
michael@0 138
michael@0 139 // The speechInputPossiblyComplete event signals that silence/noise has been
michael@0 140 // detected for a *short* amount of time after some speech has been detected.
michael@0 141 // This proporty specifies the time period.
michael@0 142 int64_t speech_input_possibly_complete_silence_length_us_;
michael@0 143
michael@0 144 // The speechInputComplete event signals that silence/noise has been
michael@0 145 // detected for a *long* amount of time after some speech has been detected.
michael@0 146 // This property specifies the time period.
michael@0 147 int64_t speech_input_complete_silence_length_us_;
michael@0 148
michael@0 149 // Same as above, this specifies the required silence period after speech
michael@0 150 // detection. This period is used instead of
michael@0 151 // speech_input_complete_silence_length_ when the utterance is longer than
michael@0 152 // long_speech_length_. This parameter is optional.
michael@0 153 int64_t long_speech_input_complete_silence_length_us_;
michael@0 154
michael@0 155 // The period of time after which the endpointer should consider
michael@0 156 // long_speech_input_complete_silence_length_ as a valid silence period
michael@0 157 // instead of speech_input_complete_silence_length_. This parameter is
michael@0 158 // optional.
michael@0 159 int64_t long_speech_length_us_;
michael@0 160
michael@0 161 // First speech onset time, used in determination of speech complete timeout.
michael@0 162 int64_t speech_start_time_us_;
michael@0 163
michael@0 164 // Most recent end time, used in determination of speech complete timeout.
michael@0 165 int64_t speech_end_time_us_;
michael@0 166
michael@0 167 int64_t audio_frame_time_us_;
michael@0 168 EpStatus old_ep_status_;
michael@0 169 bool waiting_for_speech_possibly_complete_timeout_;
michael@0 170 bool waiting_for_speech_complete_timeout_;
michael@0 171 bool speech_previously_detected_;
michael@0 172 bool speech_input_complete_;
michael@0 173 EnergyEndpointer energy_endpointer_;
michael@0 174 int sample_rate_;
michael@0 175 int32_t frame_size_;
michael@0 176 };
michael@0 177
michael@0 178 } // namespace mozilla
michael@0 179
michael@0 180 #endif // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_

mercurial