|
1 /* |
|
2 * Copyright (C) 2012 Intel Inc. All rights reserved. |
|
3 * |
|
4 * Redistribution and use in source and binary forms, with or without |
|
5 * modification, are permitted provided that the following conditions |
|
6 * are met: |
|
7 * |
|
8 * 1. Redistributions of source code must retain the above copyright |
|
9 * notice, this list of conditions and the following disclaimer. |
|
10 * 2. Redistributions in binary form must reproduce the above copyright |
|
11 * notice, this list of conditions and the following disclaimer in the |
|
12 * documentation and/or other materials provided with the distribution. |
|
13 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of |
|
14 * its contributors may be used to endorse or promote products derived |
|
15 * from this software without specific prior written permission. |
|
16 * |
|
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY |
|
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY |
|
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
|
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
|
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
|
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
27 */ |
|
28 |
|
29 #include "DirectConvolver.h" |
|
30 #include "mozilla/PodOperations.h" |
|
31 |
|
32 using namespace mozilla; |
|
33 |
|
34 namespace WebCore { |
|
35 |
|
36 DirectConvolver::DirectConvolver(size_t inputBlockSize) |
|
37 : m_inputBlockSize(inputBlockSize) |
|
38 { |
|
39 m_buffer.SetLength(inputBlockSize * 2); |
|
40 PodZero(m_buffer.Elements(), inputBlockSize * 2); |
|
41 } |
|
42 |
|
43 void DirectConvolver::process(const nsTArray<float>* convolutionKernel, const float* sourceP, float* destP, size_t framesToProcess) |
|
44 { |
|
45 MOZ_ASSERT(framesToProcess == m_inputBlockSize); |
|
46 if (framesToProcess != m_inputBlockSize) |
|
47 return; |
|
48 |
|
49 // Only support kernelSize <= m_inputBlockSize |
|
50 size_t kernelSize = convolutionKernel->Length(); |
|
51 MOZ_ASSERT(kernelSize <= m_inputBlockSize); |
|
52 if (kernelSize > m_inputBlockSize) |
|
53 return; |
|
54 |
|
55 const float* kernelP = convolutionKernel->Elements(); |
|
56 |
|
57 // Sanity check |
|
58 bool isCopyGood = kernelP && sourceP && destP && m_buffer.Elements(); |
|
59 MOZ_ASSERT(isCopyGood); |
|
60 if (!isCopyGood) |
|
61 return; |
|
62 |
|
63 float* inputP = m_buffer.Elements() + m_inputBlockSize; |
|
64 |
|
65 // Copy samples to 2nd half of input buffer. |
|
66 memcpy(inputP, sourceP, sizeof(float) * framesToProcess); |
|
67 |
|
68 // FIXME: The macro can be further optimized to avoid pipeline stalls. One possibility is to maintain 4 separate sums and change the macro to CONVOLVE_FOUR_SAMPLES. |
|
69 #define CONVOLVE_ONE_SAMPLE \ |
|
70 sum += inputP[i - j] * kernelP[j]; \ |
|
71 j++; |
|
72 |
|
73 size_t i = 0; |
|
74 while (i < framesToProcess) { |
|
75 size_t j = 0; |
|
76 float sum = 0; |
|
77 |
|
78 // FIXME: SSE optimization may be applied here. |
|
79 if (kernelSize == 32) { |
|
80 CONVOLVE_ONE_SAMPLE // 1 |
|
81 CONVOLVE_ONE_SAMPLE // 2 |
|
82 CONVOLVE_ONE_SAMPLE // 3 |
|
83 CONVOLVE_ONE_SAMPLE // 4 |
|
84 CONVOLVE_ONE_SAMPLE // 5 |
|
85 CONVOLVE_ONE_SAMPLE // 6 |
|
86 CONVOLVE_ONE_SAMPLE // 7 |
|
87 CONVOLVE_ONE_SAMPLE // 8 |
|
88 CONVOLVE_ONE_SAMPLE // 9 |
|
89 CONVOLVE_ONE_SAMPLE // 10 |
|
90 |
|
91 CONVOLVE_ONE_SAMPLE // 11 |
|
92 CONVOLVE_ONE_SAMPLE // 12 |
|
93 CONVOLVE_ONE_SAMPLE // 13 |
|
94 CONVOLVE_ONE_SAMPLE // 14 |
|
95 CONVOLVE_ONE_SAMPLE // 15 |
|
96 CONVOLVE_ONE_SAMPLE // 16 |
|
97 CONVOLVE_ONE_SAMPLE // 17 |
|
98 CONVOLVE_ONE_SAMPLE // 18 |
|
99 CONVOLVE_ONE_SAMPLE // 19 |
|
100 CONVOLVE_ONE_SAMPLE // 20 |
|
101 |
|
102 CONVOLVE_ONE_SAMPLE // 21 |
|
103 CONVOLVE_ONE_SAMPLE // 22 |
|
104 CONVOLVE_ONE_SAMPLE // 23 |
|
105 CONVOLVE_ONE_SAMPLE // 24 |
|
106 CONVOLVE_ONE_SAMPLE // 25 |
|
107 CONVOLVE_ONE_SAMPLE // 26 |
|
108 CONVOLVE_ONE_SAMPLE // 27 |
|
109 CONVOLVE_ONE_SAMPLE // 28 |
|
110 CONVOLVE_ONE_SAMPLE // 29 |
|
111 CONVOLVE_ONE_SAMPLE // 30 |
|
112 |
|
113 CONVOLVE_ONE_SAMPLE // 31 |
|
114 CONVOLVE_ONE_SAMPLE // 32 |
|
115 |
|
116 } else if (kernelSize == 64) { |
|
117 CONVOLVE_ONE_SAMPLE // 1 |
|
118 CONVOLVE_ONE_SAMPLE // 2 |
|
119 CONVOLVE_ONE_SAMPLE // 3 |
|
120 CONVOLVE_ONE_SAMPLE // 4 |
|
121 CONVOLVE_ONE_SAMPLE // 5 |
|
122 CONVOLVE_ONE_SAMPLE // 6 |
|
123 CONVOLVE_ONE_SAMPLE // 7 |
|
124 CONVOLVE_ONE_SAMPLE // 8 |
|
125 CONVOLVE_ONE_SAMPLE // 9 |
|
126 CONVOLVE_ONE_SAMPLE // 10 |
|
127 |
|
128 CONVOLVE_ONE_SAMPLE // 11 |
|
129 CONVOLVE_ONE_SAMPLE // 12 |
|
130 CONVOLVE_ONE_SAMPLE // 13 |
|
131 CONVOLVE_ONE_SAMPLE // 14 |
|
132 CONVOLVE_ONE_SAMPLE // 15 |
|
133 CONVOLVE_ONE_SAMPLE // 16 |
|
134 CONVOLVE_ONE_SAMPLE // 17 |
|
135 CONVOLVE_ONE_SAMPLE // 18 |
|
136 CONVOLVE_ONE_SAMPLE // 19 |
|
137 CONVOLVE_ONE_SAMPLE // 20 |
|
138 |
|
139 CONVOLVE_ONE_SAMPLE // 21 |
|
140 CONVOLVE_ONE_SAMPLE // 22 |
|
141 CONVOLVE_ONE_SAMPLE // 23 |
|
142 CONVOLVE_ONE_SAMPLE // 24 |
|
143 CONVOLVE_ONE_SAMPLE // 25 |
|
144 CONVOLVE_ONE_SAMPLE // 26 |
|
145 CONVOLVE_ONE_SAMPLE // 27 |
|
146 CONVOLVE_ONE_SAMPLE // 28 |
|
147 CONVOLVE_ONE_SAMPLE // 29 |
|
148 CONVOLVE_ONE_SAMPLE // 30 |
|
149 |
|
150 CONVOLVE_ONE_SAMPLE // 31 |
|
151 CONVOLVE_ONE_SAMPLE // 32 |
|
152 CONVOLVE_ONE_SAMPLE // 33 |
|
153 CONVOLVE_ONE_SAMPLE // 34 |
|
154 CONVOLVE_ONE_SAMPLE // 35 |
|
155 CONVOLVE_ONE_SAMPLE // 36 |
|
156 CONVOLVE_ONE_SAMPLE // 37 |
|
157 CONVOLVE_ONE_SAMPLE // 38 |
|
158 CONVOLVE_ONE_SAMPLE // 39 |
|
159 CONVOLVE_ONE_SAMPLE // 40 |
|
160 |
|
161 CONVOLVE_ONE_SAMPLE // 41 |
|
162 CONVOLVE_ONE_SAMPLE // 42 |
|
163 CONVOLVE_ONE_SAMPLE // 43 |
|
164 CONVOLVE_ONE_SAMPLE // 44 |
|
165 CONVOLVE_ONE_SAMPLE // 45 |
|
166 CONVOLVE_ONE_SAMPLE // 46 |
|
167 CONVOLVE_ONE_SAMPLE // 47 |
|
168 CONVOLVE_ONE_SAMPLE // 48 |
|
169 CONVOLVE_ONE_SAMPLE // 49 |
|
170 CONVOLVE_ONE_SAMPLE // 50 |
|
171 |
|
172 CONVOLVE_ONE_SAMPLE // 51 |
|
173 CONVOLVE_ONE_SAMPLE // 52 |
|
174 CONVOLVE_ONE_SAMPLE // 53 |
|
175 CONVOLVE_ONE_SAMPLE // 54 |
|
176 CONVOLVE_ONE_SAMPLE // 55 |
|
177 CONVOLVE_ONE_SAMPLE // 56 |
|
178 CONVOLVE_ONE_SAMPLE // 57 |
|
179 CONVOLVE_ONE_SAMPLE // 58 |
|
180 CONVOLVE_ONE_SAMPLE // 59 |
|
181 CONVOLVE_ONE_SAMPLE // 60 |
|
182 |
|
183 CONVOLVE_ONE_SAMPLE // 61 |
|
184 CONVOLVE_ONE_SAMPLE // 62 |
|
185 CONVOLVE_ONE_SAMPLE // 63 |
|
186 CONVOLVE_ONE_SAMPLE // 64 |
|
187 |
|
188 } else if (kernelSize == 128) { |
|
189 CONVOLVE_ONE_SAMPLE // 1 |
|
190 CONVOLVE_ONE_SAMPLE // 2 |
|
191 CONVOLVE_ONE_SAMPLE // 3 |
|
192 CONVOLVE_ONE_SAMPLE // 4 |
|
193 CONVOLVE_ONE_SAMPLE // 5 |
|
194 CONVOLVE_ONE_SAMPLE // 6 |
|
195 CONVOLVE_ONE_SAMPLE // 7 |
|
196 CONVOLVE_ONE_SAMPLE // 8 |
|
197 CONVOLVE_ONE_SAMPLE // 9 |
|
198 CONVOLVE_ONE_SAMPLE // 10 |
|
199 |
|
200 CONVOLVE_ONE_SAMPLE // 11 |
|
201 CONVOLVE_ONE_SAMPLE // 12 |
|
202 CONVOLVE_ONE_SAMPLE // 13 |
|
203 CONVOLVE_ONE_SAMPLE // 14 |
|
204 CONVOLVE_ONE_SAMPLE // 15 |
|
205 CONVOLVE_ONE_SAMPLE // 16 |
|
206 CONVOLVE_ONE_SAMPLE // 17 |
|
207 CONVOLVE_ONE_SAMPLE // 18 |
|
208 CONVOLVE_ONE_SAMPLE // 19 |
|
209 CONVOLVE_ONE_SAMPLE // 20 |
|
210 |
|
211 CONVOLVE_ONE_SAMPLE // 21 |
|
212 CONVOLVE_ONE_SAMPLE // 22 |
|
213 CONVOLVE_ONE_SAMPLE // 23 |
|
214 CONVOLVE_ONE_SAMPLE // 24 |
|
215 CONVOLVE_ONE_SAMPLE // 25 |
|
216 CONVOLVE_ONE_SAMPLE // 26 |
|
217 CONVOLVE_ONE_SAMPLE // 27 |
|
218 CONVOLVE_ONE_SAMPLE // 28 |
|
219 CONVOLVE_ONE_SAMPLE // 29 |
|
220 CONVOLVE_ONE_SAMPLE // 30 |
|
221 |
|
222 CONVOLVE_ONE_SAMPLE // 31 |
|
223 CONVOLVE_ONE_SAMPLE // 32 |
|
224 CONVOLVE_ONE_SAMPLE // 33 |
|
225 CONVOLVE_ONE_SAMPLE // 34 |
|
226 CONVOLVE_ONE_SAMPLE // 35 |
|
227 CONVOLVE_ONE_SAMPLE // 36 |
|
228 CONVOLVE_ONE_SAMPLE // 37 |
|
229 CONVOLVE_ONE_SAMPLE // 38 |
|
230 CONVOLVE_ONE_SAMPLE // 39 |
|
231 CONVOLVE_ONE_SAMPLE // 40 |
|
232 |
|
233 CONVOLVE_ONE_SAMPLE // 41 |
|
234 CONVOLVE_ONE_SAMPLE // 42 |
|
235 CONVOLVE_ONE_SAMPLE // 43 |
|
236 CONVOLVE_ONE_SAMPLE // 44 |
|
237 CONVOLVE_ONE_SAMPLE // 45 |
|
238 CONVOLVE_ONE_SAMPLE // 46 |
|
239 CONVOLVE_ONE_SAMPLE // 47 |
|
240 CONVOLVE_ONE_SAMPLE // 48 |
|
241 CONVOLVE_ONE_SAMPLE // 49 |
|
242 CONVOLVE_ONE_SAMPLE // 50 |
|
243 |
|
244 CONVOLVE_ONE_SAMPLE // 51 |
|
245 CONVOLVE_ONE_SAMPLE // 52 |
|
246 CONVOLVE_ONE_SAMPLE // 53 |
|
247 CONVOLVE_ONE_SAMPLE // 54 |
|
248 CONVOLVE_ONE_SAMPLE // 55 |
|
249 CONVOLVE_ONE_SAMPLE // 56 |
|
250 CONVOLVE_ONE_SAMPLE // 57 |
|
251 CONVOLVE_ONE_SAMPLE // 58 |
|
252 CONVOLVE_ONE_SAMPLE // 59 |
|
253 CONVOLVE_ONE_SAMPLE // 60 |
|
254 |
|
255 CONVOLVE_ONE_SAMPLE // 61 |
|
256 CONVOLVE_ONE_SAMPLE // 62 |
|
257 CONVOLVE_ONE_SAMPLE // 63 |
|
258 CONVOLVE_ONE_SAMPLE // 64 |
|
259 CONVOLVE_ONE_SAMPLE // 65 |
|
260 CONVOLVE_ONE_SAMPLE // 66 |
|
261 CONVOLVE_ONE_SAMPLE // 67 |
|
262 CONVOLVE_ONE_SAMPLE // 68 |
|
263 CONVOLVE_ONE_SAMPLE // 69 |
|
264 CONVOLVE_ONE_SAMPLE // 70 |
|
265 |
|
266 CONVOLVE_ONE_SAMPLE // 71 |
|
267 CONVOLVE_ONE_SAMPLE // 72 |
|
268 CONVOLVE_ONE_SAMPLE // 73 |
|
269 CONVOLVE_ONE_SAMPLE // 74 |
|
270 CONVOLVE_ONE_SAMPLE // 75 |
|
271 CONVOLVE_ONE_SAMPLE // 76 |
|
272 CONVOLVE_ONE_SAMPLE // 77 |
|
273 CONVOLVE_ONE_SAMPLE // 78 |
|
274 CONVOLVE_ONE_SAMPLE // 79 |
|
275 CONVOLVE_ONE_SAMPLE // 80 |
|
276 |
|
277 CONVOLVE_ONE_SAMPLE // 81 |
|
278 CONVOLVE_ONE_SAMPLE // 82 |
|
279 CONVOLVE_ONE_SAMPLE // 83 |
|
280 CONVOLVE_ONE_SAMPLE // 84 |
|
281 CONVOLVE_ONE_SAMPLE // 85 |
|
282 CONVOLVE_ONE_SAMPLE // 86 |
|
283 CONVOLVE_ONE_SAMPLE // 87 |
|
284 CONVOLVE_ONE_SAMPLE // 88 |
|
285 CONVOLVE_ONE_SAMPLE // 89 |
|
286 CONVOLVE_ONE_SAMPLE // 90 |
|
287 |
|
288 CONVOLVE_ONE_SAMPLE // 91 |
|
289 CONVOLVE_ONE_SAMPLE // 92 |
|
290 CONVOLVE_ONE_SAMPLE // 93 |
|
291 CONVOLVE_ONE_SAMPLE // 94 |
|
292 CONVOLVE_ONE_SAMPLE // 95 |
|
293 CONVOLVE_ONE_SAMPLE // 96 |
|
294 CONVOLVE_ONE_SAMPLE // 97 |
|
295 CONVOLVE_ONE_SAMPLE // 98 |
|
296 CONVOLVE_ONE_SAMPLE // 99 |
|
297 CONVOLVE_ONE_SAMPLE // 100 |
|
298 |
|
299 CONVOLVE_ONE_SAMPLE // 101 |
|
300 CONVOLVE_ONE_SAMPLE // 102 |
|
301 CONVOLVE_ONE_SAMPLE // 103 |
|
302 CONVOLVE_ONE_SAMPLE // 104 |
|
303 CONVOLVE_ONE_SAMPLE // 105 |
|
304 CONVOLVE_ONE_SAMPLE // 106 |
|
305 CONVOLVE_ONE_SAMPLE // 107 |
|
306 CONVOLVE_ONE_SAMPLE // 108 |
|
307 CONVOLVE_ONE_SAMPLE // 109 |
|
308 CONVOLVE_ONE_SAMPLE // 110 |
|
309 |
|
310 CONVOLVE_ONE_SAMPLE // 111 |
|
311 CONVOLVE_ONE_SAMPLE // 112 |
|
312 CONVOLVE_ONE_SAMPLE // 113 |
|
313 CONVOLVE_ONE_SAMPLE // 114 |
|
314 CONVOLVE_ONE_SAMPLE // 115 |
|
315 CONVOLVE_ONE_SAMPLE // 116 |
|
316 CONVOLVE_ONE_SAMPLE // 117 |
|
317 CONVOLVE_ONE_SAMPLE // 118 |
|
318 CONVOLVE_ONE_SAMPLE // 119 |
|
319 CONVOLVE_ONE_SAMPLE // 120 |
|
320 |
|
321 CONVOLVE_ONE_SAMPLE // 121 |
|
322 CONVOLVE_ONE_SAMPLE // 122 |
|
323 CONVOLVE_ONE_SAMPLE // 123 |
|
324 CONVOLVE_ONE_SAMPLE // 124 |
|
325 CONVOLVE_ONE_SAMPLE // 125 |
|
326 CONVOLVE_ONE_SAMPLE // 126 |
|
327 CONVOLVE_ONE_SAMPLE // 127 |
|
328 CONVOLVE_ONE_SAMPLE // 128 |
|
329 } else { |
|
330 while (j < kernelSize) { |
|
331 // Non-optimized using actual while loop. |
|
332 CONVOLVE_ONE_SAMPLE |
|
333 } |
|
334 } |
|
335 destP[i++] = sum; |
|
336 } |
|
337 |
|
338 // Copy 2nd half of input buffer to 1st half. |
|
339 memcpy(m_buffer.Elements(), inputP, sizeof(float) * framesToProcess); |
|
340 } |
|
341 |
|
342 void DirectConvolver::reset() |
|
343 { |
|
344 PodZero(m_buffer.Elements(), m_buffer.Length()); |
|
345 } |
|
346 |
|
347 } // namespace WebCore |