|
1 /* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* this source code form is subject to the terms of the mozilla public |
|
3 * license, v. 2.0. if a copy of the mpl was not distributed with this file, |
|
4 * You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include "AudioNodeEngineNEON.h" |
|
7 #include <arm_neon.h> |
|
8 |
|
9 //#ifdef DEBUG |
|
10 #if 0 // see bug 921099 |
|
11 #define ASSERT_ALIGNED(ptr) \ |
|
12 MOZ_ASSERT((((uintptr_t)ptr + 15) & ~0x0F) == (uintptr_t)ptr, \ |
|
13 #ptr " has to be aligned 16-bytes aligned."); |
|
14 #else |
|
15 #define ASSERT_ALIGNED(ptr) |
|
16 #endif |
|
17 |
|
18 #define ADDRESS_OF(array, index) ((float32_t*)&array[index]) |
|
19 |
|
20 namespace mozilla { |
|
21 void AudioBufferAddWithScale_NEON(const float* aInput, |
|
22 float aScale, |
|
23 float* aOutput, |
|
24 uint32_t aSize) |
|
25 { |
|
26 ASSERT_ALIGNED(aInput); |
|
27 ASSERT_ALIGNED(aOutput); |
|
28 |
|
29 float32x4_t vin0, vin1, vin2, vin3; |
|
30 float32x4_t vout0, vout1, vout2, vout3; |
|
31 float32x4_t vscale = vmovq_n_f32(aScale); |
|
32 |
|
33 uint32_t dif = aSize % 16; |
|
34 aSize -= dif; |
|
35 unsigned i = 0; |
|
36 for (; i < aSize; i+=16) { |
|
37 vin0 = vld1q_f32(ADDRESS_OF(aInput, i)); |
|
38 vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4)); |
|
39 vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8)); |
|
40 vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12)); |
|
41 |
|
42 vout0 = vld1q_f32(ADDRESS_OF(aOutput, i)); |
|
43 vout1 = vld1q_f32(ADDRESS_OF(aOutput, i+4)); |
|
44 vout2 = vld1q_f32(ADDRESS_OF(aOutput, i+8)); |
|
45 vout3 = vld1q_f32(ADDRESS_OF(aOutput, i+12)); |
|
46 |
|
47 vout0 = vmlaq_f32(vout0, vin0, vscale); |
|
48 vout1 = vmlaq_f32(vout1, vin1, vscale); |
|
49 vout2 = vmlaq_f32(vout2, vin2, vscale); |
|
50 vout3 = vmlaq_f32(vout3, vin3, vscale); |
|
51 |
|
52 vst1q_f32(ADDRESS_OF(aOutput, i), vout0); |
|
53 vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1); |
|
54 vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2); |
|
55 vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3); |
|
56 } |
|
57 |
|
58 for (unsigned j = 0; j < dif; ++i, ++j) { |
|
59 aOutput[i] += aInput[i]*aScale; |
|
60 } |
|
61 } |
|
62 void |
|
63 AudioBlockCopyChannelWithScale_NEON(const float* aInput, |
|
64 float aScale, |
|
65 float* aOutput) |
|
66 { |
|
67 ASSERT_ALIGNED(aInput); |
|
68 ASSERT_ALIGNED(aOutput); |
|
69 |
|
70 float32x4_t vin0, vin1, vin2, vin3; |
|
71 float32x4_t vout0, vout1, vout2, vout3; |
|
72 float32x4_t vscale = vmovq_n_f32(aScale); |
|
73 |
|
74 for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) { |
|
75 vin0 = vld1q_f32(ADDRESS_OF(aInput, i)); |
|
76 vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4)); |
|
77 vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8)); |
|
78 vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12)); |
|
79 |
|
80 vout0 = vmulq_f32(vin0, vscale); |
|
81 vout1 = vmulq_f32(vin1, vscale); |
|
82 vout2 = vmulq_f32(vin2, vscale); |
|
83 vout3 = vmulq_f32(vin3, vscale); |
|
84 |
|
85 vst1q_f32(ADDRESS_OF(aOutput, i), vout0); |
|
86 vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1); |
|
87 vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2); |
|
88 vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3); |
|
89 } |
|
90 } |
|
91 |
|
92 void |
|
93 AudioBlockCopyChannelWithScale_NEON(const float aInput[WEBAUDIO_BLOCK_SIZE], |
|
94 const float aScale[WEBAUDIO_BLOCK_SIZE], |
|
95 float aOutput[WEBAUDIO_BLOCK_SIZE]) |
|
96 { |
|
97 ASSERT_ALIGNED(aInput); |
|
98 ASSERT_ALIGNED(aScale); |
|
99 ASSERT_ALIGNED(aOutput); |
|
100 |
|
101 float32x4_t vin0, vin1, vin2, vin3; |
|
102 float32x4_t vout0, vout1, vout2, vout3; |
|
103 float32x4_t vscale0, vscale1, vscale2, vscale3; |
|
104 |
|
105 for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) { |
|
106 vin0 = vld1q_f32(ADDRESS_OF(aInput, i)); |
|
107 vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4)); |
|
108 vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8)); |
|
109 vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12)); |
|
110 |
|
111 vscale0 = vld1q_f32(ADDRESS_OF(aScale, i)); |
|
112 vscale1 = vld1q_f32(ADDRESS_OF(aScale, i+4)); |
|
113 vscale2 = vld1q_f32(ADDRESS_OF(aScale, i+8)); |
|
114 vscale3 = vld1q_f32(ADDRESS_OF(aScale, i+12)); |
|
115 |
|
116 vout0 = vmulq_f32(vin0, vscale0); |
|
117 vout1 = vmulq_f32(vin1, vscale1); |
|
118 vout2 = vmulq_f32(vin2, vscale2); |
|
119 vout3 = vmulq_f32(vin3, vscale3); |
|
120 |
|
121 vst1q_f32(ADDRESS_OF(aOutput, i), vout0); |
|
122 vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1); |
|
123 vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2); |
|
124 vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3); |
|
125 } |
|
126 } |
|
127 |
|
128 void |
|
129 AudioBufferInPlaceScale_NEON(float* aBlock, |
|
130 float aScale, |
|
131 uint32_t aSize) |
|
132 { |
|
133 ASSERT_ALIGNED(aBlock); |
|
134 |
|
135 float32x4_t vin0, vin1, vin2, vin3; |
|
136 float32x4_t vout0, vout1, vout2, vout3; |
|
137 float32x4_t vscale = vmovq_n_f32(aScale); |
|
138 |
|
139 uint32_t dif = aSize % 16; |
|
140 uint32_t vectorSize = aSize - dif; |
|
141 uint32_t i = 0; |
|
142 for (; i < vectorSize; i+=16) { |
|
143 vin0 = vld1q_f32(ADDRESS_OF(aBlock, i)); |
|
144 vin1 = vld1q_f32(ADDRESS_OF(aBlock, i+4)); |
|
145 vin2 = vld1q_f32(ADDRESS_OF(aBlock, i+8)); |
|
146 vin3 = vld1q_f32(ADDRESS_OF(aBlock, i+12)); |
|
147 |
|
148 vout0 = vmulq_f32(vin0, vscale); |
|
149 vout1 = vmulq_f32(vin1, vscale); |
|
150 vout2 = vmulq_f32(vin2, vscale); |
|
151 vout3 = vmulq_f32(vin3, vscale); |
|
152 |
|
153 vst1q_f32(ADDRESS_OF(aBlock, i), vout0); |
|
154 vst1q_f32(ADDRESS_OF(aBlock, i+4), vout1); |
|
155 vst1q_f32(ADDRESS_OF(aBlock, i+8), vout2); |
|
156 vst1q_f32(ADDRESS_OF(aBlock, i+12), vout3); |
|
157 } |
|
158 |
|
159 for (unsigned j = 0; j < dif; ++i, ++j) { |
|
160 aBlock[i] *= aScale; |
|
161 } |
|
162 } |
|
163 |
|
164 void |
|
165 AudioBlockPanStereoToStereo_NEON(const float aInputL[WEBAUDIO_BLOCK_SIZE], |
|
166 const float aInputR[WEBAUDIO_BLOCK_SIZE], |
|
167 float aGainL, float aGainR, bool aIsOnTheLeft, |
|
168 float aOutputL[WEBAUDIO_BLOCK_SIZE], |
|
169 float aOutputR[WEBAUDIO_BLOCK_SIZE]) |
|
170 { |
|
171 ASSERT_ALIGNED(aInputL); |
|
172 ASSERT_ALIGNED(aInputR); |
|
173 ASSERT_ALIGNED(aOutputL); |
|
174 ASSERT_ALIGNED(aOutputR); |
|
175 |
|
176 float32x4_t vinL0, vinL1; |
|
177 float32x4_t vinR0, vinR1; |
|
178 float32x4_t voutL0, voutL1; |
|
179 float32x4_t voutR0, voutR1; |
|
180 float32x4_t vscaleL = vmovq_n_f32(aGainL); |
|
181 float32x4_t vscaleR = vmovq_n_f32(aGainR); |
|
182 |
|
183 if (aIsOnTheLeft) { |
|
184 for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) { |
|
185 vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i)); |
|
186 vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i+4)); |
|
187 |
|
188 vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i)); |
|
189 vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i+4)); |
|
190 |
|
191 voutL0 = vmlaq_f32(vinL0, vinR0, vscaleL); |
|
192 voutL1 = vmlaq_f32(vinL1, vinR1, vscaleL); |
|
193 |
|
194 vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0); |
|
195 vst1q_f32(ADDRESS_OF(aOutputL, i+4), voutL1); |
|
196 |
|
197 voutR0 = vmulq_f32(vinR0, vscaleR); |
|
198 voutR1 = vmulq_f32(vinR1, vscaleR); |
|
199 |
|
200 vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0); |
|
201 vst1q_f32(ADDRESS_OF(aOutputR, i+4), voutR1); |
|
202 } |
|
203 } else { |
|
204 for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) { |
|
205 vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i)); |
|
206 vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i+4)); |
|
207 |
|
208 vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i)); |
|
209 vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i+4)); |
|
210 |
|
211 voutL0 = vmulq_f32(vinL0, vscaleL); |
|
212 voutL1 = vmulq_f32(vinL1, vscaleL); |
|
213 |
|
214 vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0); |
|
215 vst1q_f32(ADDRESS_OF(aOutputL, i+4), voutL1); |
|
216 |
|
217 voutR0 = vmlaq_f32(vinR0, vinL0, vscaleR); |
|
218 voutR1 = vmlaq_f32(vinR1, vinL1, vscaleR); |
|
219 |
|
220 vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0); |
|
221 vst1q_f32(ADDRESS_OF(aOutputR, i+4), voutR1); |
|
222 } |
|
223 } |
|
224 } |
|
225 } |