Fri, 16 Jan 2015 04:50:19 +0100
Replace accessor implementation with direct member state manipulation, by
request https://trac.torproject.org/projects/tor/ticket/9701#comment:32
1 /* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* this source code form is subject to the terms of the mozilla public
3 * license, v. 2.0. if a copy of the mpl was not distributed with this file,
4 * You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "AudioNodeEngineNEON.h"
7 #include <arm_neon.h>
9 //#ifdef DEBUG
10 #if 0 // see bug 921099
11 #define ASSERT_ALIGNED(ptr) \
12 MOZ_ASSERT((((uintptr_t)ptr + 15) & ~0x0F) == (uintptr_t)ptr, \
13 #ptr " has to be aligned 16-bytes aligned.");
14 #else
15 #define ASSERT_ALIGNED(ptr)
16 #endif
18 #define ADDRESS_OF(array, index) ((float32_t*)&array[index])
20 namespace mozilla {
21 void AudioBufferAddWithScale_NEON(const float* aInput,
22 float aScale,
23 float* aOutput,
24 uint32_t aSize)
25 {
26 ASSERT_ALIGNED(aInput);
27 ASSERT_ALIGNED(aOutput);
29 float32x4_t vin0, vin1, vin2, vin3;
30 float32x4_t vout0, vout1, vout2, vout3;
31 float32x4_t vscale = vmovq_n_f32(aScale);
33 uint32_t dif = aSize % 16;
34 aSize -= dif;
35 unsigned i = 0;
36 for (; i < aSize; i+=16) {
37 vin0 = vld1q_f32(ADDRESS_OF(aInput, i));
38 vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4));
39 vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8));
40 vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12));
42 vout0 = vld1q_f32(ADDRESS_OF(aOutput, i));
43 vout1 = vld1q_f32(ADDRESS_OF(aOutput, i+4));
44 vout2 = vld1q_f32(ADDRESS_OF(aOutput, i+8));
45 vout3 = vld1q_f32(ADDRESS_OF(aOutput, i+12));
47 vout0 = vmlaq_f32(vout0, vin0, vscale);
48 vout1 = vmlaq_f32(vout1, vin1, vscale);
49 vout2 = vmlaq_f32(vout2, vin2, vscale);
50 vout3 = vmlaq_f32(vout3, vin3, vscale);
52 vst1q_f32(ADDRESS_OF(aOutput, i), vout0);
53 vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1);
54 vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2);
55 vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3);
56 }
58 for (unsigned j = 0; j < dif; ++i, ++j) {
59 aOutput[i] += aInput[i]*aScale;
60 }
61 }
62 void
63 AudioBlockCopyChannelWithScale_NEON(const float* aInput,
64 float aScale,
65 float* aOutput)
66 {
67 ASSERT_ALIGNED(aInput);
68 ASSERT_ALIGNED(aOutput);
70 float32x4_t vin0, vin1, vin2, vin3;
71 float32x4_t vout0, vout1, vout2, vout3;
72 float32x4_t vscale = vmovq_n_f32(aScale);
74 for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
75 vin0 = vld1q_f32(ADDRESS_OF(aInput, i));
76 vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4));
77 vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8));
78 vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12));
80 vout0 = vmulq_f32(vin0, vscale);
81 vout1 = vmulq_f32(vin1, vscale);
82 vout2 = vmulq_f32(vin2, vscale);
83 vout3 = vmulq_f32(vin3, vscale);
85 vst1q_f32(ADDRESS_OF(aOutput, i), vout0);
86 vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1);
87 vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2);
88 vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3);
89 }
90 }
92 void
93 AudioBlockCopyChannelWithScale_NEON(const float aInput[WEBAUDIO_BLOCK_SIZE],
94 const float aScale[WEBAUDIO_BLOCK_SIZE],
95 float aOutput[WEBAUDIO_BLOCK_SIZE])
96 {
97 ASSERT_ALIGNED(aInput);
98 ASSERT_ALIGNED(aScale);
99 ASSERT_ALIGNED(aOutput);
101 float32x4_t vin0, vin1, vin2, vin3;
102 float32x4_t vout0, vout1, vout2, vout3;
103 float32x4_t vscale0, vscale1, vscale2, vscale3;
105 for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
106 vin0 = vld1q_f32(ADDRESS_OF(aInput, i));
107 vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4));
108 vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8));
109 vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12));
111 vscale0 = vld1q_f32(ADDRESS_OF(aScale, i));
112 vscale1 = vld1q_f32(ADDRESS_OF(aScale, i+4));
113 vscale2 = vld1q_f32(ADDRESS_OF(aScale, i+8));
114 vscale3 = vld1q_f32(ADDRESS_OF(aScale, i+12));
116 vout0 = vmulq_f32(vin0, vscale0);
117 vout1 = vmulq_f32(vin1, vscale1);
118 vout2 = vmulq_f32(vin2, vscale2);
119 vout3 = vmulq_f32(vin3, vscale3);
121 vst1q_f32(ADDRESS_OF(aOutput, i), vout0);
122 vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1);
123 vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2);
124 vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3);
125 }
126 }
128 void
129 AudioBufferInPlaceScale_NEON(float* aBlock,
130 float aScale,
131 uint32_t aSize)
132 {
133 ASSERT_ALIGNED(aBlock);
135 float32x4_t vin0, vin1, vin2, vin3;
136 float32x4_t vout0, vout1, vout2, vout3;
137 float32x4_t vscale = vmovq_n_f32(aScale);
139 uint32_t dif = aSize % 16;
140 uint32_t vectorSize = aSize - dif;
141 uint32_t i = 0;
142 for (; i < vectorSize; i+=16) {
143 vin0 = vld1q_f32(ADDRESS_OF(aBlock, i));
144 vin1 = vld1q_f32(ADDRESS_OF(aBlock, i+4));
145 vin2 = vld1q_f32(ADDRESS_OF(aBlock, i+8));
146 vin3 = vld1q_f32(ADDRESS_OF(aBlock, i+12));
148 vout0 = vmulq_f32(vin0, vscale);
149 vout1 = vmulq_f32(vin1, vscale);
150 vout2 = vmulq_f32(vin2, vscale);
151 vout3 = vmulq_f32(vin3, vscale);
153 vst1q_f32(ADDRESS_OF(aBlock, i), vout0);
154 vst1q_f32(ADDRESS_OF(aBlock, i+4), vout1);
155 vst1q_f32(ADDRESS_OF(aBlock, i+8), vout2);
156 vst1q_f32(ADDRESS_OF(aBlock, i+12), vout3);
157 }
159 for (unsigned j = 0; j < dif; ++i, ++j) {
160 aBlock[i] *= aScale;
161 }
162 }
164 void
165 AudioBlockPanStereoToStereo_NEON(const float aInputL[WEBAUDIO_BLOCK_SIZE],
166 const float aInputR[WEBAUDIO_BLOCK_SIZE],
167 float aGainL, float aGainR, bool aIsOnTheLeft,
168 float aOutputL[WEBAUDIO_BLOCK_SIZE],
169 float aOutputR[WEBAUDIO_BLOCK_SIZE])
170 {
171 ASSERT_ALIGNED(aInputL);
172 ASSERT_ALIGNED(aInputR);
173 ASSERT_ALIGNED(aOutputL);
174 ASSERT_ALIGNED(aOutputR);
176 float32x4_t vinL0, vinL1;
177 float32x4_t vinR0, vinR1;
178 float32x4_t voutL0, voutL1;
179 float32x4_t voutR0, voutR1;
180 float32x4_t vscaleL = vmovq_n_f32(aGainL);
181 float32x4_t vscaleR = vmovq_n_f32(aGainR);
183 if (aIsOnTheLeft) {
184 for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) {
185 vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i));
186 vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i+4));
188 vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i));
189 vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i+4));
191 voutL0 = vmlaq_f32(vinL0, vinR0, vscaleL);
192 voutL1 = vmlaq_f32(vinL1, vinR1, vscaleL);
194 vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0);
195 vst1q_f32(ADDRESS_OF(aOutputL, i+4), voutL1);
197 voutR0 = vmulq_f32(vinR0, vscaleR);
198 voutR1 = vmulq_f32(vinR1, vscaleR);
200 vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0);
201 vst1q_f32(ADDRESS_OF(aOutputR, i+4), voutR1);
202 }
203 } else {
204 for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) {
205 vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i));
206 vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i+4));
208 vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i));
209 vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i+4));
211 voutL0 = vmulq_f32(vinL0, vscaleL);
212 voutL1 = vmulq_f32(vinL1, vscaleL);
214 vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0);
215 vst1q_f32(ADDRESS_OF(aOutputL, i+4), voutL1);
217 voutR0 = vmlaq_f32(vinR0, vinL0, vscaleR);
218 voutR1 = vmlaq_f32(vinR1, vinL1, vscaleR);
220 vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0);
221 vst1q_f32(ADDRESS_OF(aOutputR, i+4), voutR1);
222 }
223 }
224 }
225 }