Fri, 16 Jan 2015 04:50:19 +0100
Replace accessor implementation with direct member state manipulation, by
request https://trac.torproject.org/projects/tor/ticket/9701#comment:32
michael@0 | 1 | /* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* this source code form is subject to the terms of the mozilla public |
michael@0 | 3 | * license, v. 2.0. if a copy of the mpl was not distributed with this file, |
michael@0 | 4 | * You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | #include "AudioNodeEngineNEON.h" |
michael@0 | 7 | #include <arm_neon.h> |
michael@0 | 8 | |
michael@0 | 9 | //#ifdef DEBUG |
michael@0 | 10 | #if 0 // see bug 921099 |
michael@0 | 11 | #define ASSERT_ALIGNED(ptr) \ |
michael@0 | 12 | MOZ_ASSERT((((uintptr_t)ptr + 15) & ~0x0F) == (uintptr_t)ptr, \ |
michael@0 | 13 | #ptr " has to be aligned 16-bytes aligned."); |
michael@0 | 14 | #else |
michael@0 | 15 | #define ASSERT_ALIGNED(ptr) |
michael@0 | 16 | #endif |
michael@0 | 17 | |
michael@0 | 18 | #define ADDRESS_OF(array, index) ((float32_t*)&array[index]) |
michael@0 | 19 | |
michael@0 | 20 | namespace mozilla { |
michael@0 | 21 | void AudioBufferAddWithScale_NEON(const float* aInput, |
michael@0 | 22 | float aScale, |
michael@0 | 23 | float* aOutput, |
michael@0 | 24 | uint32_t aSize) |
michael@0 | 25 | { |
michael@0 | 26 | ASSERT_ALIGNED(aInput); |
michael@0 | 27 | ASSERT_ALIGNED(aOutput); |
michael@0 | 28 | |
michael@0 | 29 | float32x4_t vin0, vin1, vin2, vin3; |
michael@0 | 30 | float32x4_t vout0, vout1, vout2, vout3; |
michael@0 | 31 | float32x4_t vscale = vmovq_n_f32(aScale); |
michael@0 | 32 | |
michael@0 | 33 | uint32_t dif = aSize % 16; |
michael@0 | 34 | aSize -= dif; |
michael@0 | 35 | unsigned i = 0; |
michael@0 | 36 | for (; i < aSize; i+=16) { |
michael@0 | 37 | vin0 = vld1q_f32(ADDRESS_OF(aInput, i)); |
michael@0 | 38 | vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4)); |
michael@0 | 39 | vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8)); |
michael@0 | 40 | vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12)); |
michael@0 | 41 | |
michael@0 | 42 | vout0 = vld1q_f32(ADDRESS_OF(aOutput, i)); |
michael@0 | 43 | vout1 = vld1q_f32(ADDRESS_OF(aOutput, i+4)); |
michael@0 | 44 | vout2 = vld1q_f32(ADDRESS_OF(aOutput, i+8)); |
michael@0 | 45 | vout3 = vld1q_f32(ADDRESS_OF(aOutput, i+12)); |
michael@0 | 46 | |
michael@0 | 47 | vout0 = vmlaq_f32(vout0, vin0, vscale); |
michael@0 | 48 | vout1 = vmlaq_f32(vout1, vin1, vscale); |
michael@0 | 49 | vout2 = vmlaq_f32(vout2, vin2, vscale); |
michael@0 | 50 | vout3 = vmlaq_f32(vout3, vin3, vscale); |
michael@0 | 51 | |
michael@0 | 52 | vst1q_f32(ADDRESS_OF(aOutput, i), vout0); |
michael@0 | 53 | vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1); |
michael@0 | 54 | vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2); |
michael@0 | 55 | vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3); |
michael@0 | 56 | } |
michael@0 | 57 | |
michael@0 | 58 | for (unsigned j = 0; j < dif; ++i, ++j) { |
michael@0 | 59 | aOutput[i] += aInput[i]*aScale; |
michael@0 | 60 | } |
michael@0 | 61 | } |
michael@0 | 62 | void |
michael@0 | 63 | AudioBlockCopyChannelWithScale_NEON(const float* aInput, |
michael@0 | 64 | float aScale, |
michael@0 | 65 | float* aOutput) |
michael@0 | 66 | { |
michael@0 | 67 | ASSERT_ALIGNED(aInput); |
michael@0 | 68 | ASSERT_ALIGNED(aOutput); |
michael@0 | 69 | |
michael@0 | 70 | float32x4_t vin0, vin1, vin2, vin3; |
michael@0 | 71 | float32x4_t vout0, vout1, vout2, vout3; |
michael@0 | 72 | float32x4_t vscale = vmovq_n_f32(aScale); |
michael@0 | 73 | |
michael@0 | 74 | for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) { |
michael@0 | 75 | vin0 = vld1q_f32(ADDRESS_OF(aInput, i)); |
michael@0 | 76 | vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4)); |
michael@0 | 77 | vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8)); |
michael@0 | 78 | vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12)); |
michael@0 | 79 | |
michael@0 | 80 | vout0 = vmulq_f32(vin0, vscale); |
michael@0 | 81 | vout1 = vmulq_f32(vin1, vscale); |
michael@0 | 82 | vout2 = vmulq_f32(vin2, vscale); |
michael@0 | 83 | vout3 = vmulq_f32(vin3, vscale); |
michael@0 | 84 | |
michael@0 | 85 | vst1q_f32(ADDRESS_OF(aOutput, i), vout0); |
michael@0 | 86 | vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1); |
michael@0 | 87 | vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2); |
michael@0 | 88 | vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3); |
michael@0 | 89 | } |
michael@0 | 90 | } |
michael@0 | 91 | |
michael@0 | 92 | void |
michael@0 | 93 | AudioBlockCopyChannelWithScale_NEON(const float aInput[WEBAUDIO_BLOCK_SIZE], |
michael@0 | 94 | const float aScale[WEBAUDIO_BLOCK_SIZE], |
michael@0 | 95 | float aOutput[WEBAUDIO_BLOCK_SIZE]) |
michael@0 | 96 | { |
michael@0 | 97 | ASSERT_ALIGNED(aInput); |
michael@0 | 98 | ASSERT_ALIGNED(aScale); |
michael@0 | 99 | ASSERT_ALIGNED(aOutput); |
michael@0 | 100 | |
michael@0 | 101 | float32x4_t vin0, vin1, vin2, vin3; |
michael@0 | 102 | float32x4_t vout0, vout1, vout2, vout3; |
michael@0 | 103 | float32x4_t vscale0, vscale1, vscale2, vscale3; |
michael@0 | 104 | |
michael@0 | 105 | for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) { |
michael@0 | 106 | vin0 = vld1q_f32(ADDRESS_OF(aInput, i)); |
michael@0 | 107 | vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4)); |
michael@0 | 108 | vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8)); |
michael@0 | 109 | vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12)); |
michael@0 | 110 | |
michael@0 | 111 | vscale0 = vld1q_f32(ADDRESS_OF(aScale, i)); |
michael@0 | 112 | vscale1 = vld1q_f32(ADDRESS_OF(aScale, i+4)); |
michael@0 | 113 | vscale2 = vld1q_f32(ADDRESS_OF(aScale, i+8)); |
michael@0 | 114 | vscale3 = vld1q_f32(ADDRESS_OF(aScale, i+12)); |
michael@0 | 115 | |
michael@0 | 116 | vout0 = vmulq_f32(vin0, vscale0); |
michael@0 | 117 | vout1 = vmulq_f32(vin1, vscale1); |
michael@0 | 118 | vout2 = vmulq_f32(vin2, vscale2); |
michael@0 | 119 | vout3 = vmulq_f32(vin3, vscale3); |
michael@0 | 120 | |
michael@0 | 121 | vst1q_f32(ADDRESS_OF(aOutput, i), vout0); |
michael@0 | 122 | vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1); |
michael@0 | 123 | vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2); |
michael@0 | 124 | vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3); |
michael@0 | 125 | } |
michael@0 | 126 | } |
michael@0 | 127 | |
michael@0 | 128 | void |
michael@0 | 129 | AudioBufferInPlaceScale_NEON(float* aBlock, |
michael@0 | 130 | float aScale, |
michael@0 | 131 | uint32_t aSize) |
michael@0 | 132 | { |
michael@0 | 133 | ASSERT_ALIGNED(aBlock); |
michael@0 | 134 | |
michael@0 | 135 | float32x4_t vin0, vin1, vin2, vin3; |
michael@0 | 136 | float32x4_t vout0, vout1, vout2, vout3; |
michael@0 | 137 | float32x4_t vscale = vmovq_n_f32(aScale); |
michael@0 | 138 | |
michael@0 | 139 | uint32_t dif = aSize % 16; |
michael@0 | 140 | uint32_t vectorSize = aSize - dif; |
michael@0 | 141 | uint32_t i = 0; |
michael@0 | 142 | for (; i < vectorSize; i+=16) { |
michael@0 | 143 | vin0 = vld1q_f32(ADDRESS_OF(aBlock, i)); |
michael@0 | 144 | vin1 = vld1q_f32(ADDRESS_OF(aBlock, i+4)); |
michael@0 | 145 | vin2 = vld1q_f32(ADDRESS_OF(aBlock, i+8)); |
michael@0 | 146 | vin3 = vld1q_f32(ADDRESS_OF(aBlock, i+12)); |
michael@0 | 147 | |
michael@0 | 148 | vout0 = vmulq_f32(vin0, vscale); |
michael@0 | 149 | vout1 = vmulq_f32(vin1, vscale); |
michael@0 | 150 | vout2 = vmulq_f32(vin2, vscale); |
michael@0 | 151 | vout3 = vmulq_f32(vin3, vscale); |
michael@0 | 152 | |
michael@0 | 153 | vst1q_f32(ADDRESS_OF(aBlock, i), vout0); |
michael@0 | 154 | vst1q_f32(ADDRESS_OF(aBlock, i+4), vout1); |
michael@0 | 155 | vst1q_f32(ADDRESS_OF(aBlock, i+8), vout2); |
michael@0 | 156 | vst1q_f32(ADDRESS_OF(aBlock, i+12), vout3); |
michael@0 | 157 | } |
michael@0 | 158 | |
michael@0 | 159 | for (unsigned j = 0; j < dif; ++i, ++j) { |
michael@0 | 160 | aBlock[i] *= aScale; |
michael@0 | 161 | } |
michael@0 | 162 | } |
michael@0 | 163 | |
michael@0 | 164 | void |
michael@0 | 165 | AudioBlockPanStereoToStereo_NEON(const float aInputL[WEBAUDIO_BLOCK_SIZE], |
michael@0 | 166 | const float aInputR[WEBAUDIO_BLOCK_SIZE], |
michael@0 | 167 | float aGainL, float aGainR, bool aIsOnTheLeft, |
michael@0 | 168 | float aOutputL[WEBAUDIO_BLOCK_SIZE], |
michael@0 | 169 | float aOutputR[WEBAUDIO_BLOCK_SIZE]) |
michael@0 | 170 | { |
michael@0 | 171 | ASSERT_ALIGNED(aInputL); |
michael@0 | 172 | ASSERT_ALIGNED(aInputR); |
michael@0 | 173 | ASSERT_ALIGNED(aOutputL); |
michael@0 | 174 | ASSERT_ALIGNED(aOutputR); |
michael@0 | 175 | |
michael@0 | 176 | float32x4_t vinL0, vinL1; |
michael@0 | 177 | float32x4_t vinR0, vinR1; |
michael@0 | 178 | float32x4_t voutL0, voutL1; |
michael@0 | 179 | float32x4_t voutR0, voutR1; |
michael@0 | 180 | float32x4_t vscaleL = vmovq_n_f32(aGainL); |
michael@0 | 181 | float32x4_t vscaleR = vmovq_n_f32(aGainR); |
michael@0 | 182 | |
michael@0 | 183 | if (aIsOnTheLeft) { |
michael@0 | 184 | for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) { |
michael@0 | 185 | vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i)); |
michael@0 | 186 | vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i+4)); |
michael@0 | 187 | |
michael@0 | 188 | vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i)); |
michael@0 | 189 | vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i+4)); |
michael@0 | 190 | |
michael@0 | 191 | voutL0 = vmlaq_f32(vinL0, vinR0, vscaleL); |
michael@0 | 192 | voutL1 = vmlaq_f32(vinL1, vinR1, vscaleL); |
michael@0 | 193 | |
michael@0 | 194 | vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0); |
michael@0 | 195 | vst1q_f32(ADDRESS_OF(aOutputL, i+4), voutL1); |
michael@0 | 196 | |
michael@0 | 197 | voutR0 = vmulq_f32(vinR0, vscaleR); |
michael@0 | 198 | voutR1 = vmulq_f32(vinR1, vscaleR); |
michael@0 | 199 | |
michael@0 | 200 | vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0); |
michael@0 | 201 | vst1q_f32(ADDRESS_OF(aOutputR, i+4), voutR1); |
michael@0 | 202 | } |
michael@0 | 203 | } else { |
michael@0 | 204 | for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) { |
michael@0 | 205 | vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i)); |
michael@0 | 206 | vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i+4)); |
michael@0 | 207 | |
michael@0 | 208 | vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i)); |
michael@0 | 209 | vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i+4)); |
michael@0 | 210 | |
michael@0 | 211 | voutL0 = vmulq_f32(vinL0, vscaleL); |
michael@0 | 212 | voutL1 = vmulq_f32(vinL1, vscaleL); |
michael@0 | 213 | |
michael@0 | 214 | vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0); |
michael@0 | 215 | vst1q_f32(ADDRESS_OF(aOutputL, i+4), voutL1); |
michael@0 | 216 | |
michael@0 | 217 | voutR0 = vmlaq_f32(vinR0, vinL0, vscaleR); |
michael@0 | 218 | voutR1 = vmlaq_f32(vinR1, vinL1, vscaleR); |
michael@0 | 219 | |
michael@0 | 220 | vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0); |
michael@0 | 221 | vst1q_f32(ADDRESS_OF(aOutputR, i+4), voutR1); |
michael@0 | 222 | } |
michael@0 | 223 | } |
michael@0 | 224 | } |
michael@0 | 225 | } |