1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/content/media/AudioNodeEngineNEON.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,225 @@ 1.4 +/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* this source code form is subject to the terms of the mozilla public 1.6 + * license, v. 2.0. if a copy of the mpl was not distributed with this file, 1.7 + * You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#include "AudioNodeEngineNEON.h" 1.10 +#include <arm_neon.h> 1.11 + 1.12 +//#ifdef DEBUG 1.13 +#if 0 // see bug 921099 1.14 + #define ASSERT_ALIGNED(ptr) \ 1.15 + MOZ_ASSERT((((uintptr_t)ptr + 15) & ~0x0F) == (uintptr_t)ptr, \ 1.16 + #ptr " has to be aligned 16-bytes aligned."); 1.17 +#else 1.18 + #define ASSERT_ALIGNED(ptr) 1.19 +#endif 1.20 + 1.21 +#define ADDRESS_OF(array, index) ((float32_t*)&array[index]) 1.22 + 1.23 +namespace mozilla { 1.24 +void AudioBufferAddWithScale_NEON(const float* aInput, 1.25 + float aScale, 1.26 + float* aOutput, 1.27 + uint32_t aSize) 1.28 +{ 1.29 + ASSERT_ALIGNED(aInput); 1.30 + ASSERT_ALIGNED(aOutput); 1.31 + 1.32 + float32x4_t vin0, vin1, vin2, vin3; 1.33 + float32x4_t vout0, vout1, vout2, vout3; 1.34 + float32x4_t vscale = vmovq_n_f32(aScale); 1.35 + 1.36 + uint32_t dif = aSize % 16; 1.37 + aSize -= dif; 1.38 + unsigned i = 0; 1.39 + for (; i < aSize; i+=16) { 1.40 + vin0 = vld1q_f32(ADDRESS_OF(aInput, i)); 1.41 + vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4)); 1.42 + vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8)); 1.43 + vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12)); 1.44 + 1.45 + vout0 = vld1q_f32(ADDRESS_OF(aOutput, i)); 1.46 + vout1 = vld1q_f32(ADDRESS_OF(aOutput, i+4)); 1.47 + vout2 = vld1q_f32(ADDRESS_OF(aOutput, i+8)); 1.48 + vout3 = vld1q_f32(ADDRESS_OF(aOutput, i+12)); 1.49 + 1.50 + vout0 = vmlaq_f32(vout0, vin0, vscale); 1.51 + vout1 = vmlaq_f32(vout1, vin1, vscale); 1.52 + vout2 = vmlaq_f32(vout2, vin2, vscale); 1.53 + vout3 = vmlaq_f32(vout3, vin3, vscale); 1.54 + 1.55 + vst1q_f32(ADDRESS_OF(aOutput, i), vout0); 1.56 + vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1); 1.57 + vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2); 1.58 + vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3); 1.59 + } 1.60 + 1.61 + for (unsigned j = 0; j < dif; ++i, ++j) { 1.62 + aOutput[i] += aInput[i]*aScale; 1.63 + } 1.64 +} 1.65 +void 1.66 +AudioBlockCopyChannelWithScale_NEON(const float* aInput, 1.67 + float aScale, 1.68 + float* aOutput) 1.69 +{ 1.70 + ASSERT_ALIGNED(aInput); 1.71 + ASSERT_ALIGNED(aOutput); 1.72 + 1.73 + float32x4_t vin0, vin1, vin2, vin3; 1.74 + float32x4_t vout0, vout1, vout2, vout3; 1.75 + float32x4_t vscale = vmovq_n_f32(aScale); 1.76 + 1.77 + for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) { 1.78 + vin0 = vld1q_f32(ADDRESS_OF(aInput, i)); 1.79 + vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4)); 1.80 + vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8)); 1.81 + vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12)); 1.82 + 1.83 + vout0 = vmulq_f32(vin0, vscale); 1.84 + vout1 = vmulq_f32(vin1, vscale); 1.85 + vout2 = vmulq_f32(vin2, vscale); 1.86 + vout3 = vmulq_f32(vin3, vscale); 1.87 + 1.88 + vst1q_f32(ADDRESS_OF(aOutput, i), vout0); 1.89 + vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1); 1.90 + vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2); 1.91 + vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3); 1.92 + } 1.93 +} 1.94 + 1.95 +void 1.96 +AudioBlockCopyChannelWithScale_NEON(const float aInput[WEBAUDIO_BLOCK_SIZE], 1.97 + const float aScale[WEBAUDIO_BLOCK_SIZE], 1.98 + float aOutput[WEBAUDIO_BLOCK_SIZE]) 1.99 +{ 1.100 + ASSERT_ALIGNED(aInput); 1.101 + ASSERT_ALIGNED(aScale); 1.102 + ASSERT_ALIGNED(aOutput); 1.103 + 1.104 + float32x4_t vin0, vin1, vin2, vin3; 1.105 + float32x4_t vout0, vout1, vout2, vout3; 1.106 + float32x4_t vscale0, vscale1, vscale2, vscale3; 1.107 + 1.108 + for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) { 1.109 + vin0 = vld1q_f32(ADDRESS_OF(aInput, i)); 1.110 + vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4)); 1.111 + vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8)); 1.112 + vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12)); 1.113 + 1.114 + vscale0 = vld1q_f32(ADDRESS_OF(aScale, i)); 1.115 + vscale1 = vld1q_f32(ADDRESS_OF(aScale, i+4)); 1.116 + vscale2 = vld1q_f32(ADDRESS_OF(aScale, i+8)); 1.117 + vscale3 = vld1q_f32(ADDRESS_OF(aScale, i+12)); 1.118 + 1.119 + vout0 = vmulq_f32(vin0, vscale0); 1.120 + vout1 = vmulq_f32(vin1, vscale1); 1.121 + vout2 = vmulq_f32(vin2, vscale2); 1.122 + vout3 = vmulq_f32(vin3, vscale3); 1.123 + 1.124 + vst1q_f32(ADDRESS_OF(aOutput, i), vout0); 1.125 + vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1); 1.126 + vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2); 1.127 + vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3); 1.128 + } 1.129 +} 1.130 + 1.131 +void 1.132 +AudioBufferInPlaceScale_NEON(float* aBlock, 1.133 + float aScale, 1.134 + uint32_t aSize) 1.135 +{ 1.136 + ASSERT_ALIGNED(aBlock); 1.137 + 1.138 + float32x4_t vin0, vin1, vin2, vin3; 1.139 + float32x4_t vout0, vout1, vout2, vout3; 1.140 + float32x4_t vscale = vmovq_n_f32(aScale); 1.141 + 1.142 + uint32_t dif = aSize % 16; 1.143 + uint32_t vectorSize = aSize - dif; 1.144 + uint32_t i = 0; 1.145 + for (; i < vectorSize; i+=16) { 1.146 + vin0 = vld1q_f32(ADDRESS_OF(aBlock, i)); 1.147 + vin1 = vld1q_f32(ADDRESS_OF(aBlock, i+4)); 1.148 + vin2 = vld1q_f32(ADDRESS_OF(aBlock, i+8)); 1.149 + vin3 = vld1q_f32(ADDRESS_OF(aBlock, i+12)); 1.150 + 1.151 + vout0 = vmulq_f32(vin0, vscale); 1.152 + vout1 = vmulq_f32(vin1, vscale); 1.153 + vout2 = vmulq_f32(vin2, vscale); 1.154 + vout3 = vmulq_f32(vin3, vscale); 1.155 + 1.156 + vst1q_f32(ADDRESS_OF(aBlock, i), vout0); 1.157 + vst1q_f32(ADDRESS_OF(aBlock, i+4), vout1); 1.158 + vst1q_f32(ADDRESS_OF(aBlock, i+8), vout2); 1.159 + vst1q_f32(ADDRESS_OF(aBlock, i+12), vout3); 1.160 + } 1.161 + 1.162 + for (unsigned j = 0; j < dif; ++i, ++j) { 1.163 + aBlock[i] *= aScale; 1.164 + } 1.165 +} 1.166 + 1.167 +void 1.168 +AudioBlockPanStereoToStereo_NEON(const float aInputL[WEBAUDIO_BLOCK_SIZE], 1.169 + const float aInputR[WEBAUDIO_BLOCK_SIZE], 1.170 + float aGainL, float aGainR, bool aIsOnTheLeft, 1.171 + float aOutputL[WEBAUDIO_BLOCK_SIZE], 1.172 + float aOutputR[WEBAUDIO_BLOCK_SIZE]) 1.173 +{ 1.174 + ASSERT_ALIGNED(aInputL); 1.175 + ASSERT_ALIGNED(aInputR); 1.176 + ASSERT_ALIGNED(aOutputL); 1.177 + ASSERT_ALIGNED(aOutputR); 1.178 + 1.179 + float32x4_t vinL0, vinL1; 1.180 + float32x4_t vinR0, vinR1; 1.181 + float32x4_t voutL0, voutL1; 1.182 + float32x4_t voutR0, voutR1; 1.183 + float32x4_t vscaleL = vmovq_n_f32(aGainL); 1.184 + float32x4_t vscaleR = vmovq_n_f32(aGainR); 1.185 + 1.186 + if (aIsOnTheLeft) { 1.187 + for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) { 1.188 + vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i)); 1.189 + vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i+4)); 1.190 + 1.191 + vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i)); 1.192 + vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i+4)); 1.193 + 1.194 + voutL0 = vmlaq_f32(vinL0, vinR0, vscaleL); 1.195 + voutL1 = vmlaq_f32(vinL1, vinR1, vscaleL); 1.196 + 1.197 + vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0); 1.198 + vst1q_f32(ADDRESS_OF(aOutputL, i+4), voutL1); 1.199 + 1.200 + voutR0 = vmulq_f32(vinR0, vscaleR); 1.201 + voutR1 = vmulq_f32(vinR1, vscaleR); 1.202 + 1.203 + vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0); 1.204 + vst1q_f32(ADDRESS_OF(aOutputR, i+4), voutR1); 1.205 + } 1.206 + } else { 1.207 + for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) { 1.208 + vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i)); 1.209 + vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i+4)); 1.210 + 1.211 + vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i)); 1.212 + vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i+4)); 1.213 + 1.214 + voutL0 = vmulq_f32(vinL0, vscaleL); 1.215 + voutL1 = vmulq_f32(vinL1, vscaleL); 1.216 + 1.217 + vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0); 1.218 + vst1q_f32(ADDRESS_OF(aOutputL, i+4), voutL1); 1.219 + 1.220 + voutR0 = vmlaq_f32(vinR0, vinL0, vscaleR); 1.221 + voutR1 = vmlaq_f32(vinR1, vinL1, vscaleR); 1.222 + 1.223 + vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0); 1.224 + vst1q_f32(ADDRESS_OF(aOutputR, i+4), voutR1); 1.225 + } 1.226 + } 1.227 +} 1.228 +}