michael@0: /* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* this source code form is subject to the terms of the mozilla public michael@0: * license, v. 2.0. if a copy of the mpl was not distributed with this file, michael@0: * You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "AudioNodeEngineNEON.h" michael@0: #include michael@0: michael@0: //#ifdef DEBUG michael@0: #if 0 // see bug 921099 michael@0: #define ASSERT_ALIGNED(ptr) \ michael@0: MOZ_ASSERT((((uintptr_t)ptr + 15) & ~0x0F) == (uintptr_t)ptr, \ michael@0: #ptr " has to be aligned 16-bytes aligned."); michael@0: #else michael@0: #define ASSERT_ALIGNED(ptr) michael@0: #endif michael@0: michael@0: #define ADDRESS_OF(array, index) ((float32_t*)&array[index]) michael@0: michael@0: namespace mozilla { michael@0: void AudioBufferAddWithScale_NEON(const float* aInput, michael@0: float aScale, michael@0: float* aOutput, michael@0: uint32_t aSize) michael@0: { michael@0: ASSERT_ALIGNED(aInput); michael@0: ASSERT_ALIGNED(aOutput); michael@0: michael@0: float32x4_t vin0, vin1, vin2, vin3; michael@0: float32x4_t vout0, vout1, vout2, vout3; michael@0: float32x4_t vscale = vmovq_n_f32(aScale); michael@0: michael@0: uint32_t dif = aSize % 16; michael@0: aSize -= dif; michael@0: unsigned i = 0; michael@0: for (; i < aSize; i+=16) { michael@0: vin0 = vld1q_f32(ADDRESS_OF(aInput, i)); michael@0: vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4)); michael@0: vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8)); michael@0: vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12)); michael@0: michael@0: vout0 = vld1q_f32(ADDRESS_OF(aOutput, i)); michael@0: vout1 = vld1q_f32(ADDRESS_OF(aOutput, i+4)); michael@0: vout2 = vld1q_f32(ADDRESS_OF(aOutput, i+8)); michael@0: vout3 = vld1q_f32(ADDRESS_OF(aOutput, i+12)); michael@0: michael@0: vout0 = vmlaq_f32(vout0, vin0, vscale); michael@0: vout1 = vmlaq_f32(vout1, vin1, vscale); michael@0: vout2 = vmlaq_f32(vout2, vin2, vscale); michael@0: vout3 = vmlaq_f32(vout3, vin3, vscale); michael@0: michael@0: vst1q_f32(ADDRESS_OF(aOutput, i), vout0); michael@0: vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1); michael@0: vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2); michael@0: vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3); michael@0: } michael@0: michael@0: for (unsigned j = 0; j < dif; ++i, ++j) { michael@0: aOutput[i] += aInput[i]*aScale; michael@0: } michael@0: } michael@0: void michael@0: AudioBlockCopyChannelWithScale_NEON(const float* aInput, michael@0: float aScale, michael@0: float* aOutput) michael@0: { michael@0: ASSERT_ALIGNED(aInput); michael@0: ASSERT_ALIGNED(aOutput); michael@0: michael@0: float32x4_t vin0, vin1, vin2, vin3; michael@0: float32x4_t vout0, vout1, vout2, vout3; michael@0: float32x4_t vscale = vmovq_n_f32(aScale); michael@0: michael@0: for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) { michael@0: vin0 = vld1q_f32(ADDRESS_OF(aInput, i)); michael@0: vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4)); michael@0: vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8)); michael@0: vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12)); michael@0: michael@0: vout0 = vmulq_f32(vin0, vscale); michael@0: vout1 = vmulq_f32(vin1, vscale); michael@0: vout2 = vmulq_f32(vin2, vscale); michael@0: vout3 = vmulq_f32(vin3, vscale); michael@0: michael@0: vst1q_f32(ADDRESS_OF(aOutput, i), vout0); michael@0: vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1); michael@0: vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2); michael@0: vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3); michael@0: } michael@0: } michael@0: michael@0: void michael@0: AudioBlockCopyChannelWithScale_NEON(const float aInput[WEBAUDIO_BLOCK_SIZE], michael@0: const float aScale[WEBAUDIO_BLOCK_SIZE], michael@0: float aOutput[WEBAUDIO_BLOCK_SIZE]) michael@0: { michael@0: ASSERT_ALIGNED(aInput); michael@0: ASSERT_ALIGNED(aScale); michael@0: ASSERT_ALIGNED(aOutput); michael@0: michael@0: float32x4_t vin0, vin1, vin2, vin3; michael@0: float32x4_t vout0, vout1, vout2, vout3; michael@0: float32x4_t vscale0, vscale1, vscale2, vscale3; michael@0: michael@0: for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) { michael@0: vin0 = vld1q_f32(ADDRESS_OF(aInput, i)); michael@0: vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4)); michael@0: vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8)); michael@0: vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12)); michael@0: michael@0: vscale0 = vld1q_f32(ADDRESS_OF(aScale, i)); michael@0: vscale1 = vld1q_f32(ADDRESS_OF(aScale, i+4)); michael@0: vscale2 = vld1q_f32(ADDRESS_OF(aScale, i+8)); michael@0: vscale3 = vld1q_f32(ADDRESS_OF(aScale, i+12)); michael@0: michael@0: vout0 = vmulq_f32(vin0, vscale0); michael@0: vout1 = vmulq_f32(vin1, vscale1); michael@0: vout2 = vmulq_f32(vin2, vscale2); michael@0: vout3 = vmulq_f32(vin3, vscale3); michael@0: michael@0: vst1q_f32(ADDRESS_OF(aOutput, i), vout0); michael@0: vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1); michael@0: vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2); michael@0: vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3); michael@0: } michael@0: } michael@0: michael@0: void michael@0: AudioBufferInPlaceScale_NEON(float* aBlock, michael@0: float aScale, michael@0: uint32_t aSize) michael@0: { michael@0: ASSERT_ALIGNED(aBlock); michael@0: michael@0: float32x4_t vin0, vin1, vin2, vin3; michael@0: float32x4_t vout0, vout1, vout2, vout3; michael@0: float32x4_t vscale = vmovq_n_f32(aScale); michael@0: michael@0: uint32_t dif = aSize % 16; michael@0: uint32_t vectorSize = aSize - dif; michael@0: uint32_t i = 0; michael@0: for (; i < vectorSize; i+=16) { michael@0: vin0 = vld1q_f32(ADDRESS_OF(aBlock, i)); michael@0: vin1 = vld1q_f32(ADDRESS_OF(aBlock, i+4)); michael@0: vin2 = vld1q_f32(ADDRESS_OF(aBlock, i+8)); michael@0: vin3 = vld1q_f32(ADDRESS_OF(aBlock, i+12)); michael@0: michael@0: vout0 = vmulq_f32(vin0, vscale); michael@0: vout1 = vmulq_f32(vin1, vscale); michael@0: vout2 = vmulq_f32(vin2, vscale); michael@0: vout3 = vmulq_f32(vin3, vscale); michael@0: michael@0: vst1q_f32(ADDRESS_OF(aBlock, i), vout0); michael@0: vst1q_f32(ADDRESS_OF(aBlock, i+4), vout1); michael@0: vst1q_f32(ADDRESS_OF(aBlock, i+8), vout2); michael@0: vst1q_f32(ADDRESS_OF(aBlock, i+12), vout3); michael@0: } michael@0: michael@0: for (unsigned j = 0; j < dif; ++i, ++j) { michael@0: aBlock[i] *= aScale; michael@0: } michael@0: } michael@0: michael@0: void michael@0: AudioBlockPanStereoToStereo_NEON(const float aInputL[WEBAUDIO_BLOCK_SIZE], michael@0: const float aInputR[WEBAUDIO_BLOCK_SIZE], michael@0: float aGainL, float aGainR, bool aIsOnTheLeft, michael@0: float aOutputL[WEBAUDIO_BLOCK_SIZE], michael@0: float aOutputR[WEBAUDIO_BLOCK_SIZE]) michael@0: { michael@0: ASSERT_ALIGNED(aInputL); michael@0: ASSERT_ALIGNED(aInputR); michael@0: ASSERT_ALIGNED(aOutputL); michael@0: ASSERT_ALIGNED(aOutputR); michael@0: michael@0: float32x4_t vinL0, vinL1; michael@0: float32x4_t vinR0, vinR1; michael@0: float32x4_t voutL0, voutL1; michael@0: float32x4_t voutR0, voutR1; michael@0: float32x4_t vscaleL = vmovq_n_f32(aGainL); michael@0: float32x4_t vscaleR = vmovq_n_f32(aGainR); michael@0: michael@0: if (aIsOnTheLeft) { michael@0: for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) { michael@0: vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i)); michael@0: vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i+4)); michael@0: michael@0: vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i)); michael@0: vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i+4)); michael@0: michael@0: voutL0 = vmlaq_f32(vinL0, vinR0, vscaleL); michael@0: voutL1 = vmlaq_f32(vinL1, vinR1, vscaleL); michael@0: michael@0: vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0); michael@0: vst1q_f32(ADDRESS_OF(aOutputL, i+4), voutL1); michael@0: michael@0: voutR0 = vmulq_f32(vinR0, vscaleR); michael@0: voutR1 = vmulq_f32(vinR1, vscaleR); michael@0: michael@0: vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0); michael@0: vst1q_f32(ADDRESS_OF(aOutputR, i+4), voutR1); michael@0: } michael@0: } else { michael@0: for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) { michael@0: vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i)); michael@0: vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i+4)); michael@0: michael@0: vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i)); michael@0: vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i+4)); michael@0: michael@0: voutL0 = vmulq_f32(vinL0, vscaleL); michael@0: voutL1 = vmulq_f32(vinL1, vscaleL); michael@0: michael@0: vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0); michael@0: vst1q_f32(ADDRESS_OF(aOutputL, i+4), voutL1); michael@0: michael@0: voutR0 = vmlaq_f32(vinR0, vinL0, vscaleR); michael@0: voutR1 = vmlaq_f32(vinR1, vinL1, vscaleR); michael@0: michael@0: vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0); michael@0: vst1q_f32(ADDRESS_OF(aOutputR, i+4), voutR1); michael@0: } michael@0: } michael@0: } michael@0: }