content/media/AudioNodeEngineNEON.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/content/media/AudioNodeEngineNEON.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,225 @@
     1.4 +/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* this source code form is subject to the terms of the mozilla public
     1.6 + * license, v. 2.0. if a copy of the mpl was not distributed with this file,
     1.7 + * You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#include "AudioNodeEngineNEON.h"
    1.10 +#include <arm_neon.h>
    1.11 +
    1.12 +//#ifdef DEBUG
    1.13 +#if 0 // see bug 921099
    1.14 +  #define ASSERT_ALIGNED(ptr)                                                  \
    1.15 +            MOZ_ASSERT((((uintptr_t)ptr + 15) & ~0x0F) == (uintptr_t)ptr,      \
    1.16 +                       #ptr " has to be aligned 16-bytes aligned.");
    1.17 +#else
    1.18 +  #define ASSERT_ALIGNED(ptr)
    1.19 +#endif
    1.20 +
    1.21 +#define ADDRESS_OF(array, index) ((float32_t*)&array[index])
    1.22 +
    1.23 +namespace mozilla {
    1.24 +void AudioBufferAddWithScale_NEON(const float* aInput,
    1.25 +                                  float aScale,
    1.26 +                                  float* aOutput,
    1.27 +                                  uint32_t aSize)
    1.28 +{
    1.29 +  ASSERT_ALIGNED(aInput);
    1.30 +  ASSERT_ALIGNED(aOutput);
    1.31 +
    1.32 +  float32x4_t vin0, vin1, vin2, vin3;
    1.33 +  float32x4_t vout0, vout1, vout2, vout3;
    1.34 +  float32x4_t vscale = vmovq_n_f32(aScale);
    1.35 +
    1.36 +  uint32_t dif = aSize % 16;
    1.37 +  aSize -= dif;
    1.38 +  unsigned i = 0;
    1.39 +  for (; i < aSize; i+=16) {
    1.40 +    vin0 = vld1q_f32(ADDRESS_OF(aInput, i));
    1.41 +    vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4));
    1.42 +    vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8));
    1.43 +    vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12));
    1.44 +
    1.45 +    vout0 = vld1q_f32(ADDRESS_OF(aOutput, i));
    1.46 +    vout1 = vld1q_f32(ADDRESS_OF(aOutput, i+4));
    1.47 +    vout2 = vld1q_f32(ADDRESS_OF(aOutput, i+8));
    1.48 +    vout3 = vld1q_f32(ADDRESS_OF(aOutput, i+12));
    1.49 +
    1.50 +    vout0 = vmlaq_f32(vout0, vin0, vscale);
    1.51 +    vout1 = vmlaq_f32(vout1, vin1, vscale);
    1.52 +    vout2 = vmlaq_f32(vout2, vin2, vscale);
    1.53 +    vout3 = vmlaq_f32(vout3, vin3, vscale);
    1.54 +
    1.55 +    vst1q_f32(ADDRESS_OF(aOutput, i), vout0);
    1.56 +    vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1);
    1.57 +    vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2);
    1.58 +    vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3);
    1.59 +  }
    1.60 +
    1.61 +  for (unsigned j = 0; j < dif; ++i, ++j) {
    1.62 +    aOutput[i] += aInput[i]*aScale;
    1.63 +  }
    1.64 +}
    1.65 +void
    1.66 +AudioBlockCopyChannelWithScale_NEON(const float* aInput,
    1.67 +                                    float aScale,
    1.68 +                                    float* aOutput)
    1.69 +{
    1.70 +  ASSERT_ALIGNED(aInput);
    1.71 +  ASSERT_ALIGNED(aOutput);
    1.72 +
    1.73 +  float32x4_t vin0, vin1, vin2, vin3;
    1.74 +  float32x4_t vout0, vout1, vout2, vout3;
    1.75 +  float32x4_t vscale = vmovq_n_f32(aScale);
    1.76 +
    1.77 +  for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
    1.78 +    vin0 = vld1q_f32(ADDRESS_OF(aInput, i));
    1.79 +    vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4));
    1.80 +    vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8));
    1.81 +    vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12));
    1.82 +
    1.83 +    vout0 = vmulq_f32(vin0, vscale);
    1.84 +    vout1 = vmulq_f32(vin1, vscale);
    1.85 +    vout2 = vmulq_f32(vin2, vscale);
    1.86 +    vout3 = vmulq_f32(vin3, vscale);
    1.87 +
    1.88 +    vst1q_f32(ADDRESS_OF(aOutput, i), vout0);
    1.89 +    vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1);
    1.90 +    vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2);
    1.91 +    vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3);
    1.92 +  }
    1.93 +}
    1.94 +
    1.95 +void
    1.96 +AudioBlockCopyChannelWithScale_NEON(const float aInput[WEBAUDIO_BLOCK_SIZE],
    1.97 +                                    const float aScale[WEBAUDIO_BLOCK_SIZE],
    1.98 +                                    float aOutput[WEBAUDIO_BLOCK_SIZE])
    1.99 +{
   1.100 +  ASSERT_ALIGNED(aInput);
   1.101 +  ASSERT_ALIGNED(aScale);
   1.102 +  ASSERT_ALIGNED(aOutput);
   1.103 +
   1.104 +  float32x4_t vin0, vin1, vin2, vin3;
   1.105 +  float32x4_t vout0, vout1, vout2, vout3;
   1.106 +  float32x4_t vscale0, vscale1, vscale2, vscale3;
   1.107 +
   1.108 +  for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=16) {
   1.109 +    vin0 = vld1q_f32(ADDRESS_OF(aInput, i));
   1.110 +    vin1 = vld1q_f32(ADDRESS_OF(aInput, i+4));
   1.111 +    vin2 = vld1q_f32(ADDRESS_OF(aInput, i+8));
   1.112 +    vin3 = vld1q_f32(ADDRESS_OF(aInput, i+12));
   1.113 +
   1.114 +    vscale0 = vld1q_f32(ADDRESS_OF(aScale, i));
   1.115 +    vscale1 = vld1q_f32(ADDRESS_OF(aScale, i+4));
   1.116 +    vscale2 = vld1q_f32(ADDRESS_OF(aScale, i+8));
   1.117 +    vscale3 = vld1q_f32(ADDRESS_OF(aScale, i+12));
   1.118 +
   1.119 +    vout0 = vmulq_f32(vin0, vscale0);
   1.120 +    vout1 = vmulq_f32(vin1, vscale1);
   1.121 +    vout2 = vmulq_f32(vin2, vscale2);
   1.122 +    vout3 = vmulq_f32(vin3, vscale3);
   1.123 +
   1.124 +    vst1q_f32(ADDRESS_OF(aOutput, i), vout0);
   1.125 +    vst1q_f32(ADDRESS_OF(aOutput, i+4), vout1);
   1.126 +    vst1q_f32(ADDRESS_OF(aOutput, i+8), vout2);
   1.127 +    vst1q_f32(ADDRESS_OF(aOutput, i+12), vout3);
   1.128 +  }
   1.129 +}
   1.130 +
   1.131 +void
   1.132 +AudioBufferInPlaceScale_NEON(float* aBlock,
   1.133 +                             float aScale,
   1.134 +                             uint32_t aSize)
   1.135 +{
   1.136 +  ASSERT_ALIGNED(aBlock);
   1.137 +
   1.138 +  float32x4_t vin0, vin1, vin2, vin3;
   1.139 +  float32x4_t vout0, vout1, vout2, vout3;
   1.140 +  float32x4_t vscale = vmovq_n_f32(aScale);
   1.141 +
   1.142 +  uint32_t dif = aSize % 16;
   1.143 +  uint32_t vectorSize = aSize - dif;
   1.144 +  uint32_t i = 0;
   1.145 +  for (; i < vectorSize; i+=16) {
   1.146 +    vin0 = vld1q_f32(ADDRESS_OF(aBlock, i));
   1.147 +    vin1 = vld1q_f32(ADDRESS_OF(aBlock, i+4));
   1.148 +    vin2 = vld1q_f32(ADDRESS_OF(aBlock, i+8));
   1.149 +    vin3 = vld1q_f32(ADDRESS_OF(aBlock, i+12));
   1.150 +
   1.151 +    vout0 = vmulq_f32(vin0, vscale);
   1.152 +    vout1 = vmulq_f32(vin1, vscale);
   1.153 +    vout2 = vmulq_f32(vin2, vscale);
   1.154 +    vout3 = vmulq_f32(vin3, vscale);
   1.155 +
   1.156 +    vst1q_f32(ADDRESS_OF(aBlock, i), vout0);
   1.157 +    vst1q_f32(ADDRESS_OF(aBlock, i+4), vout1);
   1.158 +    vst1q_f32(ADDRESS_OF(aBlock, i+8), vout2);
   1.159 +    vst1q_f32(ADDRESS_OF(aBlock, i+12), vout3);
   1.160 +  }
   1.161 +
   1.162 +  for (unsigned j = 0; j < dif; ++i, ++j) {
   1.163 +    aBlock[i] *= aScale;
   1.164 +  }
   1.165 +}
   1.166 +
   1.167 +void
   1.168 +AudioBlockPanStereoToStereo_NEON(const float aInputL[WEBAUDIO_BLOCK_SIZE],
   1.169 +                                 const float aInputR[WEBAUDIO_BLOCK_SIZE],
   1.170 +                                 float aGainL, float aGainR, bool aIsOnTheLeft,
   1.171 +                                 float aOutputL[WEBAUDIO_BLOCK_SIZE],
   1.172 +                                 float aOutputR[WEBAUDIO_BLOCK_SIZE])
   1.173 +{
   1.174 +  ASSERT_ALIGNED(aInputL);
   1.175 +  ASSERT_ALIGNED(aInputR);
   1.176 +  ASSERT_ALIGNED(aOutputL);
   1.177 +  ASSERT_ALIGNED(aOutputR);
   1.178 +
   1.179 +  float32x4_t vinL0, vinL1;
   1.180 +  float32x4_t vinR0, vinR1;
   1.181 +  float32x4_t voutL0, voutL1;
   1.182 +  float32x4_t voutR0, voutR1;
   1.183 +  float32x4_t vscaleL = vmovq_n_f32(aGainL);
   1.184 +  float32x4_t vscaleR = vmovq_n_f32(aGainR);
   1.185 +
   1.186 +  if (aIsOnTheLeft) {
   1.187 +    for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) {
   1.188 +      vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i));
   1.189 +      vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i+4));
   1.190 +
   1.191 +      vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i));
   1.192 +      vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i+4));
   1.193 +
   1.194 +      voutL0 = vmlaq_f32(vinL0, vinR0, vscaleL);
   1.195 +      voutL1 = vmlaq_f32(vinL1, vinR1, vscaleL);
   1.196 +
   1.197 +      vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0);
   1.198 +      vst1q_f32(ADDRESS_OF(aOutputL, i+4), voutL1);
   1.199 +
   1.200 +      voutR0 = vmulq_f32(vinR0, vscaleR);
   1.201 +      voutR1 = vmulq_f32(vinR1, vscaleR);
   1.202 +
   1.203 +      vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0);
   1.204 +      vst1q_f32(ADDRESS_OF(aOutputR, i+4), voutR1);
   1.205 +    }
   1.206 +  } else {
   1.207 +    for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i+=8) {
   1.208 +      vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i));
   1.209 +      vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i+4));
   1.210 +
   1.211 +      vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i));
   1.212 +      vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i+4));
   1.213 +
   1.214 +      voutL0 = vmulq_f32(vinL0, vscaleL);
   1.215 +      voutL1 = vmulq_f32(vinL1, vscaleL);
   1.216 +
   1.217 +      vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0);
   1.218 +      vst1q_f32(ADDRESS_OF(aOutputL, i+4), voutL1);
   1.219 +
   1.220 +      voutR0 = vmlaq_f32(vinR0, vinL0, vscaleR);
   1.221 +      voutR1 = vmlaq_f32(vinR1, vinL1, vscaleR);
   1.222 +
   1.223 +      vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0);
   1.224 +      vst1q_f32(ADDRESS_OF(aOutputR, i+4), voutR1);
   1.225 +    }
   1.226 +  }
   1.227 +}
   1.228 +}

mercurial