Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* Copyright (C) 2007-2008 Jean-Marc Valin |
michael@0 | 2 | * Copyright (C) 2008 Thorvald Natvig |
michael@0 | 3 | */ |
michael@0 | 4 | /** |
michael@0 | 5 | @file resample_sse.h |
michael@0 | 6 | @brief Resampler functions (SSE version) |
michael@0 | 7 | */ |
michael@0 | 8 | /* |
michael@0 | 9 | Redistribution and use in source and binary forms, with or without |
michael@0 | 10 | modification, are permitted provided that the following conditions |
michael@0 | 11 | are met: |
michael@0 | 12 | |
michael@0 | 13 | - Redistributions of source code must retain the above copyright |
michael@0 | 14 | notice, this list of conditions and the following disclaimer. |
michael@0 | 15 | |
michael@0 | 16 | - Redistributions in binary form must reproduce the above copyright |
michael@0 | 17 | notice, this list of conditions and the following disclaimer in the |
michael@0 | 18 | documentation and/or other materials provided with the distribution. |
michael@0 | 19 | |
michael@0 | 20 | - Neither the name of the Xiph.org Foundation nor the names of its |
michael@0 | 21 | contributors may be used to endorse or promote products derived from |
michael@0 | 22 | this software without specific prior written permission. |
michael@0 | 23 | |
michael@0 | 24 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
michael@0 | 25 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
michael@0 | 26 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
michael@0 | 27 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR |
michael@0 | 28 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
michael@0 | 29 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
michael@0 | 30 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
michael@0 | 31 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
michael@0 | 32 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
michael@0 | 33 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
michael@0 | 34 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
michael@0 | 35 | */ |
michael@0 | 36 | |
michael@0 | 37 | #include <xmmintrin.h> |
michael@0 | 38 | |
michael@0 | 39 | #define OVERRIDE_INNER_PRODUCT_SINGLE |
michael@0 | 40 | static inline float inner_product_single(const float *a, const float *b, unsigned int len) |
michael@0 | 41 | { |
michael@0 | 42 | int i; |
michael@0 | 43 | float ret; |
michael@0 | 44 | if (1) |
michael@0 | 45 | { |
michael@0 | 46 | __m128 sum = _mm_setzero_ps(); |
michael@0 | 47 | for (i=0;i<len;i+=8) |
michael@0 | 48 | { |
michael@0 | 49 | sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i))); |
michael@0 | 50 | sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4))); |
michael@0 | 51 | } |
michael@0 | 52 | sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum)); |
michael@0 | 53 | sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55)); |
michael@0 | 54 | _mm_store_ss(&ret, sum); |
michael@0 | 55 | } |
michael@0 | 56 | else |
michael@0 | 57 | { |
michael@0 | 58 | ret = 0; |
michael@0 | 59 | for (i=0;i<len;i++) ret += a[i] * b[i]; |
michael@0 | 60 | } |
michael@0 | 61 | return ret; |
michael@0 | 62 | } |
michael@0 | 63 | |
michael@0 | 64 | #define OVERRIDE_INTERPOLATE_PRODUCT_SINGLE |
michael@0 | 65 | static inline float interpolate_product_single(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) { |
michael@0 | 66 | int i; |
michael@0 | 67 | float ret; |
michael@0 | 68 | if (1) |
michael@0 | 69 | { |
michael@0 | 70 | __m128 sum = _mm_setzero_ps(); |
michael@0 | 71 | __m128 f = _mm_loadu_ps(frac); |
michael@0 | 72 | for(i=0;i<len;i+=2) |
michael@0 | 73 | { |
michael@0 | 74 | sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample))); |
michael@0 | 75 | sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample))); |
michael@0 | 76 | } |
michael@0 | 77 | sum = _mm_mul_ps(f, sum); |
michael@0 | 78 | sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum)); |
michael@0 | 79 | sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55)); |
michael@0 | 80 | _mm_store_ss(&ret, sum); |
michael@0 | 81 | } |
michael@0 | 82 | else |
michael@0 | 83 | { |
michael@0 | 84 | float accum[4] = {0,0,0,0}; |
michael@0 | 85 | for(i=0;i<len;i++) |
michael@0 | 86 | { |
michael@0 | 87 | const float curr_in=a[i]; |
michael@0 | 88 | accum[0] += curr_in * b[i * oversample + 0]; |
michael@0 | 89 | accum[1] += curr_in * b[i * oversample + 1]; |
michael@0 | 90 | accum[2] += curr_in * b[i * oversample + 2]; |
michael@0 | 91 | accum[3] += curr_in * b[i * oversample + 3]; |
michael@0 | 92 | } |
michael@0 | 93 | ret = accum[0] * frac[0] + accum[1] * frac[1] + accum[2] * frac[2] + accum[3] * frac[3]; |
michael@0 | 94 | } |
michael@0 | 95 | return ret; |
michael@0 | 96 | } |
michael@0 | 97 | |
michael@0 | 98 | #ifdef __SSE2__ |
michael@0 | 99 | #include <emmintrin.h> |
michael@0 | 100 | #define OVERRIDE_INNER_PRODUCT_DOUBLE |
michael@0 | 101 | |
michael@0 | 102 | static inline double inner_product_double(const float *a, const float *b, unsigned int len) |
michael@0 | 103 | { |
michael@0 | 104 | int i; |
michael@0 | 105 | double ret; |
michael@0 | 106 | __m128d sum = _mm_setzero_pd(); |
michael@0 | 107 | __m128 t; |
michael@0 | 108 | for (i=0;i<len;i+=8) |
michael@0 | 109 | { |
michael@0 | 110 | t = _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i)); |
michael@0 | 111 | sum = _mm_add_pd(sum, _mm_cvtps_pd(t)); |
michael@0 | 112 | sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t))); |
michael@0 | 113 | |
michael@0 | 114 | t = _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4)); |
michael@0 | 115 | sum = _mm_add_pd(sum, _mm_cvtps_pd(t)); |
michael@0 | 116 | sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t))); |
michael@0 | 117 | } |
michael@0 | 118 | sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128) sum)); |
michael@0 | 119 | _mm_store_sd(&ret, sum); |
michael@0 | 120 | return ret; |
michael@0 | 121 | } |
michael@0 | 122 | |
michael@0 | 123 | #define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE |
michael@0 | 124 | static inline double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) { |
michael@0 | 125 | int i; |
michael@0 | 126 | double ret; |
michael@0 | 127 | __m128d sum; |
michael@0 | 128 | __m128d sum1 = _mm_setzero_pd(); |
michael@0 | 129 | __m128d sum2 = _mm_setzero_pd(); |
michael@0 | 130 | __m128 f = _mm_loadu_ps(frac); |
michael@0 | 131 | __m128d f1 = _mm_cvtps_pd(f); |
michael@0 | 132 | __m128d f2 = _mm_cvtps_pd(_mm_movehl_ps(f,f)); |
michael@0 | 133 | __m128 t; |
michael@0 | 134 | for(i=0;i<len;i+=2) |
michael@0 | 135 | { |
michael@0 | 136 | t = _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample)); |
michael@0 | 137 | sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t)); |
michael@0 | 138 | sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t))); |
michael@0 | 139 | |
michael@0 | 140 | t = _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample)); |
michael@0 | 141 | sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t)); |
michael@0 | 142 | sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t))); |
michael@0 | 143 | } |
michael@0 | 144 | sum1 = _mm_mul_pd(f1, sum1); |
michael@0 | 145 | sum2 = _mm_mul_pd(f2, sum2); |
michael@0 | 146 | sum = _mm_add_pd(sum1, sum2); |
michael@0 | 147 | sum = _mm_add_sd(sum, (__m128d) _mm_movehl_ps((__m128) sum, (__m128) sum)); |
michael@0 | 148 | _mm_store_sd(&ret, sum); |
michael@0 | 149 | return ret; |
michael@0 | 150 | } |
michael@0 | 151 | |
michael@0 | 152 | #endif |