michael@0: --- /home/paul/workspace/repositories/opus-tools/src/resample.c 2012-11-21 11:36:59.119430163 +0100 michael@0: +++ media/libspeex_resampler/src/resample.c 2013-08-09 19:24:39.060236120 +0200 michael@0: @@ -92,18 +92,28 @@ michael@0: michael@0: #define IMAX(a,b) ((a) > (b) ? (a) : (b)) michael@0: #define IMIN(a,b) ((a) < (b) ? (a) : (b)) michael@0: michael@0: #ifndef NULL michael@0: #define NULL 0 michael@0: #endif michael@0: michael@0: +#include "sse_detect.h" michael@0: + michael@0: +/* We compile SSE code on x86 all the time, but we only use it if we find at michael@0: + * runtime that the CPU supports it. */ michael@0: #if defined(FLOATING_POINT) && defined(__SSE__) michael@0: +#if defined(_MSC_VER) michael@0: +#define inline __inline michael@0: +#endif michael@0: # include "resample_sse.h" michael@0: +#ifdef _MSC_VER michael@0: +#undef inline michael@0: +#endif michael@0: #endif michael@0: michael@0: /* Numer of elements to allocate on the stack */ michael@0: #ifdef VAR_ARRAYS michael@0: #define FIXED_STACK_ALLOC 8192 michael@0: #else michael@0: #define FIXED_STACK_ALLOC 1024 michael@0: #endif michael@0: @@ -340,35 +350,39 @@ michael@0: const spx_uint32_t den_rate = st->den_rate; michael@0: spx_word32_t sum; michael@0: michael@0: while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) michael@0: { michael@0: const spx_word16_t *sinct = & sinc_table[samp_frac_num*N]; michael@0: const spx_word16_t *iptr = & in[last_sample]; michael@0: michael@0: -#ifndef OVERRIDE_INNER_PRODUCT_SINGLE michael@0: +#ifdef OVERRIDE_INNER_PRODUCT_SINGLE michael@0: + if (moz_has_sse()) { michael@0: + sum = inner_product_single(sinct, iptr, N); michael@0: + } else { michael@0: +#endif michael@0: int j; michael@0: sum = 0; michael@0: for(j=0;j= den_rate) michael@0: { michael@0: samp_frac_num -= den_rate; michael@0: @@ -397,29 +411,33 @@ michael@0: const spx_uint32_t den_rate = st->den_rate; michael@0: double sum; michael@0: michael@0: while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) michael@0: { michael@0: const spx_word16_t *sinct = & sinc_table[samp_frac_num*N]; michael@0: const spx_word16_t *iptr = & in[last_sample]; michael@0: michael@0: -#ifndef OVERRIDE_INNER_PRODUCT_DOUBLE michael@0: - int j; michael@0: - double accum[4] = {0,0,0,0}; michael@0: - michael@0: - for(j=0;j= den_rate) michael@0: { michael@0: samp_frac_num -= den_rate; michael@0: @@ -453,35 +471,38 @@ michael@0: #ifdef FIXED_POINT michael@0: const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); michael@0: #else michael@0: const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; michael@0: #endif michael@0: spx_word16_t interp[4]; michael@0: michael@0: michael@0: -#ifndef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE michael@0: - int j; michael@0: - spx_word32_t accum[4] = {0,0,0,0}; michael@0: - michael@0: - for(j=0;jsinc_table[4+(j+1)*st->oversample-offset-2]); michael@0: - accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); michael@0: - accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); michael@0: - accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); michael@0: +#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE michael@0: + if (moz_has_sse()) { michael@0: + cubic_coef(frac, interp); michael@0: + sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); michael@0: + } else { michael@0: +#endif michael@0: + int j; michael@0: + spx_word32_t accum[4] = {0,0,0,0}; michael@0: + michael@0: + for(j=0;jsinc_table[4+(j+1)*st->oversample-offset-2]); michael@0: + accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); michael@0: + accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); michael@0: + accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); michael@0: + } michael@0: + cubic_coef(frac, interp); michael@0: + sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1)); michael@0: +#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE michael@0: } michael@0: - michael@0: - cubic_coef(frac, interp); michael@0: - sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1)); michael@0: -#else michael@0: - cubic_coef(frac, interp); michael@0: - sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); michael@0: #endif michael@0: - michael@0: + michael@0: out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 14), 32767); michael@0: last_sample += int_advance; michael@0: samp_frac_num += frac_advance; michael@0: if (samp_frac_num >= den_rate) michael@0: { michael@0: samp_frac_num -= den_rate; michael@0: last_sample++; michael@0: } michael@0: @@ -515,35 +536,38 @@ michael@0: #ifdef FIXED_POINT michael@0: const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); michael@0: #else michael@0: const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; michael@0: #endif michael@0: spx_word16_t interp[4]; michael@0: michael@0: michael@0: -#ifndef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE michael@0: +#ifdef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE michael@0: + if (moz_has_sse2()) { michael@0: + cubic_coef(frac, interp); michael@0: + sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); michael@0: + } else { michael@0: +#endif michael@0: int j; michael@0: double accum[4] = {0,0,0,0}; michael@0: michael@0: for(j=0;jsinc_table[4+(j+1)*st->oversample-offset-2]); michael@0: accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); michael@0: accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); michael@0: accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); michael@0: } michael@0: michael@0: cubic_coef(frac, interp); michael@0: sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]); michael@0: -#else michael@0: - cubic_coef(frac, interp); michael@0: - sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); michael@0: +#ifdef OVERRIDE_INNER_PRODUCT_DOUBLE michael@0: + } michael@0: #endif michael@0: - michael@0: out[out_stride * out_sample++] = PSHR32(sum,15); michael@0: last_sample += int_advance; michael@0: samp_frac_num += frac_advance; michael@0: if (samp_frac_num >= den_rate) michael@0: { michael@0: samp_frac_num -= den_rate; michael@0: last_sample++; michael@0: }