diff -r 000000000000 -r 6474c204b198 media/libspeex_resampler/sse-detect-runtime.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/media/libspeex_resampler/sse-detect-runtime.patch Wed Dec 31 06:09:35 2014 +0100 @@ -0,0 +1,220 @@ +--- /home/paul/workspace/repositories/opus-tools/src/resample.c 2012-11-21 11:36:59.119430163 +0100 ++++ media/libspeex_resampler/src/resample.c 2013-08-09 19:24:39.060236120 +0200 +@@ -92,18 +92,28 @@ + + #define IMAX(a,b) ((a) > (b) ? (a) : (b)) + #define IMIN(a,b) ((a) < (b) ? (a) : (b)) + + #ifndef NULL + #define NULL 0 + #endif + ++#include "sse_detect.h" ++ ++/* We compile SSE code on x86 all the time, but we only use it if we find at ++ * runtime that the CPU supports it. */ + #if defined(FLOATING_POINT) && defined(__SSE__) ++#if defined(_MSC_VER) ++#define inline __inline ++#endif + # include "resample_sse.h" ++#ifdef _MSC_VER ++#undef inline ++#endif + #endif + + /* Numer of elements to allocate on the stack */ + #ifdef VAR_ARRAYS + #define FIXED_STACK_ALLOC 8192 + #else + #define FIXED_STACK_ALLOC 1024 + #endif +@@ -340,35 +350,39 @@ + const spx_uint32_t den_rate = st->den_rate; + spx_word32_t sum; + + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + const spx_word16_t *sinct = & sinc_table[samp_frac_num*N]; + const spx_word16_t *iptr = & in[last_sample]; + +-#ifndef OVERRIDE_INNER_PRODUCT_SINGLE ++#ifdef OVERRIDE_INNER_PRODUCT_SINGLE ++ if (moz_has_sse()) { ++ sum = inner_product_single(sinct, iptr, N); ++ } else { ++#endif + int j; + sum = 0; + for(j=0;j= den_rate) + { + samp_frac_num -= den_rate; +@@ -397,29 +411,33 @@ + const spx_uint32_t den_rate = st->den_rate; + double sum; + + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + const spx_word16_t *sinct = & sinc_table[samp_frac_num*N]; + const spx_word16_t *iptr = & in[last_sample]; + +-#ifndef OVERRIDE_INNER_PRODUCT_DOUBLE +- int j; +- double accum[4] = {0,0,0,0}; +- +- for(j=0;j= den_rate) + { + samp_frac_num -= den_rate; +@@ -453,35 +471,38 @@ + #ifdef FIXED_POINT + const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); + #else + const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; + #endif + spx_word16_t interp[4]; + + +-#ifndef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE +- int j; +- spx_word32_t accum[4] = {0,0,0,0}; +- +- for(j=0;jsinc_table[4+(j+1)*st->oversample-offset-2]); +- accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); +- accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); +- accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); ++#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE ++ if (moz_has_sse()) { ++ cubic_coef(frac, interp); ++ sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); ++ } else { ++#endif ++ int j; ++ spx_word32_t accum[4] = {0,0,0,0}; ++ ++ for(j=0;jsinc_table[4+(j+1)*st->oversample-offset-2]); ++ accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); ++ accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); ++ accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); ++ } ++ cubic_coef(frac, interp); ++ sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1)); ++#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE + } +- +- cubic_coef(frac, interp); +- sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1)); +-#else +- cubic_coef(frac, interp); +- sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); + #endif +- ++ + out[out_stride * out_sample++] = SATURATE32(PSHR32(sum, 14), 32767); + last_sample += int_advance; + samp_frac_num += frac_advance; + if (samp_frac_num >= den_rate) + { + samp_frac_num -= den_rate; + last_sample++; + } +@@ -515,35 +536,38 @@ + #ifdef FIXED_POINT + const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); + #else + const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; + #endif + spx_word16_t interp[4]; + + +-#ifndef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE ++#ifdef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE ++ if (moz_has_sse2()) { ++ cubic_coef(frac, interp); ++ sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); ++ } else { ++#endif + int j; + double accum[4] = {0,0,0,0}; + + for(j=0;jsinc_table[4+(j+1)*st->oversample-offset-2]); + accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); + accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); + accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); + } + + cubic_coef(frac, interp); + sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]); +-#else +- cubic_coef(frac, interp); +- sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); ++#ifdef OVERRIDE_INNER_PRODUCT_DOUBLE ++ } + #endif +- + out[out_stride * out_sample++] = PSHR32(sum,15); + last_sample += int_advance; + samp_frac_num += frac_advance; + if (samp_frac_num >= den_rate) + { + samp_frac_num -= den_rate; + last_sample++; + }