michael@0: /* michael@0: * Copyright (C) 2012 Intel Inc. All rights reserved. michael@0: * michael@0: * Redistribution and use in source and binary forms, with or without michael@0: * modification, are permitted provided that the following conditions michael@0: * are met: michael@0: * michael@0: * 1. Redistributions of source code must retain the above copyright michael@0: * notice, this list of conditions and the following disclaimer. michael@0: * 2. Redistributions in binary form must reproduce the above copyright michael@0: * notice, this list of conditions and the following disclaimer in the michael@0: * documentation and/or other materials provided with the distribution. michael@0: * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of michael@0: * its contributors may be used to endorse or promote products derived michael@0: * from this software without specific prior written permission. michael@0: * michael@0: * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY michael@0: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED michael@0: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE michael@0: * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY michael@0: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES michael@0: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; michael@0: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND michael@0: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF michael@0: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: */ michael@0: michael@0: #include "DirectConvolver.h" michael@0: #include "mozilla/PodOperations.h" michael@0: michael@0: using namespace mozilla; michael@0: michael@0: namespace WebCore { michael@0: michael@0: DirectConvolver::DirectConvolver(size_t inputBlockSize) michael@0: : m_inputBlockSize(inputBlockSize) michael@0: { michael@0: m_buffer.SetLength(inputBlockSize * 2); michael@0: PodZero(m_buffer.Elements(), inputBlockSize * 2); michael@0: } michael@0: michael@0: void DirectConvolver::process(const nsTArray* convolutionKernel, const float* sourceP, float* destP, size_t framesToProcess) michael@0: { michael@0: MOZ_ASSERT(framesToProcess == m_inputBlockSize); michael@0: if (framesToProcess != m_inputBlockSize) michael@0: return; michael@0: michael@0: // Only support kernelSize <= m_inputBlockSize michael@0: size_t kernelSize = convolutionKernel->Length(); michael@0: MOZ_ASSERT(kernelSize <= m_inputBlockSize); michael@0: if (kernelSize > m_inputBlockSize) michael@0: return; michael@0: michael@0: const float* kernelP = convolutionKernel->Elements(); michael@0: michael@0: // Sanity check michael@0: bool isCopyGood = kernelP && sourceP && destP && m_buffer.Elements(); michael@0: MOZ_ASSERT(isCopyGood); michael@0: if (!isCopyGood) michael@0: return; michael@0: michael@0: float* inputP = m_buffer.Elements() + m_inputBlockSize; michael@0: michael@0: // Copy samples to 2nd half of input buffer. michael@0: memcpy(inputP, sourceP, sizeof(float) * framesToProcess); michael@0: michael@0: // FIXME: The macro can be further optimized to avoid pipeline stalls. One possibility is to maintain 4 separate sums and change the macro to CONVOLVE_FOUR_SAMPLES. michael@0: #define CONVOLVE_ONE_SAMPLE \ michael@0: sum += inputP[i - j] * kernelP[j]; \ michael@0: j++; michael@0: michael@0: size_t i = 0; michael@0: while (i < framesToProcess) { michael@0: size_t j = 0; michael@0: float sum = 0; michael@0: michael@0: // FIXME: SSE optimization may be applied here. michael@0: if (kernelSize == 32) { michael@0: CONVOLVE_ONE_SAMPLE // 1 michael@0: CONVOLVE_ONE_SAMPLE // 2 michael@0: CONVOLVE_ONE_SAMPLE // 3 michael@0: CONVOLVE_ONE_SAMPLE // 4 michael@0: CONVOLVE_ONE_SAMPLE // 5 michael@0: CONVOLVE_ONE_SAMPLE // 6 michael@0: CONVOLVE_ONE_SAMPLE // 7 michael@0: CONVOLVE_ONE_SAMPLE // 8 michael@0: CONVOLVE_ONE_SAMPLE // 9 michael@0: CONVOLVE_ONE_SAMPLE // 10 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 11 michael@0: CONVOLVE_ONE_SAMPLE // 12 michael@0: CONVOLVE_ONE_SAMPLE // 13 michael@0: CONVOLVE_ONE_SAMPLE // 14 michael@0: CONVOLVE_ONE_SAMPLE // 15 michael@0: CONVOLVE_ONE_SAMPLE // 16 michael@0: CONVOLVE_ONE_SAMPLE // 17 michael@0: CONVOLVE_ONE_SAMPLE // 18 michael@0: CONVOLVE_ONE_SAMPLE // 19 michael@0: CONVOLVE_ONE_SAMPLE // 20 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 21 michael@0: CONVOLVE_ONE_SAMPLE // 22 michael@0: CONVOLVE_ONE_SAMPLE // 23 michael@0: CONVOLVE_ONE_SAMPLE // 24 michael@0: CONVOLVE_ONE_SAMPLE // 25 michael@0: CONVOLVE_ONE_SAMPLE // 26 michael@0: CONVOLVE_ONE_SAMPLE // 27 michael@0: CONVOLVE_ONE_SAMPLE // 28 michael@0: CONVOLVE_ONE_SAMPLE // 29 michael@0: CONVOLVE_ONE_SAMPLE // 30 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 31 michael@0: CONVOLVE_ONE_SAMPLE // 32 michael@0: michael@0: } else if (kernelSize == 64) { michael@0: CONVOLVE_ONE_SAMPLE // 1 michael@0: CONVOLVE_ONE_SAMPLE // 2 michael@0: CONVOLVE_ONE_SAMPLE // 3 michael@0: CONVOLVE_ONE_SAMPLE // 4 michael@0: CONVOLVE_ONE_SAMPLE // 5 michael@0: CONVOLVE_ONE_SAMPLE // 6 michael@0: CONVOLVE_ONE_SAMPLE // 7 michael@0: CONVOLVE_ONE_SAMPLE // 8 michael@0: CONVOLVE_ONE_SAMPLE // 9 michael@0: CONVOLVE_ONE_SAMPLE // 10 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 11 michael@0: CONVOLVE_ONE_SAMPLE // 12 michael@0: CONVOLVE_ONE_SAMPLE // 13 michael@0: CONVOLVE_ONE_SAMPLE // 14 michael@0: CONVOLVE_ONE_SAMPLE // 15 michael@0: CONVOLVE_ONE_SAMPLE // 16 michael@0: CONVOLVE_ONE_SAMPLE // 17 michael@0: CONVOLVE_ONE_SAMPLE // 18 michael@0: CONVOLVE_ONE_SAMPLE // 19 michael@0: CONVOLVE_ONE_SAMPLE // 20 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 21 michael@0: CONVOLVE_ONE_SAMPLE // 22 michael@0: CONVOLVE_ONE_SAMPLE // 23 michael@0: CONVOLVE_ONE_SAMPLE // 24 michael@0: CONVOLVE_ONE_SAMPLE // 25 michael@0: CONVOLVE_ONE_SAMPLE // 26 michael@0: CONVOLVE_ONE_SAMPLE // 27 michael@0: CONVOLVE_ONE_SAMPLE // 28 michael@0: CONVOLVE_ONE_SAMPLE // 29 michael@0: CONVOLVE_ONE_SAMPLE // 30 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 31 michael@0: CONVOLVE_ONE_SAMPLE // 32 michael@0: CONVOLVE_ONE_SAMPLE // 33 michael@0: CONVOLVE_ONE_SAMPLE // 34 michael@0: CONVOLVE_ONE_SAMPLE // 35 michael@0: CONVOLVE_ONE_SAMPLE // 36 michael@0: CONVOLVE_ONE_SAMPLE // 37 michael@0: CONVOLVE_ONE_SAMPLE // 38 michael@0: CONVOLVE_ONE_SAMPLE // 39 michael@0: CONVOLVE_ONE_SAMPLE // 40 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 41 michael@0: CONVOLVE_ONE_SAMPLE // 42 michael@0: CONVOLVE_ONE_SAMPLE // 43 michael@0: CONVOLVE_ONE_SAMPLE // 44 michael@0: CONVOLVE_ONE_SAMPLE // 45 michael@0: CONVOLVE_ONE_SAMPLE // 46 michael@0: CONVOLVE_ONE_SAMPLE // 47 michael@0: CONVOLVE_ONE_SAMPLE // 48 michael@0: CONVOLVE_ONE_SAMPLE // 49 michael@0: CONVOLVE_ONE_SAMPLE // 50 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 51 michael@0: CONVOLVE_ONE_SAMPLE // 52 michael@0: CONVOLVE_ONE_SAMPLE // 53 michael@0: CONVOLVE_ONE_SAMPLE // 54 michael@0: CONVOLVE_ONE_SAMPLE // 55 michael@0: CONVOLVE_ONE_SAMPLE // 56 michael@0: CONVOLVE_ONE_SAMPLE // 57 michael@0: CONVOLVE_ONE_SAMPLE // 58 michael@0: CONVOLVE_ONE_SAMPLE // 59 michael@0: CONVOLVE_ONE_SAMPLE // 60 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 61 michael@0: CONVOLVE_ONE_SAMPLE // 62 michael@0: CONVOLVE_ONE_SAMPLE // 63 michael@0: CONVOLVE_ONE_SAMPLE // 64 michael@0: michael@0: } else if (kernelSize == 128) { michael@0: CONVOLVE_ONE_SAMPLE // 1 michael@0: CONVOLVE_ONE_SAMPLE // 2 michael@0: CONVOLVE_ONE_SAMPLE // 3 michael@0: CONVOLVE_ONE_SAMPLE // 4 michael@0: CONVOLVE_ONE_SAMPLE // 5 michael@0: CONVOLVE_ONE_SAMPLE // 6 michael@0: CONVOLVE_ONE_SAMPLE // 7 michael@0: CONVOLVE_ONE_SAMPLE // 8 michael@0: CONVOLVE_ONE_SAMPLE // 9 michael@0: CONVOLVE_ONE_SAMPLE // 10 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 11 michael@0: CONVOLVE_ONE_SAMPLE // 12 michael@0: CONVOLVE_ONE_SAMPLE // 13 michael@0: CONVOLVE_ONE_SAMPLE // 14 michael@0: CONVOLVE_ONE_SAMPLE // 15 michael@0: CONVOLVE_ONE_SAMPLE // 16 michael@0: CONVOLVE_ONE_SAMPLE // 17 michael@0: CONVOLVE_ONE_SAMPLE // 18 michael@0: CONVOLVE_ONE_SAMPLE // 19 michael@0: CONVOLVE_ONE_SAMPLE // 20 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 21 michael@0: CONVOLVE_ONE_SAMPLE // 22 michael@0: CONVOLVE_ONE_SAMPLE // 23 michael@0: CONVOLVE_ONE_SAMPLE // 24 michael@0: CONVOLVE_ONE_SAMPLE // 25 michael@0: CONVOLVE_ONE_SAMPLE // 26 michael@0: CONVOLVE_ONE_SAMPLE // 27 michael@0: CONVOLVE_ONE_SAMPLE // 28 michael@0: CONVOLVE_ONE_SAMPLE // 29 michael@0: CONVOLVE_ONE_SAMPLE // 30 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 31 michael@0: CONVOLVE_ONE_SAMPLE // 32 michael@0: CONVOLVE_ONE_SAMPLE // 33 michael@0: CONVOLVE_ONE_SAMPLE // 34 michael@0: CONVOLVE_ONE_SAMPLE // 35 michael@0: CONVOLVE_ONE_SAMPLE // 36 michael@0: CONVOLVE_ONE_SAMPLE // 37 michael@0: CONVOLVE_ONE_SAMPLE // 38 michael@0: CONVOLVE_ONE_SAMPLE // 39 michael@0: CONVOLVE_ONE_SAMPLE // 40 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 41 michael@0: CONVOLVE_ONE_SAMPLE // 42 michael@0: CONVOLVE_ONE_SAMPLE // 43 michael@0: CONVOLVE_ONE_SAMPLE // 44 michael@0: CONVOLVE_ONE_SAMPLE // 45 michael@0: CONVOLVE_ONE_SAMPLE // 46 michael@0: CONVOLVE_ONE_SAMPLE // 47 michael@0: CONVOLVE_ONE_SAMPLE // 48 michael@0: CONVOLVE_ONE_SAMPLE // 49 michael@0: CONVOLVE_ONE_SAMPLE // 50 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 51 michael@0: CONVOLVE_ONE_SAMPLE // 52 michael@0: CONVOLVE_ONE_SAMPLE // 53 michael@0: CONVOLVE_ONE_SAMPLE // 54 michael@0: CONVOLVE_ONE_SAMPLE // 55 michael@0: CONVOLVE_ONE_SAMPLE // 56 michael@0: CONVOLVE_ONE_SAMPLE // 57 michael@0: CONVOLVE_ONE_SAMPLE // 58 michael@0: CONVOLVE_ONE_SAMPLE // 59 michael@0: CONVOLVE_ONE_SAMPLE // 60 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 61 michael@0: CONVOLVE_ONE_SAMPLE // 62 michael@0: CONVOLVE_ONE_SAMPLE // 63 michael@0: CONVOLVE_ONE_SAMPLE // 64 michael@0: CONVOLVE_ONE_SAMPLE // 65 michael@0: CONVOLVE_ONE_SAMPLE // 66 michael@0: CONVOLVE_ONE_SAMPLE // 67 michael@0: CONVOLVE_ONE_SAMPLE // 68 michael@0: CONVOLVE_ONE_SAMPLE // 69 michael@0: CONVOLVE_ONE_SAMPLE // 70 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 71 michael@0: CONVOLVE_ONE_SAMPLE // 72 michael@0: CONVOLVE_ONE_SAMPLE // 73 michael@0: CONVOLVE_ONE_SAMPLE // 74 michael@0: CONVOLVE_ONE_SAMPLE // 75 michael@0: CONVOLVE_ONE_SAMPLE // 76 michael@0: CONVOLVE_ONE_SAMPLE // 77 michael@0: CONVOLVE_ONE_SAMPLE // 78 michael@0: CONVOLVE_ONE_SAMPLE // 79 michael@0: CONVOLVE_ONE_SAMPLE // 80 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 81 michael@0: CONVOLVE_ONE_SAMPLE // 82 michael@0: CONVOLVE_ONE_SAMPLE // 83 michael@0: CONVOLVE_ONE_SAMPLE // 84 michael@0: CONVOLVE_ONE_SAMPLE // 85 michael@0: CONVOLVE_ONE_SAMPLE // 86 michael@0: CONVOLVE_ONE_SAMPLE // 87 michael@0: CONVOLVE_ONE_SAMPLE // 88 michael@0: CONVOLVE_ONE_SAMPLE // 89 michael@0: CONVOLVE_ONE_SAMPLE // 90 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 91 michael@0: CONVOLVE_ONE_SAMPLE // 92 michael@0: CONVOLVE_ONE_SAMPLE // 93 michael@0: CONVOLVE_ONE_SAMPLE // 94 michael@0: CONVOLVE_ONE_SAMPLE // 95 michael@0: CONVOLVE_ONE_SAMPLE // 96 michael@0: CONVOLVE_ONE_SAMPLE // 97 michael@0: CONVOLVE_ONE_SAMPLE // 98 michael@0: CONVOLVE_ONE_SAMPLE // 99 michael@0: CONVOLVE_ONE_SAMPLE // 100 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 101 michael@0: CONVOLVE_ONE_SAMPLE // 102 michael@0: CONVOLVE_ONE_SAMPLE // 103 michael@0: CONVOLVE_ONE_SAMPLE // 104 michael@0: CONVOLVE_ONE_SAMPLE // 105 michael@0: CONVOLVE_ONE_SAMPLE // 106 michael@0: CONVOLVE_ONE_SAMPLE // 107 michael@0: CONVOLVE_ONE_SAMPLE // 108 michael@0: CONVOLVE_ONE_SAMPLE // 109 michael@0: CONVOLVE_ONE_SAMPLE // 110 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 111 michael@0: CONVOLVE_ONE_SAMPLE // 112 michael@0: CONVOLVE_ONE_SAMPLE // 113 michael@0: CONVOLVE_ONE_SAMPLE // 114 michael@0: CONVOLVE_ONE_SAMPLE // 115 michael@0: CONVOLVE_ONE_SAMPLE // 116 michael@0: CONVOLVE_ONE_SAMPLE // 117 michael@0: CONVOLVE_ONE_SAMPLE // 118 michael@0: CONVOLVE_ONE_SAMPLE // 119 michael@0: CONVOLVE_ONE_SAMPLE // 120 michael@0: michael@0: CONVOLVE_ONE_SAMPLE // 121 michael@0: CONVOLVE_ONE_SAMPLE // 122 michael@0: CONVOLVE_ONE_SAMPLE // 123 michael@0: CONVOLVE_ONE_SAMPLE // 124 michael@0: CONVOLVE_ONE_SAMPLE // 125 michael@0: CONVOLVE_ONE_SAMPLE // 126 michael@0: CONVOLVE_ONE_SAMPLE // 127 michael@0: CONVOLVE_ONE_SAMPLE // 128 michael@0: } else { michael@0: while (j < kernelSize) { michael@0: // Non-optimized using actual while loop. michael@0: CONVOLVE_ONE_SAMPLE michael@0: } michael@0: } michael@0: destP[i++] = sum; michael@0: } michael@0: michael@0: // Copy 2nd half of input buffer to 1st half. michael@0: memcpy(m_buffer.Elements(), inputP, sizeof(float) * framesToProcess); michael@0: } michael@0: michael@0: void DirectConvolver::reset() michael@0: { michael@0: PodZero(m_buffer.Elements(), m_buffer.Length()); michael@0: } michael@0: michael@0: } // namespace WebCore