media/libsoundtouch/src/TDStretch.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libsoundtouch/src/TDStretch.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,949 @@
     1.4 +////////////////////////////////////////////////////////////////////////////////
     1.5 +/// 
     1.6 +/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo 
     1.7 +/// while maintaining the original pitch by using a time domain WSOLA-like 
     1.8 +/// method with several performance-increasing tweaks.
     1.9 +///
    1.10 +/// Note : MMX optimized functions reside in a separate, platform-specific 
    1.11 +/// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
    1.12 +///
    1.13 +/// Author        : Copyright (c) Olli Parviainen
    1.14 +/// Author e-mail : oparviai 'at' iki.fi
    1.15 +/// SoundTouch WWW: http://www.surina.net/soundtouch
    1.16 +///
    1.17 +////////////////////////////////////////////////////////////////////////////////
    1.18 +//
    1.19 +// Last changed  : $Date: 2014-04-06 10:57:21 -0500 (Sun, 06 Apr 2014) $
    1.20 +// File revision : $Revision: 1.12 $
    1.21 +//
    1.22 +// $Id: TDStretch.cpp 195 2014-04-06 15:57:21Z oparviai $
    1.23 +//
    1.24 +////////////////////////////////////////////////////////////////////////////////
    1.25 +//
    1.26 +// License :
    1.27 +//
    1.28 +//  SoundTouch audio processing library
    1.29 +//  Copyright (c) Olli Parviainen
    1.30 +//
    1.31 +//  This library is free software; you can redistribute it and/or
    1.32 +//  modify it under the terms of the GNU Lesser General Public
    1.33 +//  License as published by the Free Software Foundation; either
    1.34 +//  version 2.1 of the License, or (at your option) any later version.
    1.35 +//
    1.36 +//  This library is distributed in the hope that it will be useful,
    1.37 +//  but WITHOUT ANY WARRANTY; without even the implied warranty of
    1.38 +//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    1.39 +//  Lesser General Public License for more details.
    1.40 +//
    1.41 +//  You should have received a copy of the GNU Lesser General Public
    1.42 +//  License along with this library; if not, write to the Free Software
    1.43 +//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    1.44 +//
    1.45 +////////////////////////////////////////////////////////////////////////////////
    1.46 +
    1.47 +#include <string.h>
    1.48 +#include <limits.h>
    1.49 +#include <assert.h>
    1.50 +#include <math.h>
    1.51 +#include <float.h>
    1.52 +
    1.53 +#include "STTypes.h"
    1.54 +#include "cpu_detect.h"
    1.55 +#include "TDStretch.h"
    1.56 +
    1.57 +using namespace soundtouch;
    1.58 +
    1.59 +#define max(x, y) (((x) > (y)) ? (x) : (y))
    1.60 +
    1.61 +
    1.62 +/*****************************************************************************
    1.63 + *
    1.64 + * Constant definitions
    1.65 + *
    1.66 + *****************************************************************************/
    1.67 +
    1.68 +// Table for the hierarchical mixing position seeking algorithm
    1.69 +static const short _scanOffsets[5][24]={
    1.70 +    { 124,  186,  248,  310,  372,  434,  496,  558,  620,  682,  744, 806,
    1.71 +      868,  930,  992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488,   0},
    1.72 +    {-100,  -75,  -50,  -25,   25,   50,   75,  100,    0,    0,    0,   0,
    1.73 +        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
    1.74 +    { -20,  -15,  -10,   -5,    5,   10,   15,   20,    0,    0,    0,   0,
    1.75 +        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
    1.76 +    {  -4,   -3,   -2,   -1,    1,    2,    3,    4,    0,    0,    0,   0,
    1.77 +        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
    1.78 +    { 121,  114,   97,  114,   98,  105,  108,   32,  104,   99,  117,  111,
    1.79 +      116,  100,  110,  117,  111,  115,    0,    0,    0,    0,    0,   0}};
    1.80 +
    1.81 +/*****************************************************************************
    1.82 + *
    1.83 + * Implementation of the class 'TDStretch'
    1.84 + *
    1.85 + *****************************************************************************/
    1.86 +
    1.87 +
    1.88 +TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
    1.89 +{
    1.90 +    bQuickSeek = false;
    1.91 +    channels = 2;
    1.92 +
    1.93 +    pMidBuffer = NULL;
    1.94 +    pMidBufferUnaligned = NULL;
    1.95 +    overlapLength = 0;
    1.96 +
    1.97 +    bAutoSeqSetting = true;
    1.98 +    bAutoSeekSetting = true;
    1.99 +
   1.100 +//    outDebt = 0;
   1.101 +    skipFract = 0;
   1.102 +
   1.103 +    tempo = 1.0f;
   1.104 +    setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);
   1.105 +    setTempo(1.0f);
   1.106 +
   1.107 +    clear();
   1.108 +}
   1.109 +
   1.110 +
   1.111 +
   1.112 +TDStretch::~TDStretch()
   1.113 +{
   1.114 +    delete[] pMidBufferUnaligned;
   1.115 +}
   1.116 +
   1.117 +
   1.118 +
   1.119 +// Sets routine control parameters. These control are certain time constants
   1.120 +// defining how the sound is stretched to the desired duration.
   1.121 +//
   1.122 +// 'sampleRate' = sample rate of the sound
   1.123 +// 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms)
   1.124 +// 'seekwindowMS' = seeking window length for scanning the best overlapping 
   1.125 +//      position (default = 28 ms)
   1.126 +// 'overlapMS' = overlapping length (default = 12 ms)
   1.127 +
   1.128 +void TDStretch::setParameters(int aSampleRate, int aSequenceMS, 
   1.129 +                              int aSeekWindowMS, int aOverlapMS)
   1.130 +{
   1.131 +    // accept only positive parameter values - if zero or negative, use old values instead
   1.132 +    if (aSampleRate > 0)   this->sampleRate = aSampleRate;
   1.133 +    if (aOverlapMS > 0)    this->overlapMs = aOverlapMS;
   1.134 +
   1.135 +    if (aSequenceMS > 0)
   1.136 +    {
   1.137 +        this->sequenceMs = aSequenceMS;
   1.138 +        bAutoSeqSetting = false;
   1.139 +    } 
   1.140 +    else if (aSequenceMS == 0)
   1.141 +    {
   1.142 +        // if zero, use automatic setting
   1.143 +        bAutoSeqSetting = true;
   1.144 +    }
   1.145 +
   1.146 +    if (aSeekWindowMS > 0) 
   1.147 +    {
   1.148 +        this->seekWindowMs = aSeekWindowMS;
   1.149 +        bAutoSeekSetting = false;
   1.150 +    } 
   1.151 +    else if (aSeekWindowMS == 0) 
   1.152 +    {
   1.153 +        // if zero, use automatic setting
   1.154 +        bAutoSeekSetting = true;
   1.155 +    }
   1.156 +
   1.157 +    calcSeqParameters();
   1.158 +
   1.159 +    calculateOverlapLength(overlapMs);
   1.160 +
   1.161 +    // set tempo to recalculate 'sampleReq'
   1.162 +    setTempo(tempo);
   1.163 +}
   1.164 +
   1.165 +
   1.166 +
   1.167 +/// Get routine control parameters, see setParameters() function.
   1.168 +/// Any of the parameters to this function can be NULL, in such case corresponding parameter
   1.169 +/// value isn't returned.
   1.170 +void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const
   1.171 +{
   1.172 +    if (pSampleRate)
   1.173 +    {
   1.174 +        *pSampleRate = sampleRate;
   1.175 +    }
   1.176 +
   1.177 +    if (pSequenceMs)
   1.178 +    {
   1.179 +        *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs;
   1.180 +    }
   1.181 +
   1.182 +    if (pSeekWindowMs)
   1.183 +    {
   1.184 +        *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs;
   1.185 +    }
   1.186 +
   1.187 +    if (pOverlapMs)
   1.188 +    {
   1.189 +        *pOverlapMs = overlapMs;
   1.190 +    }
   1.191 +}
   1.192 +
   1.193 +
   1.194 +// Overlaps samples in 'midBuffer' with the samples in 'pInput'
   1.195 +void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
   1.196 +{
   1.197 +    int i;
   1.198 +    SAMPLETYPE m1, m2;
   1.199 +
   1.200 +    m1 = (SAMPLETYPE)0;
   1.201 +    m2 = (SAMPLETYPE)overlapLength;
   1.202 +
   1.203 +    for (i = 0; i < overlapLength ; i ++) 
   1.204 +    {
   1.205 +        pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength;
   1.206 +        m1 += 1;
   1.207 +        m2 -= 1;
   1.208 +    }
   1.209 +}
   1.210 +
   1.211 +
   1.212 +
   1.213 +void TDStretch::clearMidBuffer()
   1.214 +{
   1.215 +    memset(pMidBuffer, 0, channels * sizeof(SAMPLETYPE) * overlapLength);
   1.216 +}
   1.217 +
   1.218 +
   1.219 +void TDStretch::clearInput()
   1.220 +{
   1.221 +    inputBuffer.clear();
   1.222 +    clearMidBuffer();
   1.223 +}
   1.224 +
   1.225 +
   1.226 +// Clears the sample buffers
   1.227 +void TDStretch::clear()
   1.228 +{
   1.229 +    outputBuffer.clear();
   1.230 +    clearInput();
   1.231 +}
   1.232 +
   1.233 +
   1.234 +
   1.235 +// Enables/disables the quick position seeking algorithm. Zero to disable, nonzero
   1.236 +// to enable
   1.237 +void TDStretch::enableQuickSeek(bool enable)
   1.238 +{
   1.239 +    bQuickSeek = enable;
   1.240 +}
   1.241 +
   1.242 +
   1.243 +// Returns nonzero if the quick seeking algorithm is enabled.
   1.244 +bool TDStretch::isQuickSeekEnabled() const
   1.245 +{
   1.246 +    return bQuickSeek;
   1.247 +}
   1.248 +
   1.249 +
   1.250 +// Seeks for the optimal overlap-mixing position.
   1.251 +int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos)
   1.252 +{
   1.253 +    if (bQuickSeek) 
   1.254 +    {
   1.255 +        return seekBestOverlapPositionQuick(refPos);
   1.256 +    } 
   1.257 +    else 
   1.258 +    {
   1.259 +        return seekBestOverlapPositionFull(refPos);
   1.260 +    }
   1.261 +}
   1.262 +
   1.263 +
   1.264 +// Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position
   1.265 +// of 'ovlPos'.
   1.266 +inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const
   1.267 +{
   1.268 +#ifndef USE_MULTICH_ALWAYS
   1.269 +    if (channels == 1)
   1.270 +    {
   1.271 +        // mono sound.
   1.272 +        overlapMono(pOutput, pInput + ovlPos);
   1.273 +    }
   1.274 +    else if (channels == 2)
   1.275 +    {
   1.276 +        // stereo sound
   1.277 +        overlapStereo(pOutput, pInput + 2 * ovlPos);
   1.278 +    } 
   1.279 +    else 
   1.280 +#endif // USE_MULTICH_ALWAYS
   1.281 +    {
   1.282 +        assert(channels > 0);
   1.283 +        overlapMulti(pOutput, pInput + channels * ovlPos);
   1.284 +    }
   1.285 +}
   1.286 +
   1.287 +
   1.288 +
   1.289 +// Seeks for the optimal overlap-mixing position. The 'stereo' version of the
   1.290 +// routine
   1.291 +//
   1.292 +// The best position is determined as the position where the two overlapped
   1.293 +// sample sequences are 'most alike', in terms of the highest cross-correlation
   1.294 +// value over the overlapping period
   1.295 +int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos) 
   1.296 +{
   1.297 +    int bestOffs;
   1.298 +    double bestCorr, corr;
   1.299 +    double norm;
   1.300 +    int i;
   1.301 +
   1.302 +    bestCorr = FLT_MIN;
   1.303 +    bestOffs = 0;
   1.304 +
   1.305 +    // Scans for the best correlation value by testing each possible position
   1.306 +    // over the permitted range.
   1.307 +    bestCorr = calcCrossCorr(refPos, pMidBuffer, norm);
   1.308 +    for (i = 1; i < seekLength; i ++) 
   1.309 +    {
   1.310 +        // Calculates correlation value for the mixing position corresponding
   1.311 +        // to 'i'. Now call "calcCrossCorrAccumulate" that is otherwise same as
   1.312 +        // "calcCrossCorr", but saves time by reusing & updating previously stored 
   1.313 +        // "norm" value
   1.314 +        corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm);
   1.315 +
   1.316 +        // heuristic rule to slightly favour values close to mid of the range
   1.317 +        double tmp = (double)(2 * i - seekLength) / (double)seekLength;
   1.318 +        corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
   1.319 +
   1.320 +        // Checks for the highest correlation value
   1.321 +        if (corr > bestCorr) 
   1.322 +        {
   1.323 +            bestCorr = corr;
   1.324 +            bestOffs = i;
   1.325 +        }
   1.326 +    }
   1.327 +    // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
   1.328 +    clearCrossCorrState();
   1.329 +
   1.330 +    return bestOffs;
   1.331 +}
   1.332 +
   1.333 +
   1.334 +// Seeks for the optimal overlap-mixing position. The 'stereo' version of the
   1.335 +// routine
   1.336 +//
   1.337 +// The best position is determined as the position where the two overlapped
   1.338 +// sample sequences are 'most alike', in terms of the highest cross-correlation
   1.339 +// value over the overlapping period
   1.340 +int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos) 
   1.341 +{
   1.342 +    int j;
   1.343 +    int bestOffs;
   1.344 +    double bestCorr, corr;
   1.345 +    int scanCount, corrOffset, tempOffset;
   1.346 +
   1.347 +    bestCorr = FLT_MIN;
   1.348 +    bestOffs = _scanOffsets[0][0];
   1.349 +    corrOffset = 0;
   1.350 +    tempOffset = 0;
   1.351 +
   1.352 +    // Scans for the best correlation value using four-pass hierarchical search.
   1.353 +    //
   1.354 +    // The look-up table 'scans' has hierarchical position adjusting steps.
   1.355 +    // In first pass the routine searhes for the highest correlation with 
   1.356 +    // relatively coarse steps, then rescans the neighbourhood of the highest
   1.357 +    // correlation with better resolution and so on.
   1.358 +    for (scanCount = 0;scanCount < 4; scanCount ++) 
   1.359 +    {
   1.360 +        j = 0;
   1.361 +        while (_scanOffsets[scanCount][j]) 
   1.362 +        {
   1.363 +            double norm;
   1.364 +            tempOffset = corrOffset + _scanOffsets[scanCount][j];
   1.365 +            if (tempOffset >= seekLength) break;
   1.366 +
   1.367 +            // Calculates correlation value for the mixing position corresponding
   1.368 +            // to 'tempOffset'
   1.369 +            corr = (double)calcCrossCorr(refPos + channels * tempOffset, pMidBuffer, norm);
   1.370 +            // heuristic rule to slightly favour values close to mid of the range
   1.371 +            double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
   1.372 +            corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
   1.373 +
   1.374 +            // Checks for the highest correlation value
   1.375 +            if (corr > bestCorr) 
   1.376 +            {
   1.377 +                bestCorr = corr;
   1.378 +                bestOffs = tempOffset;
   1.379 +            }
   1.380 +            j ++;
   1.381 +        }
   1.382 +        corrOffset = bestOffs;
   1.383 +    }
   1.384 +    // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
   1.385 +    clearCrossCorrState();
   1.386 +
   1.387 +    return bestOffs;
   1.388 +}
   1.389 +
   1.390 +
   1.391 +
   1.392 +/// clear cross correlation routine state if necessary 
   1.393 +void TDStretch::clearCrossCorrState()
   1.394 +{
   1.395 +    // default implementation is empty.
   1.396 +}
   1.397 +
   1.398 +
   1.399 +/// Calculates processing sequence length according to tempo setting
   1.400 +void TDStretch::calcSeqParameters()
   1.401 +{
   1.402 +    // Adjust tempo param according to tempo, so that variating processing sequence length is used
   1.403 +    // at varius tempo settings, between the given low...top limits
   1.404 +    #define AUTOSEQ_TEMPO_LOW   0.5     // auto setting low tempo range (-50%)
   1.405 +    #define AUTOSEQ_TEMPO_TOP   2.0     // auto setting top tempo range (+100%)
   1.406 +
   1.407 +    // sequence-ms setting values at above low & top tempo
   1.408 +    #define AUTOSEQ_AT_MIN      125.0
   1.409 +    #define AUTOSEQ_AT_MAX      50.0
   1.410 +    #define AUTOSEQ_K           ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
   1.411 +    #define AUTOSEQ_C           (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW))
   1.412 +
   1.413 +    // seek-window-ms setting values at above low & top tempo
   1.414 +    #define AUTOSEEK_AT_MIN     25.0
   1.415 +    #define AUTOSEEK_AT_MAX     15.0
   1.416 +    #define AUTOSEEK_K          ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
   1.417 +    #define AUTOSEEK_C          (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW))
   1.418 +
   1.419 +    #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x)))
   1.420 +
   1.421 +    double seq, seek;
   1.422 +    
   1.423 +    if (bAutoSeqSetting)
   1.424 +    {
   1.425 +        seq = AUTOSEQ_C + AUTOSEQ_K * tempo;
   1.426 +        seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN);
   1.427 +        sequenceMs = (int)(seq + 0.5);
   1.428 +    }
   1.429 +
   1.430 +    if (bAutoSeekSetting)
   1.431 +    {
   1.432 +        seek = AUTOSEEK_C + AUTOSEEK_K * tempo;
   1.433 +        seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN);
   1.434 +        seekWindowMs = (int)(seek + 0.5);
   1.435 +    }
   1.436 +
   1.437 +    // Update seek window lengths
   1.438 +    seekWindowLength = (sampleRate * sequenceMs) / 1000;
   1.439 +    if (seekWindowLength < 2 * overlapLength) 
   1.440 +    {
   1.441 +        seekWindowLength = 2 * overlapLength;
   1.442 +    }
   1.443 +    seekLength = (sampleRate * seekWindowMs) / 1000;
   1.444 +}
   1.445 +
   1.446 +
   1.447 +
   1.448 +// Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower 
   1.449 +// tempo, larger faster tempo.
   1.450 +void TDStretch::setTempo(float newTempo)
   1.451 +{
   1.452 +    int intskip;
   1.453 +
   1.454 +    tempo = newTempo;
   1.455 +
   1.456 +    // Calculate new sequence duration
   1.457 +    calcSeqParameters();
   1.458 +
   1.459 +    // Calculate ideal skip length (according to tempo value) 
   1.460 +    nominalSkip = tempo * (seekWindowLength - overlapLength);
   1.461 +    intskip = (int)(nominalSkip + 0.5f);
   1.462 +
   1.463 +    // Calculate how many samples are needed in the 'inputBuffer' to 
   1.464 +    // process another batch of samples
   1.465 +    //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2;
   1.466 +    sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength;
   1.467 +}
   1.468 +
   1.469 +
   1.470 +
   1.471 +// Sets the number of channels, 1 = mono, 2 = stereo
   1.472 +void TDStretch::setChannels(int numChannels)
   1.473 +{
   1.474 +    assert(numChannels > 0);
   1.475 +    if (channels == numChannels) return;
   1.476 +//    assert(numChannels == 1 || numChannels == 2);
   1.477 +
   1.478 +    channels = numChannels;
   1.479 +    inputBuffer.setChannels(channels);
   1.480 +    outputBuffer.setChannels(channels);
   1.481 +
   1.482 +    // re-init overlap/buffer
   1.483 +    overlapLength=0;
   1.484 +    setParameters(sampleRate);
   1.485 +}
   1.486 +
   1.487 +
   1.488 +// nominal tempo, no need for processing, just pass the samples through
   1.489 +// to outputBuffer
   1.490 +/*
   1.491 +void TDStretch::processNominalTempo()
   1.492 +{
   1.493 +    assert(tempo == 1.0f);
   1.494 +
   1.495 +    if (bMidBufferDirty) 
   1.496 +    {
   1.497 +        // If there are samples in pMidBuffer waiting for overlapping,
   1.498 +        // do a single sliding overlapping with them in order to prevent a 
   1.499 +        // clicking distortion in the output sound
   1.500 +        if (inputBuffer.numSamples() < overlapLength) 
   1.501 +        {
   1.502 +            // wait until we've got overlapLength input samples
   1.503 +            return;
   1.504 +        }
   1.505 +        // Mix the samples in the beginning of 'inputBuffer' with the 
   1.506 +        // samples in 'midBuffer' using sliding overlapping 
   1.507 +        overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0);
   1.508 +        outputBuffer.putSamples(overlapLength);
   1.509 +        inputBuffer.receiveSamples(overlapLength);
   1.510 +        clearMidBuffer();
   1.511 +        // now we've caught the nominal sample flow and may switch to
   1.512 +        // bypass mode
   1.513 +    }
   1.514 +
   1.515 +    // Simply bypass samples from input to output
   1.516 +    outputBuffer.moveSamples(inputBuffer);
   1.517 +}
   1.518 +*/
   1.519 +
   1.520 +
   1.521 +// Processes as many processing frames of the samples 'inputBuffer', store
   1.522 +// the result into 'outputBuffer'
   1.523 +void TDStretch::processSamples()
   1.524 +{
   1.525 +    int ovlSkip, offset;
   1.526 +    int temp;
   1.527 +
   1.528 +    /* Removed this small optimization - can introduce a click to sound when tempo setting
   1.529 +       crosses the nominal value
   1.530 +    if (tempo == 1.0f) 
   1.531 +    {
   1.532 +        // tempo not changed from the original, so bypass the processing
   1.533 +        processNominalTempo();
   1.534 +        return;
   1.535 +    }
   1.536 +    */
   1.537 +
   1.538 +    // Process samples as long as there are enough samples in 'inputBuffer'
   1.539 +    // to form a processing frame.
   1.540 +    while ((int)inputBuffer.numSamples() >= sampleReq) 
   1.541 +    {
   1.542 +        // If tempo differs from the normal ('SCALE'), scan for the best overlapping
   1.543 +        // position
   1.544 +        offset = seekBestOverlapPosition(inputBuffer.ptrBegin());
   1.545 +
   1.546 +        // Mix the samples in the 'inputBuffer' at position of 'offset' with the 
   1.547 +        // samples in 'midBuffer' using sliding overlapping
   1.548 +        // ... first partially overlap with the end of the previous sequence
   1.549 +        // (that's in 'midBuffer')
   1.550 +        overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset);
   1.551 +        outputBuffer.putSamples((uint)overlapLength);
   1.552 +
   1.553 +        // ... then copy sequence samples from 'inputBuffer' to output:
   1.554 +
   1.555 +        // length of sequence
   1.556 +        temp = (seekWindowLength - 2 * overlapLength);
   1.557 +
   1.558 +        // crosscheck that we don't have buffer overflow...
   1.559 +        if ((int)inputBuffer.numSamples() < (offset + temp + overlapLength * 2))
   1.560 +        {
   1.561 +            continue;    // just in case, shouldn't really happen
   1.562 +        }
   1.563 +
   1.564 +        outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * (offset + overlapLength), (uint)temp);
   1.565 +
   1.566 +        // Copies the end of the current sequence from 'inputBuffer' to 
   1.567 +        // 'midBuffer' for being mixed with the beginning of the next 
   1.568 +        // processing sequence and so on
   1.569 +        assert((offset + temp + overlapLength * 2) <= (int)inputBuffer.numSamples());
   1.570 +        memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp + overlapLength), 
   1.571 +            channels * sizeof(SAMPLETYPE) * overlapLength);
   1.572 +
   1.573 +        // Remove the processed samples from the input buffer. Update
   1.574 +        // the difference between integer & nominal skip step to 'skipFract'
   1.575 +        // in order to prevent the error from accumulating over time.
   1.576 +        skipFract += nominalSkip;   // real skip size
   1.577 +        ovlSkip = (int)skipFract;   // rounded to integer skip
   1.578 +        skipFract -= ovlSkip;       // maintain the fraction part, i.e. real vs. integer skip
   1.579 +        inputBuffer.receiveSamples((uint)ovlSkip);
   1.580 +    }
   1.581 +}
   1.582 +
   1.583 +
   1.584 +// Adds 'numsamples' pcs of samples from the 'samples' memory position into
   1.585 +// the input of the object.
   1.586 +void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples)
   1.587 +{
   1.588 +    // Add the samples into the input buffer
   1.589 +    inputBuffer.putSamples(samples, nSamples);
   1.590 +    // Process the samples in input buffer
   1.591 +    processSamples();
   1.592 +}
   1.593 +
   1.594 +
   1.595 +
   1.596 +/// Set new overlap length parameter & reallocate RefMidBuffer if necessary.
   1.597 +void TDStretch::acceptNewOverlapLength(int newOverlapLength)
   1.598 +{
   1.599 +    int prevOvl;
   1.600 +
   1.601 +    assert(newOverlapLength >= 0);
   1.602 +    prevOvl = overlapLength;
   1.603 +    overlapLength = newOverlapLength;
   1.604 +
   1.605 +    if (overlapLength > prevOvl)
   1.606 +    {
   1.607 +        delete[] pMidBufferUnaligned;
   1.608 +
   1.609 +        pMidBufferUnaligned = new SAMPLETYPE[overlapLength * channels + 16 / sizeof(SAMPLETYPE)];
   1.610 +        // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency
   1.611 +        pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned);
   1.612 +
   1.613 +        clearMidBuffer();
   1.614 +    }
   1.615 +}
   1.616 +
   1.617 +
   1.618 +// Operator 'new' is overloaded so that it automatically creates a suitable instance 
   1.619 +// depending on if we've a MMX/SSE/etc-capable CPU available or not.
   1.620 +void * TDStretch::operator new(size_t s)
   1.621 +{
   1.622 +    // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead!
   1.623 +    ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!");
   1.624 +    return newInstance();
   1.625 +}
   1.626 +
   1.627 +
   1.628 +TDStretch * TDStretch::newInstance()
   1.629 +{
   1.630 +#if defined(SOUNDTOUCH_ALLOW_MMX) || defined(SOUNDTOUCH_ALLOW_SSE)
   1.631 +    uint uExtensions;
   1.632 +
   1.633 +    uExtensions = detectCPUextensions();
   1.634 +#endif
   1.635 +
   1.636 +    // Check if MMX/SSE instruction set extensions supported by CPU
   1.637 +
   1.638 +#ifdef SOUNDTOUCH_ALLOW_MMX
   1.639 +    // MMX routines available only with integer sample types
   1.640 +    if (uExtensions & SUPPORT_MMX)
   1.641 +    {
   1.642 +        return ::new TDStretchMMX;
   1.643 +    }
   1.644 +    else
   1.645 +#endif // SOUNDTOUCH_ALLOW_MMX
   1.646 +
   1.647 +
   1.648 +#ifdef SOUNDTOUCH_ALLOW_SSE
   1.649 +    if (uExtensions & SUPPORT_SSE)
   1.650 +    {
   1.651 +        // SSE support
   1.652 +        return ::new TDStretchSSE;
   1.653 +    }
   1.654 +    else
   1.655 +#endif // SOUNDTOUCH_ALLOW_SSE
   1.656 +
   1.657 +    {
   1.658 +        // ISA optimizations not supported, use plain C version
   1.659 +        return ::new TDStretch;
   1.660 +    }
   1.661 +}
   1.662 +
   1.663 +
   1.664 +//////////////////////////////////////////////////////////////////////////////
   1.665 +//
   1.666 +// Integer arithmetics specific algorithm implementations.
   1.667 +//
   1.668 +//////////////////////////////////////////////////////////////////////////////
   1.669 +
   1.670 +#ifdef SOUNDTOUCH_INTEGER_SAMPLES
   1.671 +
   1.672 +// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo' 
   1.673 +// version of the routine.
   1.674 +void TDStretch::overlapStereo(short *poutput, const short *input) const
   1.675 +{
   1.676 +    int i;
   1.677 +    short temp;
   1.678 +    int cnt2;
   1.679 +
   1.680 +    for (i = 0; i < overlapLength ; i ++) 
   1.681 +    {
   1.682 +        temp = (short)(overlapLength - i);
   1.683 +        cnt2 = 2 * i;
   1.684 +        poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp )  / overlapLength;
   1.685 +        poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength;
   1.686 +    }
   1.687 +}
   1.688 +
   1.689 +
   1.690 +// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi'
   1.691 +// version of the routine.
   1.692 +void TDStretch::overlapMulti(SAMPLETYPE *poutput, const SAMPLETYPE *input) const
   1.693 +{
   1.694 +    SAMPLETYPE m1=(SAMPLETYPE)0;
   1.695 +    SAMPLETYPE m2;
   1.696 +    int i=0;
   1.697 +
   1.698 +    for (m2 = (SAMPLETYPE)overlapLength; m2; m2 --)
   1.699 +    {
   1.700 +        for (int c = 0; c < channels; c ++)
   1.701 +        {
   1.702 +            poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2)  / overlapLength;
   1.703 +            i++;
   1.704 +        }
   1.705 +
   1.706 +        m1++;
   1.707 +    }
   1.708 +}
   1.709 +
   1.710 +// Calculates the x having the closest 2^x value for the given value
   1.711 +static int _getClosest2Power(double value)
   1.712 +{
   1.713 +    return (int)(log(value) / log(2.0) + 0.5);
   1.714 +}
   1.715 +
   1.716 +
   1.717 +/// Calculates overlap period length in samples.
   1.718 +/// Integer version rounds overlap length to closest power of 2
   1.719 +/// for a divide scaling operation.
   1.720 +void TDStretch::calculateOverlapLength(int aoverlapMs)
   1.721 +{
   1.722 +    int newOvl;
   1.723 +
   1.724 +    assert(aoverlapMs >= 0);
   1.725 +
   1.726 +    // calculate overlap length so that it's power of 2 - thus it's easy to do
   1.727 +    // integer division by right-shifting. Term "-1" at end is to account for 
   1.728 +    // the extra most significatnt bit left unused in result by signed multiplication 
   1.729 +    overlapDividerBits = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1;
   1.730 +    if (overlapDividerBits > 9) overlapDividerBits = 9;
   1.731 +    if (overlapDividerBits < 3) overlapDividerBits = 3;
   1.732 +    newOvl = (int)pow(2.0, (int)overlapDividerBits + 1);    // +1 => account for -1 above
   1.733 +
   1.734 +    acceptNewOverlapLength(newOvl);
   1.735 +
   1.736 +    // calculate sloping divider so that crosscorrelation operation won't 
   1.737 +    // overflow 32-bit register. Max. sum of the crosscorrelation sum without 
   1.738 +    // divider would be 2^30*(N^3-N)/3, where N = overlap length
   1.739 +    slopingDivider = (newOvl * newOvl - 1) / 3;
   1.740 +}
   1.741 +
   1.742 +
   1.743 +double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm) const
   1.744 +{
   1.745 +    long corr;
   1.746 +    long lnorm;
   1.747 +    int i;
   1.748 +
   1.749 +    corr = lnorm = 0;
   1.750 +    // Same routine for stereo and mono. For stereo, unroll loop for better
   1.751 +    // efficiency and gives slightly better resolution against rounding. 
   1.752 +    // For mono it same routine, just  unrolls loop by factor of 4
   1.753 +    for (i = 0; i < channels * overlapLength; i += 4) 
   1.754 +    {
   1.755 +        corr += (mixingPos[i] * compare[i] + 
   1.756 +                 mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits;  // notice: do intermediate division here to avoid integer overflow
   1.757 +        corr += (mixingPos[i + 2] * compare[i + 2] + 
   1.758 +                 mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits;
   1.759 +        lnorm += (mixingPos[i] * mixingPos[i] + 
   1.760 +                  mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow
   1.761 +        lnorm += (mixingPos[i + 2] * mixingPos[i + 2] + 
   1.762 +                  mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBits;
   1.763 +    }
   1.764 +
   1.765 +    // Normalize result by dividing by sqrt(norm) - this step is easiest 
   1.766 +    // done using floating point operation
   1.767 +    norm = (double)lnorm;
   1.768 +    return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
   1.769 +}
   1.770 +
   1.771 +
   1.772 +/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
   1.773 +double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) const
   1.774 +{
   1.775 +    long corr;
   1.776 +    long lnorm;
   1.777 +    int i;
   1.778 +
   1.779 +    // cancel first normalizer tap from previous round
   1.780 +    lnorm = 0;
   1.781 +    for (i = 1; i <= channels; i ++)
   1.782 +    {
   1.783 +        lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBits;
   1.784 +    }
   1.785 +
   1.786 +    corr = 0;
   1.787 +    // Same routine for stereo and mono. For stereo, unroll loop for better
   1.788 +    // efficiency and gives slightly better resolution against rounding. 
   1.789 +    // For mono it same routine, just  unrolls loop by factor of 4
   1.790 +    for (i = 0; i < channels * overlapLength; i += 4) 
   1.791 +    {
   1.792 +        corr += (mixingPos[i] * compare[i] + 
   1.793 +                 mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits;  // notice: do intermediate division here to avoid integer overflow
   1.794 +        corr += (mixingPos[i + 2] * compare[i + 2] + 
   1.795 +                 mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits;
   1.796 +    }
   1.797 +
   1.798 +    // update normalizer with last samples of this round
   1.799 +    for (int j = 0; j < channels; j ++)
   1.800 +    {
   1.801 +        i --;
   1.802 +        lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBits;
   1.803 +    }
   1.804 +    norm += (double)lnorm;
   1.805 +
   1.806 +    // Normalize result by dividing by sqrt(norm) - this step is easiest 
   1.807 +    // done using floating point operation
   1.808 +    return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
   1.809 +}
   1.810 +
   1.811 +#endif // SOUNDTOUCH_INTEGER_SAMPLES
   1.812 +
   1.813 +//////////////////////////////////////////////////////////////////////////////
   1.814 +//
   1.815 +// Floating point arithmetics specific algorithm implementations.
   1.816 +//
   1.817 +
   1.818 +#ifdef SOUNDTOUCH_FLOAT_SAMPLES
   1.819 +
   1.820 +// Overlaps samples in 'midBuffer' with the samples in 'pInput'
   1.821 +void TDStretch::overlapStereo(float *pOutput, const float *pInput) const
   1.822 +{
   1.823 +    int i;
   1.824 +    float fScale;
   1.825 +    float f1;
   1.826 +    float f2;
   1.827 +
   1.828 +    fScale = 1.0f / (float)overlapLength;
   1.829 +
   1.830 +    f1 = 0;
   1.831 +    f2 = 1.0f;
   1.832 +
   1.833 +    for (i = 0; i < 2 * (int)overlapLength ; i += 2) 
   1.834 +    {
   1.835 +        pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2;
   1.836 +        pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2;
   1.837 +
   1.838 +        f1 += fScale;
   1.839 +        f2 -= fScale;
   1.840 +    }
   1.841 +}
   1.842 +
   1.843 +
   1.844 +// Overlaps samples in 'midBuffer' with the samples in 'input'. 
   1.845 +void TDStretch::overlapMulti(float *pOutput, const float *pInput) const
   1.846 +{
   1.847 +    int i;
   1.848 +    float fScale;
   1.849 +    float f1;
   1.850 +    float f2;
   1.851 +
   1.852 +    fScale = 1.0f / (float)overlapLength;
   1.853 +
   1.854 +    f1 = 0;
   1.855 +    f2 = 1.0f;
   1.856 +
   1.857 +    i=0;
   1.858 +    for (int i2 = 0; i2 < overlapLength; i2 ++)
   1.859 +    {
   1.860 +        // note: Could optimize this slightly by taking into account that always channels > 2
   1.861 +        for (int c = 0; c < channels; c ++)
   1.862 +        {
   1.863 +            pOutput[i] = pInput[i] * f1 + pMidBuffer[i] * f2;
   1.864 +            i++;
   1.865 +        }
   1.866 +        f1 += fScale;
   1.867 +        f2 -= fScale;
   1.868 +    }
   1.869 +}
   1.870 +
   1.871 +
   1.872 +/// Calculates overlapInMsec period length in samples.
   1.873 +void TDStretch::calculateOverlapLength(int overlapInMsec)
   1.874 +{
   1.875 +    int newOvl;
   1.876 +
   1.877 +    assert(overlapInMsec >= 0);
   1.878 +    newOvl = (sampleRate * overlapInMsec) / 1000;
   1.879 +    if (newOvl < 16) newOvl = 16;
   1.880 +
   1.881 +    // must be divisible by 8
   1.882 +    newOvl -= newOvl % 8;
   1.883 +
   1.884 +    acceptNewOverlapLength(newOvl);
   1.885 +}
   1.886 +
   1.887 +
   1.888 +/// Calculate cross-correlation
   1.889 +double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &norm) const
   1.890 +{
   1.891 +    double corr;
   1.892 +    int i;
   1.893 +
   1.894 +    corr = norm = 0;
   1.895 +    // Same routine for stereo and mono. For Stereo, unroll by factor of 2.
   1.896 +    // For mono it's same routine yet unrollsd by factor of 4.
   1.897 +    for (i = 0; i < channels * overlapLength; i += 4) 
   1.898 +    {
   1.899 +        corr += mixingPos[i] * compare[i] +
   1.900 +                mixingPos[i + 1] * compare[i + 1];
   1.901 +
   1.902 +        norm += mixingPos[i] * mixingPos[i] + 
   1.903 +                mixingPos[i + 1] * mixingPos[i + 1];
   1.904 +
   1.905 +        // unroll the loop for better CPU efficiency:
   1.906 +        corr += mixingPos[i + 2] * compare[i + 2] +
   1.907 +                mixingPos[i + 3] * compare[i + 3];
   1.908 +
   1.909 +        norm += mixingPos[i + 2] * mixingPos[i + 2] +
   1.910 +                mixingPos[i + 3] * mixingPos[i + 3];
   1.911 +    }
   1.912 +
   1.913 +    return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
   1.914 +}
   1.915 +
   1.916 +
   1.917 +/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
   1.918 +double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) const
   1.919 +{
   1.920 +    double corr;
   1.921 +    int i;
   1.922 +
   1.923 +    corr = 0;
   1.924 +
   1.925 +    // cancel first normalizer tap from previous round
   1.926 +    for (i = 1; i <= channels; i ++)
   1.927 +    {
   1.928 +        norm -= mixingPos[-i] * mixingPos[-i];
   1.929 +    }
   1.930 +
   1.931 +    // Same routine for stereo and mono. For Stereo, unroll by factor of 2.
   1.932 +    // For mono it's same routine yet unrollsd by factor of 4.
   1.933 +    for (i = 0; i < channels * overlapLength; i += 4) 
   1.934 +    {
   1.935 +        corr += mixingPos[i] * compare[i] +
   1.936 +                mixingPos[i + 1] * compare[i + 1] +
   1.937 +                mixingPos[i + 2] * compare[i + 2] +
   1.938 +                mixingPos[i + 3] * compare[i + 3];
   1.939 +    }
   1.940 +
   1.941 +    // update normalizer with last samples of this round
   1.942 +    for (int j = 0; j < channels; j ++)
   1.943 +    {
   1.944 +        i --;
   1.945 +        norm += mixingPos[i] * mixingPos[i];
   1.946 +    }
   1.947 +
   1.948 +    return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
   1.949 +}
   1.950 +
   1.951 +
   1.952 +#endif // SOUNDTOUCH_FLOAT_SAMPLES

mercurial