michael@0: ////////////////////////////////////////////////////////////////////////////////
michael@0: /// 
michael@0: /// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo 
michael@0: /// while maintaining the original pitch by using a time domain WSOLA-like 
michael@0: /// method with several performance-increasing tweaks.
michael@0: ///
michael@0: /// Note : MMX optimized functions reside in a separate, platform-specific 
michael@0: /// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
michael@0: ///
michael@0: /// Author        : Copyright (c) Olli Parviainen
michael@0: /// Author e-mail : oparviai 'at' iki.fi
michael@0: /// SoundTouch WWW: http://www.surina.net/soundtouch
michael@0: ///
michael@0: ////////////////////////////////////////////////////////////////////////////////
michael@0: //
michael@0: // Last changed  : $Date: 2014-04-06 10:57:21 -0500 (Sun, 06 Apr 2014) $
michael@0: // File revision : $Revision: 1.12 $
michael@0: //
michael@0: // $Id: TDStretch.cpp 195 2014-04-06 15:57:21Z oparviai $
michael@0: //
michael@0: ////////////////////////////////////////////////////////////////////////////////
michael@0: //
michael@0: // License :
michael@0: //
michael@0: //  SoundTouch audio processing library
michael@0: //  Copyright (c) Olli Parviainen
michael@0: //
michael@0: //  This library is free software; you can redistribute it and/or
michael@0: //  modify it under the terms of the GNU Lesser General Public
michael@0: //  License as published by the Free Software Foundation; either
michael@0: //  version 2.1 of the License, or (at your option) any later version.
michael@0: //
michael@0: //  This library is distributed in the hope that it will be useful,
michael@0: //  but WITHOUT ANY WARRANTY; without even the implied warranty of
michael@0: //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
michael@0: //  Lesser General Public License for more details.
michael@0: //
michael@0: //  You should have received a copy of the GNU Lesser General Public
michael@0: //  License along with this library; if not, write to the Free Software
michael@0: //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
michael@0: //
michael@0: ////////////////////////////////////////////////////////////////////////////////
michael@0: 
michael@0: #include <string.h>
michael@0: #include <limits.h>
michael@0: #include <assert.h>
michael@0: #include <math.h>
michael@0: #include <float.h>
michael@0: 
michael@0: #include "STTypes.h"
michael@0: #include "cpu_detect.h"
michael@0: #include "TDStretch.h"
michael@0: 
michael@0: using namespace soundtouch;
michael@0: 
michael@0: #define max(x, y) (((x) > (y)) ? (x) : (y))
michael@0: 
michael@0: 
michael@0: /*****************************************************************************
michael@0:  *
michael@0:  * Constant definitions
michael@0:  *
michael@0:  *****************************************************************************/
michael@0: 
michael@0: // Table for the hierarchical mixing position seeking algorithm
michael@0: static const short _scanOffsets[5][24]={
michael@0:     { 124,  186,  248,  310,  372,  434,  496,  558,  620,  682,  744, 806,
michael@0:       868,  930,  992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488,   0},
michael@0:     {-100,  -75,  -50,  -25,   25,   50,   75,  100,    0,    0,    0,   0,
michael@0:         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
michael@0:     { -20,  -15,  -10,   -5,    5,   10,   15,   20,    0,    0,    0,   0,
michael@0:         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
michael@0:     {  -4,   -3,   -2,   -1,    1,    2,    3,    4,    0,    0,    0,   0,
michael@0:         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
michael@0:     { 121,  114,   97,  114,   98,  105,  108,   32,  104,   99,  117,  111,
michael@0:       116,  100,  110,  117,  111,  115,    0,    0,    0,    0,    0,   0}};
michael@0: 
michael@0: /*****************************************************************************
michael@0:  *
michael@0:  * Implementation of the class 'TDStretch'
michael@0:  *
michael@0:  *****************************************************************************/
michael@0: 
michael@0: 
michael@0: TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
michael@0: {
michael@0:     bQuickSeek = false;
michael@0:     channels = 2;
michael@0: 
michael@0:     pMidBuffer = NULL;
michael@0:     pMidBufferUnaligned = NULL;
michael@0:     overlapLength = 0;
michael@0: 
michael@0:     bAutoSeqSetting = true;
michael@0:     bAutoSeekSetting = true;
michael@0: 
michael@0: //    outDebt = 0;
michael@0:     skipFract = 0;
michael@0: 
michael@0:     tempo = 1.0f;
michael@0:     setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);
michael@0:     setTempo(1.0f);
michael@0: 
michael@0:     clear();
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: TDStretch::~TDStretch()
michael@0: {
michael@0:     delete[] pMidBufferUnaligned;
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: // Sets routine control parameters. These control are certain time constants
michael@0: // defining how the sound is stretched to the desired duration.
michael@0: //
michael@0: // 'sampleRate' = sample rate of the sound
michael@0: // 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms)
michael@0: // 'seekwindowMS' = seeking window length for scanning the best overlapping 
michael@0: //      position (default = 28 ms)
michael@0: // 'overlapMS' = overlapping length (default = 12 ms)
michael@0: 
michael@0: void TDStretch::setParameters(int aSampleRate, int aSequenceMS, 
michael@0:                               int aSeekWindowMS, int aOverlapMS)
michael@0: {
michael@0:     // accept only positive parameter values - if zero or negative, use old values instead
michael@0:     if (aSampleRate > 0)   this->sampleRate = aSampleRate;
michael@0:     if (aOverlapMS > 0)    this->overlapMs = aOverlapMS;
michael@0: 
michael@0:     if (aSequenceMS > 0)
michael@0:     {
michael@0:         this->sequenceMs = aSequenceMS;
michael@0:         bAutoSeqSetting = false;
michael@0:     } 
michael@0:     else if (aSequenceMS == 0)
michael@0:     {
michael@0:         // if zero, use automatic setting
michael@0:         bAutoSeqSetting = true;
michael@0:     }
michael@0: 
michael@0:     if (aSeekWindowMS > 0) 
michael@0:     {
michael@0:         this->seekWindowMs = aSeekWindowMS;
michael@0:         bAutoSeekSetting = false;
michael@0:     } 
michael@0:     else if (aSeekWindowMS == 0) 
michael@0:     {
michael@0:         // if zero, use automatic setting
michael@0:         bAutoSeekSetting = true;
michael@0:     }
michael@0: 
michael@0:     calcSeqParameters();
michael@0: 
michael@0:     calculateOverlapLength(overlapMs);
michael@0: 
michael@0:     // set tempo to recalculate 'sampleReq'
michael@0:     setTempo(tempo);
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: /// Get routine control parameters, see setParameters() function.
michael@0: /// Any of the parameters to this function can be NULL, in such case corresponding parameter
michael@0: /// value isn't returned.
michael@0: void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const
michael@0: {
michael@0:     if (pSampleRate)
michael@0:     {
michael@0:         *pSampleRate = sampleRate;
michael@0:     }
michael@0: 
michael@0:     if (pSequenceMs)
michael@0:     {
michael@0:         *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs;
michael@0:     }
michael@0: 
michael@0:     if (pSeekWindowMs)
michael@0:     {
michael@0:         *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs;
michael@0:     }
michael@0: 
michael@0:     if (pOverlapMs)
michael@0:     {
michael@0:         *pOverlapMs = overlapMs;
michael@0:     }
michael@0: }
michael@0: 
michael@0: 
michael@0: // Overlaps samples in 'midBuffer' with the samples in 'pInput'
michael@0: void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
michael@0: {
michael@0:     int i;
michael@0:     SAMPLETYPE m1, m2;
michael@0: 
michael@0:     m1 = (SAMPLETYPE)0;
michael@0:     m2 = (SAMPLETYPE)overlapLength;
michael@0: 
michael@0:     for (i = 0; i < overlapLength ; i ++) 
michael@0:     {
michael@0:         pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength;
michael@0:         m1 += 1;
michael@0:         m2 -= 1;
michael@0:     }
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: void TDStretch::clearMidBuffer()
michael@0: {
michael@0:     memset(pMidBuffer, 0, channels * sizeof(SAMPLETYPE) * overlapLength);
michael@0: }
michael@0: 
michael@0: 
michael@0: void TDStretch::clearInput()
michael@0: {
michael@0:     inputBuffer.clear();
michael@0:     clearMidBuffer();
michael@0: }
michael@0: 
michael@0: 
michael@0: // Clears the sample buffers
michael@0: void TDStretch::clear()
michael@0: {
michael@0:     outputBuffer.clear();
michael@0:     clearInput();
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: // Enables/disables the quick position seeking algorithm. Zero to disable, nonzero
michael@0: // to enable
michael@0: void TDStretch::enableQuickSeek(bool enable)
michael@0: {
michael@0:     bQuickSeek = enable;
michael@0: }
michael@0: 
michael@0: 
michael@0: // Returns nonzero if the quick seeking algorithm is enabled.
michael@0: bool TDStretch::isQuickSeekEnabled() const
michael@0: {
michael@0:     return bQuickSeek;
michael@0: }
michael@0: 
michael@0: 
michael@0: // Seeks for the optimal overlap-mixing position.
michael@0: int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos)
michael@0: {
michael@0:     if (bQuickSeek) 
michael@0:     {
michael@0:         return seekBestOverlapPositionQuick(refPos);
michael@0:     } 
michael@0:     else 
michael@0:     {
michael@0:         return seekBestOverlapPositionFull(refPos);
michael@0:     }
michael@0: }
michael@0: 
michael@0: 
michael@0: // Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position
michael@0: // of 'ovlPos'.
michael@0: inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const
michael@0: {
michael@0: #ifndef USE_MULTICH_ALWAYS
michael@0:     if (channels == 1)
michael@0:     {
michael@0:         // mono sound.
michael@0:         overlapMono(pOutput, pInput + ovlPos);
michael@0:     }
michael@0:     else if (channels == 2)
michael@0:     {
michael@0:         // stereo sound
michael@0:         overlapStereo(pOutput, pInput + 2 * ovlPos);
michael@0:     } 
michael@0:     else 
michael@0: #endif // USE_MULTICH_ALWAYS
michael@0:     {
michael@0:         assert(channels > 0);
michael@0:         overlapMulti(pOutput, pInput + channels * ovlPos);
michael@0:     }
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: // Seeks for the optimal overlap-mixing position. The 'stereo' version of the
michael@0: // routine
michael@0: //
michael@0: // The best position is determined as the position where the two overlapped
michael@0: // sample sequences are 'most alike', in terms of the highest cross-correlation
michael@0: // value over the overlapping period
michael@0: int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos) 
michael@0: {
michael@0:     int bestOffs;
michael@0:     double bestCorr, corr;
michael@0:     double norm;
michael@0:     int i;
michael@0: 
michael@0:     bestCorr = FLT_MIN;
michael@0:     bestOffs = 0;
michael@0: 
michael@0:     // Scans for the best correlation value by testing each possible position
michael@0:     // over the permitted range.
michael@0:     bestCorr = calcCrossCorr(refPos, pMidBuffer, norm);
michael@0:     for (i = 1; i < seekLength; i ++) 
michael@0:     {
michael@0:         // Calculates correlation value for the mixing position corresponding
michael@0:         // to 'i'. Now call "calcCrossCorrAccumulate" that is otherwise same as
michael@0:         // "calcCrossCorr", but saves time by reusing & updating previously stored 
michael@0:         // "norm" value
michael@0:         corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm);
michael@0: 
michael@0:         // heuristic rule to slightly favour values close to mid of the range
michael@0:         double tmp = (double)(2 * i - seekLength) / (double)seekLength;
michael@0:         corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
michael@0: 
michael@0:         // Checks for the highest correlation value
michael@0:         if (corr > bestCorr) 
michael@0:         {
michael@0:             bestCorr = corr;
michael@0:             bestOffs = i;
michael@0:         }
michael@0:     }
michael@0:     // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
michael@0:     clearCrossCorrState();
michael@0: 
michael@0:     return bestOffs;
michael@0: }
michael@0: 
michael@0: 
michael@0: // Seeks for the optimal overlap-mixing position. The 'stereo' version of the
michael@0: // routine
michael@0: //
michael@0: // The best position is determined as the position where the two overlapped
michael@0: // sample sequences are 'most alike', in terms of the highest cross-correlation
michael@0: // value over the overlapping period
michael@0: int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos) 
michael@0: {
michael@0:     int j;
michael@0:     int bestOffs;
michael@0:     double bestCorr, corr;
michael@0:     int scanCount, corrOffset, tempOffset;
michael@0: 
michael@0:     bestCorr = FLT_MIN;
michael@0:     bestOffs = _scanOffsets[0][0];
michael@0:     corrOffset = 0;
michael@0:     tempOffset = 0;
michael@0: 
michael@0:     // Scans for the best correlation value using four-pass hierarchical search.
michael@0:     //
michael@0:     // The look-up table 'scans' has hierarchical position adjusting steps.
michael@0:     // In first pass the routine searhes for the highest correlation with 
michael@0:     // relatively coarse steps, then rescans the neighbourhood of the highest
michael@0:     // correlation with better resolution and so on.
michael@0:     for (scanCount = 0;scanCount < 4; scanCount ++) 
michael@0:     {
michael@0:         j = 0;
michael@0:         while (_scanOffsets[scanCount][j]) 
michael@0:         {
michael@0:             double norm;
michael@0:             tempOffset = corrOffset + _scanOffsets[scanCount][j];
michael@0:             if (tempOffset >= seekLength) break;
michael@0: 
michael@0:             // Calculates correlation value for the mixing position corresponding
michael@0:             // to 'tempOffset'
michael@0:             corr = (double)calcCrossCorr(refPos + channels * tempOffset, pMidBuffer, norm);
michael@0:             // heuristic rule to slightly favour values close to mid of the range
michael@0:             double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
michael@0:             corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
michael@0: 
michael@0:             // Checks for the highest correlation value
michael@0:             if (corr > bestCorr) 
michael@0:             {
michael@0:                 bestCorr = corr;
michael@0:                 bestOffs = tempOffset;
michael@0:             }
michael@0:             j ++;
michael@0:         }
michael@0:         corrOffset = bestOffs;
michael@0:     }
michael@0:     // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
michael@0:     clearCrossCorrState();
michael@0: 
michael@0:     return bestOffs;
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: /// clear cross correlation routine state if necessary 
michael@0: void TDStretch::clearCrossCorrState()
michael@0: {
michael@0:     // default implementation is empty.
michael@0: }
michael@0: 
michael@0: 
michael@0: /// Calculates processing sequence length according to tempo setting
michael@0: void TDStretch::calcSeqParameters()
michael@0: {
michael@0:     // Adjust tempo param according to tempo, so that variating processing sequence length is used
michael@0:     // at varius tempo settings, between the given low...top limits
michael@0:     #define AUTOSEQ_TEMPO_LOW   0.5     // auto setting low tempo range (-50%)
michael@0:     #define AUTOSEQ_TEMPO_TOP   2.0     // auto setting top tempo range (+100%)
michael@0: 
michael@0:     // sequence-ms setting values at above low & top tempo
michael@0:     #define AUTOSEQ_AT_MIN      125.0
michael@0:     #define AUTOSEQ_AT_MAX      50.0
michael@0:     #define AUTOSEQ_K           ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
michael@0:     #define AUTOSEQ_C           (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW))
michael@0: 
michael@0:     // seek-window-ms setting values at above low & top tempo
michael@0:     #define AUTOSEEK_AT_MIN     25.0
michael@0:     #define AUTOSEEK_AT_MAX     15.0
michael@0:     #define AUTOSEEK_K          ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
michael@0:     #define AUTOSEEK_C          (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW))
michael@0: 
michael@0:     #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x)))
michael@0: 
michael@0:     double seq, seek;
michael@0:     
michael@0:     if (bAutoSeqSetting)
michael@0:     {
michael@0:         seq = AUTOSEQ_C + AUTOSEQ_K * tempo;
michael@0:         seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN);
michael@0:         sequenceMs = (int)(seq + 0.5);
michael@0:     }
michael@0: 
michael@0:     if (bAutoSeekSetting)
michael@0:     {
michael@0:         seek = AUTOSEEK_C + AUTOSEEK_K * tempo;
michael@0:         seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN);
michael@0:         seekWindowMs = (int)(seek + 0.5);
michael@0:     }
michael@0: 
michael@0:     // Update seek window lengths
michael@0:     seekWindowLength = (sampleRate * sequenceMs) / 1000;
michael@0:     if (seekWindowLength < 2 * overlapLength) 
michael@0:     {
michael@0:         seekWindowLength = 2 * overlapLength;
michael@0:     }
michael@0:     seekLength = (sampleRate * seekWindowMs) / 1000;
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: // Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower 
michael@0: // tempo, larger faster tempo.
michael@0: void TDStretch::setTempo(float newTempo)
michael@0: {
michael@0:     int intskip;
michael@0: 
michael@0:     tempo = newTempo;
michael@0: 
michael@0:     // Calculate new sequence duration
michael@0:     calcSeqParameters();
michael@0: 
michael@0:     // Calculate ideal skip length (according to tempo value) 
michael@0:     nominalSkip = tempo * (seekWindowLength - overlapLength);
michael@0:     intskip = (int)(nominalSkip + 0.5f);
michael@0: 
michael@0:     // Calculate how many samples are needed in the 'inputBuffer' to 
michael@0:     // process another batch of samples
michael@0:     //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2;
michael@0:     sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength;
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: // Sets the number of channels, 1 = mono, 2 = stereo
michael@0: void TDStretch::setChannels(int numChannels)
michael@0: {
michael@0:     assert(numChannels > 0);
michael@0:     if (channels == numChannels) return;
michael@0: //    assert(numChannels == 1 || numChannels == 2);
michael@0: 
michael@0:     channels = numChannels;
michael@0:     inputBuffer.setChannels(channels);
michael@0:     outputBuffer.setChannels(channels);
michael@0: 
michael@0:     // re-init overlap/buffer
michael@0:     overlapLength=0;
michael@0:     setParameters(sampleRate);
michael@0: }
michael@0: 
michael@0: 
michael@0: // nominal tempo, no need for processing, just pass the samples through
michael@0: // to outputBuffer
michael@0: /*
michael@0: void TDStretch::processNominalTempo()
michael@0: {
michael@0:     assert(tempo == 1.0f);
michael@0: 
michael@0:     if (bMidBufferDirty) 
michael@0:     {
michael@0:         // If there are samples in pMidBuffer waiting for overlapping,
michael@0:         // do a single sliding overlapping with them in order to prevent a 
michael@0:         // clicking distortion in the output sound
michael@0:         if (inputBuffer.numSamples() < overlapLength) 
michael@0:         {
michael@0:             // wait until we've got overlapLength input samples
michael@0:             return;
michael@0:         }
michael@0:         // Mix the samples in the beginning of 'inputBuffer' with the 
michael@0:         // samples in 'midBuffer' using sliding overlapping 
michael@0:         overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0);
michael@0:         outputBuffer.putSamples(overlapLength);
michael@0:         inputBuffer.receiveSamples(overlapLength);
michael@0:         clearMidBuffer();
michael@0:         // now we've caught the nominal sample flow and may switch to
michael@0:         // bypass mode
michael@0:     }
michael@0: 
michael@0:     // Simply bypass samples from input to output
michael@0:     outputBuffer.moveSamples(inputBuffer);
michael@0: }
michael@0: */
michael@0: 
michael@0: 
michael@0: // Processes as many processing frames of the samples 'inputBuffer', store
michael@0: // the result into 'outputBuffer'
michael@0: void TDStretch::processSamples()
michael@0: {
michael@0:     int ovlSkip, offset;
michael@0:     int temp;
michael@0: 
michael@0:     /* Removed this small optimization - can introduce a click to sound when tempo setting
michael@0:        crosses the nominal value
michael@0:     if (tempo == 1.0f) 
michael@0:     {
michael@0:         // tempo not changed from the original, so bypass the processing
michael@0:         processNominalTempo();
michael@0:         return;
michael@0:     }
michael@0:     */
michael@0: 
michael@0:     // Process samples as long as there are enough samples in 'inputBuffer'
michael@0:     // to form a processing frame.
michael@0:     while ((int)inputBuffer.numSamples() >= sampleReq) 
michael@0:     {
michael@0:         // If tempo differs from the normal ('SCALE'), scan for the best overlapping
michael@0:         // position
michael@0:         offset = seekBestOverlapPosition(inputBuffer.ptrBegin());
michael@0: 
michael@0:         // Mix the samples in the 'inputBuffer' at position of 'offset' with the 
michael@0:         // samples in 'midBuffer' using sliding overlapping
michael@0:         // ... first partially overlap with the end of the previous sequence
michael@0:         // (that's in 'midBuffer')
michael@0:         overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset);
michael@0:         outputBuffer.putSamples((uint)overlapLength);
michael@0: 
michael@0:         // ... then copy sequence samples from 'inputBuffer' to output:
michael@0: 
michael@0:         // length of sequence
michael@0:         temp = (seekWindowLength - 2 * overlapLength);
michael@0: 
michael@0:         // crosscheck that we don't have buffer overflow...
michael@0:         if ((int)inputBuffer.numSamples() < (offset + temp + overlapLength * 2))
michael@0:         {
michael@0:             continue;    // just in case, shouldn't really happen
michael@0:         }
michael@0: 
michael@0:         outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * (offset + overlapLength), (uint)temp);
michael@0: 
michael@0:         // Copies the end of the current sequence from 'inputBuffer' to 
michael@0:         // 'midBuffer' for being mixed with the beginning of the next 
michael@0:         // processing sequence and so on
michael@0:         assert((offset + temp + overlapLength * 2) <= (int)inputBuffer.numSamples());
michael@0:         memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp + overlapLength), 
michael@0:             channels * sizeof(SAMPLETYPE) * overlapLength);
michael@0: 
michael@0:         // Remove the processed samples from the input buffer. Update
michael@0:         // the difference between integer & nominal skip step to 'skipFract'
michael@0:         // in order to prevent the error from accumulating over time.
michael@0:         skipFract += nominalSkip;   // real skip size
michael@0:         ovlSkip = (int)skipFract;   // rounded to integer skip
michael@0:         skipFract -= ovlSkip;       // maintain the fraction part, i.e. real vs. integer skip
michael@0:         inputBuffer.receiveSamples((uint)ovlSkip);
michael@0:     }
michael@0: }
michael@0: 
michael@0: 
michael@0: // Adds 'numsamples' pcs of samples from the 'samples' memory position into
michael@0: // the input of the object.
michael@0: void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples)
michael@0: {
michael@0:     // Add the samples into the input buffer
michael@0:     inputBuffer.putSamples(samples, nSamples);
michael@0:     // Process the samples in input buffer
michael@0:     processSamples();
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: /// Set new overlap length parameter & reallocate RefMidBuffer if necessary.
michael@0: void TDStretch::acceptNewOverlapLength(int newOverlapLength)
michael@0: {
michael@0:     int prevOvl;
michael@0: 
michael@0:     assert(newOverlapLength >= 0);
michael@0:     prevOvl = overlapLength;
michael@0:     overlapLength = newOverlapLength;
michael@0: 
michael@0:     if (overlapLength > prevOvl)
michael@0:     {
michael@0:         delete[] pMidBufferUnaligned;
michael@0: 
michael@0:         pMidBufferUnaligned = new SAMPLETYPE[overlapLength * channels + 16 / sizeof(SAMPLETYPE)];
michael@0:         // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency
michael@0:         pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned);
michael@0: 
michael@0:         clearMidBuffer();
michael@0:     }
michael@0: }
michael@0: 
michael@0: 
michael@0: // Operator 'new' is overloaded so that it automatically creates a suitable instance 
michael@0: // depending on if we've a MMX/SSE/etc-capable CPU available or not.
michael@0: void * TDStretch::operator new(size_t s)
michael@0: {
michael@0:     // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead!
michael@0:     ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!");
michael@0:     return newInstance();
michael@0: }
michael@0: 
michael@0: 
michael@0: TDStretch * TDStretch::newInstance()
michael@0: {
michael@0: #if defined(SOUNDTOUCH_ALLOW_MMX) || defined(SOUNDTOUCH_ALLOW_SSE)
michael@0:     uint uExtensions;
michael@0: 
michael@0:     uExtensions = detectCPUextensions();
michael@0: #endif
michael@0: 
michael@0:     // Check if MMX/SSE instruction set extensions supported by CPU
michael@0: 
michael@0: #ifdef SOUNDTOUCH_ALLOW_MMX
michael@0:     // MMX routines available only with integer sample types
michael@0:     if (uExtensions & SUPPORT_MMX)
michael@0:     {
michael@0:         return ::new TDStretchMMX;
michael@0:     }
michael@0:     else
michael@0: #endif // SOUNDTOUCH_ALLOW_MMX
michael@0: 
michael@0: 
michael@0: #ifdef SOUNDTOUCH_ALLOW_SSE
michael@0:     if (uExtensions & SUPPORT_SSE)
michael@0:     {
michael@0:         // SSE support
michael@0:         return ::new TDStretchSSE;
michael@0:     }
michael@0:     else
michael@0: #endif // SOUNDTOUCH_ALLOW_SSE
michael@0: 
michael@0:     {
michael@0:         // ISA optimizations not supported, use plain C version
michael@0:         return ::new TDStretch;
michael@0:     }
michael@0: }
michael@0: 
michael@0: 
michael@0: //////////////////////////////////////////////////////////////////////////////
michael@0: //
michael@0: // Integer arithmetics specific algorithm implementations.
michael@0: //
michael@0: //////////////////////////////////////////////////////////////////////////////
michael@0: 
michael@0: #ifdef SOUNDTOUCH_INTEGER_SAMPLES
michael@0: 
michael@0: // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo' 
michael@0: // version of the routine.
michael@0: void TDStretch::overlapStereo(short *poutput, const short *input) const
michael@0: {
michael@0:     int i;
michael@0:     short temp;
michael@0:     int cnt2;
michael@0: 
michael@0:     for (i = 0; i < overlapLength ; i ++) 
michael@0:     {
michael@0:         temp = (short)(overlapLength - i);
michael@0:         cnt2 = 2 * i;
michael@0:         poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp )  / overlapLength;
michael@0:         poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength;
michael@0:     }
michael@0: }
michael@0: 
michael@0: 
michael@0: // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi'
michael@0: // version of the routine.
michael@0: void TDStretch::overlapMulti(SAMPLETYPE *poutput, const SAMPLETYPE *input) const
michael@0: {
michael@0:     SAMPLETYPE m1=(SAMPLETYPE)0;
michael@0:     SAMPLETYPE m2;
michael@0:     int i=0;
michael@0: 
michael@0:     for (m2 = (SAMPLETYPE)overlapLength; m2; m2 --)
michael@0:     {
michael@0:         for (int c = 0; c < channels; c ++)
michael@0:         {
michael@0:             poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2)  / overlapLength;
michael@0:             i++;
michael@0:         }
michael@0: 
michael@0:         m1++;
michael@0:     }
michael@0: }
michael@0: 
michael@0: // Calculates the x having the closest 2^x value for the given value
michael@0: static int _getClosest2Power(double value)
michael@0: {
michael@0:     return (int)(log(value) / log(2.0) + 0.5);
michael@0: }
michael@0: 
michael@0: 
michael@0: /// Calculates overlap period length in samples.
michael@0: /// Integer version rounds overlap length to closest power of 2
michael@0: /// for a divide scaling operation.
michael@0: void TDStretch::calculateOverlapLength(int aoverlapMs)
michael@0: {
michael@0:     int newOvl;
michael@0: 
michael@0:     assert(aoverlapMs >= 0);
michael@0: 
michael@0:     // calculate overlap length so that it's power of 2 - thus it's easy to do
michael@0:     // integer division by right-shifting. Term "-1" at end is to account for 
michael@0:     // the extra most significatnt bit left unused in result by signed multiplication 
michael@0:     overlapDividerBits = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1;
michael@0:     if (overlapDividerBits > 9) overlapDividerBits = 9;
michael@0:     if (overlapDividerBits < 3) overlapDividerBits = 3;
michael@0:     newOvl = (int)pow(2.0, (int)overlapDividerBits + 1);    // +1 => account for -1 above
michael@0: 
michael@0:     acceptNewOverlapLength(newOvl);
michael@0: 
michael@0:     // calculate sloping divider so that crosscorrelation operation won't 
michael@0:     // overflow 32-bit register. Max. sum of the crosscorrelation sum without 
michael@0:     // divider would be 2^30*(N^3-N)/3, where N = overlap length
michael@0:     slopingDivider = (newOvl * newOvl - 1) / 3;
michael@0: }
michael@0: 
michael@0: 
michael@0: double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm) const
michael@0: {
michael@0:     long corr;
michael@0:     long lnorm;
michael@0:     int i;
michael@0: 
michael@0:     corr = lnorm = 0;
michael@0:     // Same routine for stereo and mono. For stereo, unroll loop for better
michael@0:     // efficiency and gives slightly better resolution against rounding. 
michael@0:     // For mono it same routine, just  unrolls loop by factor of 4
michael@0:     for (i = 0; i < channels * overlapLength; i += 4) 
michael@0:     {
michael@0:         corr += (mixingPos[i] * compare[i] + 
michael@0:                  mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits;  // notice: do intermediate division here to avoid integer overflow
michael@0:         corr += (mixingPos[i + 2] * compare[i + 2] + 
michael@0:                  mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits;
michael@0:         lnorm += (mixingPos[i] * mixingPos[i] + 
michael@0:                   mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow
michael@0:         lnorm += (mixingPos[i + 2] * mixingPos[i + 2] + 
michael@0:                   mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBits;
michael@0:     }
michael@0: 
michael@0:     // Normalize result by dividing by sqrt(norm) - this step is easiest 
michael@0:     // done using floating point operation
michael@0:     norm = (double)lnorm;
michael@0:     return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
michael@0: }
michael@0: 
michael@0: 
michael@0: /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
michael@0: double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) const
michael@0: {
michael@0:     long corr;
michael@0:     long lnorm;
michael@0:     int i;
michael@0: 
michael@0:     // cancel first normalizer tap from previous round
michael@0:     lnorm = 0;
michael@0:     for (i = 1; i <= channels; i ++)
michael@0:     {
michael@0:         lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBits;
michael@0:     }
michael@0: 
michael@0:     corr = 0;
michael@0:     // Same routine for stereo and mono. For stereo, unroll loop for better
michael@0:     // efficiency and gives slightly better resolution against rounding. 
michael@0:     // For mono it same routine, just  unrolls loop by factor of 4
michael@0:     for (i = 0; i < channels * overlapLength; i += 4) 
michael@0:     {
michael@0:         corr += (mixingPos[i] * compare[i] + 
michael@0:                  mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits;  // notice: do intermediate division here to avoid integer overflow
michael@0:         corr += (mixingPos[i + 2] * compare[i + 2] + 
michael@0:                  mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits;
michael@0:     }
michael@0: 
michael@0:     // update normalizer with last samples of this round
michael@0:     for (int j = 0; j < channels; j ++)
michael@0:     {
michael@0:         i --;
michael@0:         lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBits;
michael@0:     }
michael@0:     norm += (double)lnorm;
michael@0: 
michael@0:     // Normalize result by dividing by sqrt(norm) - this step is easiest 
michael@0:     // done using floating point operation
michael@0:     return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
michael@0: }
michael@0: 
michael@0: #endif // SOUNDTOUCH_INTEGER_SAMPLES
michael@0: 
michael@0: //////////////////////////////////////////////////////////////////////////////
michael@0: //
michael@0: // Floating point arithmetics specific algorithm implementations.
michael@0: //
michael@0: 
michael@0: #ifdef SOUNDTOUCH_FLOAT_SAMPLES
michael@0: 
michael@0: // Overlaps samples in 'midBuffer' with the samples in 'pInput'
michael@0: void TDStretch::overlapStereo(float *pOutput, const float *pInput) const
michael@0: {
michael@0:     int i;
michael@0:     float fScale;
michael@0:     float f1;
michael@0:     float f2;
michael@0: 
michael@0:     fScale = 1.0f / (float)overlapLength;
michael@0: 
michael@0:     f1 = 0;
michael@0:     f2 = 1.0f;
michael@0: 
michael@0:     for (i = 0; i < 2 * (int)overlapLength ; i += 2) 
michael@0:     {
michael@0:         pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2;
michael@0:         pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2;
michael@0: 
michael@0:         f1 += fScale;
michael@0:         f2 -= fScale;
michael@0:     }
michael@0: }
michael@0: 
michael@0: 
michael@0: // Overlaps samples in 'midBuffer' with the samples in 'input'. 
michael@0: void TDStretch::overlapMulti(float *pOutput, const float *pInput) const
michael@0: {
michael@0:     int i;
michael@0:     float fScale;
michael@0:     float f1;
michael@0:     float f2;
michael@0: 
michael@0:     fScale = 1.0f / (float)overlapLength;
michael@0: 
michael@0:     f1 = 0;
michael@0:     f2 = 1.0f;
michael@0: 
michael@0:     i=0;
michael@0:     for (int i2 = 0; i2 < overlapLength; i2 ++)
michael@0:     {
michael@0:         // note: Could optimize this slightly by taking into account that always channels > 2
michael@0:         for (int c = 0; c < channels; c ++)
michael@0:         {
michael@0:             pOutput[i] = pInput[i] * f1 + pMidBuffer[i] * f2;
michael@0:             i++;
michael@0:         }
michael@0:         f1 += fScale;
michael@0:         f2 -= fScale;
michael@0:     }
michael@0: }
michael@0: 
michael@0: 
michael@0: /// Calculates overlapInMsec period length in samples.
michael@0: void TDStretch::calculateOverlapLength(int overlapInMsec)
michael@0: {
michael@0:     int newOvl;
michael@0: 
michael@0:     assert(overlapInMsec >= 0);
michael@0:     newOvl = (sampleRate * overlapInMsec) / 1000;
michael@0:     if (newOvl < 16) newOvl = 16;
michael@0: 
michael@0:     // must be divisible by 8
michael@0:     newOvl -= newOvl % 8;
michael@0: 
michael@0:     acceptNewOverlapLength(newOvl);
michael@0: }
michael@0: 
michael@0: 
michael@0: /// Calculate cross-correlation
michael@0: double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &norm) const
michael@0: {
michael@0:     double corr;
michael@0:     int i;
michael@0: 
michael@0:     corr = norm = 0;
michael@0:     // Same routine for stereo and mono. For Stereo, unroll by factor of 2.
michael@0:     // For mono it's same routine yet unrollsd by factor of 4.
michael@0:     for (i = 0; i < channels * overlapLength; i += 4) 
michael@0:     {
michael@0:         corr += mixingPos[i] * compare[i] +
michael@0:                 mixingPos[i + 1] * compare[i + 1];
michael@0: 
michael@0:         norm += mixingPos[i] * mixingPos[i] + 
michael@0:                 mixingPos[i + 1] * mixingPos[i + 1];
michael@0: 
michael@0:         // unroll the loop for better CPU efficiency:
michael@0:         corr += mixingPos[i + 2] * compare[i + 2] +
michael@0:                 mixingPos[i + 3] * compare[i + 3];
michael@0: 
michael@0:         norm += mixingPos[i + 2] * mixingPos[i + 2] +
michael@0:                 mixingPos[i + 3] * mixingPos[i + 3];
michael@0:     }
michael@0: 
michael@0:     return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
michael@0: }
michael@0: 
michael@0: 
michael@0: /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
michael@0: double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) const
michael@0: {
michael@0:     double corr;
michael@0:     int i;
michael@0: 
michael@0:     corr = 0;
michael@0: 
michael@0:     // cancel first normalizer tap from previous round
michael@0:     for (i = 1; i <= channels; i ++)
michael@0:     {
michael@0:         norm -= mixingPos[-i] * mixingPos[-i];
michael@0:     }
michael@0: 
michael@0:     // Same routine for stereo and mono. For Stereo, unroll by factor of 2.
michael@0:     // For mono it's same routine yet unrollsd by factor of 4.
michael@0:     for (i = 0; i < channels * overlapLength; i += 4) 
michael@0:     {
michael@0:         corr += mixingPos[i] * compare[i] +
michael@0:                 mixingPos[i + 1] * compare[i + 1] +
michael@0:                 mixingPos[i + 2] * compare[i + 2] +
michael@0:                 mixingPos[i + 3] * compare[i + 3];
michael@0:     }
michael@0: 
michael@0:     // update normalizer with last samples of this round
michael@0:     for (int j = 0; j < channels; j ++)
michael@0:     {
michael@0:         i --;
michael@0:         norm += mixingPos[i] * mixingPos[i];
michael@0:     }
michael@0: 
michael@0:     return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
michael@0: }
michael@0: 
michael@0: 
michael@0: #endif // SOUNDTOUCH_FLOAT_SAMPLES