michael@0: //////////////////////////////////////////////////////////////////////////////// michael@0: /// michael@0: /// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo michael@0: /// while maintaining the original pitch by using a time domain WSOLA-like michael@0: /// method with several performance-increasing tweaks. michael@0: /// michael@0: /// Note : MMX optimized functions reside in a separate, platform-specific michael@0: /// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp' michael@0: /// michael@0: /// Author : Copyright (c) Olli Parviainen michael@0: /// Author e-mail : oparviai 'at' iki.fi michael@0: /// SoundTouch WWW: http://www.surina.net/soundtouch michael@0: /// michael@0: //////////////////////////////////////////////////////////////////////////////// michael@0: // michael@0: // Last changed : $Date: 2014-04-06 10:57:21 -0500 (Sun, 06 Apr 2014) $ michael@0: // File revision : $Revision: 1.12 $ michael@0: // michael@0: // $Id: TDStretch.cpp 195 2014-04-06 15:57:21Z oparviai $ michael@0: // michael@0: //////////////////////////////////////////////////////////////////////////////// michael@0: // michael@0: // License : michael@0: // michael@0: // SoundTouch audio processing library michael@0: // Copyright (c) Olli Parviainen michael@0: // michael@0: // This library is free software; you can redistribute it and/or michael@0: // modify it under the terms of the GNU Lesser General Public michael@0: // License as published by the Free Software Foundation; either michael@0: // version 2.1 of the License, or (at your option) any later version. michael@0: // michael@0: // This library is distributed in the hope that it will be useful, michael@0: // but WITHOUT ANY WARRANTY; without even the implied warranty of michael@0: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU michael@0: // Lesser General Public License for more details. michael@0: // michael@0: // You should have received a copy of the GNU Lesser General Public michael@0: // License along with this library; if not, write to the Free Software michael@0: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA michael@0: // michael@0: //////////////////////////////////////////////////////////////////////////////// michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include "STTypes.h" michael@0: #include "cpu_detect.h" michael@0: #include "TDStretch.h" michael@0: michael@0: using namespace soundtouch; michael@0: michael@0: #define max(x, y) (((x) > (y)) ? (x) : (y)) michael@0: michael@0: michael@0: /***************************************************************************** michael@0: * michael@0: * Constant definitions michael@0: * michael@0: *****************************************************************************/ michael@0: michael@0: // Table for the hierarchical mixing position seeking algorithm michael@0: static const short _scanOffsets[5][24]={ michael@0: { 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806, michael@0: 868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0}, michael@0: {-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0, michael@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, michael@0: { -20, -15, -10, -5, 5, 10, 15, 20, 0, 0, 0, 0, michael@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, michael@0: { -4, -3, -2, -1, 1, 2, 3, 4, 0, 0, 0, 0, michael@0: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, michael@0: { 121, 114, 97, 114, 98, 105, 108, 32, 104, 99, 117, 111, michael@0: 116, 100, 110, 117, 111, 115, 0, 0, 0, 0, 0, 0}}; michael@0: michael@0: /***************************************************************************** michael@0: * michael@0: * Implementation of the class 'TDStretch' michael@0: * michael@0: *****************************************************************************/ michael@0: michael@0: michael@0: TDStretch::TDStretch() : FIFOProcessor(&outputBuffer) michael@0: { michael@0: bQuickSeek = false; michael@0: channels = 2; michael@0: michael@0: pMidBuffer = NULL; michael@0: pMidBufferUnaligned = NULL; michael@0: overlapLength = 0; michael@0: michael@0: bAutoSeqSetting = true; michael@0: bAutoSeekSetting = true; michael@0: michael@0: // outDebt = 0; michael@0: skipFract = 0; michael@0: michael@0: tempo = 1.0f; michael@0: setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS); michael@0: setTempo(1.0f); michael@0: michael@0: clear(); michael@0: } michael@0: michael@0: michael@0: michael@0: TDStretch::~TDStretch() michael@0: { michael@0: delete[] pMidBufferUnaligned; michael@0: } michael@0: michael@0: michael@0: michael@0: // Sets routine control parameters. These control are certain time constants michael@0: // defining how the sound is stretched to the desired duration. michael@0: // michael@0: // 'sampleRate' = sample rate of the sound michael@0: // 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms) michael@0: // 'seekwindowMS' = seeking window length for scanning the best overlapping michael@0: // position (default = 28 ms) michael@0: // 'overlapMS' = overlapping length (default = 12 ms) michael@0: michael@0: void TDStretch::setParameters(int aSampleRate, int aSequenceMS, michael@0: int aSeekWindowMS, int aOverlapMS) michael@0: { michael@0: // accept only positive parameter values - if zero or negative, use old values instead michael@0: if (aSampleRate > 0) this->sampleRate = aSampleRate; michael@0: if (aOverlapMS > 0) this->overlapMs = aOverlapMS; michael@0: michael@0: if (aSequenceMS > 0) michael@0: { michael@0: this->sequenceMs = aSequenceMS; michael@0: bAutoSeqSetting = false; michael@0: } michael@0: else if (aSequenceMS == 0) michael@0: { michael@0: // if zero, use automatic setting michael@0: bAutoSeqSetting = true; michael@0: } michael@0: michael@0: if (aSeekWindowMS > 0) michael@0: { michael@0: this->seekWindowMs = aSeekWindowMS; michael@0: bAutoSeekSetting = false; michael@0: } michael@0: else if (aSeekWindowMS == 0) michael@0: { michael@0: // if zero, use automatic setting michael@0: bAutoSeekSetting = true; michael@0: } michael@0: michael@0: calcSeqParameters(); michael@0: michael@0: calculateOverlapLength(overlapMs); michael@0: michael@0: // set tempo to recalculate 'sampleReq' michael@0: setTempo(tempo); michael@0: } michael@0: michael@0: michael@0: michael@0: /// Get routine control parameters, see setParameters() function. michael@0: /// Any of the parameters to this function can be NULL, in such case corresponding parameter michael@0: /// value isn't returned. michael@0: void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const michael@0: { michael@0: if (pSampleRate) michael@0: { michael@0: *pSampleRate = sampleRate; michael@0: } michael@0: michael@0: if (pSequenceMs) michael@0: { michael@0: *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs; michael@0: } michael@0: michael@0: if (pSeekWindowMs) michael@0: { michael@0: *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs; michael@0: } michael@0: michael@0: if (pOverlapMs) michael@0: { michael@0: *pOverlapMs = overlapMs; michael@0: } michael@0: } michael@0: michael@0: michael@0: // Overlaps samples in 'midBuffer' with the samples in 'pInput' michael@0: void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const michael@0: { michael@0: int i; michael@0: SAMPLETYPE m1, m2; michael@0: michael@0: m1 = (SAMPLETYPE)0; michael@0: m2 = (SAMPLETYPE)overlapLength; michael@0: michael@0: for (i = 0; i < overlapLength ; i ++) michael@0: { michael@0: pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength; michael@0: m1 += 1; michael@0: m2 -= 1; michael@0: } michael@0: } michael@0: michael@0: michael@0: michael@0: void TDStretch::clearMidBuffer() michael@0: { michael@0: memset(pMidBuffer, 0, channels * sizeof(SAMPLETYPE) * overlapLength); michael@0: } michael@0: michael@0: michael@0: void TDStretch::clearInput() michael@0: { michael@0: inputBuffer.clear(); michael@0: clearMidBuffer(); michael@0: } michael@0: michael@0: michael@0: // Clears the sample buffers michael@0: void TDStretch::clear() michael@0: { michael@0: outputBuffer.clear(); michael@0: clearInput(); michael@0: } michael@0: michael@0: michael@0: michael@0: // Enables/disables the quick position seeking algorithm. Zero to disable, nonzero michael@0: // to enable michael@0: void TDStretch::enableQuickSeek(bool enable) michael@0: { michael@0: bQuickSeek = enable; michael@0: } michael@0: michael@0: michael@0: // Returns nonzero if the quick seeking algorithm is enabled. michael@0: bool TDStretch::isQuickSeekEnabled() const michael@0: { michael@0: return bQuickSeek; michael@0: } michael@0: michael@0: michael@0: // Seeks for the optimal overlap-mixing position. michael@0: int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos) michael@0: { michael@0: if (bQuickSeek) michael@0: { michael@0: return seekBestOverlapPositionQuick(refPos); michael@0: } michael@0: else michael@0: { michael@0: return seekBestOverlapPositionFull(refPos); michael@0: } michael@0: } michael@0: michael@0: michael@0: // Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position michael@0: // of 'ovlPos'. michael@0: inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const michael@0: { michael@0: #ifndef USE_MULTICH_ALWAYS michael@0: if (channels == 1) michael@0: { michael@0: // mono sound. michael@0: overlapMono(pOutput, pInput + ovlPos); michael@0: } michael@0: else if (channels == 2) michael@0: { michael@0: // stereo sound michael@0: overlapStereo(pOutput, pInput + 2 * ovlPos); michael@0: } michael@0: else michael@0: #endif // USE_MULTICH_ALWAYS michael@0: { michael@0: assert(channels > 0); michael@0: overlapMulti(pOutput, pInput + channels * ovlPos); michael@0: } michael@0: } michael@0: michael@0: michael@0: michael@0: // Seeks for the optimal overlap-mixing position. The 'stereo' version of the michael@0: // routine michael@0: // michael@0: // The best position is determined as the position where the two overlapped michael@0: // sample sequences are 'most alike', in terms of the highest cross-correlation michael@0: // value over the overlapping period michael@0: int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos) michael@0: { michael@0: int bestOffs; michael@0: double bestCorr, corr; michael@0: double norm; michael@0: int i; michael@0: michael@0: bestCorr = FLT_MIN; michael@0: bestOffs = 0; michael@0: michael@0: // Scans for the best correlation value by testing each possible position michael@0: // over the permitted range. michael@0: bestCorr = calcCrossCorr(refPos, pMidBuffer, norm); michael@0: for (i = 1; i < seekLength; i ++) michael@0: { michael@0: // Calculates correlation value for the mixing position corresponding michael@0: // to 'i'. Now call "calcCrossCorrAccumulate" that is otherwise same as michael@0: // "calcCrossCorr", but saves time by reusing & updating previously stored michael@0: // "norm" value michael@0: corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm); michael@0: michael@0: // heuristic rule to slightly favour values close to mid of the range michael@0: double tmp = (double)(2 * i - seekLength) / (double)seekLength; michael@0: corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp)); michael@0: michael@0: // Checks for the highest correlation value michael@0: if (corr > bestCorr) michael@0: { michael@0: bestCorr = corr; michael@0: bestOffs = i; michael@0: } michael@0: } michael@0: // clear cross correlation routine state if necessary (is so e.g. in MMX routines). michael@0: clearCrossCorrState(); michael@0: michael@0: return bestOffs; michael@0: } michael@0: michael@0: michael@0: // Seeks for the optimal overlap-mixing position. The 'stereo' version of the michael@0: // routine michael@0: // michael@0: // The best position is determined as the position where the two overlapped michael@0: // sample sequences are 'most alike', in terms of the highest cross-correlation michael@0: // value over the overlapping period michael@0: int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos) michael@0: { michael@0: int j; michael@0: int bestOffs; michael@0: double bestCorr, corr; michael@0: int scanCount, corrOffset, tempOffset; michael@0: michael@0: bestCorr = FLT_MIN; michael@0: bestOffs = _scanOffsets[0][0]; michael@0: corrOffset = 0; michael@0: tempOffset = 0; michael@0: michael@0: // Scans for the best correlation value using four-pass hierarchical search. michael@0: // michael@0: // The look-up table 'scans' has hierarchical position adjusting steps. michael@0: // In first pass the routine searhes for the highest correlation with michael@0: // relatively coarse steps, then rescans the neighbourhood of the highest michael@0: // correlation with better resolution and so on. michael@0: for (scanCount = 0;scanCount < 4; scanCount ++) michael@0: { michael@0: j = 0; michael@0: while (_scanOffsets[scanCount][j]) michael@0: { michael@0: double norm; michael@0: tempOffset = corrOffset + _scanOffsets[scanCount][j]; michael@0: if (tempOffset >= seekLength) break; michael@0: michael@0: // Calculates correlation value for the mixing position corresponding michael@0: // to 'tempOffset' michael@0: corr = (double)calcCrossCorr(refPos + channels * tempOffset, pMidBuffer, norm); michael@0: // heuristic rule to slightly favour values close to mid of the range michael@0: double tmp = (double)(2 * tempOffset - seekLength) / seekLength; michael@0: corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp)); michael@0: michael@0: // Checks for the highest correlation value michael@0: if (corr > bestCorr) michael@0: { michael@0: bestCorr = corr; michael@0: bestOffs = tempOffset; michael@0: } michael@0: j ++; michael@0: } michael@0: corrOffset = bestOffs; michael@0: } michael@0: // clear cross correlation routine state if necessary (is so e.g. in MMX routines). michael@0: clearCrossCorrState(); michael@0: michael@0: return bestOffs; michael@0: } michael@0: michael@0: michael@0: michael@0: /// clear cross correlation routine state if necessary michael@0: void TDStretch::clearCrossCorrState() michael@0: { michael@0: // default implementation is empty. michael@0: } michael@0: michael@0: michael@0: /// Calculates processing sequence length according to tempo setting michael@0: void TDStretch::calcSeqParameters() michael@0: { michael@0: // Adjust tempo param according to tempo, so that variating processing sequence length is used michael@0: // at varius tempo settings, between the given low...top limits michael@0: #define AUTOSEQ_TEMPO_LOW 0.5 // auto setting low tempo range (-50%) michael@0: #define AUTOSEQ_TEMPO_TOP 2.0 // auto setting top tempo range (+100%) michael@0: michael@0: // sequence-ms setting values at above low & top tempo michael@0: #define AUTOSEQ_AT_MIN 125.0 michael@0: #define AUTOSEQ_AT_MAX 50.0 michael@0: #define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW)) michael@0: #define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW)) michael@0: michael@0: // seek-window-ms setting values at above low & top tempo michael@0: #define AUTOSEEK_AT_MIN 25.0 michael@0: #define AUTOSEEK_AT_MAX 15.0 michael@0: #define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW)) michael@0: #define AUTOSEEK_C (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW)) michael@0: michael@0: #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x))) michael@0: michael@0: double seq, seek; michael@0: michael@0: if (bAutoSeqSetting) michael@0: { michael@0: seq = AUTOSEQ_C + AUTOSEQ_K * tempo; michael@0: seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN); michael@0: sequenceMs = (int)(seq + 0.5); michael@0: } michael@0: michael@0: if (bAutoSeekSetting) michael@0: { michael@0: seek = AUTOSEEK_C + AUTOSEEK_K * tempo; michael@0: seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN); michael@0: seekWindowMs = (int)(seek + 0.5); michael@0: } michael@0: michael@0: // Update seek window lengths michael@0: seekWindowLength = (sampleRate * sequenceMs) / 1000; michael@0: if (seekWindowLength < 2 * overlapLength) michael@0: { michael@0: seekWindowLength = 2 * overlapLength; michael@0: } michael@0: seekLength = (sampleRate * seekWindowMs) / 1000; michael@0: } michael@0: michael@0: michael@0: michael@0: // Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower michael@0: // tempo, larger faster tempo. michael@0: void TDStretch::setTempo(float newTempo) michael@0: { michael@0: int intskip; michael@0: michael@0: tempo = newTempo; michael@0: michael@0: // Calculate new sequence duration michael@0: calcSeqParameters(); michael@0: michael@0: // Calculate ideal skip length (according to tempo value) michael@0: nominalSkip = tempo * (seekWindowLength - overlapLength); michael@0: intskip = (int)(nominalSkip + 0.5f); michael@0: michael@0: // Calculate how many samples are needed in the 'inputBuffer' to michael@0: // process another batch of samples michael@0: //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2; michael@0: sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength; michael@0: } michael@0: michael@0: michael@0: michael@0: // Sets the number of channels, 1 = mono, 2 = stereo michael@0: void TDStretch::setChannels(int numChannels) michael@0: { michael@0: assert(numChannels > 0); michael@0: if (channels == numChannels) return; michael@0: // assert(numChannels == 1 || numChannels == 2); michael@0: michael@0: channels = numChannels; michael@0: inputBuffer.setChannels(channels); michael@0: outputBuffer.setChannels(channels); michael@0: michael@0: // re-init overlap/buffer michael@0: overlapLength=0; michael@0: setParameters(sampleRate); michael@0: } michael@0: michael@0: michael@0: // nominal tempo, no need for processing, just pass the samples through michael@0: // to outputBuffer michael@0: /* michael@0: void TDStretch::processNominalTempo() michael@0: { michael@0: assert(tempo == 1.0f); michael@0: michael@0: if (bMidBufferDirty) michael@0: { michael@0: // If there are samples in pMidBuffer waiting for overlapping, michael@0: // do a single sliding overlapping with them in order to prevent a michael@0: // clicking distortion in the output sound michael@0: if (inputBuffer.numSamples() < overlapLength) michael@0: { michael@0: // wait until we've got overlapLength input samples michael@0: return; michael@0: } michael@0: // Mix the samples in the beginning of 'inputBuffer' with the michael@0: // samples in 'midBuffer' using sliding overlapping michael@0: overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0); michael@0: outputBuffer.putSamples(overlapLength); michael@0: inputBuffer.receiveSamples(overlapLength); michael@0: clearMidBuffer(); michael@0: // now we've caught the nominal sample flow and may switch to michael@0: // bypass mode michael@0: } michael@0: michael@0: // Simply bypass samples from input to output michael@0: outputBuffer.moveSamples(inputBuffer); michael@0: } michael@0: */ michael@0: michael@0: michael@0: // Processes as many processing frames of the samples 'inputBuffer', store michael@0: // the result into 'outputBuffer' michael@0: void TDStretch::processSamples() michael@0: { michael@0: int ovlSkip, offset; michael@0: int temp; michael@0: michael@0: /* Removed this small optimization - can introduce a click to sound when tempo setting michael@0: crosses the nominal value michael@0: if (tempo == 1.0f) michael@0: { michael@0: // tempo not changed from the original, so bypass the processing michael@0: processNominalTempo(); michael@0: return; michael@0: } michael@0: */ michael@0: michael@0: // Process samples as long as there are enough samples in 'inputBuffer' michael@0: // to form a processing frame. michael@0: while ((int)inputBuffer.numSamples() >= sampleReq) michael@0: { michael@0: // If tempo differs from the normal ('SCALE'), scan for the best overlapping michael@0: // position michael@0: offset = seekBestOverlapPosition(inputBuffer.ptrBegin()); michael@0: michael@0: // Mix the samples in the 'inputBuffer' at position of 'offset' with the michael@0: // samples in 'midBuffer' using sliding overlapping michael@0: // ... first partially overlap with the end of the previous sequence michael@0: // (that's in 'midBuffer') michael@0: overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset); michael@0: outputBuffer.putSamples((uint)overlapLength); michael@0: michael@0: // ... then copy sequence samples from 'inputBuffer' to output: michael@0: michael@0: // length of sequence michael@0: temp = (seekWindowLength - 2 * overlapLength); michael@0: michael@0: // crosscheck that we don't have buffer overflow... michael@0: if ((int)inputBuffer.numSamples() < (offset + temp + overlapLength * 2)) michael@0: { michael@0: continue; // just in case, shouldn't really happen michael@0: } michael@0: michael@0: outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * (offset + overlapLength), (uint)temp); michael@0: michael@0: // Copies the end of the current sequence from 'inputBuffer' to michael@0: // 'midBuffer' for being mixed with the beginning of the next michael@0: // processing sequence and so on michael@0: assert((offset + temp + overlapLength * 2) <= (int)inputBuffer.numSamples()); michael@0: memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp + overlapLength), michael@0: channels * sizeof(SAMPLETYPE) * overlapLength); michael@0: michael@0: // Remove the processed samples from the input buffer. Update michael@0: // the difference between integer & nominal skip step to 'skipFract' michael@0: // in order to prevent the error from accumulating over time. michael@0: skipFract += nominalSkip; // real skip size michael@0: ovlSkip = (int)skipFract; // rounded to integer skip michael@0: skipFract -= ovlSkip; // maintain the fraction part, i.e. real vs. integer skip michael@0: inputBuffer.receiveSamples((uint)ovlSkip); michael@0: } michael@0: } michael@0: michael@0: michael@0: // Adds 'numsamples' pcs of samples from the 'samples' memory position into michael@0: // the input of the object. michael@0: void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples) michael@0: { michael@0: // Add the samples into the input buffer michael@0: inputBuffer.putSamples(samples, nSamples); michael@0: // Process the samples in input buffer michael@0: processSamples(); michael@0: } michael@0: michael@0: michael@0: michael@0: /// Set new overlap length parameter & reallocate RefMidBuffer if necessary. michael@0: void TDStretch::acceptNewOverlapLength(int newOverlapLength) michael@0: { michael@0: int prevOvl; michael@0: michael@0: assert(newOverlapLength >= 0); michael@0: prevOvl = overlapLength; michael@0: overlapLength = newOverlapLength; michael@0: michael@0: if (overlapLength > prevOvl) michael@0: { michael@0: delete[] pMidBufferUnaligned; michael@0: michael@0: pMidBufferUnaligned = new SAMPLETYPE[overlapLength * channels + 16 / sizeof(SAMPLETYPE)]; michael@0: // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency michael@0: pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned); michael@0: michael@0: clearMidBuffer(); michael@0: } michael@0: } michael@0: michael@0: michael@0: // Operator 'new' is overloaded so that it automatically creates a suitable instance michael@0: // depending on if we've a MMX/SSE/etc-capable CPU available or not. michael@0: void * TDStretch::operator new(size_t s) michael@0: { michael@0: // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead! michael@0: ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!"); michael@0: return newInstance(); michael@0: } michael@0: michael@0: michael@0: TDStretch * TDStretch::newInstance() michael@0: { michael@0: #if defined(SOUNDTOUCH_ALLOW_MMX) || defined(SOUNDTOUCH_ALLOW_SSE) michael@0: uint uExtensions; michael@0: michael@0: uExtensions = detectCPUextensions(); michael@0: #endif michael@0: michael@0: // Check if MMX/SSE instruction set extensions supported by CPU michael@0: michael@0: #ifdef SOUNDTOUCH_ALLOW_MMX michael@0: // MMX routines available only with integer sample types michael@0: if (uExtensions & SUPPORT_MMX) michael@0: { michael@0: return ::new TDStretchMMX; michael@0: } michael@0: else michael@0: #endif // SOUNDTOUCH_ALLOW_MMX michael@0: michael@0: michael@0: #ifdef SOUNDTOUCH_ALLOW_SSE michael@0: if (uExtensions & SUPPORT_SSE) michael@0: { michael@0: // SSE support michael@0: return ::new TDStretchSSE; michael@0: } michael@0: else michael@0: #endif // SOUNDTOUCH_ALLOW_SSE michael@0: michael@0: { michael@0: // ISA optimizations not supported, use plain C version michael@0: return ::new TDStretch; michael@0: } michael@0: } michael@0: michael@0: michael@0: ////////////////////////////////////////////////////////////////////////////// michael@0: // michael@0: // Integer arithmetics specific algorithm implementations. michael@0: // michael@0: ////////////////////////////////////////////////////////////////////////////// michael@0: michael@0: #ifdef SOUNDTOUCH_INTEGER_SAMPLES michael@0: michael@0: // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo' michael@0: // version of the routine. michael@0: void TDStretch::overlapStereo(short *poutput, const short *input) const michael@0: { michael@0: int i; michael@0: short temp; michael@0: int cnt2; michael@0: michael@0: for (i = 0; i < overlapLength ; i ++) michael@0: { michael@0: temp = (short)(overlapLength - i); michael@0: cnt2 = 2 * i; michael@0: poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp ) / overlapLength; michael@0: poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength; michael@0: } michael@0: } michael@0: michael@0: michael@0: // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi' michael@0: // version of the routine. michael@0: void TDStretch::overlapMulti(SAMPLETYPE *poutput, const SAMPLETYPE *input) const michael@0: { michael@0: SAMPLETYPE m1=(SAMPLETYPE)0; michael@0: SAMPLETYPE m2; michael@0: int i=0; michael@0: michael@0: for (m2 = (SAMPLETYPE)overlapLength; m2; m2 --) michael@0: { michael@0: for (int c = 0; c < channels; c ++) michael@0: { michael@0: poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2) / overlapLength; michael@0: i++; michael@0: } michael@0: michael@0: m1++; michael@0: } michael@0: } michael@0: michael@0: // Calculates the x having the closest 2^x value for the given value michael@0: static int _getClosest2Power(double value) michael@0: { michael@0: return (int)(log(value) / log(2.0) + 0.5); michael@0: } michael@0: michael@0: michael@0: /// Calculates overlap period length in samples. michael@0: /// Integer version rounds overlap length to closest power of 2 michael@0: /// for a divide scaling operation. michael@0: void TDStretch::calculateOverlapLength(int aoverlapMs) michael@0: { michael@0: int newOvl; michael@0: michael@0: assert(aoverlapMs >= 0); michael@0: michael@0: // calculate overlap length so that it's power of 2 - thus it's easy to do michael@0: // integer division by right-shifting. Term "-1" at end is to account for michael@0: // the extra most significatnt bit left unused in result by signed multiplication michael@0: overlapDividerBits = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1; michael@0: if (overlapDividerBits > 9) overlapDividerBits = 9; michael@0: if (overlapDividerBits < 3) overlapDividerBits = 3; michael@0: newOvl = (int)pow(2.0, (int)overlapDividerBits + 1); // +1 => account for -1 above michael@0: michael@0: acceptNewOverlapLength(newOvl); michael@0: michael@0: // calculate sloping divider so that crosscorrelation operation won't michael@0: // overflow 32-bit register. Max. sum of the crosscorrelation sum without michael@0: // divider would be 2^30*(N^3-N)/3, where N = overlap length michael@0: slopingDivider = (newOvl * newOvl - 1) / 3; michael@0: } michael@0: michael@0: michael@0: double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm) const michael@0: { michael@0: long corr; michael@0: long lnorm; michael@0: int i; michael@0: michael@0: corr = lnorm = 0; michael@0: // Same routine for stereo and mono. For stereo, unroll loop for better michael@0: // efficiency and gives slightly better resolution against rounding. michael@0: // For mono it same routine, just unrolls loop by factor of 4 michael@0: for (i = 0; i < channels * overlapLength; i += 4) michael@0: { michael@0: corr += (mixingPos[i] * compare[i] + michael@0: mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow michael@0: corr += (mixingPos[i + 2] * compare[i + 2] + michael@0: mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits; michael@0: lnorm += (mixingPos[i] * mixingPos[i] + michael@0: mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow michael@0: lnorm += (mixingPos[i + 2] * mixingPos[i + 2] + michael@0: mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBits; michael@0: } michael@0: michael@0: // Normalize result by dividing by sqrt(norm) - this step is easiest michael@0: // done using floating point operation michael@0: norm = (double)lnorm; michael@0: return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm); michael@0: } michael@0: michael@0: michael@0: /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value michael@0: double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) const michael@0: { michael@0: long corr; michael@0: long lnorm; michael@0: int i; michael@0: michael@0: // cancel first normalizer tap from previous round michael@0: lnorm = 0; michael@0: for (i = 1; i <= channels; i ++) michael@0: { michael@0: lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBits; michael@0: } michael@0: michael@0: corr = 0; michael@0: // Same routine for stereo and mono. For stereo, unroll loop for better michael@0: // efficiency and gives slightly better resolution against rounding. michael@0: // For mono it same routine, just unrolls loop by factor of 4 michael@0: for (i = 0; i < channels * overlapLength; i += 4) michael@0: { michael@0: corr += (mixingPos[i] * compare[i] + michael@0: mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow michael@0: corr += (mixingPos[i + 2] * compare[i + 2] + michael@0: mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits; michael@0: } michael@0: michael@0: // update normalizer with last samples of this round michael@0: for (int j = 0; j < channels; j ++) michael@0: { michael@0: i --; michael@0: lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBits; michael@0: } michael@0: norm += (double)lnorm; michael@0: michael@0: // Normalize result by dividing by sqrt(norm) - this step is easiest michael@0: // done using floating point operation michael@0: return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm); michael@0: } michael@0: michael@0: #endif // SOUNDTOUCH_INTEGER_SAMPLES michael@0: michael@0: ////////////////////////////////////////////////////////////////////////////// michael@0: // michael@0: // Floating point arithmetics specific algorithm implementations. michael@0: // michael@0: michael@0: #ifdef SOUNDTOUCH_FLOAT_SAMPLES michael@0: michael@0: // Overlaps samples in 'midBuffer' with the samples in 'pInput' michael@0: void TDStretch::overlapStereo(float *pOutput, const float *pInput) const michael@0: { michael@0: int i; michael@0: float fScale; michael@0: float f1; michael@0: float f2; michael@0: michael@0: fScale = 1.0f / (float)overlapLength; michael@0: michael@0: f1 = 0; michael@0: f2 = 1.0f; michael@0: michael@0: for (i = 0; i < 2 * (int)overlapLength ; i += 2) michael@0: { michael@0: pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2; michael@0: pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2; michael@0: michael@0: f1 += fScale; michael@0: f2 -= fScale; michael@0: } michael@0: } michael@0: michael@0: michael@0: // Overlaps samples in 'midBuffer' with the samples in 'input'. michael@0: void TDStretch::overlapMulti(float *pOutput, const float *pInput) const michael@0: { michael@0: int i; michael@0: float fScale; michael@0: float f1; michael@0: float f2; michael@0: michael@0: fScale = 1.0f / (float)overlapLength; michael@0: michael@0: f1 = 0; michael@0: f2 = 1.0f; michael@0: michael@0: i=0; michael@0: for (int i2 = 0; i2 < overlapLength; i2 ++) michael@0: { michael@0: // note: Could optimize this slightly by taking into account that always channels > 2 michael@0: for (int c = 0; c < channels; c ++) michael@0: { michael@0: pOutput[i] = pInput[i] * f1 + pMidBuffer[i] * f2; michael@0: i++; michael@0: } michael@0: f1 += fScale; michael@0: f2 -= fScale; michael@0: } michael@0: } michael@0: michael@0: michael@0: /// Calculates overlapInMsec period length in samples. michael@0: void TDStretch::calculateOverlapLength(int overlapInMsec) michael@0: { michael@0: int newOvl; michael@0: michael@0: assert(overlapInMsec >= 0); michael@0: newOvl = (sampleRate * overlapInMsec) / 1000; michael@0: if (newOvl < 16) newOvl = 16; michael@0: michael@0: // must be divisible by 8 michael@0: newOvl -= newOvl % 8; michael@0: michael@0: acceptNewOverlapLength(newOvl); michael@0: } michael@0: michael@0: michael@0: /// Calculate cross-correlation michael@0: double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &norm) const michael@0: { michael@0: double corr; michael@0: int i; michael@0: michael@0: corr = norm = 0; michael@0: // Same routine for stereo and mono. For Stereo, unroll by factor of 2. michael@0: // For mono it's same routine yet unrollsd by factor of 4. michael@0: for (i = 0; i < channels * overlapLength; i += 4) michael@0: { michael@0: corr += mixingPos[i] * compare[i] + michael@0: mixingPos[i + 1] * compare[i + 1]; michael@0: michael@0: norm += mixingPos[i] * mixingPos[i] + michael@0: mixingPos[i + 1] * mixingPos[i + 1]; michael@0: michael@0: // unroll the loop for better CPU efficiency: michael@0: corr += mixingPos[i + 2] * compare[i + 2] + michael@0: mixingPos[i + 3] * compare[i + 3]; michael@0: michael@0: norm += mixingPos[i + 2] * mixingPos[i + 2] + michael@0: mixingPos[i + 3] * mixingPos[i + 3]; michael@0: } michael@0: michael@0: return corr / sqrt((norm < 1e-9 ? 1.0 : norm)); michael@0: } michael@0: michael@0: michael@0: /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value michael@0: double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) const michael@0: { michael@0: double corr; michael@0: int i; michael@0: michael@0: corr = 0; michael@0: michael@0: // cancel first normalizer tap from previous round michael@0: for (i = 1; i <= channels; i ++) michael@0: { michael@0: norm -= mixingPos[-i] * mixingPos[-i]; michael@0: } michael@0: michael@0: // Same routine for stereo and mono. For Stereo, unroll by factor of 2. michael@0: // For mono it's same routine yet unrollsd by factor of 4. michael@0: for (i = 0; i < channels * overlapLength; i += 4) michael@0: { michael@0: corr += mixingPos[i] * compare[i] + michael@0: mixingPos[i + 1] * compare[i + 1] + michael@0: mixingPos[i + 2] * compare[i + 2] + michael@0: mixingPos[i + 3] * compare[i + 3]; michael@0: } michael@0: michael@0: // update normalizer with last samples of this round michael@0: for (int j = 0; j < channels; j ++) michael@0: { michael@0: i --; michael@0: norm += mixingPos[i] * mixingPos[i]; michael@0: } michael@0: michael@0: return corr / sqrt((norm < 1e-9 ? 1.0 : norm)); michael@0: } michael@0: michael@0: michael@0: #endif // SOUNDTOUCH_FLOAT_SAMPLES