1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libsoundtouch/src/TDStretch.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,949 @@ 1.4 +//////////////////////////////////////////////////////////////////////////////// 1.5 +/// 1.6 +/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo 1.7 +/// while maintaining the original pitch by using a time domain WSOLA-like 1.8 +/// method with several performance-increasing tweaks. 1.9 +/// 1.10 +/// Note : MMX optimized functions reside in a separate, platform-specific 1.11 +/// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp' 1.12 +/// 1.13 +/// Author : Copyright (c) Olli Parviainen 1.14 +/// Author e-mail : oparviai 'at' iki.fi 1.15 +/// SoundTouch WWW: http://www.surina.net/soundtouch 1.16 +/// 1.17 +//////////////////////////////////////////////////////////////////////////////// 1.18 +// 1.19 +// Last changed : $Date: 2014-04-06 10:57:21 -0500 (Sun, 06 Apr 2014) $ 1.20 +// File revision : $Revision: 1.12 $ 1.21 +// 1.22 +// $Id: TDStretch.cpp 195 2014-04-06 15:57:21Z oparviai $ 1.23 +// 1.24 +//////////////////////////////////////////////////////////////////////////////// 1.25 +// 1.26 +// License : 1.27 +// 1.28 +// SoundTouch audio processing library 1.29 +// Copyright (c) Olli Parviainen 1.30 +// 1.31 +// This library is free software; you can redistribute it and/or 1.32 +// modify it under the terms of the GNU Lesser General Public 1.33 +// License as published by the Free Software Foundation; either 1.34 +// version 2.1 of the License, or (at your option) any later version. 1.35 +// 1.36 +// This library is distributed in the hope that it will be useful, 1.37 +// but WITHOUT ANY WARRANTY; without even the implied warranty of 1.38 +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1.39 +// Lesser General Public License for more details. 1.40 +// 1.41 +// You should have received a copy of the GNU Lesser General Public 1.42 +// License along with this library; if not, write to the Free Software 1.43 +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 1.44 +// 1.45 +//////////////////////////////////////////////////////////////////////////////// 1.46 + 1.47 +#include <string.h> 1.48 +#include <limits.h> 1.49 +#include <assert.h> 1.50 +#include <math.h> 1.51 +#include <float.h> 1.52 + 1.53 +#include "STTypes.h" 1.54 +#include "cpu_detect.h" 1.55 +#include "TDStretch.h" 1.56 + 1.57 +using namespace soundtouch; 1.58 + 1.59 +#define max(x, y) (((x) > (y)) ? (x) : (y)) 1.60 + 1.61 + 1.62 +/***************************************************************************** 1.63 + * 1.64 + * Constant definitions 1.65 + * 1.66 + *****************************************************************************/ 1.67 + 1.68 +// Table for the hierarchical mixing position seeking algorithm 1.69 +static const short _scanOffsets[5][24]={ 1.70 + { 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806, 1.71 + 868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0}, 1.72 + {-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0, 1.73 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 1.74 + { -20, -15, -10, -5, 5, 10, 15, 20, 0, 0, 0, 0, 1.75 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 1.76 + { -4, -3, -2, -1, 1, 2, 3, 4, 0, 0, 0, 0, 1.77 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 1.78 + { 121, 114, 97, 114, 98, 105, 108, 32, 104, 99, 117, 111, 1.79 + 116, 100, 110, 117, 111, 115, 0, 0, 0, 0, 0, 0}}; 1.80 + 1.81 +/***************************************************************************** 1.82 + * 1.83 + * Implementation of the class 'TDStretch' 1.84 + * 1.85 + *****************************************************************************/ 1.86 + 1.87 + 1.88 +TDStretch::TDStretch() : FIFOProcessor(&outputBuffer) 1.89 +{ 1.90 + bQuickSeek = false; 1.91 + channels = 2; 1.92 + 1.93 + pMidBuffer = NULL; 1.94 + pMidBufferUnaligned = NULL; 1.95 + overlapLength = 0; 1.96 + 1.97 + bAutoSeqSetting = true; 1.98 + bAutoSeekSetting = true; 1.99 + 1.100 +// outDebt = 0; 1.101 + skipFract = 0; 1.102 + 1.103 + tempo = 1.0f; 1.104 + setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS); 1.105 + setTempo(1.0f); 1.106 + 1.107 + clear(); 1.108 +} 1.109 + 1.110 + 1.111 + 1.112 +TDStretch::~TDStretch() 1.113 +{ 1.114 + delete[] pMidBufferUnaligned; 1.115 +} 1.116 + 1.117 + 1.118 + 1.119 +// Sets routine control parameters. These control are certain time constants 1.120 +// defining how the sound is stretched to the desired duration. 1.121 +// 1.122 +// 'sampleRate' = sample rate of the sound 1.123 +// 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms) 1.124 +// 'seekwindowMS' = seeking window length for scanning the best overlapping 1.125 +// position (default = 28 ms) 1.126 +// 'overlapMS' = overlapping length (default = 12 ms) 1.127 + 1.128 +void TDStretch::setParameters(int aSampleRate, int aSequenceMS, 1.129 + int aSeekWindowMS, int aOverlapMS) 1.130 +{ 1.131 + // accept only positive parameter values - if zero or negative, use old values instead 1.132 + if (aSampleRate > 0) this->sampleRate = aSampleRate; 1.133 + if (aOverlapMS > 0) this->overlapMs = aOverlapMS; 1.134 + 1.135 + if (aSequenceMS > 0) 1.136 + { 1.137 + this->sequenceMs = aSequenceMS; 1.138 + bAutoSeqSetting = false; 1.139 + } 1.140 + else if (aSequenceMS == 0) 1.141 + { 1.142 + // if zero, use automatic setting 1.143 + bAutoSeqSetting = true; 1.144 + } 1.145 + 1.146 + if (aSeekWindowMS > 0) 1.147 + { 1.148 + this->seekWindowMs = aSeekWindowMS; 1.149 + bAutoSeekSetting = false; 1.150 + } 1.151 + else if (aSeekWindowMS == 0) 1.152 + { 1.153 + // if zero, use automatic setting 1.154 + bAutoSeekSetting = true; 1.155 + } 1.156 + 1.157 + calcSeqParameters(); 1.158 + 1.159 + calculateOverlapLength(overlapMs); 1.160 + 1.161 + // set tempo to recalculate 'sampleReq' 1.162 + setTempo(tempo); 1.163 +} 1.164 + 1.165 + 1.166 + 1.167 +/// Get routine control parameters, see setParameters() function. 1.168 +/// Any of the parameters to this function can be NULL, in such case corresponding parameter 1.169 +/// value isn't returned. 1.170 +void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const 1.171 +{ 1.172 + if (pSampleRate) 1.173 + { 1.174 + *pSampleRate = sampleRate; 1.175 + } 1.176 + 1.177 + if (pSequenceMs) 1.178 + { 1.179 + *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs; 1.180 + } 1.181 + 1.182 + if (pSeekWindowMs) 1.183 + { 1.184 + *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs; 1.185 + } 1.186 + 1.187 + if (pOverlapMs) 1.188 + { 1.189 + *pOverlapMs = overlapMs; 1.190 + } 1.191 +} 1.192 + 1.193 + 1.194 +// Overlaps samples in 'midBuffer' with the samples in 'pInput' 1.195 +void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const 1.196 +{ 1.197 + int i; 1.198 + SAMPLETYPE m1, m2; 1.199 + 1.200 + m1 = (SAMPLETYPE)0; 1.201 + m2 = (SAMPLETYPE)overlapLength; 1.202 + 1.203 + for (i = 0; i < overlapLength ; i ++) 1.204 + { 1.205 + pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength; 1.206 + m1 += 1; 1.207 + m2 -= 1; 1.208 + } 1.209 +} 1.210 + 1.211 + 1.212 + 1.213 +void TDStretch::clearMidBuffer() 1.214 +{ 1.215 + memset(pMidBuffer, 0, channels * sizeof(SAMPLETYPE) * overlapLength); 1.216 +} 1.217 + 1.218 + 1.219 +void TDStretch::clearInput() 1.220 +{ 1.221 + inputBuffer.clear(); 1.222 + clearMidBuffer(); 1.223 +} 1.224 + 1.225 + 1.226 +// Clears the sample buffers 1.227 +void TDStretch::clear() 1.228 +{ 1.229 + outputBuffer.clear(); 1.230 + clearInput(); 1.231 +} 1.232 + 1.233 + 1.234 + 1.235 +// Enables/disables the quick position seeking algorithm. Zero to disable, nonzero 1.236 +// to enable 1.237 +void TDStretch::enableQuickSeek(bool enable) 1.238 +{ 1.239 + bQuickSeek = enable; 1.240 +} 1.241 + 1.242 + 1.243 +// Returns nonzero if the quick seeking algorithm is enabled. 1.244 +bool TDStretch::isQuickSeekEnabled() const 1.245 +{ 1.246 + return bQuickSeek; 1.247 +} 1.248 + 1.249 + 1.250 +// Seeks for the optimal overlap-mixing position. 1.251 +int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos) 1.252 +{ 1.253 + if (bQuickSeek) 1.254 + { 1.255 + return seekBestOverlapPositionQuick(refPos); 1.256 + } 1.257 + else 1.258 + { 1.259 + return seekBestOverlapPositionFull(refPos); 1.260 + } 1.261 +} 1.262 + 1.263 + 1.264 +// Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position 1.265 +// of 'ovlPos'. 1.266 +inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const 1.267 +{ 1.268 +#ifndef USE_MULTICH_ALWAYS 1.269 + if (channels == 1) 1.270 + { 1.271 + // mono sound. 1.272 + overlapMono(pOutput, pInput + ovlPos); 1.273 + } 1.274 + else if (channels == 2) 1.275 + { 1.276 + // stereo sound 1.277 + overlapStereo(pOutput, pInput + 2 * ovlPos); 1.278 + } 1.279 + else 1.280 +#endif // USE_MULTICH_ALWAYS 1.281 + { 1.282 + assert(channels > 0); 1.283 + overlapMulti(pOutput, pInput + channels * ovlPos); 1.284 + } 1.285 +} 1.286 + 1.287 + 1.288 + 1.289 +// Seeks for the optimal overlap-mixing position. The 'stereo' version of the 1.290 +// routine 1.291 +// 1.292 +// The best position is determined as the position where the two overlapped 1.293 +// sample sequences are 'most alike', in terms of the highest cross-correlation 1.294 +// value over the overlapping period 1.295 +int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos) 1.296 +{ 1.297 + int bestOffs; 1.298 + double bestCorr, corr; 1.299 + double norm; 1.300 + int i; 1.301 + 1.302 + bestCorr = FLT_MIN; 1.303 + bestOffs = 0; 1.304 + 1.305 + // Scans for the best correlation value by testing each possible position 1.306 + // over the permitted range. 1.307 + bestCorr = calcCrossCorr(refPos, pMidBuffer, norm); 1.308 + for (i = 1; i < seekLength; i ++) 1.309 + { 1.310 + // Calculates correlation value for the mixing position corresponding 1.311 + // to 'i'. Now call "calcCrossCorrAccumulate" that is otherwise same as 1.312 + // "calcCrossCorr", but saves time by reusing & updating previously stored 1.313 + // "norm" value 1.314 + corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm); 1.315 + 1.316 + // heuristic rule to slightly favour values close to mid of the range 1.317 + double tmp = (double)(2 * i - seekLength) / (double)seekLength; 1.318 + corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp)); 1.319 + 1.320 + // Checks for the highest correlation value 1.321 + if (corr > bestCorr) 1.322 + { 1.323 + bestCorr = corr; 1.324 + bestOffs = i; 1.325 + } 1.326 + } 1.327 + // clear cross correlation routine state if necessary (is so e.g. in MMX routines). 1.328 + clearCrossCorrState(); 1.329 + 1.330 + return bestOffs; 1.331 +} 1.332 + 1.333 + 1.334 +// Seeks for the optimal overlap-mixing position. The 'stereo' version of the 1.335 +// routine 1.336 +// 1.337 +// The best position is determined as the position where the two overlapped 1.338 +// sample sequences are 'most alike', in terms of the highest cross-correlation 1.339 +// value over the overlapping period 1.340 +int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos) 1.341 +{ 1.342 + int j; 1.343 + int bestOffs; 1.344 + double bestCorr, corr; 1.345 + int scanCount, corrOffset, tempOffset; 1.346 + 1.347 + bestCorr = FLT_MIN; 1.348 + bestOffs = _scanOffsets[0][0]; 1.349 + corrOffset = 0; 1.350 + tempOffset = 0; 1.351 + 1.352 + // Scans for the best correlation value using four-pass hierarchical search. 1.353 + // 1.354 + // The look-up table 'scans' has hierarchical position adjusting steps. 1.355 + // In first pass the routine searhes for the highest correlation with 1.356 + // relatively coarse steps, then rescans the neighbourhood of the highest 1.357 + // correlation with better resolution and so on. 1.358 + for (scanCount = 0;scanCount < 4; scanCount ++) 1.359 + { 1.360 + j = 0; 1.361 + while (_scanOffsets[scanCount][j]) 1.362 + { 1.363 + double norm; 1.364 + tempOffset = corrOffset + _scanOffsets[scanCount][j]; 1.365 + if (tempOffset >= seekLength) break; 1.366 + 1.367 + // Calculates correlation value for the mixing position corresponding 1.368 + // to 'tempOffset' 1.369 + corr = (double)calcCrossCorr(refPos + channels * tempOffset, pMidBuffer, norm); 1.370 + // heuristic rule to slightly favour values close to mid of the range 1.371 + double tmp = (double)(2 * tempOffset - seekLength) / seekLength; 1.372 + corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp)); 1.373 + 1.374 + // Checks for the highest correlation value 1.375 + if (corr > bestCorr) 1.376 + { 1.377 + bestCorr = corr; 1.378 + bestOffs = tempOffset; 1.379 + } 1.380 + j ++; 1.381 + } 1.382 + corrOffset = bestOffs; 1.383 + } 1.384 + // clear cross correlation routine state if necessary (is so e.g. in MMX routines). 1.385 + clearCrossCorrState(); 1.386 + 1.387 + return bestOffs; 1.388 +} 1.389 + 1.390 + 1.391 + 1.392 +/// clear cross correlation routine state if necessary 1.393 +void TDStretch::clearCrossCorrState() 1.394 +{ 1.395 + // default implementation is empty. 1.396 +} 1.397 + 1.398 + 1.399 +/// Calculates processing sequence length according to tempo setting 1.400 +void TDStretch::calcSeqParameters() 1.401 +{ 1.402 + // Adjust tempo param according to tempo, so that variating processing sequence length is used 1.403 + // at varius tempo settings, between the given low...top limits 1.404 + #define AUTOSEQ_TEMPO_LOW 0.5 // auto setting low tempo range (-50%) 1.405 + #define AUTOSEQ_TEMPO_TOP 2.0 // auto setting top tempo range (+100%) 1.406 + 1.407 + // sequence-ms setting values at above low & top tempo 1.408 + #define AUTOSEQ_AT_MIN 125.0 1.409 + #define AUTOSEQ_AT_MAX 50.0 1.410 + #define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW)) 1.411 + #define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW)) 1.412 + 1.413 + // seek-window-ms setting values at above low & top tempo 1.414 + #define AUTOSEEK_AT_MIN 25.0 1.415 + #define AUTOSEEK_AT_MAX 15.0 1.416 + #define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW)) 1.417 + #define AUTOSEEK_C (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW)) 1.418 + 1.419 + #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x))) 1.420 + 1.421 + double seq, seek; 1.422 + 1.423 + if (bAutoSeqSetting) 1.424 + { 1.425 + seq = AUTOSEQ_C + AUTOSEQ_K * tempo; 1.426 + seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN); 1.427 + sequenceMs = (int)(seq + 0.5); 1.428 + } 1.429 + 1.430 + if (bAutoSeekSetting) 1.431 + { 1.432 + seek = AUTOSEEK_C + AUTOSEEK_K * tempo; 1.433 + seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN); 1.434 + seekWindowMs = (int)(seek + 0.5); 1.435 + } 1.436 + 1.437 + // Update seek window lengths 1.438 + seekWindowLength = (sampleRate * sequenceMs) / 1000; 1.439 + if (seekWindowLength < 2 * overlapLength) 1.440 + { 1.441 + seekWindowLength = 2 * overlapLength; 1.442 + } 1.443 + seekLength = (sampleRate * seekWindowMs) / 1000; 1.444 +} 1.445 + 1.446 + 1.447 + 1.448 +// Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower 1.449 +// tempo, larger faster tempo. 1.450 +void TDStretch::setTempo(float newTempo) 1.451 +{ 1.452 + int intskip; 1.453 + 1.454 + tempo = newTempo; 1.455 + 1.456 + // Calculate new sequence duration 1.457 + calcSeqParameters(); 1.458 + 1.459 + // Calculate ideal skip length (according to tempo value) 1.460 + nominalSkip = tempo * (seekWindowLength - overlapLength); 1.461 + intskip = (int)(nominalSkip + 0.5f); 1.462 + 1.463 + // Calculate how many samples are needed in the 'inputBuffer' to 1.464 + // process another batch of samples 1.465 + //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2; 1.466 + sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength; 1.467 +} 1.468 + 1.469 + 1.470 + 1.471 +// Sets the number of channels, 1 = mono, 2 = stereo 1.472 +void TDStretch::setChannels(int numChannels) 1.473 +{ 1.474 + assert(numChannels > 0); 1.475 + if (channels == numChannels) return; 1.476 +// assert(numChannels == 1 || numChannels == 2); 1.477 + 1.478 + channels = numChannels; 1.479 + inputBuffer.setChannels(channels); 1.480 + outputBuffer.setChannels(channels); 1.481 + 1.482 + // re-init overlap/buffer 1.483 + overlapLength=0; 1.484 + setParameters(sampleRate); 1.485 +} 1.486 + 1.487 + 1.488 +// nominal tempo, no need for processing, just pass the samples through 1.489 +// to outputBuffer 1.490 +/* 1.491 +void TDStretch::processNominalTempo() 1.492 +{ 1.493 + assert(tempo == 1.0f); 1.494 + 1.495 + if (bMidBufferDirty) 1.496 + { 1.497 + // If there are samples in pMidBuffer waiting for overlapping, 1.498 + // do a single sliding overlapping with them in order to prevent a 1.499 + // clicking distortion in the output sound 1.500 + if (inputBuffer.numSamples() < overlapLength) 1.501 + { 1.502 + // wait until we've got overlapLength input samples 1.503 + return; 1.504 + } 1.505 + // Mix the samples in the beginning of 'inputBuffer' with the 1.506 + // samples in 'midBuffer' using sliding overlapping 1.507 + overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0); 1.508 + outputBuffer.putSamples(overlapLength); 1.509 + inputBuffer.receiveSamples(overlapLength); 1.510 + clearMidBuffer(); 1.511 + // now we've caught the nominal sample flow and may switch to 1.512 + // bypass mode 1.513 + } 1.514 + 1.515 + // Simply bypass samples from input to output 1.516 + outputBuffer.moveSamples(inputBuffer); 1.517 +} 1.518 +*/ 1.519 + 1.520 + 1.521 +// Processes as many processing frames of the samples 'inputBuffer', store 1.522 +// the result into 'outputBuffer' 1.523 +void TDStretch::processSamples() 1.524 +{ 1.525 + int ovlSkip, offset; 1.526 + int temp; 1.527 + 1.528 + /* Removed this small optimization - can introduce a click to sound when tempo setting 1.529 + crosses the nominal value 1.530 + if (tempo == 1.0f) 1.531 + { 1.532 + // tempo not changed from the original, so bypass the processing 1.533 + processNominalTempo(); 1.534 + return; 1.535 + } 1.536 + */ 1.537 + 1.538 + // Process samples as long as there are enough samples in 'inputBuffer' 1.539 + // to form a processing frame. 1.540 + while ((int)inputBuffer.numSamples() >= sampleReq) 1.541 + { 1.542 + // If tempo differs from the normal ('SCALE'), scan for the best overlapping 1.543 + // position 1.544 + offset = seekBestOverlapPosition(inputBuffer.ptrBegin()); 1.545 + 1.546 + // Mix the samples in the 'inputBuffer' at position of 'offset' with the 1.547 + // samples in 'midBuffer' using sliding overlapping 1.548 + // ... first partially overlap with the end of the previous sequence 1.549 + // (that's in 'midBuffer') 1.550 + overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset); 1.551 + outputBuffer.putSamples((uint)overlapLength); 1.552 + 1.553 + // ... then copy sequence samples from 'inputBuffer' to output: 1.554 + 1.555 + // length of sequence 1.556 + temp = (seekWindowLength - 2 * overlapLength); 1.557 + 1.558 + // crosscheck that we don't have buffer overflow... 1.559 + if ((int)inputBuffer.numSamples() < (offset + temp + overlapLength * 2)) 1.560 + { 1.561 + continue; // just in case, shouldn't really happen 1.562 + } 1.563 + 1.564 + outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * (offset + overlapLength), (uint)temp); 1.565 + 1.566 + // Copies the end of the current sequence from 'inputBuffer' to 1.567 + // 'midBuffer' for being mixed with the beginning of the next 1.568 + // processing sequence and so on 1.569 + assert((offset + temp + overlapLength * 2) <= (int)inputBuffer.numSamples()); 1.570 + memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp + overlapLength), 1.571 + channels * sizeof(SAMPLETYPE) * overlapLength); 1.572 + 1.573 + // Remove the processed samples from the input buffer. Update 1.574 + // the difference between integer & nominal skip step to 'skipFract' 1.575 + // in order to prevent the error from accumulating over time. 1.576 + skipFract += nominalSkip; // real skip size 1.577 + ovlSkip = (int)skipFract; // rounded to integer skip 1.578 + skipFract -= ovlSkip; // maintain the fraction part, i.e. real vs. integer skip 1.579 + inputBuffer.receiveSamples((uint)ovlSkip); 1.580 + } 1.581 +} 1.582 + 1.583 + 1.584 +// Adds 'numsamples' pcs of samples from the 'samples' memory position into 1.585 +// the input of the object. 1.586 +void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples) 1.587 +{ 1.588 + // Add the samples into the input buffer 1.589 + inputBuffer.putSamples(samples, nSamples); 1.590 + // Process the samples in input buffer 1.591 + processSamples(); 1.592 +} 1.593 + 1.594 + 1.595 + 1.596 +/// Set new overlap length parameter & reallocate RefMidBuffer if necessary. 1.597 +void TDStretch::acceptNewOverlapLength(int newOverlapLength) 1.598 +{ 1.599 + int prevOvl; 1.600 + 1.601 + assert(newOverlapLength >= 0); 1.602 + prevOvl = overlapLength; 1.603 + overlapLength = newOverlapLength; 1.604 + 1.605 + if (overlapLength > prevOvl) 1.606 + { 1.607 + delete[] pMidBufferUnaligned; 1.608 + 1.609 + pMidBufferUnaligned = new SAMPLETYPE[overlapLength * channels + 16 / sizeof(SAMPLETYPE)]; 1.610 + // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency 1.611 + pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned); 1.612 + 1.613 + clearMidBuffer(); 1.614 + } 1.615 +} 1.616 + 1.617 + 1.618 +// Operator 'new' is overloaded so that it automatically creates a suitable instance 1.619 +// depending on if we've a MMX/SSE/etc-capable CPU available or not. 1.620 +void * TDStretch::operator new(size_t s) 1.621 +{ 1.622 + // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead! 1.623 + ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!"); 1.624 + return newInstance(); 1.625 +} 1.626 + 1.627 + 1.628 +TDStretch * TDStretch::newInstance() 1.629 +{ 1.630 +#if defined(SOUNDTOUCH_ALLOW_MMX) || defined(SOUNDTOUCH_ALLOW_SSE) 1.631 + uint uExtensions; 1.632 + 1.633 + uExtensions = detectCPUextensions(); 1.634 +#endif 1.635 + 1.636 + // Check if MMX/SSE instruction set extensions supported by CPU 1.637 + 1.638 +#ifdef SOUNDTOUCH_ALLOW_MMX 1.639 + // MMX routines available only with integer sample types 1.640 + if (uExtensions & SUPPORT_MMX) 1.641 + { 1.642 + return ::new TDStretchMMX; 1.643 + } 1.644 + else 1.645 +#endif // SOUNDTOUCH_ALLOW_MMX 1.646 + 1.647 + 1.648 +#ifdef SOUNDTOUCH_ALLOW_SSE 1.649 + if (uExtensions & SUPPORT_SSE) 1.650 + { 1.651 + // SSE support 1.652 + return ::new TDStretchSSE; 1.653 + } 1.654 + else 1.655 +#endif // SOUNDTOUCH_ALLOW_SSE 1.656 + 1.657 + { 1.658 + // ISA optimizations not supported, use plain C version 1.659 + return ::new TDStretch; 1.660 + } 1.661 +} 1.662 + 1.663 + 1.664 +////////////////////////////////////////////////////////////////////////////// 1.665 +// 1.666 +// Integer arithmetics specific algorithm implementations. 1.667 +// 1.668 +////////////////////////////////////////////////////////////////////////////// 1.669 + 1.670 +#ifdef SOUNDTOUCH_INTEGER_SAMPLES 1.671 + 1.672 +// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo' 1.673 +// version of the routine. 1.674 +void TDStretch::overlapStereo(short *poutput, const short *input) const 1.675 +{ 1.676 + int i; 1.677 + short temp; 1.678 + int cnt2; 1.679 + 1.680 + for (i = 0; i < overlapLength ; i ++) 1.681 + { 1.682 + temp = (short)(overlapLength - i); 1.683 + cnt2 = 2 * i; 1.684 + poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp ) / overlapLength; 1.685 + poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength; 1.686 + } 1.687 +} 1.688 + 1.689 + 1.690 +// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi' 1.691 +// version of the routine. 1.692 +void TDStretch::overlapMulti(SAMPLETYPE *poutput, const SAMPLETYPE *input) const 1.693 +{ 1.694 + SAMPLETYPE m1=(SAMPLETYPE)0; 1.695 + SAMPLETYPE m2; 1.696 + int i=0; 1.697 + 1.698 + for (m2 = (SAMPLETYPE)overlapLength; m2; m2 --) 1.699 + { 1.700 + for (int c = 0; c < channels; c ++) 1.701 + { 1.702 + poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2) / overlapLength; 1.703 + i++; 1.704 + } 1.705 + 1.706 + m1++; 1.707 + } 1.708 +} 1.709 + 1.710 +// Calculates the x having the closest 2^x value for the given value 1.711 +static int _getClosest2Power(double value) 1.712 +{ 1.713 + return (int)(log(value) / log(2.0) + 0.5); 1.714 +} 1.715 + 1.716 + 1.717 +/// Calculates overlap period length in samples. 1.718 +/// Integer version rounds overlap length to closest power of 2 1.719 +/// for a divide scaling operation. 1.720 +void TDStretch::calculateOverlapLength(int aoverlapMs) 1.721 +{ 1.722 + int newOvl; 1.723 + 1.724 + assert(aoverlapMs >= 0); 1.725 + 1.726 + // calculate overlap length so that it's power of 2 - thus it's easy to do 1.727 + // integer division by right-shifting. Term "-1" at end is to account for 1.728 + // the extra most significatnt bit left unused in result by signed multiplication 1.729 + overlapDividerBits = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1; 1.730 + if (overlapDividerBits > 9) overlapDividerBits = 9; 1.731 + if (overlapDividerBits < 3) overlapDividerBits = 3; 1.732 + newOvl = (int)pow(2.0, (int)overlapDividerBits + 1); // +1 => account for -1 above 1.733 + 1.734 + acceptNewOverlapLength(newOvl); 1.735 + 1.736 + // calculate sloping divider so that crosscorrelation operation won't 1.737 + // overflow 32-bit register. Max. sum of the crosscorrelation sum without 1.738 + // divider would be 2^30*(N^3-N)/3, where N = overlap length 1.739 + slopingDivider = (newOvl * newOvl - 1) / 3; 1.740 +} 1.741 + 1.742 + 1.743 +double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm) const 1.744 +{ 1.745 + long corr; 1.746 + long lnorm; 1.747 + int i; 1.748 + 1.749 + corr = lnorm = 0; 1.750 + // Same routine for stereo and mono. For stereo, unroll loop for better 1.751 + // efficiency and gives slightly better resolution against rounding. 1.752 + // For mono it same routine, just unrolls loop by factor of 4 1.753 + for (i = 0; i < channels * overlapLength; i += 4) 1.754 + { 1.755 + corr += (mixingPos[i] * compare[i] + 1.756 + mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow 1.757 + corr += (mixingPos[i + 2] * compare[i + 2] + 1.758 + mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits; 1.759 + lnorm += (mixingPos[i] * mixingPos[i] + 1.760 + mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow 1.761 + lnorm += (mixingPos[i + 2] * mixingPos[i + 2] + 1.762 + mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBits; 1.763 + } 1.764 + 1.765 + // Normalize result by dividing by sqrt(norm) - this step is easiest 1.766 + // done using floating point operation 1.767 + norm = (double)lnorm; 1.768 + return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm); 1.769 +} 1.770 + 1.771 + 1.772 +/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value 1.773 +double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) const 1.774 +{ 1.775 + long corr; 1.776 + long lnorm; 1.777 + int i; 1.778 + 1.779 + // cancel first normalizer tap from previous round 1.780 + lnorm = 0; 1.781 + for (i = 1; i <= channels; i ++) 1.782 + { 1.783 + lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBits; 1.784 + } 1.785 + 1.786 + corr = 0; 1.787 + // Same routine for stereo and mono. For stereo, unroll loop for better 1.788 + // efficiency and gives slightly better resolution against rounding. 1.789 + // For mono it same routine, just unrolls loop by factor of 4 1.790 + for (i = 0; i < channels * overlapLength; i += 4) 1.791 + { 1.792 + corr += (mixingPos[i] * compare[i] + 1.793 + mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow 1.794 + corr += (mixingPos[i + 2] * compare[i + 2] + 1.795 + mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits; 1.796 + } 1.797 + 1.798 + // update normalizer with last samples of this round 1.799 + for (int j = 0; j < channels; j ++) 1.800 + { 1.801 + i --; 1.802 + lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBits; 1.803 + } 1.804 + norm += (double)lnorm; 1.805 + 1.806 + // Normalize result by dividing by sqrt(norm) - this step is easiest 1.807 + // done using floating point operation 1.808 + return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm); 1.809 +} 1.810 + 1.811 +#endif // SOUNDTOUCH_INTEGER_SAMPLES 1.812 + 1.813 +////////////////////////////////////////////////////////////////////////////// 1.814 +// 1.815 +// Floating point arithmetics specific algorithm implementations. 1.816 +// 1.817 + 1.818 +#ifdef SOUNDTOUCH_FLOAT_SAMPLES 1.819 + 1.820 +// Overlaps samples in 'midBuffer' with the samples in 'pInput' 1.821 +void TDStretch::overlapStereo(float *pOutput, const float *pInput) const 1.822 +{ 1.823 + int i; 1.824 + float fScale; 1.825 + float f1; 1.826 + float f2; 1.827 + 1.828 + fScale = 1.0f / (float)overlapLength; 1.829 + 1.830 + f1 = 0; 1.831 + f2 = 1.0f; 1.832 + 1.833 + for (i = 0; i < 2 * (int)overlapLength ; i += 2) 1.834 + { 1.835 + pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2; 1.836 + pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2; 1.837 + 1.838 + f1 += fScale; 1.839 + f2 -= fScale; 1.840 + } 1.841 +} 1.842 + 1.843 + 1.844 +// Overlaps samples in 'midBuffer' with the samples in 'input'. 1.845 +void TDStretch::overlapMulti(float *pOutput, const float *pInput) const 1.846 +{ 1.847 + int i; 1.848 + float fScale; 1.849 + float f1; 1.850 + float f2; 1.851 + 1.852 + fScale = 1.0f / (float)overlapLength; 1.853 + 1.854 + f1 = 0; 1.855 + f2 = 1.0f; 1.856 + 1.857 + i=0; 1.858 + for (int i2 = 0; i2 < overlapLength; i2 ++) 1.859 + { 1.860 + // note: Could optimize this slightly by taking into account that always channels > 2 1.861 + for (int c = 0; c < channels; c ++) 1.862 + { 1.863 + pOutput[i] = pInput[i] * f1 + pMidBuffer[i] * f2; 1.864 + i++; 1.865 + } 1.866 + f1 += fScale; 1.867 + f2 -= fScale; 1.868 + } 1.869 +} 1.870 + 1.871 + 1.872 +/// Calculates overlapInMsec period length in samples. 1.873 +void TDStretch::calculateOverlapLength(int overlapInMsec) 1.874 +{ 1.875 + int newOvl; 1.876 + 1.877 + assert(overlapInMsec >= 0); 1.878 + newOvl = (sampleRate * overlapInMsec) / 1000; 1.879 + if (newOvl < 16) newOvl = 16; 1.880 + 1.881 + // must be divisible by 8 1.882 + newOvl -= newOvl % 8; 1.883 + 1.884 + acceptNewOverlapLength(newOvl); 1.885 +} 1.886 + 1.887 + 1.888 +/// Calculate cross-correlation 1.889 +double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &norm) const 1.890 +{ 1.891 + double corr; 1.892 + int i; 1.893 + 1.894 + corr = norm = 0; 1.895 + // Same routine for stereo and mono. For Stereo, unroll by factor of 2. 1.896 + // For mono it's same routine yet unrollsd by factor of 4. 1.897 + for (i = 0; i < channels * overlapLength; i += 4) 1.898 + { 1.899 + corr += mixingPos[i] * compare[i] + 1.900 + mixingPos[i + 1] * compare[i + 1]; 1.901 + 1.902 + norm += mixingPos[i] * mixingPos[i] + 1.903 + mixingPos[i + 1] * mixingPos[i + 1]; 1.904 + 1.905 + // unroll the loop for better CPU efficiency: 1.906 + corr += mixingPos[i + 2] * compare[i + 2] + 1.907 + mixingPos[i + 3] * compare[i + 3]; 1.908 + 1.909 + norm += mixingPos[i + 2] * mixingPos[i + 2] + 1.910 + mixingPos[i + 3] * mixingPos[i + 3]; 1.911 + } 1.912 + 1.913 + return corr / sqrt((norm < 1e-9 ? 1.0 : norm)); 1.914 +} 1.915 + 1.916 + 1.917 +/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value 1.918 +double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) const 1.919 +{ 1.920 + double corr; 1.921 + int i; 1.922 + 1.923 + corr = 0; 1.924 + 1.925 + // cancel first normalizer tap from previous round 1.926 + for (i = 1; i <= channels; i ++) 1.927 + { 1.928 + norm -= mixingPos[-i] * mixingPos[-i]; 1.929 + } 1.930 + 1.931 + // Same routine for stereo and mono. For Stereo, unroll by factor of 2. 1.932 + // For mono it's same routine yet unrollsd by factor of 4. 1.933 + for (i = 0; i < channels * overlapLength; i += 4) 1.934 + { 1.935 + corr += mixingPos[i] * compare[i] + 1.936 + mixingPos[i + 1] * compare[i + 1] + 1.937 + mixingPos[i + 2] * compare[i + 2] + 1.938 + mixingPos[i + 3] * compare[i + 3]; 1.939 + } 1.940 + 1.941 + // update normalizer with last samples of this round 1.942 + for (int j = 0; j < channels; j ++) 1.943 + { 1.944 + i --; 1.945 + norm += mixingPos[i] * mixingPos[i]; 1.946 + } 1.947 + 1.948 + return corr / sqrt((norm < 1e-9 ? 1.0 : norm)); 1.949 +} 1.950 + 1.951 + 1.952 +#endif // SOUNDTOUCH_FLOAT_SAMPLES