Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 ////////////////////////////////////////////////////////////////////////////////
2 ///
3 /// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo
4 /// while maintaining the original pitch by using a time domain WSOLA-like
5 /// method with several performance-increasing tweaks.
6 ///
7 /// Note : MMX optimized functions reside in a separate, platform-specific
8 /// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
9 ///
10 /// Author : Copyright (c) Olli Parviainen
11 /// Author e-mail : oparviai 'at' iki.fi
12 /// SoundTouch WWW: http://www.surina.net/soundtouch
13 ///
14 ////////////////////////////////////////////////////////////////////////////////
15 //
16 // Last changed : $Date: 2014-04-06 10:57:21 -0500 (Sun, 06 Apr 2014) $
17 // File revision : $Revision: 1.12 $
18 //
19 // $Id: TDStretch.cpp 195 2014-04-06 15:57:21Z oparviai $
20 //
21 ////////////////////////////////////////////////////////////////////////////////
22 //
23 // License :
24 //
25 // SoundTouch audio processing library
26 // Copyright (c) Olli Parviainen
27 //
28 // This library is free software; you can redistribute it and/or
29 // modify it under the terms of the GNU Lesser General Public
30 // License as published by the Free Software Foundation; either
31 // version 2.1 of the License, or (at your option) any later version.
32 //
33 // This library is distributed in the hope that it will be useful,
34 // but WITHOUT ANY WARRANTY; without even the implied warranty of
35 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
36 // Lesser General Public License for more details.
37 //
38 // You should have received a copy of the GNU Lesser General Public
39 // License along with this library; if not, write to the Free Software
40 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
41 //
42 ////////////////////////////////////////////////////////////////////////////////
44 #include <string.h>
45 #include <limits.h>
46 #include <assert.h>
47 #include <math.h>
48 #include <float.h>
50 #include "STTypes.h"
51 #include "cpu_detect.h"
52 #include "TDStretch.h"
54 using namespace soundtouch;
56 #define max(x, y) (((x) > (y)) ? (x) : (y))
59 /*****************************************************************************
60 *
61 * Constant definitions
62 *
63 *****************************************************************************/
65 // Table for the hierarchical mixing position seeking algorithm
66 static const short _scanOffsets[5][24]={
67 { 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806,
68 868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0},
69 {-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0,
70 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
71 { -20, -15, -10, -5, 5, 10, 15, 20, 0, 0, 0, 0,
72 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
73 { -4, -3, -2, -1, 1, 2, 3, 4, 0, 0, 0, 0,
74 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
75 { 121, 114, 97, 114, 98, 105, 108, 32, 104, 99, 117, 111,
76 116, 100, 110, 117, 111, 115, 0, 0, 0, 0, 0, 0}};
78 /*****************************************************************************
79 *
80 * Implementation of the class 'TDStretch'
81 *
82 *****************************************************************************/
85 TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
86 {
87 bQuickSeek = false;
88 channels = 2;
90 pMidBuffer = NULL;
91 pMidBufferUnaligned = NULL;
92 overlapLength = 0;
94 bAutoSeqSetting = true;
95 bAutoSeekSetting = true;
97 // outDebt = 0;
98 skipFract = 0;
100 tempo = 1.0f;
101 setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);
102 setTempo(1.0f);
104 clear();
105 }
109 TDStretch::~TDStretch()
110 {
111 delete[] pMidBufferUnaligned;
112 }
116 // Sets routine control parameters. These control are certain time constants
117 // defining how the sound is stretched to the desired duration.
118 //
119 // 'sampleRate' = sample rate of the sound
120 // 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms)
121 // 'seekwindowMS' = seeking window length for scanning the best overlapping
122 // position (default = 28 ms)
123 // 'overlapMS' = overlapping length (default = 12 ms)
125 void TDStretch::setParameters(int aSampleRate, int aSequenceMS,
126 int aSeekWindowMS, int aOverlapMS)
127 {
128 // accept only positive parameter values - if zero or negative, use old values instead
129 if (aSampleRate > 0) this->sampleRate = aSampleRate;
130 if (aOverlapMS > 0) this->overlapMs = aOverlapMS;
132 if (aSequenceMS > 0)
133 {
134 this->sequenceMs = aSequenceMS;
135 bAutoSeqSetting = false;
136 }
137 else if (aSequenceMS == 0)
138 {
139 // if zero, use automatic setting
140 bAutoSeqSetting = true;
141 }
143 if (aSeekWindowMS > 0)
144 {
145 this->seekWindowMs = aSeekWindowMS;
146 bAutoSeekSetting = false;
147 }
148 else if (aSeekWindowMS == 0)
149 {
150 // if zero, use automatic setting
151 bAutoSeekSetting = true;
152 }
154 calcSeqParameters();
156 calculateOverlapLength(overlapMs);
158 // set tempo to recalculate 'sampleReq'
159 setTempo(tempo);
160 }
164 /// Get routine control parameters, see setParameters() function.
165 /// Any of the parameters to this function can be NULL, in such case corresponding parameter
166 /// value isn't returned.
167 void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const
168 {
169 if (pSampleRate)
170 {
171 *pSampleRate = sampleRate;
172 }
174 if (pSequenceMs)
175 {
176 *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs;
177 }
179 if (pSeekWindowMs)
180 {
181 *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs;
182 }
184 if (pOverlapMs)
185 {
186 *pOverlapMs = overlapMs;
187 }
188 }
191 // Overlaps samples in 'midBuffer' with the samples in 'pInput'
192 void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
193 {
194 int i;
195 SAMPLETYPE m1, m2;
197 m1 = (SAMPLETYPE)0;
198 m2 = (SAMPLETYPE)overlapLength;
200 for (i = 0; i < overlapLength ; i ++)
201 {
202 pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength;
203 m1 += 1;
204 m2 -= 1;
205 }
206 }
210 void TDStretch::clearMidBuffer()
211 {
212 memset(pMidBuffer, 0, channels * sizeof(SAMPLETYPE) * overlapLength);
213 }
216 void TDStretch::clearInput()
217 {
218 inputBuffer.clear();
219 clearMidBuffer();
220 }
223 // Clears the sample buffers
224 void TDStretch::clear()
225 {
226 outputBuffer.clear();
227 clearInput();
228 }
232 // Enables/disables the quick position seeking algorithm. Zero to disable, nonzero
233 // to enable
234 void TDStretch::enableQuickSeek(bool enable)
235 {
236 bQuickSeek = enable;
237 }
240 // Returns nonzero if the quick seeking algorithm is enabled.
241 bool TDStretch::isQuickSeekEnabled() const
242 {
243 return bQuickSeek;
244 }
247 // Seeks for the optimal overlap-mixing position.
248 int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos)
249 {
250 if (bQuickSeek)
251 {
252 return seekBestOverlapPositionQuick(refPos);
253 }
254 else
255 {
256 return seekBestOverlapPositionFull(refPos);
257 }
258 }
261 // Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position
262 // of 'ovlPos'.
263 inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const
264 {
265 #ifndef USE_MULTICH_ALWAYS
266 if (channels == 1)
267 {
268 // mono sound.
269 overlapMono(pOutput, pInput + ovlPos);
270 }
271 else if (channels == 2)
272 {
273 // stereo sound
274 overlapStereo(pOutput, pInput + 2 * ovlPos);
275 }
276 else
277 #endif // USE_MULTICH_ALWAYS
278 {
279 assert(channels > 0);
280 overlapMulti(pOutput, pInput + channels * ovlPos);
281 }
282 }
286 // Seeks for the optimal overlap-mixing position. The 'stereo' version of the
287 // routine
288 //
289 // The best position is determined as the position where the two overlapped
290 // sample sequences are 'most alike', in terms of the highest cross-correlation
291 // value over the overlapping period
292 int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
293 {
294 int bestOffs;
295 double bestCorr, corr;
296 double norm;
297 int i;
299 bestCorr = FLT_MIN;
300 bestOffs = 0;
302 // Scans for the best correlation value by testing each possible position
303 // over the permitted range.
304 bestCorr = calcCrossCorr(refPos, pMidBuffer, norm);
305 for (i = 1; i < seekLength; i ++)
306 {
307 // Calculates correlation value for the mixing position corresponding
308 // to 'i'. Now call "calcCrossCorrAccumulate" that is otherwise same as
309 // "calcCrossCorr", but saves time by reusing & updating previously stored
310 // "norm" value
311 corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm);
313 // heuristic rule to slightly favour values close to mid of the range
314 double tmp = (double)(2 * i - seekLength) / (double)seekLength;
315 corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
317 // Checks for the highest correlation value
318 if (corr > bestCorr)
319 {
320 bestCorr = corr;
321 bestOffs = i;
322 }
323 }
324 // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
325 clearCrossCorrState();
327 return bestOffs;
328 }
331 // Seeks for the optimal overlap-mixing position. The 'stereo' version of the
332 // routine
333 //
334 // The best position is determined as the position where the two overlapped
335 // sample sequences are 'most alike', in terms of the highest cross-correlation
336 // value over the overlapping period
337 int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)
338 {
339 int j;
340 int bestOffs;
341 double bestCorr, corr;
342 int scanCount, corrOffset, tempOffset;
344 bestCorr = FLT_MIN;
345 bestOffs = _scanOffsets[0][0];
346 corrOffset = 0;
347 tempOffset = 0;
349 // Scans for the best correlation value using four-pass hierarchical search.
350 //
351 // The look-up table 'scans' has hierarchical position adjusting steps.
352 // In first pass the routine searhes for the highest correlation with
353 // relatively coarse steps, then rescans the neighbourhood of the highest
354 // correlation with better resolution and so on.
355 for (scanCount = 0;scanCount < 4; scanCount ++)
356 {
357 j = 0;
358 while (_scanOffsets[scanCount][j])
359 {
360 double norm;
361 tempOffset = corrOffset + _scanOffsets[scanCount][j];
362 if (tempOffset >= seekLength) break;
364 // Calculates correlation value for the mixing position corresponding
365 // to 'tempOffset'
366 corr = (double)calcCrossCorr(refPos + channels * tempOffset, pMidBuffer, norm);
367 // heuristic rule to slightly favour values close to mid of the range
368 double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
369 corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
371 // Checks for the highest correlation value
372 if (corr > bestCorr)
373 {
374 bestCorr = corr;
375 bestOffs = tempOffset;
376 }
377 j ++;
378 }
379 corrOffset = bestOffs;
380 }
381 // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
382 clearCrossCorrState();
384 return bestOffs;
385 }
389 /// clear cross correlation routine state if necessary
390 void TDStretch::clearCrossCorrState()
391 {
392 // default implementation is empty.
393 }
396 /// Calculates processing sequence length according to tempo setting
397 void TDStretch::calcSeqParameters()
398 {
399 // Adjust tempo param according to tempo, so that variating processing sequence length is used
400 // at varius tempo settings, between the given low...top limits
401 #define AUTOSEQ_TEMPO_LOW 0.5 // auto setting low tempo range (-50%)
402 #define AUTOSEQ_TEMPO_TOP 2.0 // auto setting top tempo range (+100%)
404 // sequence-ms setting values at above low & top tempo
405 #define AUTOSEQ_AT_MIN 125.0
406 #define AUTOSEQ_AT_MAX 50.0
407 #define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
408 #define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW))
410 // seek-window-ms setting values at above low & top tempo
411 #define AUTOSEEK_AT_MIN 25.0
412 #define AUTOSEEK_AT_MAX 15.0
413 #define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
414 #define AUTOSEEK_C (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW))
416 #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x)))
418 double seq, seek;
420 if (bAutoSeqSetting)
421 {
422 seq = AUTOSEQ_C + AUTOSEQ_K * tempo;
423 seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN);
424 sequenceMs = (int)(seq + 0.5);
425 }
427 if (bAutoSeekSetting)
428 {
429 seek = AUTOSEEK_C + AUTOSEEK_K * tempo;
430 seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN);
431 seekWindowMs = (int)(seek + 0.5);
432 }
434 // Update seek window lengths
435 seekWindowLength = (sampleRate * sequenceMs) / 1000;
436 if (seekWindowLength < 2 * overlapLength)
437 {
438 seekWindowLength = 2 * overlapLength;
439 }
440 seekLength = (sampleRate * seekWindowMs) / 1000;
441 }
445 // Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower
446 // tempo, larger faster tempo.
447 void TDStretch::setTempo(float newTempo)
448 {
449 int intskip;
451 tempo = newTempo;
453 // Calculate new sequence duration
454 calcSeqParameters();
456 // Calculate ideal skip length (according to tempo value)
457 nominalSkip = tempo * (seekWindowLength - overlapLength);
458 intskip = (int)(nominalSkip + 0.5f);
460 // Calculate how many samples are needed in the 'inputBuffer' to
461 // process another batch of samples
462 //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2;
463 sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength;
464 }
468 // Sets the number of channels, 1 = mono, 2 = stereo
469 void TDStretch::setChannels(int numChannels)
470 {
471 assert(numChannels > 0);
472 if (channels == numChannels) return;
473 // assert(numChannels == 1 || numChannels == 2);
475 channels = numChannels;
476 inputBuffer.setChannels(channels);
477 outputBuffer.setChannels(channels);
479 // re-init overlap/buffer
480 overlapLength=0;
481 setParameters(sampleRate);
482 }
485 // nominal tempo, no need for processing, just pass the samples through
486 // to outputBuffer
487 /*
488 void TDStretch::processNominalTempo()
489 {
490 assert(tempo == 1.0f);
492 if (bMidBufferDirty)
493 {
494 // If there are samples in pMidBuffer waiting for overlapping,
495 // do a single sliding overlapping with them in order to prevent a
496 // clicking distortion in the output sound
497 if (inputBuffer.numSamples() < overlapLength)
498 {
499 // wait until we've got overlapLength input samples
500 return;
501 }
502 // Mix the samples in the beginning of 'inputBuffer' with the
503 // samples in 'midBuffer' using sliding overlapping
504 overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0);
505 outputBuffer.putSamples(overlapLength);
506 inputBuffer.receiveSamples(overlapLength);
507 clearMidBuffer();
508 // now we've caught the nominal sample flow and may switch to
509 // bypass mode
510 }
512 // Simply bypass samples from input to output
513 outputBuffer.moveSamples(inputBuffer);
514 }
515 */
518 // Processes as many processing frames of the samples 'inputBuffer', store
519 // the result into 'outputBuffer'
520 void TDStretch::processSamples()
521 {
522 int ovlSkip, offset;
523 int temp;
525 /* Removed this small optimization - can introduce a click to sound when tempo setting
526 crosses the nominal value
527 if (tempo == 1.0f)
528 {
529 // tempo not changed from the original, so bypass the processing
530 processNominalTempo();
531 return;
532 }
533 */
535 // Process samples as long as there are enough samples in 'inputBuffer'
536 // to form a processing frame.
537 while ((int)inputBuffer.numSamples() >= sampleReq)
538 {
539 // If tempo differs from the normal ('SCALE'), scan for the best overlapping
540 // position
541 offset = seekBestOverlapPosition(inputBuffer.ptrBegin());
543 // Mix the samples in the 'inputBuffer' at position of 'offset' with the
544 // samples in 'midBuffer' using sliding overlapping
545 // ... first partially overlap with the end of the previous sequence
546 // (that's in 'midBuffer')
547 overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset);
548 outputBuffer.putSamples((uint)overlapLength);
550 // ... then copy sequence samples from 'inputBuffer' to output:
552 // length of sequence
553 temp = (seekWindowLength - 2 * overlapLength);
555 // crosscheck that we don't have buffer overflow...
556 if ((int)inputBuffer.numSamples() < (offset + temp + overlapLength * 2))
557 {
558 continue; // just in case, shouldn't really happen
559 }
561 outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * (offset + overlapLength), (uint)temp);
563 // Copies the end of the current sequence from 'inputBuffer' to
564 // 'midBuffer' for being mixed with the beginning of the next
565 // processing sequence and so on
566 assert((offset + temp + overlapLength * 2) <= (int)inputBuffer.numSamples());
567 memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp + overlapLength),
568 channels * sizeof(SAMPLETYPE) * overlapLength);
570 // Remove the processed samples from the input buffer. Update
571 // the difference between integer & nominal skip step to 'skipFract'
572 // in order to prevent the error from accumulating over time.
573 skipFract += nominalSkip; // real skip size
574 ovlSkip = (int)skipFract; // rounded to integer skip
575 skipFract -= ovlSkip; // maintain the fraction part, i.e. real vs. integer skip
576 inputBuffer.receiveSamples((uint)ovlSkip);
577 }
578 }
581 // Adds 'numsamples' pcs of samples from the 'samples' memory position into
582 // the input of the object.
583 void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples)
584 {
585 // Add the samples into the input buffer
586 inputBuffer.putSamples(samples, nSamples);
587 // Process the samples in input buffer
588 processSamples();
589 }
593 /// Set new overlap length parameter & reallocate RefMidBuffer if necessary.
594 void TDStretch::acceptNewOverlapLength(int newOverlapLength)
595 {
596 int prevOvl;
598 assert(newOverlapLength >= 0);
599 prevOvl = overlapLength;
600 overlapLength = newOverlapLength;
602 if (overlapLength > prevOvl)
603 {
604 delete[] pMidBufferUnaligned;
606 pMidBufferUnaligned = new SAMPLETYPE[overlapLength * channels + 16 / sizeof(SAMPLETYPE)];
607 // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency
608 pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned);
610 clearMidBuffer();
611 }
612 }
615 // Operator 'new' is overloaded so that it automatically creates a suitable instance
616 // depending on if we've a MMX/SSE/etc-capable CPU available or not.
617 void * TDStretch::operator new(size_t s)
618 {
619 // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead!
620 ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!");
621 return newInstance();
622 }
625 TDStretch * TDStretch::newInstance()
626 {
627 #if defined(SOUNDTOUCH_ALLOW_MMX) || defined(SOUNDTOUCH_ALLOW_SSE)
628 uint uExtensions;
630 uExtensions = detectCPUextensions();
631 #endif
633 // Check if MMX/SSE instruction set extensions supported by CPU
635 #ifdef SOUNDTOUCH_ALLOW_MMX
636 // MMX routines available only with integer sample types
637 if (uExtensions & SUPPORT_MMX)
638 {
639 return ::new TDStretchMMX;
640 }
641 else
642 #endif // SOUNDTOUCH_ALLOW_MMX
645 #ifdef SOUNDTOUCH_ALLOW_SSE
646 if (uExtensions & SUPPORT_SSE)
647 {
648 // SSE support
649 return ::new TDStretchSSE;
650 }
651 else
652 #endif // SOUNDTOUCH_ALLOW_SSE
654 {
655 // ISA optimizations not supported, use plain C version
656 return ::new TDStretch;
657 }
658 }
661 //////////////////////////////////////////////////////////////////////////////
662 //
663 // Integer arithmetics specific algorithm implementations.
664 //
665 //////////////////////////////////////////////////////////////////////////////
667 #ifdef SOUNDTOUCH_INTEGER_SAMPLES
669 // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo'
670 // version of the routine.
671 void TDStretch::overlapStereo(short *poutput, const short *input) const
672 {
673 int i;
674 short temp;
675 int cnt2;
677 for (i = 0; i < overlapLength ; i ++)
678 {
679 temp = (short)(overlapLength - i);
680 cnt2 = 2 * i;
681 poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp ) / overlapLength;
682 poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength;
683 }
684 }
687 // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi'
688 // version of the routine.
689 void TDStretch::overlapMulti(SAMPLETYPE *poutput, const SAMPLETYPE *input) const
690 {
691 SAMPLETYPE m1=(SAMPLETYPE)0;
692 SAMPLETYPE m2;
693 int i=0;
695 for (m2 = (SAMPLETYPE)overlapLength; m2; m2 --)
696 {
697 for (int c = 0; c < channels; c ++)
698 {
699 poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2) / overlapLength;
700 i++;
701 }
703 m1++;
704 }
705 }
707 // Calculates the x having the closest 2^x value for the given value
708 static int _getClosest2Power(double value)
709 {
710 return (int)(log(value) / log(2.0) + 0.5);
711 }
714 /// Calculates overlap period length in samples.
715 /// Integer version rounds overlap length to closest power of 2
716 /// for a divide scaling operation.
717 void TDStretch::calculateOverlapLength(int aoverlapMs)
718 {
719 int newOvl;
721 assert(aoverlapMs >= 0);
723 // calculate overlap length so that it's power of 2 - thus it's easy to do
724 // integer division by right-shifting. Term "-1" at end is to account for
725 // the extra most significatnt bit left unused in result by signed multiplication
726 overlapDividerBits = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1;
727 if (overlapDividerBits > 9) overlapDividerBits = 9;
728 if (overlapDividerBits < 3) overlapDividerBits = 3;
729 newOvl = (int)pow(2.0, (int)overlapDividerBits + 1); // +1 => account for -1 above
731 acceptNewOverlapLength(newOvl);
733 // calculate sloping divider so that crosscorrelation operation won't
734 // overflow 32-bit register. Max. sum of the crosscorrelation sum without
735 // divider would be 2^30*(N^3-N)/3, where N = overlap length
736 slopingDivider = (newOvl * newOvl - 1) / 3;
737 }
740 double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm) const
741 {
742 long corr;
743 long lnorm;
744 int i;
746 corr = lnorm = 0;
747 // Same routine for stereo and mono. For stereo, unroll loop for better
748 // efficiency and gives slightly better resolution against rounding.
749 // For mono it same routine, just unrolls loop by factor of 4
750 for (i = 0; i < channels * overlapLength; i += 4)
751 {
752 corr += (mixingPos[i] * compare[i] +
753 mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow
754 corr += (mixingPos[i + 2] * compare[i + 2] +
755 mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits;
756 lnorm += (mixingPos[i] * mixingPos[i] +
757 mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow
758 lnorm += (mixingPos[i + 2] * mixingPos[i + 2] +
759 mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBits;
760 }
762 // Normalize result by dividing by sqrt(norm) - this step is easiest
763 // done using floating point operation
764 norm = (double)lnorm;
765 return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
766 }
769 /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
770 double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) const
771 {
772 long corr;
773 long lnorm;
774 int i;
776 // cancel first normalizer tap from previous round
777 lnorm = 0;
778 for (i = 1; i <= channels; i ++)
779 {
780 lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBits;
781 }
783 corr = 0;
784 // Same routine for stereo and mono. For stereo, unroll loop for better
785 // efficiency and gives slightly better resolution against rounding.
786 // For mono it same routine, just unrolls loop by factor of 4
787 for (i = 0; i < channels * overlapLength; i += 4)
788 {
789 corr += (mixingPos[i] * compare[i] +
790 mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow
791 corr += (mixingPos[i + 2] * compare[i + 2] +
792 mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits;
793 }
795 // update normalizer with last samples of this round
796 for (int j = 0; j < channels; j ++)
797 {
798 i --;
799 lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBits;
800 }
801 norm += (double)lnorm;
803 // Normalize result by dividing by sqrt(norm) - this step is easiest
804 // done using floating point operation
805 return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
806 }
808 #endif // SOUNDTOUCH_INTEGER_SAMPLES
810 //////////////////////////////////////////////////////////////////////////////
811 //
812 // Floating point arithmetics specific algorithm implementations.
813 //
815 #ifdef SOUNDTOUCH_FLOAT_SAMPLES
817 // Overlaps samples in 'midBuffer' with the samples in 'pInput'
818 void TDStretch::overlapStereo(float *pOutput, const float *pInput) const
819 {
820 int i;
821 float fScale;
822 float f1;
823 float f2;
825 fScale = 1.0f / (float)overlapLength;
827 f1 = 0;
828 f2 = 1.0f;
830 for (i = 0; i < 2 * (int)overlapLength ; i += 2)
831 {
832 pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2;
833 pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2;
835 f1 += fScale;
836 f2 -= fScale;
837 }
838 }
841 // Overlaps samples in 'midBuffer' with the samples in 'input'.
842 void TDStretch::overlapMulti(float *pOutput, const float *pInput) const
843 {
844 int i;
845 float fScale;
846 float f1;
847 float f2;
849 fScale = 1.0f / (float)overlapLength;
851 f1 = 0;
852 f2 = 1.0f;
854 i=0;
855 for (int i2 = 0; i2 < overlapLength; i2 ++)
856 {
857 // note: Could optimize this slightly by taking into account that always channels > 2
858 for (int c = 0; c < channels; c ++)
859 {
860 pOutput[i] = pInput[i] * f1 + pMidBuffer[i] * f2;
861 i++;
862 }
863 f1 += fScale;
864 f2 -= fScale;
865 }
866 }
869 /// Calculates overlapInMsec period length in samples.
870 void TDStretch::calculateOverlapLength(int overlapInMsec)
871 {
872 int newOvl;
874 assert(overlapInMsec >= 0);
875 newOvl = (sampleRate * overlapInMsec) / 1000;
876 if (newOvl < 16) newOvl = 16;
878 // must be divisible by 8
879 newOvl -= newOvl % 8;
881 acceptNewOverlapLength(newOvl);
882 }
885 /// Calculate cross-correlation
886 double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &norm) const
887 {
888 double corr;
889 int i;
891 corr = norm = 0;
892 // Same routine for stereo and mono. For Stereo, unroll by factor of 2.
893 // For mono it's same routine yet unrollsd by factor of 4.
894 for (i = 0; i < channels * overlapLength; i += 4)
895 {
896 corr += mixingPos[i] * compare[i] +
897 mixingPos[i + 1] * compare[i + 1];
899 norm += mixingPos[i] * mixingPos[i] +
900 mixingPos[i + 1] * mixingPos[i + 1];
902 // unroll the loop for better CPU efficiency:
903 corr += mixingPos[i + 2] * compare[i + 2] +
904 mixingPos[i + 3] * compare[i + 3];
906 norm += mixingPos[i + 2] * mixingPos[i + 2] +
907 mixingPos[i + 3] * mixingPos[i + 3];
908 }
910 return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
911 }
914 /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
915 double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) const
916 {
917 double corr;
918 int i;
920 corr = 0;
922 // cancel first normalizer tap from previous round
923 for (i = 1; i <= channels; i ++)
924 {
925 norm -= mixingPos[-i] * mixingPos[-i];
926 }
928 // Same routine for stereo and mono. For Stereo, unroll by factor of 2.
929 // For mono it's same routine yet unrollsd by factor of 4.
930 for (i = 0; i < channels * overlapLength; i += 4)
931 {
932 corr += mixingPos[i] * compare[i] +
933 mixingPos[i + 1] * compare[i + 1] +
934 mixingPos[i + 2] * compare[i + 2] +
935 mixingPos[i + 3] * compare[i + 3];
936 }
938 // update normalizer with last samples of this round
939 for (int j = 0; j < channels; j ++)
940 {
941 i --;
942 norm += mixingPos[i] * mixingPos[i];
943 }
945 return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
946 }
949 #endif // SOUNDTOUCH_FLOAT_SAMPLES