|
1 //////////////////////////////////////////////////////////////////////////////// |
|
2 /// |
|
3 /// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo |
|
4 /// while maintaining the original pitch by using a time domain WSOLA-like |
|
5 /// method with several performance-increasing tweaks. |
|
6 /// |
|
7 /// Note : MMX optimized functions reside in a separate, platform-specific |
|
8 /// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp' |
|
9 /// |
|
10 /// Author : Copyright (c) Olli Parviainen |
|
11 /// Author e-mail : oparviai 'at' iki.fi |
|
12 /// SoundTouch WWW: http://www.surina.net/soundtouch |
|
13 /// |
|
14 //////////////////////////////////////////////////////////////////////////////// |
|
15 // |
|
16 // Last changed : $Date: 2014-04-06 10:57:21 -0500 (Sun, 06 Apr 2014) $ |
|
17 // File revision : $Revision: 1.12 $ |
|
18 // |
|
19 // $Id: TDStretch.cpp 195 2014-04-06 15:57:21Z oparviai $ |
|
20 // |
|
21 //////////////////////////////////////////////////////////////////////////////// |
|
22 // |
|
23 // License : |
|
24 // |
|
25 // SoundTouch audio processing library |
|
26 // Copyright (c) Olli Parviainen |
|
27 // |
|
28 // This library is free software; you can redistribute it and/or |
|
29 // modify it under the terms of the GNU Lesser General Public |
|
30 // License as published by the Free Software Foundation; either |
|
31 // version 2.1 of the License, or (at your option) any later version. |
|
32 // |
|
33 // This library is distributed in the hope that it will be useful, |
|
34 // but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
35 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
36 // Lesser General Public License for more details. |
|
37 // |
|
38 // You should have received a copy of the GNU Lesser General Public |
|
39 // License along with this library; if not, write to the Free Software |
|
40 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
41 // |
|
42 //////////////////////////////////////////////////////////////////////////////// |
|
43 |
|
44 #include <string.h> |
|
45 #include <limits.h> |
|
46 #include <assert.h> |
|
47 #include <math.h> |
|
48 #include <float.h> |
|
49 |
|
50 #include "STTypes.h" |
|
51 #include "cpu_detect.h" |
|
52 #include "TDStretch.h" |
|
53 |
|
54 using namespace soundtouch; |
|
55 |
|
56 #define max(x, y) (((x) > (y)) ? (x) : (y)) |
|
57 |
|
58 |
|
59 /***************************************************************************** |
|
60 * |
|
61 * Constant definitions |
|
62 * |
|
63 *****************************************************************************/ |
|
64 |
|
65 // Table for the hierarchical mixing position seeking algorithm |
|
66 static const short _scanOffsets[5][24]={ |
|
67 { 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806, |
|
68 868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0}, |
|
69 {-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0, |
|
70 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, |
|
71 { -20, -15, -10, -5, 5, 10, 15, 20, 0, 0, 0, 0, |
|
72 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, |
|
73 { -4, -3, -2, -1, 1, 2, 3, 4, 0, 0, 0, 0, |
|
74 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, |
|
75 { 121, 114, 97, 114, 98, 105, 108, 32, 104, 99, 117, 111, |
|
76 116, 100, 110, 117, 111, 115, 0, 0, 0, 0, 0, 0}}; |
|
77 |
|
78 /***************************************************************************** |
|
79 * |
|
80 * Implementation of the class 'TDStretch' |
|
81 * |
|
82 *****************************************************************************/ |
|
83 |
|
84 |
|
85 TDStretch::TDStretch() : FIFOProcessor(&outputBuffer) |
|
86 { |
|
87 bQuickSeek = false; |
|
88 channels = 2; |
|
89 |
|
90 pMidBuffer = NULL; |
|
91 pMidBufferUnaligned = NULL; |
|
92 overlapLength = 0; |
|
93 |
|
94 bAutoSeqSetting = true; |
|
95 bAutoSeekSetting = true; |
|
96 |
|
97 // outDebt = 0; |
|
98 skipFract = 0; |
|
99 |
|
100 tempo = 1.0f; |
|
101 setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS); |
|
102 setTempo(1.0f); |
|
103 |
|
104 clear(); |
|
105 } |
|
106 |
|
107 |
|
108 |
|
109 TDStretch::~TDStretch() |
|
110 { |
|
111 delete[] pMidBufferUnaligned; |
|
112 } |
|
113 |
|
114 |
|
115 |
|
116 // Sets routine control parameters. These control are certain time constants |
|
117 // defining how the sound is stretched to the desired duration. |
|
118 // |
|
119 // 'sampleRate' = sample rate of the sound |
|
120 // 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms) |
|
121 // 'seekwindowMS' = seeking window length for scanning the best overlapping |
|
122 // position (default = 28 ms) |
|
123 // 'overlapMS' = overlapping length (default = 12 ms) |
|
124 |
|
125 void TDStretch::setParameters(int aSampleRate, int aSequenceMS, |
|
126 int aSeekWindowMS, int aOverlapMS) |
|
127 { |
|
128 // accept only positive parameter values - if zero or negative, use old values instead |
|
129 if (aSampleRate > 0) this->sampleRate = aSampleRate; |
|
130 if (aOverlapMS > 0) this->overlapMs = aOverlapMS; |
|
131 |
|
132 if (aSequenceMS > 0) |
|
133 { |
|
134 this->sequenceMs = aSequenceMS; |
|
135 bAutoSeqSetting = false; |
|
136 } |
|
137 else if (aSequenceMS == 0) |
|
138 { |
|
139 // if zero, use automatic setting |
|
140 bAutoSeqSetting = true; |
|
141 } |
|
142 |
|
143 if (aSeekWindowMS > 0) |
|
144 { |
|
145 this->seekWindowMs = aSeekWindowMS; |
|
146 bAutoSeekSetting = false; |
|
147 } |
|
148 else if (aSeekWindowMS == 0) |
|
149 { |
|
150 // if zero, use automatic setting |
|
151 bAutoSeekSetting = true; |
|
152 } |
|
153 |
|
154 calcSeqParameters(); |
|
155 |
|
156 calculateOverlapLength(overlapMs); |
|
157 |
|
158 // set tempo to recalculate 'sampleReq' |
|
159 setTempo(tempo); |
|
160 } |
|
161 |
|
162 |
|
163 |
|
164 /// Get routine control parameters, see setParameters() function. |
|
165 /// Any of the parameters to this function can be NULL, in such case corresponding parameter |
|
166 /// value isn't returned. |
|
167 void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const |
|
168 { |
|
169 if (pSampleRate) |
|
170 { |
|
171 *pSampleRate = sampleRate; |
|
172 } |
|
173 |
|
174 if (pSequenceMs) |
|
175 { |
|
176 *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs; |
|
177 } |
|
178 |
|
179 if (pSeekWindowMs) |
|
180 { |
|
181 *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs; |
|
182 } |
|
183 |
|
184 if (pOverlapMs) |
|
185 { |
|
186 *pOverlapMs = overlapMs; |
|
187 } |
|
188 } |
|
189 |
|
190 |
|
191 // Overlaps samples in 'midBuffer' with the samples in 'pInput' |
|
192 void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const |
|
193 { |
|
194 int i; |
|
195 SAMPLETYPE m1, m2; |
|
196 |
|
197 m1 = (SAMPLETYPE)0; |
|
198 m2 = (SAMPLETYPE)overlapLength; |
|
199 |
|
200 for (i = 0; i < overlapLength ; i ++) |
|
201 { |
|
202 pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength; |
|
203 m1 += 1; |
|
204 m2 -= 1; |
|
205 } |
|
206 } |
|
207 |
|
208 |
|
209 |
|
210 void TDStretch::clearMidBuffer() |
|
211 { |
|
212 memset(pMidBuffer, 0, channels * sizeof(SAMPLETYPE) * overlapLength); |
|
213 } |
|
214 |
|
215 |
|
216 void TDStretch::clearInput() |
|
217 { |
|
218 inputBuffer.clear(); |
|
219 clearMidBuffer(); |
|
220 } |
|
221 |
|
222 |
|
223 // Clears the sample buffers |
|
224 void TDStretch::clear() |
|
225 { |
|
226 outputBuffer.clear(); |
|
227 clearInput(); |
|
228 } |
|
229 |
|
230 |
|
231 |
|
232 // Enables/disables the quick position seeking algorithm. Zero to disable, nonzero |
|
233 // to enable |
|
234 void TDStretch::enableQuickSeek(bool enable) |
|
235 { |
|
236 bQuickSeek = enable; |
|
237 } |
|
238 |
|
239 |
|
240 // Returns nonzero if the quick seeking algorithm is enabled. |
|
241 bool TDStretch::isQuickSeekEnabled() const |
|
242 { |
|
243 return bQuickSeek; |
|
244 } |
|
245 |
|
246 |
|
247 // Seeks for the optimal overlap-mixing position. |
|
248 int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos) |
|
249 { |
|
250 if (bQuickSeek) |
|
251 { |
|
252 return seekBestOverlapPositionQuick(refPos); |
|
253 } |
|
254 else |
|
255 { |
|
256 return seekBestOverlapPositionFull(refPos); |
|
257 } |
|
258 } |
|
259 |
|
260 |
|
261 // Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position |
|
262 // of 'ovlPos'. |
|
263 inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const |
|
264 { |
|
265 #ifndef USE_MULTICH_ALWAYS |
|
266 if (channels == 1) |
|
267 { |
|
268 // mono sound. |
|
269 overlapMono(pOutput, pInput + ovlPos); |
|
270 } |
|
271 else if (channels == 2) |
|
272 { |
|
273 // stereo sound |
|
274 overlapStereo(pOutput, pInput + 2 * ovlPos); |
|
275 } |
|
276 else |
|
277 #endif // USE_MULTICH_ALWAYS |
|
278 { |
|
279 assert(channels > 0); |
|
280 overlapMulti(pOutput, pInput + channels * ovlPos); |
|
281 } |
|
282 } |
|
283 |
|
284 |
|
285 |
|
286 // Seeks for the optimal overlap-mixing position. The 'stereo' version of the |
|
287 // routine |
|
288 // |
|
289 // The best position is determined as the position where the two overlapped |
|
290 // sample sequences are 'most alike', in terms of the highest cross-correlation |
|
291 // value over the overlapping period |
|
292 int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos) |
|
293 { |
|
294 int bestOffs; |
|
295 double bestCorr, corr; |
|
296 double norm; |
|
297 int i; |
|
298 |
|
299 bestCorr = FLT_MIN; |
|
300 bestOffs = 0; |
|
301 |
|
302 // Scans for the best correlation value by testing each possible position |
|
303 // over the permitted range. |
|
304 bestCorr = calcCrossCorr(refPos, pMidBuffer, norm); |
|
305 for (i = 1; i < seekLength; i ++) |
|
306 { |
|
307 // Calculates correlation value for the mixing position corresponding |
|
308 // to 'i'. Now call "calcCrossCorrAccumulate" that is otherwise same as |
|
309 // "calcCrossCorr", but saves time by reusing & updating previously stored |
|
310 // "norm" value |
|
311 corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm); |
|
312 |
|
313 // heuristic rule to slightly favour values close to mid of the range |
|
314 double tmp = (double)(2 * i - seekLength) / (double)seekLength; |
|
315 corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp)); |
|
316 |
|
317 // Checks for the highest correlation value |
|
318 if (corr > bestCorr) |
|
319 { |
|
320 bestCorr = corr; |
|
321 bestOffs = i; |
|
322 } |
|
323 } |
|
324 // clear cross correlation routine state if necessary (is so e.g. in MMX routines). |
|
325 clearCrossCorrState(); |
|
326 |
|
327 return bestOffs; |
|
328 } |
|
329 |
|
330 |
|
331 // Seeks for the optimal overlap-mixing position. The 'stereo' version of the |
|
332 // routine |
|
333 // |
|
334 // The best position is determined as the position where the two overlapped |
|
335 // sample sequences are 'most alike', in terms of the highest cross-correlation |
|
336 // value over the overlapping period |
|
337 int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos) |
|
338 { |
|
339 int j; |
|
340 int bestOffs; |
|
341 double bestCorr, corr; |
|
342 int scanCount, corrOffset, tempOffset; |
|
343 |
|
344 bestCorr = FLT_MIN; |
|
345 bestOffs = _scanOffsets[0][0]; |
|
346 corrOffset = 0; |
|
347 tempOffset = 0; |
|
348 |
|
349 // Scans for the best correlation value using four-pass hierarchical search. |
|
350 // |
|
351 // The look-up table 'scans' has hierarchical position adjusting steps. |
|
352 // In first pass the routine searhes for the highest correlation with |
|
353 // relatively coarse steps, then rescans the neighbourhood of the highest |
|
354 // correlation with better resolution and so on. |
|
355 for (scanCount = 0;scanCount < 4; scanCount ++) |
|
356 { |
|
357 j = 0; |
|
358 while (_scanOffsets[scanCount][j]) |
|
359 { |
|
360 double norm; |
|
361 tempOffset = corrOffset + _scanOffsets[scanCount][j]; |
|
362 if (tempOffset >= seekLength) break; |
|
363 |
|
364 // Calculates correlation value for the mixing position corresponding |
|
365 // to 'tempOffset' |
|
366 corr = (double)calcCrossCorr(refPos + channels * tempOffset, pMidBuffer, norm); |
|
367 // heuristic rule to slightly favour values close to mid of the range |
|
368 double tmp = (double)(2 * tempOffset - seekLength) / seekLength; |
|
369 corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp)); |
|
370 |
|
371 // Checks for the highest correlation value |
|
372 if (corr > bestCorr) |
|
373 { |
|
374 bestCorr = corr; |
|
375 bestOffs = tempOffset; |
|
376 } |
|
377 j ++; |
|
378 } |
|
379 corrOffset = bestOffs; |
|
380 } |
|
381 // clear cross correlation routine state if necessary (is so e.g. in MMX routines). |
|
382 clearCrossCorrState(); |
|
383 |
|
384 return bestOffs; |
|
385 } |
|
386 |
|
387 |
|
388 |
|
389 /// clear cross correlation routine state if necessary |
|
390 void TDStretch::clearCrossCorrState() |
|
391 { |
|
392 // default implementation is empty. |
|
393 } |
|
394 |
|
395 |
|
396 /// Calculates processing sequence length according to tempo setting |
|
397 void TDStretch::calcSeqParameters() |
|
398 { |
|
399 // Adjust tempo param according to tempo, so that variating processing sequence length is used |
|
400 // at varius tempo settings, between the given low...top limits |
|
401 #define AUTOSEQ_TEMPO_LOW 0.5 // auto setting low tempo range (-50%) |
|
402 #define AUTOSEQ_TEMPO_TOP 2.0 // auto setting top tempo range (+100%) |
|
403 |
|
404 // sequence-ms setting values at above low & top tempo |
|
405 #define AUTOSEQ_AT_MIN 125.0 |
|
406 #define AUTOSEQ_AT_MAX 50.0 |
|
407 #define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW)) |
|
408 #define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW)) |
|
409 |
|
410 // seek-window-ms setting values at above low & top tempo |
|
411 #define AUTOSEEK_AT_MIN 25.0 |
|
412 #define AUTOSEEK_AT_MAX 15.0 |
|
413 #define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW)) |
|
414 #define AUTOSEEK_C (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW)) |
|
415 |
|
416 #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x))) |
|
417 |
|
418 double seq, seek; |
|
419 |
|
420 if (bAutoSeqSetting) |
|
421 { |
|
422 seq = AUTOSEQ_C + AUTOSEQ_K * tempo; |
|
423 seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN); |
|
424 sequenceMs = (int)(seq + 0.5); |
|
425 } |
|
426 |
|
427 if (bAutoSeekSetting) |
|
428 { |
|
429 seek = AUTOSEEK_C + AUTOSEEK_K * tempo; |
|
430 seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN); |
|
431 seekWindowMs = (int)(seek + 0.5); |
|
432 } |
|
433 |
|
434 // Update seek window lengths |
|
435 seekWindowLength = (sampleRate * sequenceMs) / 1000; |
|
436 if (seekWindowLength < 2 * overlapLength) |
|
437 { |
|
438 seekWindowLength = 2 * overlapLength; |
|
439 } |
|
440 seekLength = (sampleRate * seekWindowMs) / 1000; |
|
441 } |
|
442 |
|
443 |
|
444 |
|
445 // Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower |
|
446 // tempo, larger faster tempo. |
|
447 void TDStretch::setTempo(float newTempo) |
|
448 { |
|
449 int intskip; |
|
450 |
|
451 tempo = newTempo; |
|
452 |
|
453 // Calculate new sequence duration |
|
454 calcSeqParameters(); |
|
455 |
|
456 // Calculate ideal skip length (according to tempo value) |
|
457 nominalSkip = tempo * (seekWindowLength - overlapLength); |
|
458 intskip = (int)(nominalSkip + 0.5f); |
|
459 |
|
460 // Calculate how many samples are needed in the 'inputBuffer' to |
|
461 // process another batch of samples |
|
462 //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2; |
|
463 sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength; |
|
464 } |
|
465 |
|
466 |
|
467 |
|
468 // Sets the number of channels, 1 = mono, 2 = stereo |
|
469 void TDStretch::setChannels(int numChannels) |
|
470 { |
|
471 assert(numChannels > 0); |
|
472 if (channels == numChannels) return; |
|
473 // assert(numChannels == 1 || numChannels == 2); |
|
474 |
|
475 channels = numChannels; |
|
476 inputBuffer.setChannels(channels); |
|
477 outputBuffer.setChannels(channels); |
|
478 |
|
479 // re-init overlap/buffer |
|
480 overlapLength=0; |
|
481 setParameters(sampleRate); |
|
482 } |
|
483 |
|
484 |
|
485 // nominal tempo, no need for processing, just pass the samples through |
|
486 // to outputBuffer |
|
487 /* |
|
488 void TDStretch::processNominalTempo() |
|
489 { |
|
490 assert(tempo == 1.0f); |
|
491 |
|
492 if (bMidBufferDirty) |
|
493 { |
|
494 // If there are samples in pMidBuffer waiting for overlapping, |
|
495 // do a single sliding overlapping with them in order to prevent a |
|
496 // clicking distortion in the output sound |
|
497 if (inputBuffer.numSamples() < overlapLength) |
|
498 { |
|
499 // wait until we've got overlapLength input samples |
|
500 return; |
|
501 } |
|
502 // Mix the samples in the beginning of 'inputBuffer' with the |
|
503 // samples in 'midBuffer' using sliding overlapping |
|
504 overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0); |
|
505 outputBuffer.putSamples(overlapLength); |
|
506 inputBuffer.receiveSamples(overlapLength); |
|
507 clearMidBuffer(); |
|
508 // now we've caught the nominal sample flow and may switch to |
|
509 // bypass mode |
|
510 } |
|
511 |
|
512 // Simply bypass samples from input to output |
|
513 outputBuffer.moveSamples(inputBuffer); |
|
514 } |
|
515 */ |
|
516 |
|
517 |
|
518 // Processes as many processing frames of the samples 'inputBuffer', store |
|
519 // the result into 'outputBuffer' |
|
520 void TDStretch::processSamples() |
|
521 { |
|
522 int ovlSkip, offset; |
|
523 int temp; |
|
524 |
|
525 /* Removed this small optimization - can introduce a click to sound when tempo setting |
|
526 crosses the nominal value |
|
527 if (tempo == 1.0f) |
|
528 { |
|
529 // tempo not changed from the original, so bypass the processing |
|
530 processNominalTempo(); |
|
531 return; |
|
532 } |
|
533 */ |
|
534 |
|
535 // Process samples as long as there are enough samples in 'inputBuffer' |
|
536 // to form a processing frame. |
|
537 while ((int)inputBuffer.numSamples() >= sampleReq) |
|
538 { |
|
539 // If tempo differs from the normal ('SCALE'), scan for the best overlapping |
|
540 // position |
|
541 offset = seekBestOverlapPosition(inputBuffer.ptrBegin()); |
|
542 |
|
543 // Mix the samples in the 'inputBuffer' at position of 'offset' with the |
|
544 // samples in 'midBuffer' using sliding overlapping |
|
545 // ... first partially overlap with the end of the previous sequence |
|
546 // (that's in 'midBuffer') |
|
547 overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset); |
|
548 outputBuffer.putSamples((uint)overlapLength); |
|
549 |
|
550 // ... then copy sequence samples from 'inputBuffer' to output: |
|
551 |
|
552 // length of sequence |
|
553 temp = (seekWindowLength - 2 * overlapLength); |
|
554 |
|
555 // crosscheck that we don't have buffer overflow... |
|
556 if ((int)inputBuffer.numSamples() < (offset + temp + overlapLength * 2)) |
|
557 { |
|
558 continue; // just in case, shouldn't really happen |
|
559 } |
|
560 |
|
561 outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * (offset + overlapLength), (uint)temp); |
|
562 |
|
563 // Copies the end of the current sequence from 'inputBuffer' to |
|
564 // 'midBuffer' for being mixed with the beginning of the next |
|
565 // processing sequence and so on |
|
566 assert((offset + temp + overlapLength * 2) <= (int)inputBuffer.numSamples()); |
|
567 memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp + overlapLength), |
|
568 channels * sizeof(SAMPLETYPE) * overlapLength); |
|
569 |
|
570 // Remove the processed samples from the input buffer. Update |
|
571 // the difference between integer & nominal skip step to 'skipFract' |
|
572 // in order to prevent the error from accumulating over time. |
|
573 skipFract += nominalSkip; // real skip size |
|
574 ovlSkip = (int)skipFract; // rounded to integer skip |
|
575 skipFract -= ovlSkip; // maintain the fraction part, i.e. real vs. integer skip |
|
576 inputBuffer.receiveSamples((uint)ovlSkip); |
|
577 } |
|
578 } |
|
579 |
|
580 |
|
581 // Adds 'numsamples' pcs of samples from the 'samples' memory position into |
|
582 // the input of the object. |
|
583 void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples) |
|
584 { |
|
585 // Add the samples into the input buffer |
|
586 inputBuffer.putSamples(samples, nSamples); |
|
587 // Process the samples in input buffer |
|
588 processSamples(); |
|
589 } |
|
590 |
|
591 |
|
592 |
|
593 /// Set new overlap length parameter & reallocate RefMidBuffer if necessary. |
|
594 void TDStretch::acceptNewOverlapLength(int newOverlapLength) |
|
595 { |
|
596 int prevOvl; |
|
597 |
|
598 assert(newOverlapLength >= 0); |
|
599 prevOvl = overlapLength; |
|
600 overlapLength = newOverlapLength; |
|
601 |
|
602 if (overlapLength > prevOvl) |
|
603 { |
|
604 delete[] pMidBufferUnaligned; |
|
605 |
|
606 pMidBufferUnaligned = new SAMPLETYPE[overlapLength * channels + 16 / sizeof(SAMPLETYPE)]; |
|
607 // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency |
|
608 pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned); |
|
609 |
|
610 clearMidBuffer(); |
|
611 } |
|
612 } |
|
613 |
|
614 |
|
615 // Operator 'new' is overloaded so that it automatically creates a suitable instance |
|
616 // depending on if we've a MMX/SSE/etc-capable CPU available or not. |
|
617 void * TDStretch::operator new(size_t s) |
|
618 { |
|
619 // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead! |
|
620 ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!"); |
|
621 return newInstance(); |
|
622 } |
|
623 |
|
624 |
|
625 TDStretch * TDStretch::newInstance() |
|
626 { |
|
627 #if defined(SOUNDTOUCH_ALLOW_MMX) || defined(SOUNDTOUCH_ALLOW_SSE) |
|
628 uint uExtensions; |
|
629 |
|
630 uExtensions = detectCPUextensions(); |
|
631 #endif |
|
632 |
|
633 // Check if MMX/SSE instruction set extensions supported by CPU |
|
634 |
|
635 #ifdef SOUNDTOUCH_ALLOW_MMX |
|
636 // MMX routines available only with integer sample types |
|
637 if (uExtensions & SUPPORT_MMX) |
|
638 { |
|
639 return ::new TDStretchMMX; |
|
640 } |
|
641 else |
|
642 #endif // SOUNDTOUCH_ALLOW_MMX |
|
643 |
|
644 |
|
645 #ifdef SOUNDTOUCH_ALLOW_SSE |
|
646 if (uExtensions & SUPPORT_SSE) |
|
647 { |
|
648 // SSE support |
|
649 return ::new TDStretchSSE; |
|
650 } |
|
651 else |
|
652 #endif // SOUNDTOUCH_ALLOW_SSE |
|
653 |
|
654 { |
|
655 // ISA optimizations not supported, use plain C version |
|
656 return ::new TDStretch; |
|
657 } |
|
658 } |
|
659 |
|
660 |
|
661 ////////////////////////////////////////////////////////////////////////////// |
|
662 // |
|
663 // Integer arithmetics specific algorithm implementations. |
|
664 // |
|
665 ////////////////////////////////////////////////////////////////////////////// |
|
666 |
|
667 #ifdef SOUNDTOUCH_INTEGER_SAMPLES |
|
668 |
|
669 // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo' |
|
670 // version of the routine. |
|
671 void TDStretch::overlapStereo(short *poutput, const short *input) const |
|
672 { |
|
673 int i; |
|
674 short temp; |
|
675 int cnt2; |
|
676 |
|
677 for (i = 0; i < overlapLength ; i ++) |
|
678 { |
|
679 temp = (short)(overlapLength - i); |
|
680 cnt2 = 2 * i; |
|
681 poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp ) / overlapLength; |
|
682 poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength; |
|
683 } |
|
684 } |
|
685 |
|
686 |
|
687 // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi' |
|
688 // version of the routine. |
|
689 void TDStretch::overlapMulti(SAMPLETYPE *poutput, const SAMPLETYPE *input) const |
|
690 { |
|
691 SAMPLETYPE m1=(SAMPLETYPE)0; |
|
692 SAMPLETYPE m2; |
|
693 int i=0; |
|
694 |
|
695 for (m2 = (SAMPLETYPE)overlapLength; m2; m2 --) |
|
696 { |
|
697 for (int c = 0; c < channels; c ++) |
|
698 { |
|
699 poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2) / overlapLength; |
|
700 i++; |
|
701 } |
|
702 |
|
703 m1++; |
|
704 } |
|
705 } |
|
706 |
|
707 // Calculates the x having the closest 2^x value for the given value |
|
708 static int _getClosest2Power(double value) |
|
709 { |
|
710 return (int)(log(value) / log(2.0) + 0.5); |
|
711 } |
|
712 |
|
713 |
|
714 /// Calculates overlap period length in samples. |
|
715 /// Integer version rounds overlap length to closest power of 2 |
|
716 /// for a divide scaling operation. |
|
717 void TDStretch::calculateOverlapLength(int aoverlapMs) |
|
718 { |
|
719 int newOvl; |
|
720 |
|
721 assert(aoverlapMs >= 0); |
|
722 |
|
723 // calculate overlap length so that it's power of 2 - thus it's easy to do |
|
724 // integer division by right-shifting. Term "-1" at end is to account for |
|
725 // the extra most significatnt bit left unused in result by signed multiplication |
|
726 overlapDividerBits = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1; |
|
727 if (overlapDividerBits > 9) overlapDividerBits = 9; |
|
728 if (overlapDividerBits < 3) overlapDividerBits = 3; |
|
729 newOvl = (int)pow(2.0, (int)overlapDividerBits + 1); // +1 => account for -1 above |
|
730 |
|
731 acceptNewOverlapLength(newOvl); |
|
732 |
|
733 // calculate sloping divider so that crosscorrelation operation won't |
|
734 // overflow 32-bit register. Max. sum of the crosscorrelation sum without |
|
735 // divider would be 2^30*(N^3-N)/3, where N = overlap length |
|
736 slopingDivider = (newOvl * newOvl - 1) / 3; |
|
737 } |
|
738 |
|
739 |
|
740 double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm) const |
|
741 { |
|
742 long corr; |
|
743 long lnorm; |
|
744 int i; |
|
745 |
|
746 corr = lnorm = 0; |
|
747 // Same routine for stereo and mono. For stereo, unroll loop for better |
|
748 // efficiency and gives slightly better resolution against rounding. |
|
749 // For mono it same routine, just unrolls loop by factor of 4 |
|
750 for (i = 0; i < channels * overlapLength; i += 4) |
|
751 { |
|
752 corr += (mixingPos[i] * compare[i] + |
|
753 mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow |
|
754 corr += (mixingPos[i + 2] * compare[i + 2] + |
|
755 mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits; |
|
756 lnorm += (mixingPos[i] * mixingPos[i] + |
|
757 mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow |
|
758 lnorm += (mixingPos[i + 2] * mixingPos[i + 2] + |
|
759 mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBits; |
|
760 } |
|
761 |
|
762 // Normalize result by dividing by sqrt(norm) - this step is easiest |
|
763 // done using floating point operation |
|
764 norm = (double)lnorm; |
|
765 return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm); |
|
766 } |
|
767 |
|
768 |
|
769 /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value |
|
770 double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) const |
|
771 { |
|
772 long corr; |
|
773 long lnorm; |
|
774 int i; |
|
775 |
|
776 // cancel first normalizer tap from previous round |
|
777 lnorm = 0; |
|
778 for (i = 1; i <= channels; i ++) |
|
779 { |
|
780 lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBits; |
|
781 } |
|
782 |
|
783 corr = 0; |
|
784 // Same routine for stereo and mono. For stereo, unroll loop for better |
|
785 // efficiency and gives slightly better resolution against rounding. |
|
786 // For mono it same routine, just unrolls loop by factor of 4 |
|
787 for (i = 0; i < channels * overlapLength; i += 4) |
|
788 { |
|
789 corr += (mixingPos[i] * compare[i] + |
|
790 mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow |
|
791 corr += (mixingPos[i + 2] * compare[i + 2] + |
|
792 mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits; |
|
793 } |
|
794 |
|
795 // update normalizer with last samples of this round |
|
796 for (int j = 0; j < channels; j ++) |
|
797 { |
|
798 i --; |
|
799 lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBits; |
|
800 } |
|
801 norm += (double)lnorm; |
|
802 |
|
803 // Normalize result by dividing by sqrt(norm) - this step is easiest |
|
804 // done using floating point operation |
|
805 return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm); |
|
806 } |
|
807 |
|
808 #endif // SOUNDTOUCH_INTEGER_SAMPLES |
|
809 |
|
810 ////////////////////////////////////////////////////////////////////////////// |
|
811 // |
|
812 // Floating point arithmetics specific algorithm implementations. |
|
813 // |
|
814 |
|
815 #ifdef SOUNDTOUCH_FLOAT_SAMPLES |
|
816 |
|
817 // Overlaps samples in 'midBuffer' with the samples in 'pInput' |
|
818 void TDStretch::overlapStereo(float *pOutput, const float *pInput) const |
|
819 { |
|
820 int i; |
|
821 float fScale; |
|
822 float f1; |
|
823 float f2; |
|
824 |
|
825 fScale = 1.0f / (float)overlapLength; |
|
826 |
|
827 f1 = 0; |
|
828 f2 = 1.0f; |
|
829 |
|
830 for (i = 0; i < 2 * (int)overlapLength ; i += 2) |
|
831 { |
|
832 pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2; |
|
833 pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2; |
|
834 |
|
835 f1 += fScale; |
|
836 f2 -= fScale; |
|
837 } |
|
838 } |
|
839 |
|
840 |
|
841 // Overlaps samples in 'midBuffer' with the samples in 'input'. |
|
842 void TDStretch::overlapMulti(float *pOutput, const float *pInput) const |
|
843 { |
|
844 int i; |
|
845 float fScale; |
|
846 float f1; |
|
847 float f2; |
|
848 |
|
849 fScale = 1.0f / (float)overlapLength; |
|
850 |
|
851 f1 = 0; |
|
852 f2 = 1.0f; |
|
853 |
|
854 i=0; |
|
855 for (int i2 = 0; i2 < overlapLength; i2 ++) |
|
856 { |
|
857 // note: Could optimize this slightly by taking into account that always channels > 2 |
|
858 for (int c = 0; c < channels; c ++) |
|
859 { |
|
860 pOutput[i] = pInput[i] * f1 + pMidBuffer[i] * f2; |
|
861 i++; |
|
862 } |
|
863 f1 += fScale; |
|
864 f2 -= fScale; |
|
865 } |
|
866 } |
|
867 |
|
868 |
|
869 /// Calculates overlapInMsec period length in samples. |
|
870 void TDStretch::calculateOverlapLength(int overlapInMsec) |
|
871 { |
|
872 int newOvl; |
|
873 |
|
874 assert(overlapInMsec >= 0); |
|
875 newOvl = (sampleRate * overlapInMsec) / 1000; |
|
876 if (newOvl < 16) newOvl = 16; |
|
877 |
|
878 // must be divisible by 8 |
|
879 newOvl -= newOvl % 8; |
|
880 |
|
881 acceptNewOverlapLength(newOvl); |
|
882 } |
|
883 |
|
884 |
|
885 /// Calculate cross-correlation |
|
886 double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &norm) const |
|
887 { |
|
888 double corr; |
|
889 int i; |
|
890 |
|
891 corr = norm = 0; |
|
892 // Same routine for stereo and mono. For Stereo, unroll by factor of 2. |
|
893 // For mono it's same routine yet unrollsd by factor of 4. |
|
894 for (i = 0; i < channels * overlapLength; i += 4) |
|
895 { |
|
896 corr += mixingPos[i] * compare[i] + |
|
897 mixingPos[i + 1] * compare[i + 1]; |
|
898 |
|
899 norm += mixingPos[i] * mixingPos[i] + |
|
900 mixingPos[i + 1] * mixingPos[i + 1]; |
|
901 |
|
902 // unroll the loop for better CPU efficiency: |
|
903 corr += mixingPos[i + 2] * compare[i + 2] + |
|
904 mixingPos[i + 3] * compare[i + 3]; |
|
905 |
|
906 norm += mixingPos[i + 2] * mixingPos[i + 2] + |
|
907 mixingPos[i + 3] * mixingPos[i + 3]; |
|
908 } |
|
909 |
|
910 return corr / sqrt((norm < 1e-9 ? 1.0 : norm)); |
|
911 } |
|
912 |
|
913 |
|
914 /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value |
|
915 double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) const |
|
916 { |
|
917 double corr; |
|
918 int i; |
|
919 |
|
920 corr = 0; |
|
921 |
|
922 // cancel first normalizer tap from previous round |
|
923 for (i = 1; i <= channels; i ++) |
|
924 { |
|
925 norm -= mixingPos[-i] * mixingPos[-i]; |
|
926 } |
|
927 |
|
928 // Same routine for stereo and mono. For Stereo, unroll by factor of 2. |
|
929 // For mono it's same routine yet unrollsd by factor of 4. |
|
930 for (i = 0; i < channels * overlapLength; i += 4) |
|
931 { |
|
932 corr += mixingPos[i] * compare[i] + |
|
933 mixingPos[i + 1] * compare[i + 1] + |
|
934 mixingPos[i + 2] * compare[i + 2] + |
|
935 mixingPos[i + 3] * compare[i + 3]; |
|
936 } |
|
937 |
|
938 // update normalizer with last samples of this round |
|
939 for (int j = 0; j < channels; j ++) |
|
940 { |
|
941 i --; |
|
942 norm += mixingPos[i] * mixingPos[i]; |
|
943 } |
|
944 |
|
945 return corr / sqrt((norm < 1e-9 ? 1.0 : norm)); |
|
946 } |
|
947 |
|
948 |
|
949 #endif // SOUNDTOUCH_FLOAT_SAMPLES |