media/libsoundtouch/src/TDStretch.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 ////////////////////////////////////////////////////////////////////////////////
     2 /// 
     3 /// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo 
     4 /// while maintaining the original pitch by using a time domain WSOLA-like 
     5 /// method with several performance-increasing tweaks.
     6 ///
     7 /// Note : MMX optimized functions reside in a separate, platform-specific 
     8 /// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
     9 ///
    10 /// Author        : Copyright (c) Olli Parviainen
    11 /// Author e-mail : oparviai 'at' iki.fi
    12 /// SoundTouch WWW: http://www.surina.net/soundtouch
    13 ///
    14 ////////////////////////////////////////////////////////////////////////////////
    15 //
    16 // Last changed  : $Date: 2014-04-06 10:57:21 -0500 (Sun, 06 Apr 2014) $
    17 // File revision : $Revision: 1.12 $
    18 //
    19 // $Id: TDStretch.cpp 195 2014-04-06 15:57:21Z oparviai $
    20 //
    21 ////////////////////////////////////////////////////////////////////////////////
    22 //
    23 // License :
    24 //
    25 //  SoundTouch audio processing library
    26 //  Copyright (c) Olli Parviainen
    27 //
    28 //  This library is free software; you can redistribute it and/or
    29 //  modify it under the terms of the GNU Lesser General Public
    30 //  License as published by the Free Software Foundation; either
    31 //  version 2.1 of the License, or (at your option) any later version.
    32 //
    33 //  This library is distributed in the hope that it will be useful,
    34 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
    35 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    36 //  Lesser General Public License for more details.
    37 //
    38 //  You should have received a copy of the GNU Lesser General Public
    39 //  License along with this library; if not, write to the Free Software
    40 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    41 //
    42 ////////////////////////////////////////////////////////////////////////////////
    44 #include <string.h>
    45 #include <limits.h>
    46 #include <assert.h>
    47 #include <math.h>
    48 #include <float.h>
    50 #include "STTypes.h"
    51 #include "cpu_detect.h"
    52 #include "TDStretch.h"
    54 using namespace soundtouch;
    56 #define max(x, y) (((x) > (y)) ? (x) : (y))
    59 /*****************************************************************************
    60  *
    61  * Constant definitions
    62  *
    63  *****************************************************************************/
    65 // Table for the hierarchical mixing position seeking algorithm
    66 static const short _scanOffsets[5][24]={
    67     { 124,  186,  248,  310,  372,  434,  496,  558,  620,  682,  744, 806,
    68       868,  930,  992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488,   0},
    69     {-100,  -75,  -50,  -25,   25,   50,   75,  100,    0,    0,    0,   0,
    70         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
    71     { -20,  -15,  -10,   -5,    5,   10,   15,   20,    0,    0,    0,   0,
    72         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
    73     {  -4,   -3,   -2,   -1,    1,    2,    3,    4,    0,    0,    0,   0,
    74         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
    75     { 121,  114,   97,  114,   98,  105,  108,   32,  104,   99,  117,  111,
    76       116,  100,  110,  117,  111,  115,    0,    0,    0,    0,    0,   0}};
    78 /*****************************************************************************
    79  *
    80  * Implementation of the class 'TDStretch'
    81  *
    82  *****************************************************************************/
    85 TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
    86 {
    87     bQuickSeek = false;
    88     channels = 2;
    90     pMidBuffer = NULL;
    91     pMidBufferUnaligned = NULL;
    92     overlapLength = 0;
    94     bAutoSeqSetting = true;
    95     bAutoSeekSetting = true;
    97 //    outDebt = 0;
    98     skipFract = 0;
   100     tempo = 1.0f;
   101     setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);
   102     setTempo(1.0f);
   104     clear();
   105 }
   109 TDStretch::~TDStretch()
   110 {
   111     delete[] pMidBufferUnaligned;
   112 }
   116 // Sets routine control parameters. These control are certain time constants
   117 // defining how the sound is stretched to the desired duration.
   118 //
   119 // 'sampleRate' = sample rate of the sound
   120 // 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms)
   121 // 'seekwindowMS' = seeking window length for scanning the best overlapping 
   122 //      position (default = 28 ms)
   123 // 'overlapMS' = overlapping length (default = 12 ms)
   125 void TDStretch::setParameters(int aSampleRate, int aSequenceMS, 
   126                               int aSeekWindowMS, int aOverlapMS)
   127 {
   128     // accept only positive parameter values - if zero or negative, use old values instead
   129     if (aSampleRate > 0)   this->sampleRate = aSampleRate;
   130     if (aOverlapMS > 0)    this->overlapMs = aOverlapMS;
   132     if (aSequenceMS > 0)
   133     {
   134         this->sequenceMs = aSequenceMS;
   135         bAutoSeqSetting = false;
   136     } 
   137     else if (aSequenceMS == 0)
   138     {
   139         // if zero, use automatic setting
   140         bAutoSeqSetting = true;
   141     }
   143     if (aSeekWindowMS > 0) 
   144     {
   145         this->seekWindowMs = aSeekWindowMS;
   146         bAutoSeekSetting = false;
   147     } 
   148     else if (aSeekWindowMS == 0) 
   149     {
   150         // if zero, use automatic setting
   151         bAutoSeekSetting = true;
   152     }
   154     calcSeqParameters();
   156     calculateOverlapLength(overlapMs);
   158     // set tempo to recalculate 'sampleReq'
   159     setTempo(tempo);
   160 }
   164 /// Get routine control parameters, see setParameters() function.
   165 /// Any of the parameters to this function can be NULL, in such case corresponding parameter
   166 /// value isn't returned.
   167 void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const
   168 {
   169     if (pSampleRate)
   170     {
   171         *pSampleRate = sampleRate;
   172     }
   174     if (pSequenceMs)
   175     {
   176         *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs;
   177     }
   179     if (pSeekWindowMs)
   180     {
   181         *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs;
   182     }
   184     if (pOverlapMs)
   185     {
   186         *pOverlapMs = overlapMs;
   187     }
   188 }
   191 // Overlaps samples in 'midBuffer' with the samples in 'pInput'
   192 void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
   193 {
   194     int i;
   195     SAMPLETYPE m1, m2;
   197     m1 = (SAMPLETYPE)0;
   198     m2 = (SAMPLETYPE)overlapLength;
   200     for (i = 0; i < overlapLength ; i ++) 
   201     {
   202         pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength;
   203         m1 += 1;
   204         m2 -= 1;
   205     }
   206 }
   210 void TDStretch::clearMidBuffer()
   211 {
   212     memset(pMidBuffer, 0, channels * sizeof(SAMPLETYPE) * overlapLength);
   213 }
   216 void TDStretch::clearInput()
   217 {
   218     inputBuffer.clear();
   219     clearMidBuffer();
   220 }
   223 // Clears the sample buffers
   224 void TDStretch::clear()
   225 {
   226     outputBuffer.clear();
   227     clearInput();
   228 }
   232 // Enables/disables the quick position seeking algorithm. Zero to disable, nonzero
   233 // to enable
   234 void TDStretch::enableQuickSeek(bool enable)
   235 {
   236     bQuickSeek = enable;
   237 }
   240 // Returns nonzero if the quick seeking algorithm is enabled.
   241 bool TDStretch::isQuickSeekEnabled() const
   242 {
   243     return bQuickSeek;
   244 }
   247 // Seeks for the optimal overlap-mixing position.
   248 int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos)
   249 {
   250     if (bQuickSeek) 
   251     {
   252         return seekBestOverlapPositionQuick(refPos);
   253     } 
   254     else 
   255     {
   256         return seekBestOverlapPositionFull(refPos);
   257     }
   258 }
   261 // Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position
   262 // of 'ovlPos'.
   263 inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const
   264 {
   265 #ifndef USE_MULTICH_ALWAYS
   266     if (channels == 1)
   267     {
   268         // mono sound.
   269         overlapMono(pOutput, pInput + ovlPos);
   270     }
   271     else if (channels == 2)
   272     {
   273         // stereo sound
   274         overlapStereo(pOutput, pInput + 2 * ovlPos);
   275     } 
   276     else 
   277 #endif // USE_MULTICH_ALWAYS
   278     {
   279         assert(channels > 0);
   280         overlapMulti(pOutput, pInput + channels * ovlPos);
   281     }
   282 }
   286 // Seeks for the optimal overlap-mixing position. The 'stereo' version of the
   287 // routine
   288 //
   289 // The best position is determined as the position where the two overlapped
   290 // sample sequences are 'most alike', in terms of the highest cross-correlation
   291 // value over the overlapping period
   292 int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos) 
   293 {
   294     int bestOffs;
   295     double bestCorr, corr;
   296     double norm;
   297     int i;
   299     bestCorr = FLT_MIN;
   300     bestOffs = 0;
   302     // Scans for the best correlation value by testing each possible position
   303     // over the permitted range.
   304     bestCorr = calcCrossCorr(refPos, pMidBuffer, norm);
   305     for (i = 1; i < seekLength; i ++) 
   306     {
   307         // Calculates correlation value for the mixing position corresponding
   308         // to 'i'. Now call "calcCrossCorrAccumulate" that is otherwise same as
   309         // "calcCrossCorr", but saves time by reusing & updating previously stored 
   310         // "norm" value
   311         corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm);
   313         // heuristic rule to slightly favour values close to mid of the range
   314         double tmp = (double)(2 * i - seekLength) / (double)seekLength;
   315         corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
   317         // Checks for the highest correlation value
   318         if (corr > bestCorr) 
   319         {
   320             bestCorr = corr;
   321             bestOffs = i;
   322         }
   323     }
   324     // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
   325     clearCrossCorrState();
   327     return bestOffs;
   328 }
   331 // Seeks for the optimal overlap-mixing position. The 'stereo' version of the
   332 // routine
   333 //
   334 // The best position is determined as the position where the two overlapped
   335 // sample sequences are 'most alike', in terms of the highest cross-correlation
   336 // value over the overlapping period
   337 int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos) 
   338 {
   339     int j;
   340     int bestOffs;
   341     double bestCorr, corr;
   342     int scanCount, corrOffset, tempOffset;
   344     bestCorr = FLT_MIN;
   345     bestOffs = _scanOffsets[0][0];
   346     corrOffset = 0;
   347     tempOffset = 0;
   349     // Scans for the best correlation value using four-pass hierarchical search.
   350     //
   351     // The look-up table 'scans' has hierarchical position adjusting steps.
   352     // In first pass the routine searhes for the highest correlation with 
   353     // relatively coarse steps, then rescans the neighbourhood of the highest
   354     // correlation with better resolution and so on.
   355     for (scanCount = 0;scanCount < 4; scanCount ++) 
   356     {
   357         j = 0;
   358         while (_scanOffsets[scanCount][j]) 
   359         {
   360             double norm;
   361             tempOffset = corrOffset + _scanOffsets[scanCount][j];
   362             if (tempOffset >= seekLength) break;
   364             // Calculates correlation value for the mixing position corresponding
   365             // to 'tempOffset'
   366             corr = (double)calcCrossCorr(refPos + channels * tempOffset, pMidBuffer, norm);
   367             // heuristic rule to slightly favour values close to mid of the range
   368             double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
   369             corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
   371             // Checks for the highest correlation value
   372             if (corr > bestCorr) 
   373             {
   374                 bestCorr = corr;
   375                 bestOffs = tempOffset;
   376             }
   377             j ++;
   378         }
   379         corrOffset = bestOffs;
   380     }
   381     // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
   382     clearCrossCorrState();
   384     return bestOffs;
   385 }
   389 /// clear cross correlation routine state if necessary 
   390 void TDStretch::clearCrossCorrState()
   391 {
   392     // default implementation is empty.
   393 }
   396 /// Calculates processing sequence length according to tempo setting
   397 void TDStretch::calcSeqParameters()
   398 {
   399     // Adjust tempo param according to tempo, so that variating processing sequence length is used
   400     // at varius tempo settings, between the given low...top limits
   401     #define AUTOSEQ_TEMPO_LOW   0.5     // auto setting low tempo range (-50%)
   402     #define AUTOSEQ_TEMPO_TOP   2.0     // auto setting top tempo range (+100%)
   404     // sequence-ms setting values at above low & top tempo
   405     #define AUTOSEQ_AT_MIN      125.0
   406     #define AUTOSEQ_AT_MAX      50.0
   407     #define AUTOSEQ_K           ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
   408     #define AUTOSEQ_C           (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW))
   410     // seek-window-ms setting values at above low & top tempo
   411     #define AUTOSEEK_AT_MIN     25.0
   412     #define AUTOSEEK_AT_MAX     15.0
   413     #define AUTOSEEK_K          ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
   414     #define AUTOSEEK_C          (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW))
   416     #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x)))
   418     double seq, seek;
   420     if (bAutoSeqSetting)
   421     {
   422         seq = AUTOSEQ_C + AUTOSEQ_K * tempo;
   423         seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN);
   424         sequenceMs = (int)(seq + 0.5);
   425     }
   427     if (bAutoSeekSetting)
   428     {
   429         seek = AUTOSEEK_C + AUTOSEEK_K * tempo;
   430         seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN);
   431         seekWindowMs = (int)(seek + 0.5);
   432     }
   434     // Update seek window lengths
   435     seekWindowLength = (sampleRate * sequenceMs) / 1000;
   436     if (seekWindowLength < 2 * overlapLength) 
   437     {
   438         seekWindowLength = 2 * overlapLength;
   439     }
   440     seekLength = (sampleRate * seekWindowMs) / 1000;
   441 }
   445 // Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower 
   446 // tempo, larger faster tempo.
   447 void TDStretch::setTempo(float newTempo)
   448 {
   449     int intskip;
   451     tempo = newTempo;
   453     // Calculate new sequence duration
   454     calcSeqParameters();
   456     // Calculate ideal skip length (according to tempo value) 
   457     nominalSkip = tempo * (seekWindowLength - overlapLength);
   458     intskip = (int)(nominalSkip + 0.5f);
   460     // Calculate how many samples are needed in the 'inputBuffer' to 
   461     // process another batch of samples
   462     //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2;
   463     sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength;
   464 }
   468 // Sets the number of channels, 1 = mono, 2 = stereo
   469 void TDStretch::setChannels(int numChannels)
   470 {
   471     assert(numChannels > 0);
   472     if (channels == numChannels) return;
   473 //    assert(numChannels == 1 || numChannels == 2);
   475     channels = numChannels;
   476     inputBuffer.setChannels(channels);
   477     outputBuffer.setChannels(channels);
   479     // re-init overlap/buffer
   480     overlapLength=0;
   481     setParameters(sampleRate);
   482 }
   485 // nominal tempo, no need for processing, just pass the samples through
   486 // to outputBuffer
   487 /*
   488 void TDStretch::processNominalTempo()
   489 {
   490     assert(tempo == 1.0f);
   492     if (bMidBufferDirty) 
   493     {
   494         // If there are samples in pMidBuffer waiting for overlapping,
   495         // do a single sliding overlapping with them in order to prevent a 
   496         // clicking distortion in the output sound
   497         if (inputBuffer.numSamples() < overlapLength) 
   498         {
   499             // wait until we've got overlapLength input samples
   500             return;
   501         }
   502         // Mix the samples in the beginning of 'inputBuffer' with the 
   503         // samples in 'midBuffer' using sliding overlapping 
   504         overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0);
   505         outputBuffer.putSamples(overlapLength);
   506         inputBuffer.receiveSamples(overlapLength);
   507         clearMidBuffer();
   508         // now we've caught the nominal sample flow and may switch to
   509         // bypass mode
   510     }
   512     // Simply bypass samples from input to output
   513     outputBuffer.moveSamples(inputBuffer);
   514 }
   515 */
   518 // Processes as many processing frames of the samples 'inputBuffer', store
   519 // the result into 'outputBuffer'
   520 void TDStretch::processSamples()
   521 {
   522     int ovlSkip, offset;
   523     int temp;
   525     /* Removed this small optimization - can introduce a click to sound when tempo setting
   526        crosses the nominal value
   527     if (tempo == 1.0f) 
   528     {
   529         // tempo not changed from the original, so bypass the processing
   530         processNominalTempo();
   531         return;
   532     }
   533     */
   535     // Process samples as long as there are enough samples in 'inputBuffer'
   536     // to form a processing frame.
   537     while ((int)inputBuffer.numSamples() >= sampleReq) 
   538     {
   539         // If tempo differs from the normal ('SCALE'), scan for the best overlapping
   540         // position
   541         offset = seekBestOverlapPosition(inputBuffer.ptrBegin());
   543         // Mix the samples in the 'inputBuffer' at position of 'offset' with the 
   544         // samples in 'midBuffer' using sliding overlapping
   545         // ... first partially overlap with the end of the previous sequence
   546         // (that's in 'midBuffer')
   547         overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset);
   548         outputBuffer.putSamples((uint)overlapLength);
   550         // ... then copy sequence samples from 'inputBuffer' to output:
   552         // length of sequence
   553         temp = (seekWindowLength - 2 * overlapLength);
   555         // crosscheck that we don't have buffer overflow...
   556         if ((int)inputBuffer.numSamples() < (offset + temp + overlapLength * 2))
   557         {
   558             continue;    // just in case, shouldn't really happen
   559         }
   561         outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * (offset + overlapLength), (uint)temp);
   563         // Copies the end of the current sequence from 'inputBuffer' to 
   564         // 'midBuffer' for being mixed with the beginning of the next 
   565         // processing sequence and so on
   566         assert((offset + temp + overlapLength * 2) <= (int)inputBuffer.numSamples());
   567         memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp + overlapLength), 
   568             channels * sizeof(SAMPLETYPE) * overlapLength);
   570         // Remove the processed samples from the input buffer. Update
   571         // the difference between integer & nominal skip step to 'skipFract'
   572         // in order to prevent the error from accumulating over time.
   573         skipFract += nominalSkip;   // real skip size
   574         ovlSkip = (int)skipFract;   // rounded to integer skip
   575         skipFract -= ovlSkip;       // maintain the fraction part, i.e. real vs. integer skip
   576         inputBuffer.receiveSamples((uint)ovlSkip);
   577     }
   578 }
   581 // Adds 'numsamples' pcs of samples from the 'samples' memory position into
   582 // the input of the object.
   583 void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples)
   584 {
   585     // Add the samples into the input buffer
   586     inputBuffer.putSamples(samples, nSamples);
   587     // Process the samples in input buffer
   588     processSamples();
   589 }
   593 /// Set new overlap length parameter & reallocate RefMidBuffer if necessary.
   594 void TDStretch::acceptNewOverlapLength(int newOverlapLength)
   595 {
   596     int prevOvl;
   598     assert(newOverlapLength >= 0);
   599     prevOvl = overlapLength;
   600     overlapLength = newOverlapLength;
   602     if (overlapLength > prevOvl)
   603     {
   604         delete[] pMidBufferUnaligned;
   606         pMidBufferUnaligned = new SAMPLETYPE[overlapLength * channels + 16 / sizeof(SAMPLETYPE)];
   607         // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency
   608         pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned);
   610         clearMidBuffer();
   611     }
   612 }
   615 // Operator 'new' is overloaded so that it automatically creates a suitable instance 
   616 // depending on if we've a MMX/SSE/etc-capable CPU available or not.
   617 void * TDStretch::operator new(size_t s)
   618 {
   619     // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead!
   620     ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!");
   621     return newInstance();
   622 }
   625 TDStretch * TDStretch::newInstance()
   626 {
   627 #if defined(SOUNDTOUCH_ALLOW_MMX) || defined(SOUNDTOUCH_ALLOW_SSE)
   628     uint uExtensions;
   630     uExtensions = detectCPUextensions();
   631 #endif
   633     // Check if MMX/SSE instruction set extensions supported by CPU
   635 #ifdef SOUNDTOUCH_ALLOW_MMX
   636     // MMX routines available only with integer sample types
   637     if (uExtensions & SUPPORT_MMX)
   638     {
   639         return ::new TDStretchMMX;
   640     }
   641     else
   642 #endif // SOUNDTOUCH_ALLOW_MMX
   645 #ifdef SOUNDTOUCH_ALLOW_SSE
   646     if (uExtensions & SUPPORT_SSE)
   647     {
   648         // SSE support
   649         return ::new TDStretchSSE;
   650     }
   651     else
   652 #endif // SOUNDTOUCH_ALLOW_SSE
   654     {
   655         // ISA optimizations not supported, use plain C version
   656         return ::new TDStretch;
   657     }
   658 }
   661 //////////////////////////////////////////////////////////////////////////////
   662 //
   663 // Integer arithmetics specific algorithm implementations.
   664 //
   665 //////////////////////////////////////////////////////////////////////////////
   667 #ifdef SOUNDTOUCH_INTEGER_SAMPLES
   669 // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo' 
   670 // version of the routine.
   671 void TDStretch::overlapStereo(short *poutput, const short *input) const
   672 {
   673     int i;
   674     short temp;
   675     int cnt2;
   677     for (i = 0; i < overlapLength ; i ++) 
   678     {
   679         temp = (short)(overlapLength - i);
   680         cnt2 = 2 * i;
   681         poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp )  / overlapLength;
   682         poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength;
   683     }
   684 }
   687 // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi'
   688 // version of the routine.
   689 void TDStretch::overlapMulti(SAMPLETYPE *poutput, const SAMPLETYPE *input) const
   690 {
   691     SAMPLETYPE m1=(SAMPLETYPE)0;
   692     SAMPLETYPE m2;
   693     int i=0;
   695     for (m2 = (SAMPLETYPE)overlapLength; m2; m2 --)
   696     {
   697         for (int c = 0; c < channels; c ++)
   698         {
   699             poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2)  / overlapLength;
   700             i++;
   701         }
   703         m1++;
   704     }
   705 }
   707 // Calculates the x having the closest 2^x value for the given value
   708 static int _getClosest2Power(double value)
   709 {
   710     return (int)(log(value) / log(2.0) + 0.5);
   711 }
   714 /// Calculates overlap period length in samples.
   715 /// Integer version rounds overlap length to closest power of 2
   716 /// for a divide scaling operation.
   717 void TDStretch::calculateOverlapLength(int aoverlapMs)
   718 {
   719     int newOvl;
   721     assert(aoverlapMs >= 0);
   723     // calculate overlap length so that it's power of 2 - thus it's easy to do
   724     // integer division by right-shifting. Term "-1" at end is to account for 
   725     // the extra most significatnt bit left unused in result by signed multiplication 
   726     overlapDividerBits = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1;
   727     if (overlapDividerBits > 9) overlapDividerBits = 9;
   728     if (overlapDividerBits < 3) overlapDividerBits = 3;
   729     newOvl = (int)pow(2.0, (int)overlapDividerBits + 1);    // +1 => account for -1 above
   731     acceptNewOverlapLength(newOvl);
   733     // calculate sloping divider so that crosscorrelation operation won't 
   734     // overflow 32-bit register. Max. sum of the crosscorrelation sum without 
   735     // divider would be 2^30*(N^3-N)/3, where N = overlap length
   736     slopingDivider = (newOvl * newOvl - 1) / 3;
   737 }
   740 double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm) const
   741 {
   742     long corr;
   743     long lnorm;
   744     int i;
   746     corr = lnorm = 0;
   747     // Same routine for stereo and mono. For stereo, unroll loop for better
   748     // efficiency and gives slightly better resolution against rounding. 
   749     // For mono it same routine, just  unrolls loop by factor of 4
   750     for (i = 0; i < channels * overlapLength; i += 4) 
   751     {
   752         corr += (mixingPos[i] * compare[i] + 
   753                  mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits;  // notice: do intermediate division here to avoid integer overflow
   754         corr += (mixingPos[i + 2] * compare[i + 2] + 
   755                  mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits;
   756         lnorm += (mixingPos[i] * mixingPos[i] + 
   757                   mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBits; // notice: do intermediate division here to avoid integer overflow
   758         lnorm += (mixingPos[i + 2] * mixingPos[i + 2] + 
   759                   mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBits;
   760     }
   762     // Normalize result by dividing by sqrt(norm) - this step is easiest 
   763     // done using floating point operation
   764     norm = (double)lnorm;
   765     return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
   766 }
   769 /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
   770 double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) const
   771 {
   772     long corr;
   773     long lnorm;
   774     int i;
   776     // cancel first normalizer tap from previous round
   777     lnorm = 0;
   778     for (i = 1; i <= channels; i ++)
   779     {
   780         lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBits;
   781     }
   783     corr = 0;
   784     // Same routine for stereo and mono. For stereo, unroll loop for better
   785     // efficiency and gives slightly better resolution against rounding. 
   786     // For mono it same routine, just  unrolls loop by factor of 4
   787     for (i = 0; i < channels * overlapLength; i += 4) 
   788     {
   789         corr += (mixingPos[i] * compare[i] + 
   790                  mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits;  // notice: do intermediate division here to avoid integer overflow
   791         corr += (mixingPos[i + 2] * compare[i + 2] + 
   792                  mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits;
   793     }
   795     // update normalizer with last samples of this round
   796     for (int j = 0; j < channels; j ++)
   797     {
   798         i --;
   799         lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBits;
   800     }
   801     norm += (double)lnorm;
   803     // Normalize result by dividing by sqrt(norm) - this step is easiest 
   804     // done using floating point operation
   805     return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
   806 }
   808 #endif // SOUNDTOUCH_INTEGER_SAMPLES
   810 //////////////////////////////////////////////////////////////////////////////
   811 //
   812 // Floating point arithmetics specific algorithm implementations.
   813 //
   815 #ifdef SOUNDTOUCH_FLOAT_SAMPLES
   817 // Overlaps samples in 'midBuffer' with the samples in 'pInput'
   818 void TDStretch::overlapStereo(float *pOutput, const float *pInput) const
   819 {
   820     int i;
   821     float fScale;
   822     float f1;
   823     float f2;
   825     fScale = 1.0f / (float)overlapLength;
   827     f1 = 0;
   828     f2 = 1.0f;
   830     for (i = 0; i < 2 * (int)overlapLength ; i += 2) 
   831     {
   832         pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2;
   833         pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2;
   835         f1 += fScale;
   836         f2 -= fScale;
   837     }
   838 }
   841 // Overlaps samples in 'midBuffer' with the samples in 'input'. 
   842 void TDStretch::overlapMulti(float *pOutput, const float *pInput) const
   843 {
   844     int i;
   845     float fScale;
   846     float f1;
   847     float f2;
   849     fScale = 1.0f / (float)overlapLength;
   851     f1 = 0;
   852     f2 = 1.0f;
   854     i=0;
   855     for (int i2 = 0; i2 < overlapLength; i2 ++)
   856     {
   857         // note: Could optimize this slightly by taking into account that always channels > 2
   858         for (int c = 0; c < channels; c ++)
   859         {
   860             pOutput[i] = pInput[i] * f1 + pMidBuffer[i] * f2;
   861             i++;
   862         }
   863         f1 += fScale;
   864         f2 -= fScale;
   865     }
   866 }
   869 /// Calculates overlapInMsec period length in samples.
   870 void TDStretch::calculateOverlapLength(int overlapInMsec)
   871 {
   872     int newOvl;
   874     assert(overlapInMsec >= 0);
   875     newOvl = (sampleRate * overlapInMsec) / 1000;
   876     if (newOvl < 16) newOvl = 16;
   878     // must be divisible by 8
   879     newOvl -= newOvl % 8;
   881     acceptNewOverlapLength(newOvl);
   882 }
   885 /// Calculate cross-correlation
   886 double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &norm) const
   887 {
   888     double corr;
   889     int i;
   891     corr = norm = 0;
   892     // Same routine for stereo and mono. For Stereo, unroll by factor of 2.
   893     // For mono it's same routine yet unrollsd by factor of 4.
   894     for (i = 0; i < channels * overlapLength; i += 4) 
   895     {
   896         corr += mixingPos[i] * compare[i] +
   897                 mixingPos[i + 1] * compare[i + 1];
   899         norm += mixingPos[i] * mixingPos[i] + 
   900                 mixingPos[i + 1] * mixingPos[i + 1];
   902         // unroll the loop for better CPU efficiency:
   903         corr += mixingPos[i + 2] * compare[i + 2] +
   904                 mixingPos[i + 3] * compare[i + 3];
   906         norm += mixingPos[i + 2] * mixingPos[i + 2] +
   907                 mixingPos[i + 3] * mixingPos[i + 3];
   908     }
   910     return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
   911 }
   914 /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
   915 double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) const
   916 {
   917     double corr;
   918     int i;
   920     corr = 0;
   922     // cancel first normalizer tap from previous round
   923     for (i = 1; i <= channels; i ++)
   924     {
   925         norm -= mixingPos[-i] * mixingPos[-i];
   926     }
   928     // Same routine for stereo and mono. For Stereo, unroll by factor of 2.
   929     // For mono it's same routine yet unrollsd by factor of 4.
   930     for (i = 0; i < channels * overlapLength; i += 4) 
   931     {
   932         corr += mixingPos[i] * compare[i] +
   933                 mixingPos[i + 1] * compare[i + 1] +
   934                 mixingPos[i + 2] * compare[i + 2] +
   935                 mixingPos[i + 3] * compare[i + 3];
   936     }
   938     // update normalizer with last samples of this round
   939     for (int j = 0; j < channels; j ++)
   940     {
   941         i --;
   942         norm += mixingPos[i] * mixingPos[i];
   943     }
   945     return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
   946 }
   949 #endif // SOUNDTOUCH_FLOAT_SAMPLES

mercurial