a8d0c51c69
* DSP: Implement Pipe 2 Pipe 2 is a DSP pipe that is used to initialize both the DSP hardware (the application signals to the DSP to initialize) and the application (the DSP provides the memory location of structures in the shared memory region). * AudioCore: Implement codecs (DecodeADPCM, DecodePCM8, DecodePCM16) * DSP Pipes: Implement as FIFO * AudioCore: File structure * AudioCore: More structure * AudioCore: Buffer management * DSP/Source: Reorganise Source's AdvanceFrame. * Audio Output * lolidk * huh? * interp * More interp stuff * oops * Zero State * Don't mix Source frame if it's not enabled * DSP: Forgot to zero a buffer, adjusted thread synchronisation, adjusted format spec for buffers * asdf * Get it to compile and tweak stretching a bit. * revert stretch test * deleted accidental partial catch submodule commit * new audio stretching algorithm * update .gitmodule * fix OS X build * remove getopt from rubberband * #include <stddef> to audio_core.h * typo * -framework Accelerate * OptionTransientsSmooth -> OptionTransientsCrisp * tweak stretch tempo smoothing coefficient. also switch back to smooth. * tweak mroe * remove printf * sola * #include <cmath> * VERY QUICK MERGE TO GET IT WORKING DOESN'T ACTIVATE AUDIO FILTERS * Reminder to self * fix comparison * common/thread: Correct code style * Thread: Make Barrier reusable * fix threading synchonisation code * add profiling code * print error to console when audio clips * fix metallic sound * reduce logspam
371 lines
11 KiB
C++
371 lines
11 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
///
|
|
/// Beats-per-minute (BPM) detection routine.
|
|
///
|
|
/// The beat detection algorithm works as follows:
|
|
/// - Use function 'inputSamples' to input a chunks of samples to the class for
|
|
/// analysis. It's a good idea to enter a large sound file or stream in smallish
|
|
/// chunks of around few kilosamples in order not to extinguish too much RAM memory.
|
|
/// - Inputted sound data is decimated to approx 500 Hz to reduce calculation burden,
|
|
/// which is basically ok as low (bass) frequencies mostly determine the beat rate.
|
|
/// Simple averaging is used for anti-alias filtering because the resulting signal
|
|
/// quality isn't of that high importance.
|
|
/// - Decimated sound data is enveloped, i.e. the amplitude shape is detected by
|
|
/// taking absolute value that's smoothed by sliding average. Signal levels that
|
|
/// are below a couple of times the general RMS amplitude level are cut away to
|
|
/// leave only notable peaks there.
|
|
/// - Repeating sound patterns (e.g. beats) are detected by calculating short-term
|
|
/// autocorrelation function of the enveloped signal.
|
|
/// - After whole sound data file has been analyzed as above, the bpm level is
|
|
/// detected by function 'getBpm' that finds the highest peak of the autocorrelation
|
|
/// function, calculates it's precise location and converts this reading to bpm's.
|
|
///
|
|
/// Author : Copyright (c) Olli Parviainen
|
|
/// Author e-mail : oparviai 'at' iki.fi
|
|
/// SoundTouch WWW: http://www.surina.net/soundtouch
|
|
///
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Last changed : $Date: 2015-02-21 23:24:29 +0200 (Sat, 21 Feb 2015) $
|
|
// File revision : $Revision: 4 $
|
|
//
|
|
// $Id: BPMDetect.cpp 202 2015-02-21 21:24:29Z oparviai $
|
|
//
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// License :
|
|
//
|
|
// SoundTouch audio processing library
|
|
// Copyright (c) Olli Parviainen
|
|
//
|
|
// This library is free software; you can redistribute it and/or
|
|
// modify it under the terms of the GNU Lesser General Public
|
|
// License as published by the Free Software Foundation; either
|
|
// version 2.1 of the License, or (at your option) any later version.
|
|
//
|
|
// This library is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
// Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public
|
|
// License along with this library; if not, write to the Free Software
|
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
//
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include <math.h>
|
|
#include <assert.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include "FIFOSampleBuffer.h"
|
|
#include "PeakFinder.h"
|
|
#include "BPMDetect.h"
|
|
|
|
using namespace soundtouch;
|
|
|
|
#define INPUT_BLOCK_SAMPLES 2048
|
|
#define DECIMATED_BLOCK_SAMPLES 256
|
|
|
|
/// decay constant for calculating RMS volume sliding average approximation
|
|
/// (time constant is about 10 sec)
|
|
const float avgdecay = 0.99986f;
|
|
|
|
/// Normalization coefficient for calculating RMS sliding average approximation.
|
|
const float avgnorm = (1 - avgdecay);
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Enable following define to create bpm analysis file:
|
|
|
|
// #define _CREATE_BPM_DEBUG_FILE
|
|
|
|
#ifdef _CREATE_BPM_DEBUG_FILE
|
|
|
|
#define DEBUGFILE_NAME "c:\\temp\\soundtouch-bpm-debug.txt"
|
|
|
|
static void _SaveDebugData(const float *data, int minpos, int maxpos, double coeff)
|
|
{
|
|
FILE *fptr = fopen(DEBUGFILE_NAME, "wt");
|
|
int i;
|
|
|
|
if (fptr)
|
|
{
|
|
printf("\n\nWriting BPM debug data into file " DEBUGFILE_NAME "\n\n");
|
|
for (i = minpos; i < maxpos; i ++)
|
|
{
|
|
fprintf(fptr, "%d\t%.1lf\t%f\n", i, coeff / (double)i, data[i]);
|
|
}
|
|
fclose(fptr);
|
|
}
|
|
}
|
|
#else
|
|
#define _SaveDebugData(a,b,c,d)
|
|
#endif
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
BPMDetect::BPMDetect(int numChannels, int aSampleRate)
|
|
{
|
|
this->sampleRate = aSampleRate;
|
|
this->channels = numChannels;
|
|
|
|
decimateSum = 0;
|
|
decimateCount = 0;
|
|
|
|
envelopeAccu = 0;
|
|
|
|
// Initialize RMS volume accumulator to RMS level of 1500 (out of 32768) that's
|
|
// safe initial RMS signal level value for song data. This value is then adapted
|
|
// to the actual level during processing.
|
|
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
|
// integer samples
|
|
RMSVolumeAccu = (1500 * 1500) / avgnorm;
|
|
#else
|
|
// float samples, scaled to range [-1..+1[
|
|
RMSVolumeAccu = (0.045f * 0.045f) / avgnorm;
|
|
#endif
|
|
|
|
// choose decimation factor so that result is approx. 1000 Hz
|
|
decimateBy = sampleRate / 1000;
|
|
assert(decimateBy > 0);
|
|
assert(INPUT_BLOCK_SAMPLES < decimateBy * DECIMATED_BLOCK_SAMPLES);
|
|
|
|
// Calculate window length & starting item according to desired min & max bpms
|
|
windowLen = (60 * sampleRate) / (decimateBy * MIN_BPM);
|
|
windowStart = (60 * sampleRate) / (decimateBy * MAX_BPM);
|
|
|
|
assert(windowLen > windowStart);
|
|
|
|
// allocate new working objects
|
|
xcorr = new float[windowLen];
|
|
memset(xcorr, 0, windowLen * sizeof(float));
|
|
|
|
// allocate processing buffer
|
|
buffer = new FIFOSampleBuffer();
|
|
// we do processing in mono mode
|
|
buffer->setChannels(1);
|
|
buffer->clear();
|
|
}
|
|
|
|
|
|
|
|
BPMDetect::~BPMDetect()
|
|
{
|
|
delete[] xcorr;
|
|
delete buffer;
|
|
}
|
|
|
|
|
|
|
|
/// convert to mono, low-pass filter & decimate to about 500 Hz.
|
|
/// return number of outputted samples.
|
|
///
|
|
/// Decimation is used to remove the unnecessary frequencies and thus to reduce
|
|
/// the amount of data needed to be processed as calculating autocorrelation
|
|
/// function is a very-very heavy operation.
|
|
///
|
|
/// Anti-alias filtering is done simply by averaging the samples. This is really a
|
|
/// poor-man's anti-alias filtering, but it's not so critical in this kind of application
|
|
/// (it'd also be difficult to design a high-quality filter with steep cut-off at very
|
|
/// narrow band)
|
|
int BPMDetect::decimate(SAMPLETYPE *dest, const SAMPLETYPE *src, int numsamples)
|
|
{
|
|
int count, outcount;
|
|
LONG_SAMPLETYPE out;
|
|
|
|
assert(channels > 0);
|
|
assert(decimateBy > 0);
|
|
outcount = 0;
|
|
for (count = 0; count < numsamples; count ++)
|
|
{
|
|
int j;
|
|
|
|
// convert to mono and accumulate
|
|
for (j = 0; j < channels; j ++)
|
|
{
|
|
decimateSum += src[j];
|
|
}
|
|
src += j;
|
|
|
|
decimateCount ++;
|
|
if (decimateCount >= decimateBy)
|
|
{
|
|
// Store every Nth sample only
|
|
out = (LONG_SAMPLETYPE)(decimateSum / (decimateBy * channels));
|
|
decimateSum = 0;
|
|
decimateCount = 0;
|
|
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
|
// check ranges for sure (shouldn't actually be necessary)
|
|
if (out > 32767)
|
|
{
|
|
out = 32767;
|
|
}
|
|
else if (out < -32768)
|
|
{
|
|
out = -32768;
|
|
}
|
|
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
|
dest[outcount] = (SAMPLETYPE)out;
|
|
outcount ++;
|
|
}
|
|
}
|
|
return outcount;
|
|
}
|
|
|
|
|
|
|
|
// Calculates autocorrelation function of the sample history buffer
|
|
void BPMDetect::updateXCorr(int process_samples)
|
|
{
|
|
int offs;
|
|
SAMPLETYPE *pBuffer;
|
|
|
|
assert(buffer->numSamples() >= (uint)(process_samples + windowLen));
|
|
|
|
pBuffer = buffer->ptrBegin();
|
|
#pragma omp parallel for
|
|
for (offs = windowStart; offs < windowLen; offs ++)
|
|
{
|
|
LONG_SAMPLETYPE sum;
|
|
int i;
|
|
|
|
sum = 0;
|
|
for (i = 0; i < process_samples; i ++)
|
|
{
|
|
sum += pBuffer[i] * pBuffer[i + offs]; // scaling the sub-result shouldn't be necessary
|
|
}
|
|
// xcorr[offs] *= xcorr_decay; // decay 'xcorr' here with suitable coefficients
|
|
// if it's desired that the system adapts automatically to
|
|
// various bpms, e.g. in processing continouos music stream.
|
|
// The 'xcorr_decay' should be a value that's smaller than but
|
|
// close to one, and should also depend on 'process_samples' value.
|
|
|
|
xcorr[offs] += (float)sum;
|
|
}
|
|
}
|
|
|
|
|
|
// Calculates envelope of the sample data
|
|
void BPMDetect::calcEnvelope(SAMPLETYPE *samples, int numsamples)
|
|
{
|
|
const static double decay = 0.7f; // decay constant for smoothing the envelope
|
|
const static double norm = (1 - decay);
|
|
|
|
int i;
|
|
LONG_SAMPLETYPE out;
|
|
double val;
|
|
|
|
for (i = 0; i < numsamples; i ++)
|
|
{
|
|
// calc average RMS volume
|
|
RMSVolumeAccu *= avgdecay;
|
|
val = (float)fabs((float)samples[i]);
|
|
RMSVolumeAccu += val * val;
|
|
|
|
// cut amplitudes that are below cutoff ~2 times RMS volume
|
|
// (we're interested in peak values, not the silent moments)
|
|
if (val < 0.5 * sqrt(RMSVolumeAccu * avgnorm))
|
|
{
|
|
val = 0;
|
|
}
|
|
|
|
// smooth amplitude envelope
|
|
envelopeAccu *= decay;
|
|
envelopeAccu += val;
|
|
out = (LONG_SAMPLETYPE)(envelopeAccu * norm);
|
|
|
|
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
|
// cut peaks (shouldn't be necessary though)
|
|
if (out > 32767) out = 32767;
|
|
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
|
samples[i] = (SAMPLETYPE)out;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void BPMDetect::inputSamples(const SAMPLETYPE *samples, int numSamples)
|
|
{
|
|
SAMPLETYPE decimated[DECIMATED_BLOCK_SAMPLES];
|
|
|
|
// iterate so that max INPUT_BLOCK_SAMPLES processed per iteration
|
|
while (numSamples > 0)
|
|
{
|
|
int block;
|
|
int decSamples;
|
|
|
|
block = (numSamples > INPUT_BLOCK_SAMPLES) ? INPUT_BLOCK_SAMPLES : numSamples;
|
|
|
|
// decimate. note that converts to mono at the same time
|
|
decSamples = decimate(decimated, samples, block);
|
|
samples += block * channels;
|
|
numSamples -= block;
|
|
|
|
// envelope new samples and add them to buffer
|
|
calcEnvelope(decimated, decSamples);
|
|
buffer->putSamples(decimated, decSamples);
|
|
}
|
|
|
|
// when the buffer has enought samples for processing...
|
|
if ((int)buffer->numSamples() > windowLen)
|
|
{
|
|
int processLength;
|
|
|
|
// how many samples are processed
|
|
processLength = (int)buffer->numSamples() - windowLen;
|
|
|
|
// ... calculate autocorrelations for oldest samples...
|
|
updateXCorr(processLength);
|
|
// ... and remove them from the buffer
|
|
buffer->receiveSamples(processLength);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void BPMDetect::removeBias()
|
|
{
|
|
int i;
|
|
float minval = 1e12f; // arbitrary large number
|
|
|
|
for (i = windowStart; i < windowLen; i ++)
|
|
{
|
|
if (xcorr[i] < minval)
|
|
{
|
|
minval = xcorr[i];
|
|
}
|
|
}
|
|
|
|
for (i = windowStart; i < windowLen; i ++)
|
|
{
|
|
xcorr[i] -= minval;
|
|
}
|
|
}
|
|
|
|
|
|
float BPMDetect::getBpm()
|
|
{
|
|
double peakPos;
|
|
double coeff;
|
|
PeakFinder peakFinder;
|
|
|
|
coeff = 60.0 * ((double)sampleRate / (double)decimateBy);
|
|
|
|
// save bpm debug analysis data if debug data enabled
|
|
_SaveDebugData(xcorr, windowStart, windowLen, coeff);
|
|
|
|
// remove bias from xcorr data
|
|
removeBias();
|
|
|
|
// find peak position
|
|
peakPos = peakFinder.detectPeak(xcorr, windowStart, windowLen);
|
|
|
|
assert(decimateBy != 0);
|
|
if (peakPos < 1e-9) return 0.0; // detection failed.
|
|
|
|
// calculate BPM
|
|
return (float) (coeff / peakPos);
|
|
}
|