aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--examples/puredata/simple-test.pd11
-rw-r--r--examples/puredata/xtract~.c13
-rw-r--r--src/Makefile.am1
-rw-r--r--src/descriptors.c16
-rwxr-xr-xsrc/dywapitchtrack/dywapitchtrack.c448
-rwxr-xr-xsrc/dywapitchtrack/dywapitchtrack.h110
-rw-r--r--src/init.c6
-rw-r--r--src/libxtract.c1
-rw-r--r--src/scalar.c23
-rw-r--r--src/xtract_globals_private.h3
-rw-r--r--xtract/libxtract.h4
-rw-r--r--xtract/xtract_scalar.h15
12 files changed, 636 insertions, 15 deletions
diff --git a/examples/puredata/simple-test.pd b/examples/puredata/simple-test.pd
index f9bd79c..dce728c 100644
--- a/examples/puredata/simple-test.pd
+++ b/examples/puredata/simple-test.pd
@@ -1,11 +1,8 @@
#N canvas 642 248 670 289 10;
#X obj 192 63 osc~ 440;
#X floatatom 194 187 10 0 0 0 - - -;
-#X obj 194 150 xtract~ spectral_mean;
#X obj 193 94 *~ 0.3;
-#X text 337 121 Optional second argument gives blocksize;
-#X obj 193 121 xtract~ spectrum 64;
-#X connect 0 0 3 0;
-#X connect 2 0 1 0;
-#X connect 3 0 5 0;
-#X connect 5 0 2 0;
+#X obj 193 137 xtract~ f0;
+#X connect 0 0 2 0;
+#X connect 2 0 3 0;
+#X connect 3 0 1 0;
diff --git a/examples/puredata/xtract~.c b/examples/puredata/xtract~.c
index 8ec1c15..a7f7325 100644
--- a/examples/puredata/xtract~.c
+++ b/examples/puredata/xtract~.c
@@ -72,7 +72,7 @@ static t_int *xtract_perform(t_int *w) {
t_int rv = 0;
double result = 0.0;
- for(n = 0; n < N; ++n) {
+ for(t_int n = 0; n < N; ++n) {
x->data[n] = (double)in[n];
}
@@ -148,7 +148,7 @@ static void *xtract_new(t_symbol *me, t_int argc, t_atom *argv) {
t_int n, N, M, f, F,
n_args,
type;
- t_float *argv_max;
+ double *argv_max;
t_symbol *arg1;
xtract_function_descriptor_t *fd;
char *p_name,
@@ -259,7 +259,7 @@ static void *xtract_new(t_symbol *me, t_int argc, t_atom *argv) {
if(x->is_subframe)
N = M;
- post("xtract~: window size: %d", N);
+ post("xtract~: assumed window size: %d", N);
/* do init if needed */
if(x->feature == XTRACT_MFCC){
@@ -271,9 +271,9 @@ static void *xtract_new(t_symbol *me, t_int argc, t_atom *argv) {
post("xtract~: mfcc: filters = %d",
((xtract_mel_filter *)x->argv)->n_filters);
mf->filters =
- (t_float **)getbytes(mf->n_filters * sizeof(t_float *));
+ (double **)getbytes(mf->n_filters * sizeof(double *));
for(n = 0; n < mf->n_filters; n++)
- mf->filters[n] = (float *)getbytes(N * sizeof(float));
+ mf->filters[n] = (double *)getbytes(N * sizeof(double));
xtract_init_mfcc(N, NYQUIST, XTRACT_EQUAL_GAIN, 80.0f,
18000.0f, mf->n_filters, mf->filters);
@@ -288,6 +288,9 @@ static void *xtract_new(t_symbol *me, t_int argc, t_atom *argv) {
x->argv = x->window;
x->done_init = 1;
}
+ else if(x->feature == XTRACT_WAVELET_F0){
+ xtract_init_wavelet_f0_state();
+ }
/* Initialise fft_plan if required */
if(x->feature == XTRACT_AUTOCORRELATION_FFT ||
diff --git a/src/Makefile.am b/src/Makefile.am
index 93b9eac..71f81ca 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -16,6 +16,7 @@ SOURCES = libxtract.c \
window.c \
fini.c \
helper.c \
+ dywapitchtrack/dywapitchtrack.c \
$(OOURA)
lib_LTLIBRARIES = libxtract.la
diff --git a/src/descriptors.c b/src/descriptors.c
index e43d9d5..044f3d4 100644
--- a/src/descriptors.c
+++ b/src/descriptors.c
@@ -87,6 +87,7 @@ xtract_function_descriptor_t *xtract_make_descriptors(void)
break;
case XTRACT_F0:
case XTRACT_FAILSAFE_F0:
+ case XTRACT_WAVELET_F0:
*argv_min = XTRACT_SR_LOWER_LIMIT;
*argv_max = XTRACT_SR_UPPER_LIMIT;
*argv_def = XTRACT_SR_DEFAULT;
@@ -228,6 +229,7 @@ xtract_function_descriptor_t *xtract_make_descriptors(void)
case XTRACT_LOWEST_VALUE:
case XTRACT_F0:
case XTRACT_FAILSAFE_F0:
+ case XTRACT_WAVELET_F0:
*argv_donor = XTRACT_ANY;
break;
case XTRACT_MFCC:
@@ -346,6 +348,7 @@ xtract_function_descriptor_t *xtract_make_descriptors(void)
break;
case XTRACT_F0:
case XTRACT_FAILSAFE_F0:
+ case XTRACT_WAVELET_F0:
case XTRACT_SPECTRUM:
case XTRACT_AUTOCORRELATION:
case XTRACT_AUTOCORRELATION_FFT:
@@ -413,6 +416,7 @@ xtract_function_descriptor_t *xtract_make_descriptors(void)
case XTRACT_LNORM:
case XTRACT_F0:
case XTRACT_FAILSAFE_F0:
+ case XTRACT_WAVELET_F0:
case XTRACT_MFCC:
case XTRACT_AUTOCORRELATION:
case XTRACT_AUTOCORRELATION_FFT:
@@ -608,6 +612,14 @@ xtract_function_descriptor_t *xtract_make_descriptors(void)
"Extract the fundamental frequency of an audio signal (failsafe)");
strcpy(author, "Jamie Bullock");
break;
+ case XTRACT_WAVELET_F0:
+ strcpy(name, "wavelet_f0");
+ strcpy(p_name, "Fundamental Frequency (wavelet method)");
+ strcpy(desc, "Extract the fundamental frequency of a signal (wavelet method)");
+ strcpy(p_desc,
+ "Extract the fundamental frequency of an audio signal (wavelet method)");
+ strcpy(author, "Antoine Schmitt");
+ break;
case XTRACT_TONALITY:
strcpy(name, "tonality");
strcpy(p_name, "Tonality");
@@ -980,6 +992,7 @@ xtract_function_descriptor_t *xtract_make_descriptors(void)
case XTRACT_LOWEST_VALUE:
case XTRACT_F0:
case XTRACT_FAILSAFE_F0:
+ case XTRACT_WAVELET_F0:
case XTRACT_FLATNESS_DB:
case XTRACT_TONALITY:
*argc = 1;
@@ -1103,6 +1116,7 @@ xtract_function_descriptor_t *xtract_make_descriptors(void)
case XTRACT_HPS:
case XTRACT_F0:
case XTRACT_FAILSAFE_F0:
+ case XTRACT_WAVELET_F0:
case XTRACT_FLUX:
case XTRACT_LNORM:
case XTRACT_NONZERO_COUNT:
@@ -1180,6 +1194,7 @@ xtract_function_descriptor_t *xtract_make_descriptors(void)
case XTRACT_HPS:
case XTRACT_F0:
case XTRACT_FAILSAFE_F0:
+ case XTRACT_WAVELET_F0:
case XTRACT_NONZERO_COUNT:
case XTRACT_AUTOCORRELATION:
case XTRACT_AMDF:
@@ -1248,6 +1263,7 @@ xtract_function_descriptor_t *xtract_make_descriptors(void)
case XTRACT_SPREAD:
case XTRACT_F0:
case XTRACT_FAILSAFE_F0:
+ case XTRACT_WAVELET_F0:
case XTRACT_HPS:
case XTRACT_ROLLOFF:
*result_unit = XTRACT_HERTZ;
diff --git a/src/dywapitchtrack/dywapitchtrack.c b/src/dywapitchtrack/dywapitchtrack.c
new file mode 100755
index 0000000..b66000c
--- /dev/null
+++ b/src/dywapitchtrack/dywapitchtrack.c
@@ -0,0 +1,448 @@
+/* dywapitchtrack.c
+
+ Dynamic Wavelet Algorithm Pitch Tracking library
+ Released under the MIT open source licence
+
+ Copyright (c) 2010 Antoine Schmitt
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+*/
+
+#include "dywapitchtrack.h"
+#include <math.h>
+#include <stdlib.h>
+#include <string.h> // for memset
+
+
+//**********************
+// Utils
+//**********************
+
+#ifndef max
+#define max(x, y) ((x) > (y)) ? (x) : (y)
+#endif
+#ifndef min
+#define min(x, y) ((x) < (y)) ? (x) : (y)
+#endif
+
+// returns 1 if power of 2
+int _power2p(int value) {
+ if (value == 0) return 1;
+ if (value == 2) return 1;
+ if (value & 0x1) return 0;
+ return (_power2p(value >> 1));
+}
+
+// count number of bits
+int _bitcount(int value) {
+ if (value == 0) return 0;
+ if (value == 1) return 1;
+ if (value == 2) return 2;
+ return _bitcount(value >> 1) + 1;
+}
+
+// closest power of 2 above or equal
+int _ceil_power2(int value) {
+ if (_power2p(value)) return value;
+
+ if (value == 1) return 2;
+ int j, i = _bitcount(value);
+ int res = 1;
+ for (j = 0; j < i; j++) res <<= 1;
+ return res;
+}
+
+// closest power of 2 below or equal
+int _floor_power2(int value) {
+ if (_power2p(value)) return value;
+ return _ceil_power2(value)/2;
+}
+
+// abs value
+int _iabs(int x) {
+ if (x >= 0) return x;
+ return -x;
+}
+
+// 2 power
+int _2power(int i) {
+ int res = 1, j;
+ for (j = 0; j < i; j++) res <<= 1;
+ return res;
+}
+
+//******************************
+// the Wavelet algorithm itself
+//******************************
+
+int dywapitch_neededsamplecount(int minFreq) {
+ int nbSam = 3*44100/minFreq; // 1017. for 130 Hz
+ nbSam = _ceil_power2(nbSam); // 1024
+ return nbSam;
+}
+
+typedef struct _minmax {
+ int index;
+ struct _minmax *next;
+} minmax;
+
+double _dywapitch_computeWaveletPitch(double * samples, int startsample, int samplecount) {
+ double pitchF = 0.0;
+
+ int i, j;
+ double si, si1;
+
+ // must be a power of 2
+ samplecount = _floor_power2(samplecount);
+
+ double *sam = (double *)malloc(sizeof(double)*samplecount);
+ memcpy(sam, samples + startsample, sizeof(double)*samplecount);
+ int curSamNb = samplecount;
+
+ int *distances = (int *)malloc(sizeof(int)*samplecount);
+ int *mins = (int *)malloc(sizeof(int)*samplecount);
+ int *maxs = (int *)malloc(sizeof(int)*samplecount);
+ int nbMins, nbMaxs;
+
+ // algorithm parameters
+ int maxFLWTlevels = 6;
+ double maxF = 3000.;
+ int differenceLevelsN = 3;
+ double maximaThresholdRatio = 0.75;
+
+ double ampltitudeThreshold;
+ double theDC = 0.0;
+
+ { // compute ampltitudeThreshold and theDC
+ //first compute the DC and maxAMplitude
+ double maxValue = 0.0;
+ double minValue = 0.0;
+ for (i = 0; i < samplecount;i++) {
+ si = sam[i];
+ theDC = theDC + si;
+ if (si > maxValue) maxValue = si;
+ if (si < minValue) minValue = si;
+ }
+ theDC = theDC/samplecount;
+ maxValue = maxValue - theDC;
+ minValue = minValue - theDC;
+ double amplitudeMax = (maxValue > -minValue ? maxValue : -minValue);
+
+ ampltitudeThreshold = amplitudeMax*maximaThresholdRatio;
+ //asLog("dywapitch theDC=%f ampltitudeThreshold=%f\n", theDC, ampltitudeThreshold);
+
+ }
+
+ // levels, start without downsampling..
+ int curLevel = 0;
+ double curModeDistance = -1.;
+ int delta;
+
+ while(1) {
+
+ // delta
+ delta = 44100./(_2power(curLevel)*maxF);
+ //("dywapitch doing level=%ld delta=%ld\n", curLevel, delta);
+
+ if (curSamNb < 2) goto cleanup;
+
+ // compute the first maximums and minumums after zero-crossing
+ // store if greater than the min threshold
+ // and if at a greater distance than delta
+ double dv, previousDV = -1000;
+ nbMins = nbMaxs = 0;
+ int lastMinIndex = -1000000;
+ int lastmaxIndex = -1000000;
+ int findMax = 0;
+ int findMin = 0;
+ for (i = 2; i < curSamNb; i++) {
+ si = sam[i] - theDC;
+ si1 = sam[i-1] - theDC;
+
+ if (si1 <= 0 && si > 0) findMax = 1;
+ if (si1 >= 0 && si < 0) findMin = 1;
+
+ // min or max ?
+ dv = si - si1;
+
+ if (previousDV > -1000) {
+
+ if (findMin && previousDV < 0 && dv >= 0) {
+ // minimum
+ if (fabs(si) >= ampltitudeThreshold) {
+ if (i > lastMinIndex + delta) {
+ mins[nbMins++] = i;
+ lastMinIndex = i;
+ findMin = 0;
+ //if DEBUGG then put "min ok"&&si
+ //
+ } else {
+ //if DEBUGG then put "min too close to previous"&&(i - lastMinIndex)
+ //
+ }
+ } else {
+ // if DEBUGG then put "min "&abs(si)&" < thresh = "&ampltitudeThreshold
+ //--
+ }
+ }
+
+ if (findMax && previousDV > 0 && dv <= 0) {
+ // maximum
+ if (fabs(si) >= ampltitudeThreshold) {
+ if (i > lastmaxIndex + delta) {
+ maxs[nbMaxs++] = i;
+ lastmaxIndex = i;
+ findMax = 0;
+ } else {
+ //if DEBUGG then put "max too close to previous"&&(i - lastmaxIndex)
+ //--
+ }
+ } else {
+ //if DEBUGG then put "max "&abs(si)&" < thresh = "&ampltitudeThreshold
+ //--
+ }
+ }
+ }
+
+ previousDV = dv;
+ }
+
+ if (nbMins == 0 && nbMaxs == 0) {
+ // no best distance !
+ //asLog("dywapitch no mins nor maxs, exiting\n");
+
+ // if DEBUGG then put "no mins nor maxs, exiting"
+ goto cleanup;
+ }
+ //if DEBUGG then put count(maxs)&&"maxs &"&&count(mins)&&"mins"
+
+ // maxs = [5, 20, 100,...]
+ // compute distances
+ int d;
+ memset(distances, 0, samplecount*sizeof(int));
+ for (i = 0 ; i < nbMins ; i++) {
+ for (j = 1; j < differenceLevelsN; j++) {
+ if (i+j < nbMins) {
+ d = _iabs(mins[i] - mins[i+j]);
+ //asLog("dywapitch i=%ld j=%ld d=%ld\n", i, j, d);
+ distances[d] = distances[d] + 1;
+ }
+ }
+ }
+ for (i = 0 ; i < nbMaxs ; i++) {
+ for (j = 1; j < differenceLevelsN; j++) {
+ if (i+j < nbMaxs) {
+ d = _iabs(maxs[i] - maxs[i+j]);
+ //asLog("dywapitch i=%ld j=%ld d=%ld\n", i, j, d);
+ distances[d] = distances[d] + 1;
+ }
+ }
+ }
+
+ // find best summed distance
+ int bestDistance = -1;
+ int bestValue = -1;
+ for (i = 0; i< curSamNb; i++) {
+ int summed = 0;
+ for (j = -delta ; j <= delta ; j++) {
+ if (i+j >=0 && i+j < curSamNb)
+ summed += distances[i+j];
+ }
+ //asLog("dywapitch i=%ld summed=%ld bestDistance=%ld\n", i, summed, bestDistance);
+ if (summed == bestValue) {
+ if (i == 2*bestDistance)
+ bestDistance = i;
+
+ } else if (summed > bestValue) {
+ bestValue = summed;
+ bestDistance = i;
+ }
+ }
+ //asLog("dywapitch bestDistance=%ld\n", bestDistance);
+
+ // averaging
+ double distAvg = 0.0;
+ double nbDists = 0;
+ for (j = -delta ; j <= delta ; j++) {
+ if (bestDistance+j >=0 && bestDistance+j < samplecount) {
+ int nbDist = distances[bestDistance+j];
+ if (nbDist > 0) {
+ nbDists += nbDist;
+ distAvg += (bestDistance+j)*nbDist;
+ }
+ }
+ }
+ // this is our mode distance !
+ distAvg /= nbDists;
+ //asLog("dywapitch distAvg=%f\n", distAvg);
+
+ // continue the levels ?
+ if (curModeDistance > -1.) {
+ double similarity = fabs(distAvg*2 - curModeDistance);
+ if (similarity <= 2*delta) {
+ //if DEBUGG then put "similarity="&similarity&&"delta="&delta&&"ok"
+ //asLog("dywapitch similarity=%f OK !\n", similarity);
+ // two consecutive similar mode distances : ok !
+ pitchF = 44100./(_2power(curLevel-1)*curModeDistance);
+ goto cleanup;
+ }
+ //if DEBUGG then put "similarity="&similarity&&"delta="&delta&&"not"
+ }
+
+ // not similar, continue next level
+ curModeDistance = distAvg;
+
+ curLevel = curLevel + 1;
+ if (curLevel >= maxFLWTlevels) {
+ // put "max levels reached, exiting"
+ //asLog("dywapitch max levels reached, exiting\n");
+ goto cleanup;
+ }
+
+ // downsample
+ if (curSamNb < 2) {
+ //asLog("dywapitch not enough samples, exiting\n");
+ goto cleanup;
+ }
+ for (i = 0; i < curSamNb/2; i++) {
+ sam[i] = (sam[2*i] + sam[2*i + 1])/2.;
+ }
+ curSamNb /= 2;
+ }
+
+ ///
+cleanup:
+ free(distances);
+ free(mins);
+ free(maxs);
+ free(sam);
+
+ return pitchF;
+}
+
+// ***********************************
+// the dynamic postprocess
+// ***********************************
+
+/***
+It states:
+ - a pitch cannot change much all of a sudden (20%) (impossible humanly,
+ so if such a situation happens, consider that it is a mistake and drop it.
+ - a pitch cannot double or be divided by 2 all of a sudden : it is an
+ algorithm side-effect : divide it or double it by 2.
+ - a lonely voiced pitch cannot happen, nor can a sudden drop in the middle
+ of a voiced segment. Smooth the plot.
+***/
+
+double _dywapitch_dynamicprocess(dywapitchtracker *pitchtracker, double pitch) {
+
+ // equivalence
+ if (pitch == 0.0) pitch = -1.0;
+
+ //
+ double estimatedPitch = -1;
+ double acceptedError = 0.2f;
+ int maxConfidence = 5;
+
+ if (pitch != -1) {
+ // I have a pitch here
+
+ if (pitchtracker->_prevPitch == -1) {
+ // no previous
+ estimatedPitch = pitch;
+ pitchtracker->_prevPitch = pitch;
+ pitchtracker->_pitchConfidence = 1;
+
+ } else if (abs(pitchtracker->_prevPitch - pitch)/pitch < acceptedError) {
+ // similar : remember and increment pitch
+ pitchtracker->_prevPitch = pitch;
+ estimatedPitch = pitch;
+ pitchtracker->_pitchConfidence = min(maxConfidence, pitchtracker->_pitchConfidence + 1); // maximum 3
+
+ } else if ((pitchtracker->_pitchConfidence >= maxConfidence-2) && abs(pitchtracker->_prevPitch - 2.*pitch)/(2.*pitch) < acceptedError) {
+ // close to half the last pitch, which is trusted
+ estimatedPitch = 2.*pitch;
+ pitchtracker->_prevPitch = estimatedPitch;
+
+ } else if ((pitchtracker->_pitchConfidence >= maxConfidence-2) && abs(pitchtracker->_prevPitch - 0.5*pitch)/(0.5*pitch) < acceptedError) {
+ // close to twice the last pitch, which is trusted
+ estimatedPitch = 0.5*pitch;
+ pitchtracker->_prevPitch = estimatedPitch;
+
+ } else {
+ // nothing like this : very different value
+ if (pitchtracker->_pitchConfidence >= 1) {
+ // previous trusted : keep previous
+ estimatedPitch = pitchtracker->_prevPitch;
+ pitchtracker->_pitchConfidence = max(0, pitchtracker->_pitchConfidence - 1);
+ } else {
+ // previous not trusted : take current
+ estimatedPitch = pitch;
+ pitchtracker->_prevPitch = pitch;
+ pitchtracker->_pitchConfidence = 1;
+ }
+ }
+
+ } else {
+ // no pitch now
+ if (pitchtracker->_prevPitch != -1) {
+ // was pitch before
+ if (pitchtracker->_pitchConfidence >= 1) {
+ // continue previous
+ estimatedPitch = pitchtracker->_prevPitch;
+ pitchtracker->_pitchConfidence = max(0, pitchtracker->_pitchConfidence - 1);
+ } else {
+ pitchtracker->_prevPitch = -1;
+ estimatedPitch = -1.;
+ pitchtracker->_pitchConfidence = 0;
+ }
+ }
+ }
+
+ // put "_pitchConfidence="&pitchtracker->_pitchConfidence
+ if (pitchtracker->_pitchConfidence >= 1) {
+ // ok
+ pitch = estimatedPitch;
+ } else {
+ pitch = -1;
+ }
+
+ // equivalence
+ if (pitch == -1) pitch = 0.0;
+
+ return pitch;
+}
+
+
+// ************************************
+// the API main entry points
+// ************************************
+
+void dywapitch_inittracking(dywapitchtracker *pitchtracker) {
+ pitchtracker->_prevPitch = -1.;
+ pitchtracker->_pitchConfidence = -1;
+}
+
+double dywapitch_computepitch(dywapitchtracker *pitchtracker, double * samples, int startsample, int samplecount) {
+ double raw_pitch = _dywapitch_computeWaveletPitch(samples, startsample, samplecount);
+ return _dywapitch_dynamicprocess(pitchtracker, raw_pitch);
+}
+
+
+
diff --git a/src/dywapitchtrack/dywapitchtrack.h b/src/dywapitchtrack/dywapitchtrack.h
new file mode 100755
index 0000000..0c12f80
--- /dev/null
+++ b/src/dywapitchtrack/dywapitchtrack.h
@@ -0,0 +1,110 @@
+/* dywapitchtrack.h
+
+ Dynamic Wavelet Algorithm Pitch Tracking library
+ Released under the MIT open source licence
+
+ Copyright (c) 2010 Antoine Schmitt
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+*/
+
+/* Documentation
+
+ The dywapitchtrack library computes the pitch of an audio stream in real time.
+
+ The pitch is the main frequency of the waveform (the 'note' being played or sung).
+ It is expressed as a float in Hz.
+
+ Unlike the human ear, pitch detection is difficult to achieve for computers. Many
+ algorithms have been designed and experimented, but there is no 'best' algorithm.
+ They all depend on the context and the tradeoffs acceptable in terms of speed and
+ latency. The context includes the quality and 'cleanness' of the audio : obviously
+ polyphonic sounds (multiple instruments playing different notes at the same time)
+ are extremely difficult to track, percussive or noisy audio has no pitch, most
+ real-life audio have some noisy moments, some instruments have a lot of harmonics,
+ etc...
+
+ The dywapitchtrack is based on a custom-tailored algorithm which is of very high quality:
+ both very accurate (precision < 0.05 semitones), very low latency (< 23 ms) and
+ very low error rate. It has been thoroughly tested on human voice.
+
+ It can best be described as a dynamic wavelet algorithm (dywa):
+
+ The heart of the algorithm is a very powerful wavelet algorithm, described in a paper
+ by Eric Larson and Ross Maddox : "Real-Time Time-Domain Pitch Tracking Using Wavelets"
+ http://online.physics.uiuc.edu/courses/phys498pom/NSF_REU_Reports/2005_reu/Real-Time_Time-Domain_Pitch_Tracking_Using_Wavelets.pdf
+
+ This central algorithm has been improved by adding dynamic tracking, to reduce the
+ common problems of frequency-halving and voiced/unvoiced errors. This dynamic tracking
+ explains the need for a tracking structure (dywapitchtracker). The dynamic tracking assumes
+ that the main function dywapitch_computepitch is called repeatedly, as it follows the pitch
+ over time and makes assumptions about human voice capabilities and reallife conditions
+ (as documented inside the code).
+
+ Note : The algorithm currently assumes a 44100Hz audio sampling rate. If you use a different
+ samplerate, you can just multiply the resulting pitch by the ratio between your samplerate and 44100.
+*/
+
+/* Usage
+
+ // Allocate your audio buffers and start the audio stream.
+ // Allocate a 'dywapitchtracker' structure.
+ // Start the pitch tracking by calling 'dywapitch_inittracking'.
+ dywapitchtracker pitchtracker;
+ dywapitch_inittracking(&pitchtracker);
+
+ // For each available audio buffer, call 'dywapitch_computepitch'
+ double thepitch = dywapitch_computepitch(&pitchtracker, samples, start, count);
+
+*/
+
+#ifndef dywapitchtrack__H
+#define dywapitchtrack__H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// structure to hold tracking data
+typedef struct _dywapitchtracker {
+ double _prevPitch;
+ int _pitchConfidence;
+} dywapitchtracker;
+
+// returns the number of samples needed to compute pitch for fequencies equal and above the given minFreq (in Hz)
+// useful to allocate large enough audio buffer
+// ex : for frequencies above 130Hz, you need 1024 samples (assuming a 44100 Hz samplerate)
+int dywapitch_neededsamplecount(int minFreq);
+
+// call before computing any pitch, passing an allocated dywapitchtracker structure
+void dywapitch_inittracking(dywapitchtracker *pitchtracker);
+
+// computes the pitch. Pass the inited dywapitchtracker structure
+// samples : a pointer to the sample buffer
+// startsample : the index of teh first sample to use in teh sample buffer
+// samplecount : the number of samples to use to compte the pitch
+// return 0.0 if no pitch was found (sound too low, noise, etc..)
+double dywapitch_computepitch(dywapitchtracker *pitchtracker, double * samples, int startsample, int samplecount);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif
+
diff --git a/src/init.c b/src/init.c
index b660836..0dbf9b0 100644
--- a/src/init.c
+++ b/src/init.c
@@ -35,6 +35,7 @@
#include "../xtract/libxtract.h"
#include "xtract_window_private.h"
+#include "xtract_scalar_private.h"
#define DEFINE_GLOBALS
#include "xtract_globals_private.h"
@@ -361,6 +362,11 @@ int xtract_init_mfcc(int N, double nyquist, int style, double freq_min, double f
}
+int xtract_init_wavelet_f0_state(void)
+{
+ dywapitch_inittracking(&wavelet_f0_state);
+}
+
double *xtract_init_window(const int N, const int type)
{
double *window;
diff --git a/src/libxtract.c b/src/libxtract.c
index 535ee2e..99162fb 100644
--- a/src/libxtract.c
+++ b/src/libxtract.c
@@ -68,6 +68,7 @@ int(*xtract[])(const double *, const int, const void *, double *) =
xtract_hps,
xtract_f0,
xtract_failsafe_f0,
+ xtract_wavelet_f0,
/* xtract_delta.h */
xtract_lnorm,
xtract_flux,
diff --git a/src/scalar.c b/src/scalar.c
index 4675c4c..d35f12b 100644
--- a/src/scalar.c
+++ b/src/scalar.c
@@ -28,9 +28,12 @@
#include <stdio.h>
#include <math.h>
-#include "../xtract/libxtract.h"
-#include "../xtract/xtract_helper.h"
+#include "dywapitchtrack/dywapitchtrack.h"
+
+#include "xtract/libxtract.h"
+#include "xtract/xtract_helper.h"
#include "xtract_macros_private.h"
+#include "xtract_globals_private.h"
int xtract_mean(const double *data, const int N, const void *argv, double *result)
{
@@ -953,7 +956,6 @@ int xtract_failsafe_f0(const double *data, const int N, const void *argv, double
if(return_code == XTRACT_NO_RESULT)
{
-
sr = *(double *)argv;
if(sr == 0)
sr = 44100.0;
@@ -975,3 +977,18 @@ int xtract_failsafe_f0(const double *data, const int N, const void *argv, double
}
+int xtract_wavelet_f0(const double *data, const int N, const void *argv, double *result)
+{
+ double sr = *(double *)argv;
+
+ *result = dywapitch_computepitch(&wavelet_f0_state, data, 0, N);
+
+ if (*result == 0.0)
+ {
+ return XTRACT_NO_RESULT;
+ }
+
+ return XTRACT_SUCCESS;
+}
+
+
diff --git a/src/xtract_globals_private.h b/src/xtract_globals_private.h
index e643ab0..2b0f3d4 100644
--- a/src/xtract_globals_private.h
+++ b/src/xtract_globals_private.h
@@ -27,6 +27,7 @@
#define XTRACT_GLOBALS_PRIVATE_H
#include "fft.h"
+#include "dywapitchtrack/dywapitchtrack.h"
#ifdef DEFINE_GLOBALS
#define GLOBAL
@@ -46,5 +47,7 @@ GLOBAL xtract_vdsp_data vdsp_data_spectrum;
GLOBAL xtract_vdsp_data vdsp_data_autocorrelation_fft;
#endif
+GLOBAL dywapitchtracker wavelet_f0_state;
+
#endif /* Header guard */
diff --git a/xtract/libxtract.h b/xtract/libxtract.h
index 8082a47..d07ba97 100644
--- a/xtract/libxtract.h
+++ b/xtract/libxtract.h
@@ -116,6 +116,7 @@ enum xtract_features_ {
XTRACT_HPS,
XTRACT_F0,
XTRACT_FAILSAFE_F0,
+ XTRACT_WAVELET_F0,
XTRACT_LNORM,
XTRACT_FLUX,
XTRACT_ATTACK_TIME,
@@ -361,6 +362,9 @@ extern int(*xtract[XTRACT_FEATURES])(const double *data, const int N, const void
#endif
+/** \brief A function to initialise wavelet f0 detector state */
+int xtract_init_wavelet_f0_state(void);
+
/** \brief A structure to store a set of n_filters Mel filters */
typedef struct xtract_mel_filter_ {
int n_filters;
diff --git a/xtract/xtract_scalar.h b/xtract/xtract_scalar.h
index 3853e66..7d5d752 100644
--- a/xtract/xtract_scalar.h
+++ b/xtract/xtract_scalar.h
@@ -416,6 +416,21 @@ int xtract_f0(const double *data, const int N, const void *argv, double *result)
*/
int xtract_failsafe_f0(const double *data, const int N, const void *argv, double *result);
+/** \brief Extract the fundamental frequency of an input vector using wavelet-based method
+ *
+ * \param *data: a pointer to the first element in an array of doubles representing an audio vector
+ * \param N: the number of elements to be considered
+ * \param *argv: a pointer to a double representing the audio sample rate
+ * \param *result: the pitch of N values from the array pointed to by *data
+ *
+ * This function uses the time-domain wavelet-based method described in Larson and Maddox (2005) and
+ * implemented in the dywapitchtrack library
+ *
+ * xtract_init_wavelet_f0_state() must be called exactly once prior to calling xtract_wavelet_f0()
+ *
+ */
+int xtract_wavelet_f0(const double *data, const int N, const void *argv, double *result);
+
/** \brief Extract the number of non-zero elements in an input vector
*
* \param *data: a pointer to the first element in an array of doubles