diff options
author | Jamie Bullock <jamie@jamiebullock.com> | 2013-05-31 22:44:03 +0100 |
---|---|---|
committer | Jamie Bullock <jamie@jamiebullock.com> | 2013-05-31 22:50:18 +0100 |
commit | 7b4c50a8d9087a57a6af27ca0ae3eb30d35aae43 (patch) | |
tree | 50afdbdeca49923cff79c3ac634baaf159c88143 /src/dywapitchtrack/dywapitchtrack.c | |
parent | 962c5fe017b588f59b21585c847ef6b67898cbf7 (diff) | |
download | LibXtract-7b4c50a8d9087a57a6af27ca0ae3eb30d35aae43.tar.gz LibXtract-7b4c50a8d9087a57a6af27ca0ae3eb30d35aae43.tar.bz2 LibXtract-7b4c50a8d9087a57a6af27ca0ae3eb30d35aae43.zip |
Add wavelet-based pitch tracker
Diffstat (limited to 'src/dywapitchtrack/dywapitchtrack.c')
-rwxr-xr-x | src/dywapitchtrack/dywapitchtrack.c | 448 |
1 files changed, 448 insertions, 0 deletions
diff --git a/src/dywapitchtrack/dywapitchtrack.c b/src/dywapitchtrack/dywapitchtrack.c new file mode 100755 index 0000000..b66000c --- /dev/null +++ b/src/dywapitchtrack/dywapitchtrack.c @@ -0,0 +1,448 @@ +/* dywapitchtrack.c + + Dynamic Wavelet Algorithm Pitch Tracking library + Released under the MIT open source licence + + Copyright (c) 2010 Antoine Schmitt + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#include "dywapitchtrack.h" +#include <math.h> +#include <stdlib.h> +#include <string.h> // for memset + + +//********************** +// Utils +//********************** + +#ifndef max +#define max(x, y) ((x) > (y)) ? (x) : (y) +#endif +#ifndef min +#define min(x, y) ((x) < (y)) ? (x) : (y) +#endif + +// returns 1 if power of 2 +int _power2p(int value) { + if (value == 0) return 1; + if (value == 2) return 1; + if (value & 0x1) return 0; + return (_power2p(value >> 1)); +} + +// count number of bits +int _bitcount(int value) { + if (value == 0) return 0; + if (value == 1) return 1; + if (value == 2) return 2; + return _bitcount(value >> 1) + 1; +} + +// closest power of 2 above or equal +int _ceil_power2(int value) { + if (_power2p(value)) return value; + + if (value == 1) return 2; + int j, i = _bitcount(value); + int res = 1; + for (j = 0; j < i; j++) res <<= 1; + return res; +} + +// closest power of 2 below or equal +int _floor_power2(int value) { + if (_power2p(value)) return value; + return _ceil_power2(value)/2; +} + +// abs value +int _iabs(int x) { + if (x >= 0) return x; + return -x; +} + +// 2 power +int _2power(int i) { + int res = 1, j; + for (j = 0; j < i; j++) res <<= 1; + return res; +} + +//****************************** +// the Wavelet algorithm itself +//****************************** + +int dywapitch_neededsamplecount(int minFreq) { + int nbSam = 3*44100/minFreq; // 1017. for 130 Hz + nbSam = _ceil_power2(nbSam); // 1024 + return nbSam; +} + +typedef struct _minmax { + int index; + struct _minmax *next; +} minmax; + +double _dywapitch_computeWaveletPitch(double * samples, int startsample, int samplecount) { + double pitchF = 0.0; + + int i, j; + double si, si1; + + // must be a power of 2 + samplecount = _floor_power2(samplecount); + + double *sam = (double *)malloc(sizeof(double)*samplecount); + memcpy(sam, samples + startsample, sizeof(double)*samplecount); + int curSamNb = samplecount; + + int *distances = (int *)malloc(sizeof(int)*samplecount); + int *mins = (int *)malloc(sizeof(int)*samplecount); + int *maxs = (int *)malloc(sizeof(int)*samplecount); + int nbMins, nbMaxs; + + // algorithm parameters + int maxFLWTlevels = 6; + double maxF = 3000.; + int differenceLevelsN = 3; + double maximaThresholdRatio = 0.75; + + double ampltitudeThreshold; + double theDC = 0.0; + + { // compute ampltitudeThreshold and theDC + //first compute the DC and maxAMplitude + double maxValue = 0.0; + double minValue = 0.0; + for (i = 0; i < samplecount;i++) { + si = sam[i]; + theDC = theDC + si; + if (si > maxValue) maxValue = si; + if (si < minValue) minValue = si; + } + theDC = theDC/samplecount; + maxValue = maxValue - theDC; + minValue = minValue - theDC; + double amplitudeMax = (maxValue > -minValue ? maxValue : -minValue); + + ampltitudeThreshold = amplitudeMax*maximaThresholdRatio; + //asLog("dywapitch theDC=%f ampltitudeThreshold=%f\n", theDC, ampltitudeThreshold); + + } + + // levels, start without downsampling.. + int curLevel = 0; + double curModeDistance = -1.; + int delta; + + while(1) { + + // delta + delta = 44100./(_2power(curLevel)*maxF); + //("dywapitch doing level=%ld delta=%ld\n", curLevel, delta); + + if (curSamNb < 2) goto cleanup; + + // compute the first maximums and minumums after zero-crossing + // store if greater than the min threshold + // and if at a greater distance than delta + double dv, previousDV = -1000; + nbMins = nbMaxs = 0; + int lastMinIndex = -1000000; + int lastmaxIndex = -1000000; + int findMax = 0; + int findMin = 0; + for (i = 2; i < curSamNb; i++) { + si = sam[i] - theDC; + si1 = sam[i-1] - theDC; + + if (si1 <= 0 && si > 0) findMax = 1; + if (si1 >= 0 && si < 0) findMin = 1; + + // min or max ? + dv = si - si1; + + if (previousDV > -1000) { + + if (findMin && previousDV < 0 && dv >= 0) { + // minimum + if (fabs(si) >= ampltitudeThreshold) { + if (i > lastMinIndex + delta) { + mins[nbMins++] = i; + lastMinIndex = i; + findMin = 0; + //if DEBUGG then put "min ok"&&si + // + } else { + //if DEBUGG then put "min too close to previous"&&(i - lastMinIndex) + // + } + } else { + // if DEBUGG then put "min "&abs(si)&" < thresh = "&ltitudeThreshold + //-- + } + } + + if (findMax && previousDV > 0 && dv <= 0) { + // maximum + if (fabs(si) >= ampltitudeThreshold) { + if (i > lastmaxIndex + delta) { + maxs[nbMaxs++] = i; + lastmaxIndex = i; + findMax = 0; + } else { + //if DEBUGG then put "max too close to previous"&&(i - lastmaxIndex) + //-- + } + } else { + //if DEBUGG then put "max "&abs(si)&" < thresh = "&ltitudeThreshold + //-- + } + } + } + + previousDV = dv; + } + + if (nbMins == 0 && nbMaxs == 0) { + // no best distance ! + //asLog("dywapitch no mins nor maxs, exiting\n"); + + // if DEBUGG then put "no mins nor maxs, exiting" + goto cleanup; + } + //if DEBUGG then put count(maxs)&&"maxs &"&&count(mins)&&"mins" + + // maxs = [5, 20, 100,...] + // compute distances + int d; + memset(distances, 0, samplecount*sizeof(int)); + for (i = 0 ; i < nbMins ; i++) { + for (j = 1; j < differenceLevelsN; j++) { + if (i+j < nbMins) { + d = _iabs(mins[i] - mins[i+j]); + //asLog("dywapitch i=%ld j=%ld d=%ld\n", i, j, d); + distances[d] = distances[d] + 1; + } + } + } + for (i = 0 ; i < nbMaxs ; i++) { + for (j = 1; j < differenceLevelsN; j++) { + if (i+j < nbMaxs) { + d = _iabs(maxs[i] - maxs[i+j]); + //asLog("dywapitch i=%ld j=%ld d=%ld\n", i, j, d); + distances[d] = distances[d] + 1; + } + } + } + + // find best summed distance + int bestDistance = -1; + int bestValue = -1; + for (i = 0; i< curSamNb; i++) { + int summed = 0; + for (j = -delta ; j <= delta ; j++) { + if (i+j >=0 && i+j < curSamNb) + summed += distances[i+j]; + } + //asLog("dywapitch i=%ld summed=%ld bestDistance=%ld\n", i, summed, bestDistance); + if (summed == bestValue) { + if (i == 2*bestDistance) + bestDistance = i; + + } else if (summed > bestValue) { + bestValue = summed; + bestDistance = i; + } + } + //asLog("dywapitch bestDistance=%ld\n", bestDistance); + + // averaging + double distAvg = 0.0; + double nbDists = 0; + for (j = -delta ; j <= delta ; j++) { + if (bestDistance+j >=0 && bestDistance+j < samplecount) { + int nbDist = distances[bestDistance+j]; + if (nbDist > 0) { + nbDists += nbDist; + distAvg += (bestDistance+j)*nbDist; + } + } + } + // this is our mode distance ! + distAvg /= nbDists; + //asLog("dywapitch distAvg=%f\n", distAvg); + + // continue the levels ? + if (curModeDistance > -1.) { + double similarity = fabs(distAvg*2 - curModeDistance); + if (similarity <= 2*delta) { + //if DEBUGG then put "similarity="&similarity&&"delta="&delta&&"ok" + //asLog("dywapitch similarity=%f OK !\n", similarity); + // two consecutive similar mode distances : ok ! + pitchF = 44100./(_2power(curLevel-1)*curModeDistance); + goto cleanup; + } + //if DEBUGG then put "similarity="&similarity&&"delta="&delta&&"not" + } + + // not similar, continue next level + curModeDistance = distAvg; + + curLevel = curLevel + 1; + if (curLevel >= maxFLWTlevels) { + // put "max levels reached, exiting" + //asLog("dywapitch max levels reached, exiting\n"); + goto cleanup; + } + + // downsample + if (curSamNb < 2) { + //asLog("dywapitch not enough samples, exiting\n"); + goto cleanup; + } + for (i = 0; i < curSamNb/2; i++) { + sam[i] = (sam[2*i] + sam[2*i + 1])/2.; + } + curSamNb /= 2; + } + + /// +cleanup: + free(distances); + free(mins); + free(maxs); + free(sam); + + return pitchF; +} + +// *********************************** +// the dynamic postprocess +// *********************************** + +/*** +It states: + - a pitch cannot change much all of a sudden (20%) (impossible humanly, + so if such a situation happens, consider that it is a mistake and drop it. + - a pitch cannot double or be divided by 2 all of a sudden : it is an + algorithm side-effect : divide it or double it by 2. + - a lonely voiced pitch cannot happen, nor can a sudden drop in the middle + of a voiced segment. Smooth the plot. +***/ + +double _dywapitch_dynamicprocess(dywapitchtracker *pitchtracker, double pitch) { + + // equivalence + if (pitch == 0.0) pitch = -1.0; + + // + double estimatedPitch = -1; + double acceptedError = 0.2f; + int maxConfidence = 5; + + if (pitch != -1) { + // I have a pitch here + + if (pitchtracker->_prevPitch == -1) { + // no previous + estimatedPitch = pitch; + pitchtracker->_prevPitch = pitch; + pitchtracker->_pitchConfidence = 1; + + } else if (abs(pitchtracker->_prevPitch - pitch)/pitch < acceptedError) { + // similar : remember and increment pitch + pitchtracker->_prevPitch = pitch; + estimatedPitch = pitch; + pitchtracker->_pitchConfidence = min(maxConfidence, pitchtracker->_pitchConfidence + 1); // maximum 3 + + } else if ((pitchtracker->_pitchConfidence >= maxConfidence-2) && abs(pitchtracker->_prevPitch - 2.*pitch)/(2.*pitch) < acceptedError) { + // close to half the last pitch, which is trusted + estimatedPitch = 2.*pitch; + pitchtracker->_prevPitch = estimatedPitch; + + } else if ((pitchtracker->_pitchConfidence >= maxConfidence-2) && abs(pitchtracker->_prevPitch - 0.5*pitch)/(0.5*pitch) < acceptedError) { + // close to twice the last pitch, which is trusted + estimatedPitch = 0.5*pitch; + pitchtracker->_prevPitch = estimatedPitch; + + } else { + // nothing like this : very different value + if (pitchtracker->_pitchConfidence >= 1) { + // previous trusted : keep previous + estimatedPitch = pitchtracker->_prevPitch; + pitchtracker->_pitchConfidence = max(0, pitchtracker->_pitchConfidence - 1); + } else { + // previous not trusted : take current + estimatedPitch = pitch; + pitchtracker->_prevPitch = pitch; + pitchtracker->_pitchConfidence = 1; + } + } + + } else { + // no pitch now + if (pitchtracker->_prevPitch != -1) { + // was pitch before + if (pitchtracker->_pitchConfidence >= 1) { + // continue previous + estimatedPitch = pitchtracker->_prevPitch; + pitchtracker->_pitchConfidence = max(0, pitchtracker->_pitchConfidence - 1); + } else { + pitchtracker->_prevPitch = -1; + estimatedPitch = -1.; + pitchtracker->_pitchConfidence = 0; + } + } + } + + // put "_pitchConfidence="&pitchtracker->_pitchConfidence + if (pitchtracker->_pitchConfidence >= 1) { + // ok + pitch = estimatedPitch; + } else { + pitch = -1; + } + + // equivalence + if (pitch == -1) pitch = 0.0; + + return pitch; +} + + +// ************************************ +// the API main entry points +// ************************************ + +void dywapitch_inittracking(dywapitchtracker *pitchtracker) { + pitchtracker->_prevPitch = -1.; + pitchtracker->_pitchConfidence = -1; +} + +double dywapitch_computepitch(dywapitchtracker *pitchtracker, double * samples, int startsample, int samplecount) { + double raw_pitch = _dywapitch_computeWaveletPitch(samples, startsample, samplecount); + return _dywapitch_dynamicprocess(pitchtracker, raw_pitch); +} + + + |