diff options
Diffstat (limited to 'src/loris/F0Estimate.C')
-rw-r--r-- | src/loris/F0Estimate.C | 697 |
1 files changed, 697 insertions, 0 deletions
diff --git a/src/loris/F0Estimate.C b/src/loris/F0Estimate.C new file mode 100644 index 0000000..d03c136 --- /dev/null +++ b/src/loris/F0Estimate.C @@ -0,0 +1,697 @@ +/* + * This is the Loris C++ Class Library, implementing analysis, + * manipulation, and synthesis of digitized sounds using the Reassigned + * Bandwidth-Enhanced Additive Sound Model. + * + * Loris is Copyright (c) 1999-2010 by Kelly Fitz and Lippold Haken + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY, without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * F0Estimate.C + * + * Implementation of an iterative alrogithm for computing an + * estimate of fundamental frequency from a sequence of sinusoidal + * frequencies and amplitudes using a likelihood estimator + * adapted from Quatieri's Speech Signal Processing text. The + * algorithm here takes advantage of the fact that spectral peaks + * have already been identified and extracted in the analysis/modeling + * process. + * + * Kelly Fitz, 28 March 2006 + * loris@cerlsoundgroup.org + * + * http://www.cerlsoundgroup.org/Loris/ + * + */ + +#if HAVE_CONFIG_H + #include "config.h" +#endif + +#include "F0Estimate.h" + +#include "LorisExceptions.h" // for Assert + +#include "Notifier.h" + +#include <algorithm> +#include <cmath> +#include <numeric> + +#include <vector> +using std::vector; + +#if defined(HAVE_M_PI) && (HAVE_M_PI) + const double Pi = M_PI; +#else + const double Pi = 3.14159265358979324; +#endif + +// #if defined(HAVE_ISFINITE) && (HAVE_ISFINITE) +// using std::isfinite; +// +// isfinite is not, after all, part of the standard, +// it is an extension. If it is not provided, the following +// checks for NaN and finite-ness. +// +// Use this instead. +// This code is taken from +// http://www.johndcook.com/IEEE_exceptions_in_cpp.html + +#include <float.h> +inline bool IsFiniteNumber( double x ) +{ + // DBL_MAX is defined in float.h. + // Comparisons with NaN always fail. + + return (x <= DBL_MAX && x >= -DBL_MAX); +} + + + + +// begin namespace +namespace Loris { + +// --------------------------------------------------------------------------- +// forward declarations for helpers, defined below +// Q is the likelihood function, Qprime is its derivative w.r.t. frequency + +static double +secant_method( const vector<double> & amps, + const vector<double> & freqs, + double f1, double f2, + double precision ); + +static void +compute_candidate_freqs( const vector< double > & peak_freqs, + double fmin, double fmax, + vector< double > & eval_freqs ); +static void +evaluate_Q( const vector<double> & amps, + const vector<double> & freqs, + const vector<double> & eval_freqs, + vector<double> & Q ); + +static double +evaluate_Q( const vector<double> & amps, + const vector<double> & freqs, + double eval_freq, + double norm ); + +static double +evaluate_Qprime( const vector<double> & amps, + const vector<double> & freqs, + double eval_freq ); + +static void +evaluate_Q( const vector<double> & amps, + const vector<double> & freqs, + const vector<double> & eval_freqs, + vector<double> & Q, + double norm ); + +// --------------------------------------------------------------------------- + + +// --------------------------------------------------------------------------- +// F0Estimate constructor +// --------------------------------------------------------------------------- +// Construct from parameters of the iterative F0 estimation +// algorithm. Find candidate F0 estimates as integer divisors +// of the peak frequencies, pick the highest frequency of the +// most likely candidates, and refine that estiamte using the +// secant method. +// +// Store the frequency and the normalized value of the +// likelihood function at that frequency (1.0 indicates that +// all the peaks are perfect harmonics of the estimated +// frequency). +// +// See the F0Estimate.h for a description of the algorithm, also +// outlined inline below. + +F0Estimate::F0Estimate( const vector<double> & amps, + const vector<double> & freqs, + double fmin, double fmax, + double resolution ) : + m_frequency( 0 ), + m_confidence( 0 ) +{ + if ( fmin > fmax ) + { + std::swap( fmin, fmax ); + } + // never consider DC (0 Hz) to be a valid fundamental + fmin = std::max( 1., fmin ); + + // ------------------------------------------------------------------------- + // 1) Identify candidate F0s as the integer divisors of the sinusoidal + // frequencies provided, within the specified range (this algorithm + // relies on the reasonable assumption that for any frequency recognized + // as a likely F0, at least one of the sinusoidal frequencies must + // represent a harmonic, the likelihood function makes this same + + // First collect candidate frequencies: all integer + // divisors of the peak frequencies that are between + // fmin and fmax. + vector< double > eval_freqs; + compute_candidate_freqs( freqs, fmin, fmax, eval_freqs ); + + if ( ! eval_freqs.empty() ) + { + // Compute a normalization factor equal to the total + // energy represented by all the peaks passed in + // amps and freqs, so that the value of the likelihood + // function does not depend on the overall signal + // amplitude, but instead depends only on the quality + // of the estimate, or the confidence in the result, + // and the quality of the final estimate can be evaluated + // by the value of the likelihood function. + double normalization = + 1.0 / std::inner_product( amps.begin(), amps.end(), amps.begin(), 0.0 ); + + // Evaluate the likelihood function at the candidate frequencies. + vector < double > Q( eval_freqs.size() ); + evaluate_Q( amps, freqs, eval_freqs, Q, normalization ); + + // ------------------------------------------------------------------------- + // 2) Select the highest frequency candidate that nearly maximizes the + // likelihood function (because all subharmonics of the true F0 will + // be equal in likelihood to the true F0, but no higher frequency can + // be as likely). + + // Find the highest frequency corresponding to a high value of Q + // (the most likely candidate). + + vector<double>::size_type idx = + std::max_element( Q.begin(), Q.end() ) - Q.begin(); + + double bestFreq = eval_freqs[ idx ]; + double bestQ = Q[ idx ]; + + // ------------------------------------------------------------------------- + // 2a) Check the likelihood of integer multiples of the best candidate, + // choose the highest multiple (within the specified range) that + // as likely as the best candidate frequency to be the new best + // candidate. + + // Check integer multiples of the best candidate frequency, + // so that we can be certain that the peak doesn't + // correspond to a subharmonic of the true most-likely + // F0 in the range [fmin,fmax]. + // + // While the next octave up is in range, and its likelihood + // is within 5% of the previously found peak, accept the + // octave as the better candidate (when we reach the true + // best candidate frequency, the next multiple should be + // much less likely). + + double nextF = 2 * bestFreq; + double nextQ = evaluate_Q( amps, freqs, nextF, normalization ); + + while ( fmax > nextF && ( 0.95 * bestQ ) < nextQ ) + { + // update best candidate: + bestFreq = nextF; + bestQ = nextQ; + + // consider the next multiple + nextF += bestFreq; + nextQ = evaluate_Q( amps, freqs, nextF, normalization ); + } + +// notifier << "peak is : " << bestFreq +// << " Hz, Q: " << bestQ << endl; + + // ------------------------------------------------------------------------- + // 3) Refine the best candidate using the secant method for refining the + // root of the derivative of the likelihood function in the neighborhood + // of the best candidate (because a peak in the likelihood function is + // a root of the derivative of that function). + + + // Refine this estimate using the secant method. + // + // Check the derivative function: if the slope (derivative) + // is positive, then assume that bestFreq is just below the + // root, and choose a second frequency just greater than + // bestFreq. Otherwise, assume that bestFreq is just above + // the root of the derivative function, and choose a second + // frequency just below bestFreq. + + double altFreq = bestFreq - resolution; + if ( 0 < evaluate_Qprime( amps, freqs, bestFreq ) ) + { + altFreq = bestFreq + resolution; + } + + // Now invoke the secant method to attempt to refine + // the root estimate: + m_frequency = secant_method( amps, freqs, + bestFreq, altFreq, + resolution ); + + +// notifier << "best candidate is : " << bestFreq +// << " Hz, Q: " << bestQ << endl; +// notifier << "secant method found : " << m_frequency +// << " Hz, Q: " +// << evaluate_Q( amps, freqs, m_frequency, normalization ) << endl; +// + + // What if the secant method flies off to some other root? + // Check that the root is still between fmin and fmax. + if ( m_frequency < fmin || m_frequency > fmax ) + { + // If refining fails, just use the best candidate estimate. + m_frequency = bestFreq; + } + + + // + // Could also use the bisection method, or the false position method, which + // always converge. All that is required is that two points on the + // function (the derivative of the likelihood function, in this case) + // having opposite signs are used to begin the search. So we need + // to first find a nearby freq at which the derivative of Q evaluates + // with the opposite sign as bestFreq. + // + + + // Compute the value of the likelihood function at this frequency. + m_confidence = evaluate_Q( amps, freqs, m_frequency, normalization ); + + // If the secant method makes things worse, then just go with the + // the most likely candidate. + // + // This is a sanity measure, should never happen. + if ( bestQ > m_confidence ) + { + m_confidence = bestQ; + m_frequency = bestFreq; + } + +// notifier << "refined to: " << m_frequency +// << " Hz, Q: " << m_confidence << endl; + + } + +} + + +// --------------------------------------------------------------------------- +// --- local helpers --- +// --------------------------------------------------------------------------- + + +// Collect candidate frequencies in eval_freqs. +// Candidates are all integer divisors +// between fmin and fmax of any frequency in the +// vector of peak frequencies provided. + +static void +compute_candidate_freqs( const vector< double > & peak_freqs, + double fmin, double fmax, + vector< double > & eval_freqs ) +{ + Assert( fmax > fmin ); + + eval_freqs.clear(); + + for ( vector< double >::const_iterator pk = peak_freqs.begin(); + pk != peak_freqs.end(); + ++pk ) + { + // check all integer divisors of *pk + double div = 1; + double f = *pk; + + // reject all the ones greater than fmax + while( f > fmax ) + { + ++div; + f = *pk / div; + } + + // keep the the ones that are between fmin + // and fmax + while( f >= fmin ) + { + eval_freqs.push_back( f ); + ++div; + f = *pk / div; + } + } + + // sort the candidats + sort( eval_freqs.begin(), eval_freqs.end() ); +} + + +// --------------------------------------------------------------------------- +// --- likelihood function evaluation --- +// --------------------------------------------------------------------------- + + +// Qterm is a functor to help compute terms +// in the likelihood function sum. + +struct Qterm +{ + double f0; + Qterm( double f ) : f0(f) {} + + double operator()( double amp, double freq ) const + { + double arg = 2*Pi*freq/f0; + return amp*amp*std::cos(arg); + } +}; + +// evaluate_Q +// +// Evaluate the likelihood function at the specified +// frequency. + +static double +evaluate_Q( const vector<double> & amps, + const vector<double> & freqs, + double eval_freq, + double norm ) +{ + double prod = + std::inner_product( amps.begin(), amps.end(), + freqs.begin(), + 0., + std::plus< double >(), + Qterm( eval_freq ) ); + + return prod * norm; +} + +// evaluate_Q +// +// Evaluate the normalized likelihood function at a range of +// frequencies, return the results in the vector Q. + +static void +evaluate_Q( const vector<double> & amps, + const vector<double> & freqs, + const vector<double> & eval_freqs, + vector<double> & Q ) +{ + Assert( eval_freqs.size() == Q.size() ); + Assert( amps.size() == freqs.size() ); + + // Compute a normalization factor equal to the total + // energy represented by all the peaks passed in + // amps and freqs, so that the value of the likelihood + // function does not depend on the overall signal + // amplitude, but instead depends only on the quality + // of the estimate, or the confidence in the result, + // and the quality of the final estimate can be evaluated + // by the value of the likelihood function. + double etotal = std::inner_product( amps.begin(), amps.end(), amps.begin(), 0.0 ); + double norm = 1.0 / etotal; + + evaluate_Q( amps, freqs, eval_freqs, Q, norm ); +} + + + +// evaluate_Q +// +// Evaluate the normalized likelihood function at a range of +// frequencies, using the normalization factor provided, and +// return the results in the vector Q. + +static void +evaluate_Q( const vector<double> & amps, + const vector<double> & freqs, + const vector<double> & eval_freqs, + vector<double> & Q, + double norm ) +{ + Assert( eval_freqs.size() == Q.size() ); + Assert( amps.size() == freqs.size() ); + + // iterate over the frequencies at which to + // evaluate the likelihood function: + vector<double>::const_iterator freq_it = eval_freqs.begin(); + vector<double>::iterator Q_it = Q.begin(); + while ( freq_it != eval_freqs.end() ) + { + double f = *freq_it; + + double result = evaluate_Q( amps, freqs, f, norm ); + + *Q_it++ = result; + ++freq_it; + } +} + +// --------------------------------------------------------------------------- +// --- likelihood function derivative evaluation --- +// --------------------------------------------------------------------------- + + +// Qprimeterm is a functor to help compute terms +// in the likelihood function derivative sum, used +// in the secant method of root refinement. + +struct Qprimeterm +{ + double f0; + Qprimeterm( double f ) : f0(f) {} + + double operator()( double amp, double freq ) const + { + double arg = 2*Pi*freq/f0; + return amp*amp*std::sin(arg)*arg/f0; + } +}; + + +// evaluate_Qprime +// +// Evaluate the derivative of the likelihood function (w.r.t. frequency) +// at the specified frequency. + +static double +evaluate_Qprime( const vector<double> & amps, + const vector<double> & freqs, + double eval_freq ) +{ + double prod = + std::inner_product( amps.begin(), amps.end(), + freqs.begin(), + 0., + std::plus< double >(), + Qprimeterm( eval_freq ) ); + + return prod; +} + +// --------------------------------------------------------------------------- +// --- secant method of refining a root/peak estimate --- +// --------------------------------------------------------------------------- + +// secant_method +// +// Find roots of the derivative of the likelihood +// function using the secant method, return the +// value of x (frequency) at which the roots is found. + +static double +secant_method( const vector<double> & amps, + const vector<double> & freqs, + double f1, double f2, double precision ) +{ + double xn = f1; + double xnm1 = f2; + double fxnm1 = evaluate_Qprime( amps, freqs, xnm1 ); + + const unsigned int MaxIters = 20; + + unsigned int iters = 0; + double deltax = 0.0; + + // Iterate until delta is small, or blows up, + // or we have iterated too many times. + do + { + double fxn = evaluate_Qprime( amps, freqs, xn ); + + deltax = fxn * (xn - xnm1)/(fxn - fxnm1); + + xnm1 = xn; + xn = xn - deltax; + + fxnm1 = fxn; + + } while( // fabs( deltax ) > precision && + IsFiniteNumber( deltax ) && + ++iters < MaxIters ); + + + // Check whether delta blew up. If it did, revert to the + // previous value of x. + + if ( ! IsFiniteNumber( deltax ) ) + { + xn = xnm1; + } + + return xn; +} + +#if 0 +// --------------------------------------------------------------------------- +// --- local helpers - dumb old way --- +// --------------------------------------------------------------------------- + + +static void +compute_eval_freqs( double fmin, double fmax, + vector<double> & eval_freqs ); + +static vector<double>::const_iterator +choose_peak( const vector<double> & Q ); + +// --------------------------------------------------------------------------- +// F0Estimate constructor -- iterative method +// --------------------------------------------------------------------------- +// Iteratively compute the value of the likelihood function +// at a range of frequencies around the peak likelihood. +// Store the maximum value when the range of likelihood +// values computed is less than the specified resolution. +// Store the frequency and the normalized value of the +// likelihood function at that frequency (1.0 indicates that +// all the peaks are perfect harmonics of the estimated +// frequency). + +void +F0Estimate::construct_iterative_method( const vector<double> & amps, + const vector<double> & freqs, + double fmin, double fmax, + double resolution ) +{ + + // when the frequency range is small, few samples are + // needed, but initially make sure to sample at least + // every 20 Hz. + // Scratch that, 20 Hz isn't fine enough, could miss a + // peak that way, try 2 Hz. There might be some room to + // adjust this parameter to trade off speed for robustness. + int Nsamps = std::max( 8, (int)std::ceil((fmax-fmin)*0.5) ); + vector<double> eval_freqs, Q; + double peak_freq = fmin, peak_Q = 0; + + // invariant: + // the likelihood function for the estimate of the fundamental + // frequency is maximized at some frequency between + // fmin and fmax (stop when that range is smaller + // than the resolution) + do + { + // determine the frequencies at which to evaluate + // the likelihood function + eval_freqs.resize( Nsamps ); + compute_eval_freqs( fmin, fmax, eval_freqs ); + + // evaluate the likelihood function at those + // frequencies: + Q.resize( Nsamps ); + evaluate_Q( amps, freqs, eval_freqs, Q ); + + // find the highest frequency at which the likelihood + // function peaks: + vector<double>::const_iterator peak = choose_peak( Q ); + int peak_idx = peak - Q.begin(); + peak_Q = *peak; + peak_freq = eval_freqs[ peak_idx ]; + + // update search range: + fmin = eval_freqs[ std::max(peak_idx - 1, 0) ]; + fmax = eval_freqs[ std::min(peak_idx + 1, Nsamps - 1) ]; + Nsamps = std::max( 8, (int)std::ceil((fmax-fmin)*0.05) ); + + } while ( (fmax - fmin) > resolution ); + + m_frequency = peak_freq; + m_confidence = peak_Q; +} + +// compute_eval_freqs +// +// Fill the frequency vector with a sampling +// of the range [fmin,fmax]. +// +// (used by dumb old iterative method) +// +static void +compute_eval_freqs( double fmin, double fmax, + vector<double> & eval_freqs ) +{ + Assert( fmax > fmin ); + + double delta = (fmax-fmin)/(eval_freqs.size()-1); + double f = fmin; + vector<double>::iterator it = eval_freqs.begin(); + while( it != eval_freqs.end() ) + { + *it++ = f; + f += delta; + } + eval_freqs.back() = fmax; +} + +// choose_peak +// +// Return the position of last peak that +// in the vector Q. +// +static vector<double>::const_iterator +choose_peak( const vector<double> & Q ) +{ + Assert( !Q.empty() ); + + double Qmax = *std::max_element( Q.begin(), Q.end() ); + vector<double>::const_iterator it = (Q.end()) - 1; + double tmp = *it; + + // this threshold determines how strong the + // highest-frequency peak in the likelihood + // function needs to be relative to the overall + // peak. For strongly periodic signals, this can + // be quite near to 1, but for things that are + // somewhat non-harmonic, setting it too high + // gives octave errors. Cannot tell whether errors + // will be introduced by having it too low. + const double threshold = 0.85 * Qmax; + while( (it != Q.begin()) && ((*it < threshold) || (*it < *(it-1))) ) + { + --it; + tmp = *it; + } + + return it; +} + +#endif + +} // end of namespace Loris |