summaryrefslogtreecommitdiff
path: root/src/loris/F0Estimate.C
diff options
context:
space:
mode:
authorJohn Glover <glover.john@gmail.com>2011-07-08 18:06:21 +0100
committerJohn Glover <glover.john@gmail.com>2011-07-08 18:06:21 +0100
commitd6073e01c933c77f1e2bc3c3fe1126d617003549 (patch)
tree695d23677c5b84bf3a0f88fbd4959b4f7cbc0e90 /src/loris/F0Estimate.C
parent641688b252da468eb374674a0dbaae1bbac70b2b (diff)
downloadsimpl-d6073e01c933c77f1e2bc3c3fe1126d617003549.tar.gz
simpl-d6073e01c933c77f1e2bc3c3fe1126d617003549.tar.bz2
simpl-d6073e01c933c77f1e2bc3c3fe1126d617003549.zip
Start adding Loris files
Diffstat (limited to 'src/loris/F0Estimate.C')
-rw-r--r--src/loris/F0Estimate.C697
1 files changed, 697 insertions, 0 deletions
diff --git a/src/loris/F0Estimate.C b/src/loris/F0Estimate.C
new file mode 100644
index 0000000..d03c136
--- /dev/null
+++ b/src/loris/F0Estimate.C
@@ -0,0 +1,697 @@
+/*
+ * This is the Loris C++ Class Library, implementing analysis,
+ * manipulation, and synthesis of digitized sounds using the Reassigned
+ * Bandwidth-Enhanced Additive Sound Model.
+ *
+ * Loris is Copyright (c) 1999-2010 by Kelly Fitz and Lippold Haken
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY, without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ * F0Estimate.C
+ *
+ * Implementation of an iterative alrogithm for computing an
+ * estimate of fundamental frequency from a sequence of sinusoidal
+ * frequencies and amplitudes using a likelihood estimator
+ * adapted from Quatieri's Speech Signal Processing text. The
+ * algorithm here takes advantage of the fact that spectral peaks
+ * have already been identified and extracted in the analysis/modeling
+ * process.
+ *
+ * Kelly Fitz, 28 March 2006
+ * loris@cerlsoundgroup.org
+ *
+ * http://www.cerlsoundgroup.org/Loris/
+ *
+ */
+
+#if HAVE_CONFIG_H
+ #include "config.h"
+#endif
+
+#include "F0Estimate.h"
+
+#include "LorisExceptions.h" // for Assert
+
+#include "Notifier.h"
+
+#include <algorithm>
+#include <cmath>
+#include <numeric>
+
+#include <vector>
+using std::vector;
+
+#if defined(HAVE_M_PI) && (HAVE_M_PI)
+ const double Pi = M_PI;
+#else
+ const double Pi = 3.14159265358979324;
+#endif
+
+// #if defined(HAVE_ISFINITE) && (HAVE_ISFINITE)
+// using std::isfinite;
+//
+// isfinite is not, after all, part of the standard,
+// it is an extension. If it is not provided, the following
+// checks for NaN and finite-ness.
+//
+// Use this instead.
+// This code is taken from
+// http://www.johndcook.com/IEEE_exceptions_in_cpp.html
+
+#include <float.h>
+inline bool IsFiniteNumber( double x )
+{
+ // DBL_MAX is defined in float.h.
+ // Comparisons with NaN always fail.
+
+ return (x <= DBL_MAX && x >= -DBL_MAX);
+}
+
+
+
+
+// begin namespace
+namespace Loris {
+
+// ---------------------------------------------------------------------------
+// forward declarations for helpers, defined below
+// Q is the likelihood function, Qprime is its derivative w.r.t. frequency
+
+static double
+secant_method( const vector<double> & amps,
+ const vector<double> & freqs,
+ double f1, double f2,
+ double precision );
+
+static void
+compute_candidate_freqs( const vector< double > & peak_freqs,
+ double fmin, double fmax,
+ vector< double > & eval_freqs );
+static void
+evaluate_Q( const vector<double> & amps,
+ const vector<double> & freqs,
+ const vector<double> & eval_freqs,
+ vector<double> & Q );
+
+static double
+evaluate_Q( const vector<double> & amps,
+ const vector<double> & freqs,
+ double eval_freq,
+ double norm );
+
+static double
+evaluate_Qprime( const vector<double> & amps,
+ const vector<double> & freqs,
+ double eval_freq );
+
+static void
+evaluate_Q( const vector<double> & amps,
+ const vector<double> & freqs,
+ const vector<double> & eval_freqs,
+ vector<double> & Q,
+ double norm );
+
+// ---------------------------------------------------------------------------
+
+
+// ---------------------------------------------------------------------------
+// F0Estimate constructor
+// ---------------------------------------------------------------------------
+// Construct from parameters of the iterative F0 estimation
+// algorithm. Find candidate F0 estimates as integer divisors
+// of the peak frequencies, pick the highest frequency of the
+// most likely candidates, and refine that estiamte using the
+// secant method.
+//
+// Store the frequency and the normalized value of the
+// likelihood function at that frequency (1.0 indicates that
+// all the peaks are perfect harmonics of the estimated
+// frequency).
+//
+// See the F0Estimate.h for a description of the algorithm, also
+// outlined inline below.
+
+F0Estimate::F0Estimate( const vector<double> & amps,
+ const vector<double> & freqs,
+ double fmin, double fmax,
+ double resolution ) :
+ m_frequency( 0 ),
+ m_confidence( 0 )
+{
+ if ( fmin > fmax )
+ {
+ std::swap( fmin, fmax );
+ }
+ // never consider DC (0 Hz) to be a valid fundamental
+ fmin = std::max( 1., fmin );
+
+ // -------------------------------------------------------------------------
+ // 1) Identify candidate F0s as the integer divisors of the sinusoidal
+ // frequencies provided, within the specified range (this algorithm
+ // relies on the reasonable assumption that for any frequency recognized
+ // as a likely F0, at least one of the sinusoidal frequencies must
+ // represent a harmonic, the likelihood function makes this same
+
+ // First collect candidate frequencies: all integer
+ // divisors of the peak frequencies that are between
+ // fmin and fmax.
+ vector< double > eval_freqs;
+ compute_candidate_freqs( freqs, fmin, fmax, eval_freqs );
+
+ if ( ! eval_freqs.empty() )
+ {
+ // Compute a normalization factor equal to the total
+ // energy represented by all the peaks passed in
+ // amps and freqs, so that the value of the likelihood
+ // function does not depend on the overall signal
+ // amplitude, but instead depends only on the quality
+ // of the estimate, or the confidence in the result,
+ // and the quality of the final estimate can be evaluated
+ // by the value of the likelihood function.
+ double normalization =
+ 1.0 / std::inner_product( amps.begin(), amps.end(), amps.begin(), 0.0 );
+
+ // Evaluate the likelihood function at the candidate frequencies.
+ vector < double > Q( eval_freqs.size() );
+ evaluate_Q( amps, freqs, eval_freqs, Q, normalization );
+
+ // -------------------------------------------------------------------------
+ // 2) Select the highest frequency candidate that nearly maximizes the
+ // likelihood function (because all subharmonics of the true F0 will
+ // be equal in likelihood to the true F0, but no higher frequency can
+ // be as likely).
+
+ // Find the highest frequency corresponding to a high value of Q
+ // (the most likely candidate).
+
+ vector<double>::size_type idx =
+ std::max_element( Q.begin(), Q.end() ) - Q.begin();
+
+ double bestFreq = eval_freqs[ idx ];
+ double bestQ = Q[ idx ];
+
+ // -------------------------------------------------------------------------
+ // 2a) Check the likelihood of integer multiples of the best candidate,
+ // choose the highest multiple (within the specified range) that
+ // as likely as the best candidate frequency to be the new best
+ // candidate.
+
+ // Check integer multiples of the best candidate frequency,
+ // so that we can be certain that the peak doesn't
+ // correspond to a subharmonic of the true most-likely
+ // F0 in the range [fmin,fmax].
+ //
+ // While the next octave up is in range, and its likelihood
+ // is within 5% of the previously found peak, accept the
+ // octave as the better candidate (when we reach the true
+ // best candidate frequency, the next multiple should be
+ // much less likely).
+
+ double nextF = 2 * bestFreq;
+ double nextQ = evaluate_Q( amps, freqs, nextF, normalization );
+
+ while ( fmax > nextF && ( 0.95 * bestQ ) < nextQ )
+ {
+ // update best candidate:
+ bestFreq = nextF;
+ bestQ = nextQ;
+
+ // consider the next multiple
+ nextF += bestFreq;
+ nextQ = evaluate_Q( amps, freqs, nextF, normalization );
+ }
+
+// notifier << "peak is : " << bestFreq
+// << " Hz, Q: " << bestQ << endl;
+
+ // -------------------------------------------------------------------------
+ // 3) Refine the best candidate using the secant method for refining the
+ // root of the derivative of the likelihood function in the neighborhood
+ // of the best candidate (because a peak in the likelihood function is
+ // a root of the derivative of that function).
+
+
+ // Refine this estimate using the secant method.
+ //
+ // Check the derivative function: if the slope (derivative)
+ // is positive, then assume that bestFreq is just below the
+ // root, and choose a second frequency just greater than
+ // bestFreq. Otherwise, assume that bestFreq is just above
+ // the root of the derivative function, and choose a second
+ // frequency just below bestFreq.
+
+ double altFreq = bestFreq - resolution;
+ if ( 0 < evaluate_Qprime( amps, freqs, bestFreq ) )
+ {
+ altFreq = bestFreq + resolution;
+ }
+
+ // Now invoke the secant method to attempt to refine
+ // the root estimate:
+ m_frequency = secant_method( amps, freqs,
+ bestFreq, altFreq,
+ resolution );
+
+
+// notifier << "best candidate is : " << bestFreq
+// << " Hz, Q: " << bestQ << endl;
+// notifier << "secant method found : " << m_frequency
+// << " Hz, Q: "
+// << evaluate_Q( amps, freqs, m_frequency, normalization ) << endl;
+//
+
+ // What if the secant method flies off to some other root?
+ // Check that the root is still between fmin and fmax.
+ if ( m_frequency < fmin || m_frequency > fmax )
+ {
+ // If refining fails, just use the best candidate estimate.
+ m_frequency = bestFreq;
+ }
+
+
+ //
+ // Could also use the bisection method, or the false position method, which
+ // always converge. All that is required is that two points on the
+ // function (the derivative of the likelihood function, in this case)
+ // having opposite signs are used to begin the search. So we need
+ // to first find a nearby freq at which the derivative of Q evaluates
+ // with the opposite sign as bestFreq.
+ //
+
+
+ // Compute the value of the likelihood function at this frequency.
+ m_confidence = evaluate_Q( amps, freqs, m_frequency, normalization );
+
+ // If the secant method makes things worse, then just go with the
+ // the most likely candidate.
+ //
+ // This is a sanity measure, should never happen.
+ if ( bestQ > m_confidence )
+ {
+ m_confidence = bestQ;
+ m_frequency = bestFreq;
+ }
+
+// notifier << "refined to: " << m_frequency
+// << " Hz, Q: " << m_confidence << endl;
+
+ }
+
+}
+
+
+// ---------------------------------------------------------------------------
+// --- local helpers ---
+// ---------------------------------------------------------------------------
+
+
+// Collect candidate frequencies in eval_freqs.
+// Candidates are all integer divisors
+// between fmin and fmax of any frequency in the
+// vector of peak frequencies provided.
+
+static void
+compute_candidate_freqs( const vector< double > & peak_freqs,
+ double fmin, double fmax,
+ vector< double > & eval_freqs )
+{
+ Assert( fmax > fmin );
+
+ eval_freqs.clear();
+
+ for ( vector< double >::const_iterator pk = peak_freqs.begin();
+ pk != peak_freqs.end();
+ ++pk )
+ {
+ // check all integer divisors of *pk
+ double div = 1;
+ double f = *pk;
+
+ // reject all the ones greater than fmax
+ while( f > fmax )
+ {
+ ++div;
+ f = *pk / div;
+ }
+
+ // keep the the ones that are between fmin
+ // and fmax
+ while( f >= fmin )
+ {
+ eval_freqs.push_back( f );
+ ++div;
+ f = *pk / div;
+ }
+ }
+
+ // sort the candidats
+ sort( eval_freqs.begin(), eval_freqs.end() );
+}
+
+
+// ---------------------------------------------------------------------------
+// --- likelihood function evaluation ---
+// ---------------------------------------------------------------------------
+
+
+// Qterm is a functor to help compute terms
+// in the likelihood function sum.
+
+struct Qterm
+{
+ double f0;
+ Qterm( double f ) : f0(f) {}
+
+ double operator()( double amp, double freq ) const
+ {
+ double arg = 2*Pi*freq/f0;
+ return amp*amp*std::cos(arg);
+ }
+};
+
+// evaluate_Q
+//
+// Evaluate the likelihood function at the specified
+// frequency.
+
+static double
+evaluate_Q( const vector<double> & amps,
+ const vector<double> & freqs,
+ double eval_freq,
+ double norm )
+{
+ double prod =
+ std::inner_product( amps.begin(), amps.end(),
+ freqs.begin(),
+ 0.,
+ std::plus< double >(),
+ Qterm( eval_freq ) );
+
+ return prod * norm;
+}
+
+// evaluate_Q
+//
+// Evaluate the normalized likelihood function at a range of
+// frequencies, return the results in the vector Q.
+
+static void
+evaluate_Q( const vector<double> & amps,
+ const vector<double> & freqs,
+ const vector<double> & eval_freqs,
+ vector<double> & Q )
+{
+ Assert( eval_freqs.size() == Q.size() );
+ Assert( amps.size() == freqs.size() );
+
+ // Compute a normalization factor equal to the total
+ // energy represented by all the peaks passed in
+ // amps and freqs, so that the value of the likelihood
+ // function does not depend on the overall signal
+ // amplitude, but instead depends only on the quality
+ // of the estimate, or the confidence in the result,
+ // and the quality of the final estimate can be evaluated
+ // by the value of the likelihood function.
+ double etotal = std::inner_product( amps.begin(), amps.end(), amps.begin(), 0.0 );
+ double norm = 1.0 / etotal;
+
+ evaluate_Q( amps, freqs, eval_freqs, Q, norm );
+}
+
+
+
+// evaluate_Q
+//
+// Evaluate the normalized likelihood function at a range of
+// frequencies, using the normalization factor provided, and
+// return the results in the vector Q.
+
+static void
+evaluate_Q( const vector<double> & amps,
+ const vector<double> & freqs,
+ const vector<double> & eval_freqs,
+ vector<double> & Q,
+ double norm )
+{
+ Assert( eval_freqs.size() == Q.size() );
+ Assert( amps.size() == freqs.size() );
+
+ // iterate over the frequencies at which to
+ // evaluate the likelihood function:
+ vector<double>::const_iterator freq_it = eval_freqs.begin();
+ vector<double>::iterator Q_it = Q.begin();
+ while ( freq_it != eval_freqs.end() )
+ {
+ double f = *freq_it;
+
+ double result = evaluate_Q( amps, freqs, f, norm );
+
+ *Q_it++ = result;
+ ++freq_it;
+ }
+}
+
+// ---------------------------------------------------------------------------
+// --- likelihood function derivative evaluation ---
+// ---------------------------------------------------------------------------
+
+
+// Qprimeterm is a functor to help compute terms
+// in the likelihood function derivative sum, used
+// in the secant method of root refinement.
+
+struct Qprimeterm
+{
+ double f0;
+ Qprimeterm( double f ) : f0(f) {}
+
+ double operator()( double amp, double freq ) const
+ {
+ double arg = 2*Pi*freq/f0;
+ return amp*amp*std::sin(arg)*arg/f0;
+ }
+};
+
+
+// evaluate_Qprime
+//
+// Evaluate the derivative of the likelihood function (w.r.t. frequency)
+// at the specified frequency.
+
+static double
+evaluate_Qprime( const vector<double> & amps,
+ const vector<double> & freqs,
+ double eval_freq )
+{
+ double prod =
+ std::inner_product( amps.begin(), amps.end(),
+ freqs.begin(),
+ 0.,
+ std::plus< double >(),
+ Qprimeterm( eval_freq ) );
+
+ return prod;
+}
+
+// ---------------------------------------------------------------------------
+// --- secant method of refining a root/peak estimate ---
+// ---------------------------------------------------------------------------
+
+// secant_method
+//
+// Find roots of the derivative of the likelihood
+// function using the secant method, return the
+// value of x (frequency) at which the roots is found.
+
+static double
+secant_method( const vector<double> & amps,
+ const vector<double> & freqs,
+ double f1, double f2, double precision )
+{
+ double xn = f1;
+ double xnm1 = f2;
+ double fxnm1 = evaluate_Qprime( amps, freqs, xnm1 );
+
+ const unsigned int MaxIters = 20;
+
+ unsigned int iters = 0;
+ double deltax = 0.0;
+
+ // Iterate until delta is small, or blows up,
+ // or we have iterated too many times.
+ do
+ {
+ double fxn = evaluate_Qprime( amps, freqs, xn );
+
+ deltax = fxn * (xn - xnm1)/(fxn - fxnm1);
+
+ xnm1 = xn;
+ xn = xn - deltax;
+
+ fxnm1 = fxn;
+
+ } while( // fabs( deltax ) > precision &&
+ IsFiniteNumber( deltax ) &&
+ ++iters < MaxIters );
+
+
+ // Check whether delta blew up. If it did, revert to the
+ // previous value of x.
+
+ if ( ! IsFiniteNumber( deltax ) )
+ {
+ xn = xnm1;
+ }
+
+ return xn;
+}
+
+#if 0
+// ---------------------------------------------------------------------------
+// --- local helpers - dumb old way ---
+// ---------------------------------------------------------------------------
+
+
+static void
+compute_eval_freqs( double fmin, double fmax,
+ vector<double> & eval_freqs );
+
+static vector<double>::const_iterator
+choose_peak( const vector<double> & Q );
+
+// ---------------------------------------------------------------------------
+// F0Estimate constructor -- iterative method
+// ---------------------------------------------------------------------------
+// Iteratively compute the value of the likelihood function
+// at a range of frequencies around the peak likelihood.
+// Store the maximum value when the range of likelihood
+// values computed is less than the specified resolution.
+// Store the frequency and the normalized value of the
+// likelihood function at that frequency (1.0 indicates that
+// all the peaks are perfect harmonics of the estimated
+// frequency).
+
+void
+F0Estimate::construct_iterative_method( const vector<double> & amps,
+ const vector<double> & freqs,
+ double fmin, double fmax,
+ double resolution )
+{
+
+ // when the frequency range is small, few samples are
+ // needed, but initially make sure to sample at least
+ // every 20 Hz.
+ // Scratch that, 20 Hz isn't fine enough, could miss a
+ // peak that way, try 2 Hz. There might be some room to
+ // adjust this parameter to trade off speed for robustness.
+ int Nsamps = std::max( 8, (int)std::ceil((fmax-fmin)*0.5) );
+ vector<double> eval_freqs, Q;
+ double peak_freq = fmin, peak_Q = 0;
+
+ // invariant:
+ // the likelihood function for the estimate of the fundamental
+ // frequency is maximized at some frequency between
+ // fmin and fmax (stop when that range is smaller
+ // than the resolution)
+ do
+ {
+ // determine the frequencies at which to evaluate
+ // the likelihood function
+ eval_freqs.resize( Nsamps );
+ compute_eval_freqs( fmin, fmax, eval_freqs );
+
+ // evaluate the likelihood function at those
+ // frequencies:
+ Q.resize( Nsamps );
+ evaluate_Q( amps, freqs, eval_freqs, Q );
+
+ // find the highest frequency at which the likelihood
+ // function peaks:
+ vector<double>::const_iterator peak = choose_peak( Q );
+ int peak_idx = peak - Q.begin();
+ peak_Q = *peak;
+ peak_freq = eval_freqs[ peak_idx ];
+
+ // update search range:
+ fmin = eval_freqs[ std::max(peak_idx - 1, 0) ];
+ fmax = eval_freqs[ std::min(peak_idx + 1, Nsamps - 1) ];
+ Nsamps = std::max( 8, (int)std::ceil((fmax-fmin)*0.05) );
+
+ } while ( (fmax - fmin) > resolution );
+
+ m_frequency = peak_freq;
+ m_confidence = peak_Q;
+}
+
+// compute_eval_freqs
+//
+// Fill the frequency vector with a sampling
+// of the range [fmin,fmax].
+//
+// (used by dumb old iterative method)
+//
+static void
+compute_eval_freqs( double fmin, double fmax,
+ vector<double> & eval_freqs )
+{
+ Assert( fmax > fmin );
+
+ double delta = (fmax-fmin)/(eval_freqs.size()-1);
+ double f = fmin;
+ vector<double>::iterator it = eval_freqs.begin();
+ while( it != eval_freqs.end() )
+ {
+ *it++ = f;
+ f += delta;
+ }
+ eval_freqs.back() = fmax;
+}
+
+// choose_peak
+//
+// Return the position of last peak that
+// in the vector Q.
+//
+static vector<double>::const_iterator
+choose_peak( const vector<double> & Q )
+{
+ Assert( !Q.empty() );
+
+ double Qmax = *std::max_element( Q.begin(), Q.end() );
+ vector<double>::const_iterator it = (Q.end()) - 1;
+ double tmp = *it;
+
+ // this threshold determines how strong the
+ // highest-frequency peak in the likelihood
+ // function needs to be relative to the overall
+ // peak. For strongly periodic signals, this can
+ // be quite near to 1, but for things that are
+ // somewhat non-harmonic, setting it too high
+ // gives octave errors. Cannot tell whether errors
+ // will be introduced by having it too low.
+ const double threshold = 0.85 * Qmax;
+ while( (it != Q.begin()) && ((*it < threshold) || (*it < *(it-1))) )
+ {
+ --it;
+ tmp = *it;
+ }
+
+ return it;
+}
+
+#endif
+
+} // end of namespace Loris