summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Glover <glover.john@gmail.com>2010-10-29 12:31:23 +0100
committerJohn Glover <glover.john@gmail.com>2010-10-29 12:31:23 +0100
commit3fd1ab08d784eacafbea9e931267f1aac2b2b050 (patch)
tree897837a3d0477f9d4cc1575770877e832f96df60
parent0e5d0c72ac003c9461d12a656caac7b8ad2d5222 (diff)
downloadsimpl-3fd1ab08d784eacafbea9e931267f1aac2b2b050.tar.gz
simpl-3fd1ab08d784eacafbea9e931267f1aac2b2b050.tar.bz2
simpl-3fd1ab08d784eacafbea9e931267f1aac2b2b050.zip
Added a fundamental frequency estimation implementation (Two-way mismatch)
-rw-r--r--mq.py92
-rw-r--r--readme.txt7
2 files changed, 86 insertions, 13 deletions
diff --git a/mq.py b/mq.py
index 7ef7b36..42a4932 100644
--- a/mq.py
+++ b/mq.py
@@ -16,6 +16,70 @@
import simpl
import numpy as np
+import operator as op
+
+def best_match(f, candidates):
+ best_diff = 44100.0
+ best_freq = 0.0
+ pos = 0
+ for i, c in enumerate(candidates):
+ if abs(f - c) < best_diff:
+ best_diff = abs(f - c)
+ best_freq = c
+ pos = i
+ return pos
+
+def TWM(peaks, f_min=0.0, f_max=3000.0, f_step=20.0):
+ # TWM parameters
+ p = 0.5
+ q = 1.4
+ r = 0.5
+ rho = 0.33
+ N = 8
+ Err = {}
+
+ max_amp = max([x.amplitude for x in peaks])
+
+ # remove all peaks with amplitude of less than 10% of max
+ # note: this is not in the paper, found that it improved accuracy however
+ peaks = [x for x in peaks if x.amplitude >= (max_amp * 0.1)]
+
+ # get the max frequency of the remaining peaks
+ max_freq = max([x.frequency for x in peaks])
+ if max_freq < f_max:
+ f_max = max_freq
+
+ f_current = f_min
+ while f_current < f_max:
+ Err_pm = 0.0
+ Err_mp = 0.0
+ harmonics = np.arange(f_current, f_max, f_current)
+ if len(harmonics) > N:
+ harmonics = harmonics[0:N]
+
+ # calculate mismatch between predicted and actual peaks
+ for h in harmonics:
+ k = best_match(f_current, [x.frequency for x in peaks])
+ f = peaks[k].frequency
+ a = peaks[k].amplitude
+ Err_pm += abs(h-f) * (h**-p) + (a / max_amp) * ((q * (abs(h-f)) * (h**-p) -r))
+
+ # calculate the mismatch between actual and predicted peaks
+ for x in peaks:
+ k = best_match(x.frequency, harmonics)
+ f = harmonics[k]
+ xf = x.frequency
+ a = x.amplitude # TODO: is this value for 'a' correct?
+ Err_mp += abs(xf-f) * (xf**-p) + (a / max_amp) * ((q * (abs(xf-f)) * (xf**-p) -r))
+
+ # calculate the total error for f_current as a fundamental frequency
+ Err[f_current] = (Err_pm / len(harmonics)) + (rho * Err_mp / len(peaks))
+ f_current += f_step
+
+ # return the value with the minimum total error
+ print min(Err.iteritems(), key=op.itemgetter(1))[0]
+ return min(Err.iteritems(), key=op.itemgetter(1))[0]
+
class MQPeakDetection(simpl.PeakDetection):
"""Peak detection, based on the McAulay and Quatieri (MQ) algorithm.
@@ -32,10 +96,13 @@ class MQPeakDetection(simpl.PeakDetection):
self._window = simpl.zeros(self._window_size)
self._create_analysis_window()
self._fundamental = float(self._sampling_rate) / self._window_size
+ self._static_frame_size = False
+ self._current_peaks = []
def set_window_size(self, window_size):
self._window_size = window_size
self._fundamental = float(self._sampling_rate) / window_size
+ self._create_analysis_window()
def _create_analysis_window(self):
"Creates the analysis window, a normalised hamming window"
@@ -44,11 +111,19 @@ class MQPeakDetection(simpl.PeakDetection):
for i in range(self._window_size):
s += self._window[i]
self._window /= s
+
+ def get_next_frame_size(self):
+ if not len(self._current_peaks):
+ return self._frame_size
+
+ TWM(self._current_peaks, f_min=self._fundamental, f_step=self._fundamental)
+
+ return self._frame_size
def find_peaks_in_frame(self, frame):
"""Selects the highest peaks from the given spectral frame, up to a maximum of
self._max_peaks peaks."""
- current_peaks = []
+ self._current_peaks = []
# fft of frame
f = np.fft.rfft(frame * self._window)
spectrum = abs(f)
@@ -64,17 +139,17 @@ class MQPeakDetection(simpl.PeakDetection):
p.amplitude = current_mag
p.frequency = (bin - 1) * self._fundamental
p.phase = np.angle(f[bin-1])
- current_peaks.append(p)
+ self._current_peaks.append(p)
prev_mag = current_mag
current_mag = next_mag
# sort peaks, largest amplitude first, and up to a max of self.num_peaks peaks
- current_peaks.sort(cmp=simpl.compare_peak_amps)
- current_peaks.reverse()
- if len(current_peaks) > self._max_peaks:
- current_peaks = current_peaks[0:self._max_peaks]
+ self._current_peaks.sort(cmp=simpl.compare_peak_amps)
+ self._current_peaks.reverse()
+ if len(self._current_peaks) > self._max_peaks:
+ self._current_peaks = self._current_peaks[0:self._max_peaks]
# put back into ascending frequency order
- current_peaks.sort(cmp=simpl.compare_peak_freqs)
- return current_peaks
+ self._current_peaks.sort(cmp=simpl.compare_peak_freqs)
+ return self._current_peaks
class MQPartialTracking(simpl.PartialTracking):
@@ -238,7 +313,6 @@ class MQSynthesis(simpl.Synthesis):
for i in range(self.frame_size):
inst_amp += amp_inc
inst_phase = prev_phase + (prev_freq * i) + (alpha * (i**2)) + (beta * (i**3))
- #print inst_phase
self._current_frame[i] += (2.0 * inst_amp) * np.cos(inst_phase)
return self._current_frame
diff --git a/readme.txt b/readme.txt
index 864749a..f684064 100644
--- a/readme.txt
+++ b/readme.txt
@@ -25,8 +25,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Introduction
------------
-Simpl is an open source library for sinusoidal modelling written in the Python
-programming language and making use of Scientific Python (SciPy). The aim of this
+Simpl is an open source library for sinusoidal modelling written in C/C++ and Python,
+and making use of Scientific Python (SciPy). The aim of this
project is to tie together many of the existing sinusoidal modelling implementations
into a single unified system with a consistent API, as well as providing implementations
of some recently published sinusoidal modelling algorithms, many of which have yet
@@ -79,7 +79,7 @@ See the main project page at http://mtg.upf.edu/static/libsms for more informati
The MQ algorithm is based on the following paper:
R. McAulay, T. Quatieri, "Speech Analysis/Synthesis Based on a Sinusoidal Representation",
-IEEE Transaction on Acoustics, Speech and Signal Processing, vol. 34, no. 4, pp. 744Ð754, 1986.
+IEEE Transaction on Acoustics, Speech and Signal Processing, vol. 34, no. 4, pp. 744-754, 1986.
To Do
@@ -114,7 +114,6 @@ sms:
- include stochastic residual synthesis in SMSResidual
mq:
-- fix peak detection module
- window size should change depending on detected pitch. No pitch detection algorithm described in the
original paper, so can use the SMS algorithm for now.