Added a fundamental frequency estimation implementation (Two-way mismatch)

author: John Glover <glover.john@gmail.com> 2010-10-29 12:31:23 +0100
committer: John Glover <glover.john@gmail.com> 2010-10-29 12:31:23 +0100
commit: 3fd1ab08d784eacafbea9e931267f1aac2b2b050 (patch)
tree: 897837a3d0477f9d4cc1575770877e832f96df60
parent: 0e5d0c72ac003c9461d12a656caac7b8ad2d5222 (diff)
download: simpl-3fd1ab08d784eacafbea9e931267f1aac2b2b050.tar.gz
simpl-3fd1ab08d784eacafbea9e931267f1aac2b2b050.tar.bz2
simpl-3fd1ab08d784eacafbea9e931267f1aac2b2b050.zip
2 files changed, 86 insertions, 13 deletions
diff --git a/mq.py b/mq.py
index 7ef7b36..42a4932 100644
--- a/mq.py
+++ b/mq.py
@@ -16,6 +16,70 @@
 
 import simpl
 import numpy as np
+import operator as op
+
+def best_match(f, candidates):
+    best_diff = 44100.0
+    best_freq = 0.0
+    pos = 0
+    for i, c in enumerate(candidates):
+        if abs(f - c) < best_diff:
+            best_diff = abs(f - c)
+            best_freq = c
+            pos = i
+    return pos
+
+def TWM(peaks, f_min=0.0, f_max=3000.0, f_step=20.0):
+    # TWM parameters
+    p = 0.5
+    q = 1.4
+    r = 0.5
+    rho = 0.33
+    N = 8
+    Err = {}
+
+    max_amp = max([x.amplitude for x in peaks])
+
+    # remove all peaks with amplitude of less than 10% of max
+    # note: this is not in the paper, found that it improved accuracy however
+    peaks = [x for x in peaks if x.amplitude >= (max_amp * 0.1)]
+
+    # get the max frequency of the remaining peaks
+    max_freq = max([x.frequency for x in peaks])
+    if max_freq < f_max:
+        f_max = max_freq
+
+    f_current = f_min
+    while f_current < f_max:
+        Err_pm = 0.0
+        Err_mp = 0.0
+        harmonics = np.arange(f_current, f_max, f_current)
+        if len(harmonics) > N:
+            harmonics = harmonics[0:N]
+
+        # calculate mismatch between predicted and actual peaks
+        for h in harmonics:
+            k = best_match(f_current, [x.frequency for x in peaks])
+            f = peaks[k].frequency
+            a = peaks[k].amplitude
+            Err_pm += abs(h-f) * (h**-p) + (a / max_amp) * ((q * (abs(h-f)) * (h**-p) -r))
+
+        # calculate the mismatch between actual and predicted peaks
+        for x in peaks:
+            k = best_match(x.frequency, harmonics)
+            f = harmonics[k]
+            xf = x.frequency
+            a = x.amplitude # TODO: is this value for 'a' correct?
+            Err_mp += abs(xf-f) * (xf**-p) + (a / max_amp) * ((q * (abs(xf-f)) * (xf**-p) -r))
+
+        # calculate the total error for f_current as a fundamental frequency
+        Err[f_current] = (Err_pm / len(harmonics)) + (rho * Err_mp / len(peaks))
+        f_current += f_step
+    
+    # return the value with the minimum total error
+    print min(Err.iteritems(), key=op.itemgetter(1))[0]
+    return min(Err.iteritems(), key=op.itemgetter(1))[0]
+
 
 class MQPeakDetection(simpl.PeakDetection):
     """Peak detection, based on the McAulay and Quatieri (MQ) algorithm.
@@ -32,10 +96,13 @@ class MQPeakDetection(simpl.PeakDetection):
         self._window = simpl.zeros(self._window_size)
         self._create_analysis_window()
         self._fundamental = float(self._sampling_rate) / self._window_size
+        self._static_frame_size = False
+        self._current_peaks = []
         
     def set_window_size(self, window_size):
         self._window_size = window_size
         self._fundamental = float(self._sampling_rate) / window_size
+        self._create_analysis_window()
         
     def _create_analysis_window(self):
         "Creates the analysis window, a normalised hamming window"
@@ -44,11 +111,19 @@ class MQPeakDetection(simpl.PeakDetection):
         for i in range(self._window_size):
             s += self._window[i]
         self._window /= s
+
+    def get_next_frame_size(self):
+        if not len(self._current_peaks):
+            return self._frame_size
+
+        TWM(self._current_peaks, f_min=self._fundamental, f_step=self._fundamental)
+
+        return self._frame_size
         
     def find_peaks_in_frame(self, frame):
         """Selects the highest peaks from the given spectral frame, up to a maximum of 
         self._max_peaks peaks."""
-        current_peaks = []        
+        self._current_peaks = []        
         # fft of frame
         f = np.fft.rfft(frame * self._window)
         spectrum = abs(f)
@@ -64,17 +139,17 @@ class MQPeakDetection(simpl.PeakDetection):
                 p.amplitude = current_mag
                 p.frequency = (bin - 1) * self._fundamental
                 p.phase = np.angle(f[bin-1])
-                current_peaks.append(p)
+                self._current_peaks.append(p)
             prev_mag = current_mag
             current_mag = next_mag
         # sort peaks, largest amplitude first, and up to a max of self.num_peaks peaks        
-        current_peaks.sort(cmp=simpl.compare_peak_amps)   
-        current_peaks.reverse()
-        if len(current_peaks) > self._max_peaks:
-            current_peaks = current_peaks[0:self._max_peaks]
+        self._current_peaks.sort(cmp=simpl.compare_peak_amps)   
+        self._current_peaks.reverse()
+        if len(self._current_peaks) > self._max_peaks:
+            self._current_peaks = self._current_peaks[0:self._max_peaks]
         # put back into ascending frequency order
-        current_peaks.sort(cmp=simpl.compare_peak_freqs)
-        return current_peaks
+        self._current_peaks.sort(cmp=simpl.compare_peak_freqs)
+        return self._current_peaks
 
 
 class MQPartialTracking(simpl.PartialTracking):
@@ -238,7 +313,6 @@ class MQSynthesis(simpl.Synthesis):
             for i in range(self.frame_size):
                 inst_amp += amp_inc
                 inst_phase = prev_phase + (prev_freq * i) + (alpha * (i**2)) + (beta * (i**3))
-                #print inst_phase
                 self._current_frame[i] += (2.0 * inst_amp) * np.cos(inst_phase)
 
         return self._current_frame
diff --git a/readme.txt b/readme.txt
index 864749a..f684064 100644
--- a/readme.txt
+++ b/readme.txt
@@ -25,8 +25,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 Introduction
 ------------
 
-Simpl is an open source library for sinusoidal modelling written in the Python
-programming language and making use of Scientific Python (SciPy). The aim of this
+Simpl is an open source library for sinusoidal modelling written in C/C++ and Python,
+and making use of Scientific Python (SciPy). The aim of this
 project is to tie together many of the existing sinusoidal modelling implementations
 into a single unified system with a consistent API, as well as providing implementations
 of some recently published sinusoidal modelling algorithms, many of which have yet
@@ -79,7 +79,7 @@ See the main project page at http://mtg.upf.edu/static/libsms for more informati
 
 The MQ algorithm is based on the following paper:
 R. McAulay, T. Quatieri, "Speech Analysis/Synthesis Based on a Sinusoidal Representation", 
-IEEE Transaction on Acoustics, Speech and Signal Processing, vol. 34, no. 4, pp. 744Ð754, 1986.
+IEEE Transaction on Acoustics, Speech and Signal Processing, vol. 34, no. 4, pp. 744-754, 1986.
 
 
 To Do
@@ -114,7 +114,6 @@ sms:
 - include stochastic residual synthesis in SMSResidual
 
 mq:
-- fix peak detection module
 - window size should change depending on detected pitch. No pitch detection algorithm described in the
   original paper, so can use the SMS algorithm for now.
author	John Glover <glover.john@gmail.com>	2010-10-29 12:31:23 +0100
committer	John Glover <glover.john@gmail.com>	2010-10-29 12:31:23 +0100
commit	3fd1ab08d784eacafbea9e931267f1aac2b2b050 (patch)
tree	897837a3d0477f9d4cc1575770877e832f96df60
parent	0e5d0c72ac003c9461d12a656caac7b8ad2d5222 (diff)
download	simpl-3fd1ab08d784eacafbea9e931267f1aac2b2b050.tar.gz simpl-3fd1ab08d784eacafbea9e931267f1aac2b2b050.tar.bz2 simpl-3fd1ab08d784eacafbea9e931267f1aac2b2b050.zip