summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Glover <j@johnglover.net>2012-06-30 19:04:02 +0100
committerJohn Glover <j@johnglover.net>2012-06-30 19:04:02 +0100
commitb2e6565c4f94134948cb2d38c27b8c062191ce4a (patch)
tree9e8f849a149723389c033d5005fe082e659687f6
parent1ef9783c3faed6a18e8cc2fba34f48b50bedd08a (diff)
downloadsimpl-b2e6565c4f94134948cb2d38c27b8c062191ce4a.tar.gz
simpl-b2e6565c4f94134948cb2d38c27b8c062191ce4a.tar.bz2
simpl-b2e6565c4f94134948cb2d38c27b8c062191ce4a.zip
[sms] Add C++ implementation of SMSPeakDetection.
-rw-r--r--setup.py69
-rw-r--r--simpl/peak_detection.pxd5
-rw-r--r--simpl/peak_detection.pyx7
-rw-r--r--src/simpl/peak_detection.cpp120
-rw-r--r--src/simpl/peak_detection.h50
-rw-r--r--src/sms/sms.h4
-rw-r--r--tests/test_peak_detection.py106
7 files changed, 306 insertions, 55 deletions
diff --git a/setup.py b/setup.py
index 4e6695d..459b329 100644
--- a/setup.py
+++ b/setup.py
@@ -8,11 +8,10 @@ many of which have yet to be released in software. Simpl is primarily intended
as a tool for other researchers in the field, allowing them to easily combine,
compare and contrast many of the published analysis/synthesis algorithms.
"""
+import os
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
-import os
-from glob import glob
# -----------------------------------------------------------------------------
# Global
@@ -35,16 +34,11 @@ except ImportError:
macros = []
link_args = []
swig_opts = ['-c++']
-include_dirs = [numpy_include, '/usr/local/include']
-
-simpl_sources = glob('src/simpl/*.cpp')
-simpl_include_dirs = ['src/simpl']
-simpl_include_dirs.extend(include_dirs)
+include_dirs = ['simpl', 'src/simpl', 'src/sms', 'src/sndobj',
+ 'src/sndobj/rfftw', numpy_include, '/usr/local/include']
+libs = ['m', 'fftw3', 'gsl', 'gslcblas']
+sources = []
-base = Extension("simpl.base",
- sources=["simpl/base.pyx", "src/simpl/base.cpp"],
- include_dirs=["simpl"] + simpl_include_dirs,
- language="c++")
# -----------------------------------------------------------------------------
# SndObj Library
@@ -80,6 +74,8 @@ fftw_sources = """
sndobj_sources = map(lambda x: 'src/sndobj/' + x, sndobj_sources)
sndobj_sources.extend(map(lambda x: 'src/sndobj/rfftw/' + x, fftw_sources))
+# sources.extend(sndobj_sources)
+
sndobj_sources.append("simpl/sndobj.i")
sndobj_macros = [('PYTHON_WRAP', None)]
@@ -99,18 +95,18 @@ sndobj = Extension("simpl/_simplsndobj",
# SMS
# -----------------------------------------------------------------------------
-# sms_sources = """
-# OOURA.c cepstrum.c peakContinuation.c soundIO.c tables.c
-# fileIO.c peakDetection.c spectralApprox.c transforms.c
-# filters.c residual.c spectrum.c windows.c SFMT.c fixTracks.c
-# sineSynth.c stocAnalysis.c harmDetection.c sms.c synthesis.c
-# analysis.c modify.c
-# """.split()
+sms_sources = """
+ OOURA.c cepstrum.c peakContinuation.c soundIO.c tables.c
+ fileIO.c peakDetection.c spectralApprox.c transforms.c
+ filters.c residual.c spectrum.c windows.c SFMT.c fixTracks.c
+ sineSynth.c stocAnalysis.c harmDetection.c sms.c synthesis.c
+ analysis.c modify.c
+ """.split()
+
+sms_sources = map(lambda x: 'src/sms/' + x, sms_sources)
+sources.extend(sms_sources)
-# sms_sources = map(lambda x: 'src/sms/' + x, sms_sources)
-# sms_sources.append("simpl/sms.i")
-# sms_include_dirs = ['src/sms']
-# sms_include_dirs.extend(include_dirs)
+sms_sources.append("simpl/sms.i")
# sms = Extension("simpl/_simplsms",
# sources=sms_sources,
@@ -118,20 +114,27 @@ sndobj = Extension("simpl/_simplsndobj",
# libraries=['m', 'fftw3', 'gsl', 'gslcblas'],
# extra_compile_args=['-DMERSENNE_TWISTER'])
-# sms = Extension("simpl.sms",
-# sources=["simpl/sms.pyx", "simpl/base.pyx",
-# "src/simpl/simplsms.cpp", "src/simpl/base.cpp"],
-# include_dirs=["simpl"] + simpl_include_dirs,
-# language="c++")
+# -----------------------------------------------------------------------------
+# Base
+# -----------------------------------------------------------------------------
+base = Extension(
+ "simpl.base",
+ sources=["simpl/base.pyx", "src/simpl/base.cpp"],
+ include_dirs=include_dirs,
+ language="c++"
+)
# -----------------------------------------------------------------------------
# Peak Detection
# -----------------------------------------------------------------------------
peak_detection = Extension(
"simpl.peak_detection",
- sources=["simpl/peak_detection.pyx", "src/simpl/peak_detection.cpp",
- "src/simpl/base.cpp"],
- include_dirs=["simpl"] + simpl_include_dirs,
+ sources=sources + ["simpl/peak_detection.pyx",
+ "src/simpl/peak_detection.cpp",
+ "src/simpl/base.cpp"],
+ include_dirs=include_dirs,
+ libraries=libs,
+ extra_compile_args=['-DMERSENNE_TWISTER'],
language="c++"
)
@@ -142,7 +145,7 @@ partial_tracking = Extension(
"simpl.partial_tracking",
sources=["simpl/partial_tracking.pyx", "src/simpl/partial_tracking.cpp",
"src/simpl/base.cpp"],
- include_dirs=["simpl"] + simpl_include_dirs,
+ include_dirs=include_dirs,
language="c++"
)
@@ -154,7 +157,7 @@ synthesis = Extension(
"simpl.synthesis",
sources=["simpl/synthesis.pyx", "src/simpl/synthesis.cpp",
"src/simpl/base.cpp"],
- include_dirs=["simpl"] + simpl_include_dirs,
+ include_dirs=include_dirs,
language="c++"
)
@@ -166,7 +169,7 @@ residual = Extension(
"simpl.residual",
sources=["simpl/residual.pyx", "src/simpl/residual.cpp",
"src/simpl/base.cpp"],
- include_dirs=["simpl"] + simpl_include_dirs,
+ include_dirs=include_dirs,
language="c++"
)
diff --git a/simpl/peak_detection.pxd b/simpl/peak_detection.pxd
index 7ff9d09..867a89e 100644
--- a/simpl/peak_detection.pxd
+++ b/simpl/peak_detection.pxd
@@ -32,8 +32,13 @@ cdef extern from "../src/simpl/peak_detection.h" namespace "simpl":
void min_peak_separation(double new_min_peak_separation)
int num_frames()
c_Frame* frame(int frame_number)
+ void frames(vector[c_Frame*] new_frames)
vector[c_Peak*] find_peaks_in_frame(c_Frame* frame)
vector[c_Frame*] find_peaks(int audio_size, double* audio)
cdef cppclass c_SMSPeakDetection "simpl::SMSPeakDetection"(c_PeakDetection):
c_SMSPeakDetection()
+ void hop_size(int new_hop_size)
+ void max_peaks(int new_max_peaks)
+ vector[c_Peak*] find_peaks_in_frame(c_Frame* frame)
+ vector[c_Frame*] find_peaks(int audio_size, double* audio)
diff --git a/simpl/peak_detection.pyx b/simpl/peak_detection.pyx
index a2fc8f5..12f47b2 100644
--- a/simpl/peak_detection.pyx
+++ b/simpl/peak_detection.pyx
@@ -63,8 +63,11 @@ cdef class PeakDetection:
property frames:
def __get__(self):
return [self.frame(i) for i in range(self.thisptr.num_frames())]
- def __set__(self, f):
- raise Exception("NotImplemented")
+ def __set__(self, new_frames):
+ cdef vector[c_Frame*] c_frames
+ for f in new_frames:
+ c_frames.push_back((<Frame>f).thisptr)
+ self.thisptr.frames(c_frames)
def find_peaks_in_frame(self, Frame frame not None):
peaks = []
diff --git a/src/simpl/peak_detection.cpp b/src/simpl/peak_detection.cpp
index 1d44b6f..7a31846 100644
--- a/src/simpl/peak_detection.cpp
+++ b/src/simpl/peak_detection.cpp
@@ -113,6 +113,10 @@ Frames PeakDetection::frames() {
return _frames;
}
+void PeakDetection::frames(Frames new_frames) {
+ _frames = new_frames;
+}
+
// Find and return all spectral peaks in a given frame of audio
Peaks PeakDetection::find_peaks_in_frame(Frame* frame) {
Peaks peaks;
@@ -153,4 +157,120 @@ Frames PeakDetection::find_peaks(int audio_size, sample* audio) {
// SMSPeakDetection
// ---------------------------------------------------------------------------
SMSPeakDetection::SMSPeakDetection() {
+ sms_init();
+
+ sms_initAnalParams(&_analysis_params);
+ _analysis_params.iSamplingRate = _sampling_rate;
+ _analysis_params.iFrameRate = _sampling_rate / _hop_size;
+ _analysis_params.iWindowType = SMS_WIN_HAMMING;
+ _analysis_params.fHighestFreq = 20000;
+ _analysis_params.iMaxDelayFrames = 4;
+ _analysis_params.analDelay = 0;
+ _analysis_params.minGoodFrames = 1;
+ _analysis_params.iCleanTracks = 0;
+ _analysis_params.iFormat = SMS_FORMAT_HP;
+ _analysis_params.nTracks = _max_peaks;
+ _analysis_params.maxPeaks = _max_peaks;
+ _analysis_params.nGuides = _max_peaks;
+ _analysis_params.preEmphasis = 0;
+ sms_initAnalysis(&_analysis_params);
+
+ sms_initSpectralPeaks(&_peaks, _max_peaks);
+
+ // By default, SMS will change the size of the frames being read
+ // depending on the detected fundamental frequency (if any) of the
+ // input sound. To prevent this behaviour (useful when comparing
+ // different analysis algorithms), set the
+ // _static_frame_size variable to True
+ _static_frame_size = false;
+}
+
+SMSPeakDetection::~SMSPeakDetection() {
+ sms_freeAnalysis(&_analysis_params);
+ sms_freeSpectralPeaks(&_peaks);
+ sms_free();
+}
+
+int SMSPeakDetection::next_frame_size() {
+ return _analysis_params.sizeNextRead;
+}
+
+void SMSPeakDetection::hop_size(int new_hop_size) {
+ _hop_size = new_hop_size;
+ sms_freeAnalysis(&_analysis_params);
+ _analysis_params.iFrameRate = _sampling_rate / _hop_size;
+ sms_initAnalysis(&_analysis_params);
+}
+
+void SMSPeakDetection::max_peaks(int new_max_peaks) {
+ _max_peaks = new_max_peaks;
+ if(_max_peaks > SMS_MAX_NPEAKS) {
+ _max_peaks = SMS_MAX_NPEAKS;
+ }
+
+ sms_freeAnalysis(&_analysis_params);
+ sms_freeSpectralPeaks(&_peaks);
+
+ _analysis_params.nTracks = _max_peaks;
+ _analysis_params.maxPeaks = _max_peaks;
+ _analysis_params.nGuides = _max_peaks;
+
+ sms_initAnalysis(&_analysis_params);
+ sms_initSpectralPeaks(&_peaks, _max_peaks);
+}
+
+// Find and return all spectral peaks in a given frame of audio
+Peaks SMSPeakDetection::find_peaks_in_frame(Frame* frame) {
+ Peaks peaks;
+
+ int num_peaks = sms_findPeaks(frame->size(), frame->audio(),
+ &_analysis_params, &_peaks);
+
+ for(int i = 0; i < num_peaks; i++) {
+ Peak* p = new Peak();
+ p->amplitude = _peaks.pSpectralPeaks[i].fMag;
+ p->frequency = _peaks.pSpectralPeaks[i].fFreq;
+ p->phase = _peaks.pSpectralPeaks[i].fPhase;
+ peaks.push_back(p);
+ }
+ return peaks;
+}
+
+// Find and return all spectral peaks in a given audio signal.
+// If the signal contains more than 1 frame worth of audio,
+// it will be broken up into separate frames, with a list of
+// peaks returned for each frame.
+//
+// TODO: This hops by frame size rather than hop size in order to
+// make sure the results are the same as with libsms. Make sure
+// we have the same number of frames as the other algorithms.
+Frames SMSPeakDetection::find_peaks(int audio_size, sample* audio) {
+ clear();
+
+ _analysis_params.iSizeSound = audio_size;
+ unsigned int pos = 0;
+
+ // account for SMS analysis delay
+ // need an extra (max_frame_delay - 1) frames
+ int delay = (_analysis_params.iMaxDelayFrames - 1) & _hop_size;
+
+ while(pos < ((audio_size - _hop_size) + delay)) {
+ // get the next frame size
+ if(!_static_frame_size) {
+ _frame_size = next_frame_size();
+ }
+
+ // get the next frame
+ Frame* f = new Frame(_frame_size);
+ f->audio(&audio[pos]);
+
+ // find peaks
+ Peaks peaks = find_peaks_in_frame(f);
+ f->add_peaks(&peaks);
+
+ _frames.push_back(f);
+ pos += _frame_size;
+ }
+
+ return _frames;
}
diff --git a/src/simpl/peak_detection.h b/src/simpl/peak_detection.h
index 0045762..577e0b5 100644
--- a/src/simpl/peak_detection.h
+++ b/src/simpl/peak_detection.h
@@ -3,8 +3,13 @@
#include "base.h"
+extern "C" {
+ #include "sms.h"
+}
+
using namespace std;
+
namespace simpl
{
@@ -16,7 +21,7 @@ namespace simpl
// ---------------------------------------------------------------------------
class PeakDetection {
- private:
+ protected:
int _sampling_rate;
int _frame_size;
bool _static_frame_size;
@@ -32,26 +37,27 @@ class PeakDetection {
virtual ~PeakDetection();
void clear();
- int sampling_rate();
- void sampling_rate(int new_sampling_rate);
- int frame_size();
- void frame_size(int new_frame_size);
- bool static_frame_size();
- void static_frame_size(bool new_static_frame_size);
+ virtual int sampling_rate();
+ virtual void sampling_rate(int new_sampling_rate);
+ virtual int frame_size();
+ virtual void frame_size(int new_frame_size);
+ virtual bool static_frame_size();
+ virtual void static_frame_size(bool new_static_frame_size);
virtual int next_frame_size();
- int hop_size();
- void hop_size(int new_hop_size);
- int max_peaks();
- void max_peaks(int new_max_peaks);
- std::string window_type();
- void window_type(std::string new_window_type);
- int window_size();
- void window_size(int new_window_size);
- sample min_peak_separation();
- void min_peak_separation(sample new_min_peak_separation);
+ virtual int hop_size();
+ virtual void hop_size(int new_hop_size);
+ virtual int max_peaks();
+ virtual void max_peaks(int new_max_peaks);
+ virtual std::string window_type();
+ virtual void window_type(std::string new_window_type);
+ virtual int window_size();
+ virtual void window_size(int new_window_size);
+ virtual sample min_peak_separation();
+ virtual void min_peak_separation(sample new_min_peak_separation);
int num_frames();
Frame* frame(int frame_number);
Frames frames();
+ void frames(Frames new_frames);
// Find and return all spectral peaks in a given frame of audio
virtual Peaks find_peaks_in_frame(Frame* frame);
@@ -67,8 +73,18 @@ class PeakDetection {
// SMSPeakDetection
// ---------------------------------------------------------------------------
class SMSPeakDetection : public PeakDetection {
+ private:
+ SMSAnalysisParams _analysis_params;
+ SMSSpectralPeaks _peaks;
+
public:
SMSPeakDetection();
+ ~SMSPeakDetection();
+ int next_frame_size();
+ void hop_size(int new_hop_size);
+ void max_peaks(int new_max_peaks);
+ Peaks find_peaks_in_frame(Frame* frame);
+ Frames find_peaks(int audio_size, sample* audio);
};
diff --git a/src/sms/sms.h b/src/sms/sms.h
index 0613ab3..4e6e5c1 100644
--- a/src/sms/sms.h
+++ b/src/sms/sms.h
@@ -132,7 +132,7 @@ typedef struct
} SMS_Peak;
/* a collection of spectral peaks */
-typedef struct
+typedef struct SMSSpectralPeaks
{
SMS_Peak *pSpectralPeaks;
int nPeaks;
@@ -228,7 +228,7 @@ typedef struct
* and peak detection/continuation process can be re-computed with more accuracy.
*
*/
-typedef struct
+typedef struct SMSAnalysisParams
{
int iDebugMode; /*!< debug codes enumerated by SMS_DBG \see SMS_DBG */
int iFormat; /*!< analysis format code defined by SMS_Format \see SMS_Format */
diff --git a/tests/test_peak_detection.py b/tests/test_peak_detection.py
index b24b701..6d623b6 100644
--- a/tests/test_peak_detection.py
+++ b/tests/test_peak_detection.py
@@ -1,12 +1,20 @@
import os
import numpy as np
from nose.tools import assert_almost_equals
+import pysms
import simpl
import simpl.peak_detection as peak_detection
+PeakDetection = peak_detection.PeakDetection
+SMSPeakDetection = peak_detection.SMSPeakDetection
+
float_precision = 5
frame_size = 512
hop_size = 512
+max_peaks = 10
+max_partials = 10
+num_frames = 30
+num_samples = num_frames * hop_size
audio_path = os.path.join(
os.path.dirname(__file__), 'audio/flute.wav'
)
@@ -18,8 +26,104 @@ class TestPeakDetection(object):
cls.audio = simpl.read_wav(audio_path)[0]
def test_peak_detection(self):
- pd = peak_detection.PeakDetection()
+ pd = PeakDetection()
pd.find_peaks(self.audio)
assert len(pd.frames) == len(self.audio) / hop_size
assert len(pd.frames[0].peaks) == 0
+
+
+class TestSMSPeakDetection(object):
+ def _pysms_analysis_params(self, sampling_rate):
+ analysis_params = pysms.SMS_AnalParams()
+ pysms.sms_initAnalParams(analysis_params)
+ analysis_params.iSamplingRate = sampling_rate
+ analysis_params.iFrameRate = sampling_rate / hop_size
+ analysis_params.iWindowType = pysms.SMS_WIN_HAMMING
+ analysis_params.fDefaultFundamental = 100
+ analysis_params.fHighestFreq = 20000
+ analysis_params.iFormat = pysms.SMS_FORMAT_HP
+ analysis_params.nTracks = max_peaks
+ analysis_params.peakParams.iMaxPeaks = max_peaks
+ analysis_params.nGuides = max_peaks
+ analysis_params.iMaxDelayFrames = 4
+ analysis_params.analDelay = 0
+ analysis_params.minGoodFrames = 1
+ analysis_params.iCleanTracks = 0
+ analysis_params.iStochasticType = pysms.SMS_STOC_NONE
+ analysis_params.preEmphasis = 0
+ return analysis_params
+
+ def test_size_next_read(self):
+ """
+ test_size_next_read
+ Make sure PeakDetection is calculating the correct value for the
+ size of the next frame.
+ """
+ audio, sampling_rate = simpl.read_wav(audio_path)
+ pysms.sms_init()
+ snd_header = pysms.SMS_SndHeader()
+
+ # Try to open the input file to fill snd_header
+ if(pysms.sms_openSF(audio_path, snd_header)):
+ raise NameError(
+ "error opening sound file: " + pysms.sms_errorString()
+ )
+
+ analysis_params = self._pysms_analysis_params(sampling_rate)
+ analysis_params.iMaxDelayFrames = num_frames + 1
+ if pysms.sms_initAnalysis(analysis_params, snd_header) != 0:
+ raise Exception("Error allocating memory for analysis_params")
+ analysis_params.nFrames = num_frames
+ sms_header = pysms.SMS_Header()
+ pysms.sms_fillHeader(sms_header, analysis_params, "pysms")
+
+ sample_offset = 0
+ pysms_size_new_data = 0
+ current_frame = 0
+ sms_next_read_sizes = []
+
+ while current_frame < num_frames:
+ sms_next_read_sizes.append(analysis_params.sizeNextRead)
+ sample_offset += pysms_size_new_data
+ pysms_size_new_data = analysis_params.sizeNextRead
+
+ # convert frame to floats for libsms
+ frame = audio[sample_offset:sample_offset + pysms_size_new_data]
+ frame = np.array(frame, dtype=np.float32)
+ if len(frame) < pysms_size_new_data:
+ frame = np.hstack((
+ frame, np.zeros(pysms_size_new_data - len(frame),
+ dtype=np.float32)
+ ))
+
+ analysis_data = pysms.SMS_Data()
+ pysms.sms_allocFrameH(sms_header, analysis_data)
+ status = pysms.sms_analyze(frame, analysis_data, analysis_params)
+ # as the no. of frames of delay is > num_frames, sms_analyze should
+ # never get around to performing partial tracking, and so the
+ # return value should be 0
+ assert status == 0
+ pysms.sms_freeFrame(analysis_data)
+ current_frame += 1
+
+ pysms.sms_freeAnalysis(analysis_params)
+ pysms.sms_closeSF()
+ pysms.sms_free()
+
+ pd = SMSPeakDetection()
+ pd.hop_size = hop_size
+ pd.max_peaks = max_peaks
+ current_frame = 0
+ sample_offset = 0
+
+ while current_frame < num_frames:
+ pd.frame_size = pd.next_frame_size()
+ assert sms_next_read_sizes[current_frame] == pd.frame_size,\
+ (sms_next_read_sizes[current_frame], pd.frame_size)
+ frame = simpl.Frame()
+ frame.size = pd.frame_size
+ frame.audio = audio[sample_offset:sample_offset + pd.frame_size]
+ pd.find_peaks_in_frame(frame)
+ sample_offset += pd.frame_size
+ current_frame += 1