diff options
-rw-r--r-- | setup.py | 69 | ||||
-rw-r--r-- | simpl/peak_detection.pxd | 5 | ||||
-rw-r--r-- | simpl/peak_detection.pyx | 7 | ||||
-rw-r--r-- | src/simpl/peak_detection.cpp | 120 | ||||
-rw-r--r-- | src/simpl/peak_detection.h | 50 | ||||
-rw-r--r-- | src/sms/sms.h | 4 | ||||
-rw-r--r-- | tests/test_peak_detection.py | 106 |
7 files changed, 306 insertions, 55 deletions
@@ -8,11 +8,10 @@ many of which have yet to be released in software. Simpl is primarily intended as a tool for other researchers in the field, allowing them to easily combine, compare and contrast many of the published analysis/synthesis algorithms. """ +import os from distutils.core import setup from distutils.extension import Extension from Cython.Distutils import build_ext -import os -from glob import glob # ----------------------------------------------------------------------------- # Global @@ -35,16 +34,11 @@ except ImportError: macros = [] link_args = [] swig_opts = ['-c++'] -include_dirs = [numpy_include, '/usr/local/include'] - -simpl_sources = glob('src/simpl/*.cpp') -simpl_include_dirs = ['src/simpl'] -simpl_include_dirs.extend(include_dirs) +include_dirs = ['simpl', 'src/simpl', 'src/sms', 'src/sndobj', + 'src/sndobj/rfftw', numpy_include, '/usr/local/include'] +libs = ['m', 'fftw3', 'gsl', 'gslcblas'] +sources = [] -base = Extension("simpl.base", - sources=["simpl/base.pyx", "src/simpl/base.cpp"], - include_dirs=["simpl"] + simpl_include_dirs, - language="c++") # ----------------------------------------------------------------------------- # SndObj Library @@ -80,6 +74,8 @@ fftw_sources = """ sndobj_sources = map(lambda x: 'src/sndobj/' + x, sndobj_sources) sndobj_sources.extend(map(lambda x: 'src/sndobj/rfftw/' + x, fftw_sources)) +# sources.extend(sndobj_sources) + sndobj_sources.append("simpl/sndobj.i") sndobj_macros = [('PYTHON_WRAP', None)] @@ -99,18 +95,18 @@ sndobj = Extension("simpl/_simplsndobj", # SMS # ----------------------------------------------------------------------------- -# sms_sources = """ -# OOURA.c cepstrum.c peakContinuation.c soundIO.c tables.c -# fileIO.c peakDetection.c spectralApprox.c transforms.c -# filters.c residual.c spectrum.c windows.c SFMT.c fixTracks.c -# sineSynth.c stocAnalysis.c harmDetection.c sms.c synthesis.c -# analysis.c modify.c -# """.split() +sms_sources = """ + OOURA.c cepstrum.c peakContinuation.c soundIO.c tables.c + fileIO.c peakDetection.c spectralApprox.c transforms.c + filters.c residual.c spectrum.c windows.c SFMT.c fixTracks.c + sineSynth.c stocAnalysis.c harmDetection.c sms.c synthesis.c + analysis.c modify.c + """.split() + +sms_sources = map(lambda x: 'src/sms/' + x, sms_sources) +sources.extend(sms_sources) -# sms_sources = map(lambda x: 'src/sms/' + x, sms_sources) -# sms_sources.append("simpl/sms.i") -# sms_include_dirs = ['src/sms'] -# sms_include_dirs.extend(include_dirs) +sms_sources.append("simpl/sms.i") # sms = Extension("simpl/_simplsms", # sources=sms_sources, @@ -118,20 +114,27 @@ sndobj = Extension("simpl/_simplsndobj", # libraries=['m', 'fftw3', 'gsl', 'gslcblas'], # extra_compile_args=['-DMERSENNE_TWISTER']) -# sms = Extension("simpl.sms", -# sources=["simpl/sms.pyx", "simpl/base.pyx", -# "src/simpl/simplsms.cpp", "src/simpl/base.cpp"], -# include_dirs=["simpl"] + simpl_include_dirs, -# language="c++") +# ----------------------------------------------------------------------------- +# Base +# ----------------------------------------------------------------------------- +base = Extension( + "simpl.base", + sources=["simpl/base.pyx", "src/simpl/base.cpp"], + include_dirs=include_dirs, + language="c++" +) # ----------------------------------------------------------------------------- # Peak Detection # ----------------------------------------------------------------------------- peak_detection = Extension( "simpl.peak_detection", - sources=["simpl/peak_detection.pyx", "src/simpl/peak_detection.cpp", - "src/simpl/base.cpp"], - include_dirs=["simpl"] + simpl_include_dirs, + sources=sources + ["simpl/peak_detection.pyx", + "src/simpl/peak_detection.cpp", + "src/simpl/base.cpp"], + include_dirs=include_dirs, + libraries=libs, + extra_compile_args=['-DMERSENNE_TWISTER'], language="c++" ) @@ -142,7 +145,7 @@ partial_tracking = Extension( "simpl.partial_tracking", sources=["simpl/partial_tracking.pyx", "src/simpl/partial_tracking.cpp", "src/simpl/base.cpp"], - include_dirs=["simpl"] + simpl_include_dirs, + include_dirs=include_dirs, language="c++" ) @@ -154,7 +157,7 @@ synthesis = Extension( "simpl.synthesis", sources=["simpl/synthesis.pyx", "src/simpl/synthesis.cpp", "src/simpl/base.cpp"], - include_dirs=["simpl"] + simpl_include_dirs, + include_dirs=include_dirs, language="c++" ) @@ -166,7 +169,7 @@ residual = Extension( "simpl.residual", sources=["simpl/residual.pyx", "src/simpl/residual.cpp", "src/simpl/base.cpp"], - include_dirs=["simpl"] + simpl_include_dirs, + include_dirs=include_dirs, language="c++" ) diff --git a/simpl/peak_detection.pxd b/simpl/peak_detection.pxd index 7ff9d09..867a89e 100644 --- a/simpl/peak_detection.pxd +++ b/simpl/peak_detection.pxd @@ -32,8 +32,13 @@ cdef extern from "../src/simpl/peak_detection.h" namespace "simpl": void min_peak_separation(double new_min_peak_separation) int num_frames() c_Frame* frame(int frame_number) + void frames(vector[c_Frame*] new_frames) vector[c_Peak*] find_peaks_in_frame(c_Frame* frame) vector[c_Frame*] find_peaks(int audio_size, double* audio) cdef cppclass c_SMSPeakDetection "simpl::SMSPeakDetection"(c_PeakDetection): c_SMSPeakDetection() + void hop_size(int new_hop_size) + void max_peaks(int new_max_peaks) + vector[c_Peak*] find_peaks_in_frame(c_Frame* frame) + vector[c_Frame*] find_peaks(int audio_size, double* audio) diff --git a/simpl/peak_detection.pyx b/simpl/peak_detection.pyx index a2fc8f5..12f47b2 100644 --- a/simpl/peak_detection.pyx +++ b/simpl/peak_detection.pyx @@ -63,8 +63,11 @@ cdef class PeakDetection: property frames: def __get__(self): return [self.frame(i) for i in range(self.thisptr.num_frames())] - def __set__(self, f): - raise Exception("NotImplemented") + def __set__(self, new_frames): + cdef vector[c_Frame*] c_frames + for f in new_frames: + c_frames.push_back((<Frame>f).thisptr) + self.thisptr.frames(c_frames) def find_peaks_in_frame(self, Frame frame not None): peaks = [] diff --git a/src/simpl/peak_detection.cpp b/src/simpl/peak_detection.cpp index 1d44b6f..7a31846 100644 --- a/src/simpl/peak_detection.cpp +++ b/src/simpl/peak_detection.cpp @@ -113,6 +113,10 @@ Frames PeakDetection::frames() { return _frames; } +void PeakDetection::frames(Frames new_frames) { + _frames = new_frames; +} + // Find and return all spectral peaks in a given frame of audio Peaks PeakDetection::find_peaks_in_frame(Frame* frame) { Peaks peaks; @@ -153,4 +157,120 @@ Frames PeakDetection::find_peaks(int audio_size, sample* audio) { // SMSPeakDetection // --------------------------------------------------------------------------- SMSPeakDetection::SMSPeakDetection() { + sms_init(); + + sms_initAnalParams(&_analysis_params); + _analysis_params.iSamplingRate = _sampling_rate; + _analysis_params.iFrameRate = _sampling_rate / _hop_size; + _analysis_params.iWindowType = SMS_WIN_HAMMING; + _analysis_params.fHighestFreq = 20000; + _analysis_params.iMaxDelayFrames = 4; + _analysis_params.analDelay = 0; + _analysis_params.minGoodFrames = 1; + _analysis_params.iCleanTracks = 0; + _analysis_params.iFormat = SMS_FORMAT_HP; + _analysis_params.nTracks = _max_peaks; + _analysis_params.maxPeaks = _max_peaks; + _analysis_params.nGuides = _max_peaks; + _analysis_params.preEmphasis = 0; + sms_initAnalysis(&_analysis_params); + + sms_initSpectralPeaks(&_peaks, _max_peaks); + + // By default, SMS will change the size of the frames being read + // depending on the detected fundamental frequency (if any) of the + // input sound. To prevent this behaviour (useful when comparing + // different analysis algorithms), set the + // _static_frame_size variable to True + _static_frame_size = false; +} + +SMSPeakDetection::~SMSPeakDetection() { + sms_freeAnalysis(&_analysis_params); + sms_freeSpectralPeaks(&_peaks); + sms_free(); +} + +int SMSPeakDetection::next_frame_size() { + return _analysis_params.sizeNextRead; +} + +void SMSPeakDetection::hop_size(int new_hop_size) { + _hop_size = new_hop_size; + sms_freeAnalysis(&_analysis_params); + _analysis_params.iFrameRate = _sampling_rate / _hop_size; + sms_initAnalysis(&_analysis_params); +} + +void SMSPeakDetection::max_peaks(int new_max_peaks) { + _max_peaks = new_max_peaks; + if(_max_peaks > SMS_MAX_NPEAKS) { + _max_peaks = SMS_MAX_NPEAKS; + } + + sms_freeAnalysis(&_analysis_params); + sms_freeSpectralPeaks(&_peaks); + + _analysis_params.nTracks = _max_peaks; + _analysis_params.maxPeaks = _max_peaks; + _analysis_params.nGuides = _max_peaks; + + sms_initAnalysis(&_analysis_params); + sms_initSpectralPeaks(&_peaks, _max_peaks); +} + +// Find and return all spectral peaks in a given frame of audio +Peaks SMSPeakDetection::find_peaks_in_frame(Frame* frame) { + Peaks peaks; + + int num_peaks = sms_findPeaks(frame->size(), frame->audio(), + &_analysis_params, &_peaks); + + for(int i = 0; i < num_peaks; i++) { + Peak* p = new Peak(); + p->amplitude = _peaks.pSpectralPeaks[i].fMag; + p->frequency = _peaks.pSpectralPeaks[i].fFreq; + p->phase = _peaks.pSpectralPeaks[i].fPhase; + peaks.push_back(p); + } + return peaks; +} + +// Find and return all spectral peaks in a given audio signal. +// If the signal contains more than 1 frame worth of audio, +// it will be broken up into separate frames, with a list of +// peaks returned for each frame. +// +// TODO: This hops by frame size rather than hop size in order to +// make sure the results are the same as with libsms. Make sure +// we have the same number of frames as the other algorithms. +Frames SMSPeakDetection::find_peaks(int audio_size, sample* audio) { + clear(); + + _analysis_params.iSizeSound = audio_size; + unsigned int pos = 0; + + // account for SMS analysis delay + // need an extra (max_frame_delay - 1) frames + int delay = (_analysis_params.iMaxDelayFrames - 1) & _hop_size; + + while(pos < ((audio_size - _hop_size) + delay)) { + // get the next frame size + if(!_static_frame_size) { + _frame_size = next_frame_size(); + } + + // get the next frame + Frame* f = new Frame(_frame_size); + f->audio(&audio[pos]); + + // find peaks + Peaks peaks = find_peaks_in_frame(f); + f->add_peaks(&peaks); + + _frames.push_back(f); + pos += _frame_size; + } + + return _frames; } diff --git a/src/simpl/peak_detection.h b/src/simpl/peak_detection.h index 0045762..577e0b5 100644 --- a/src/simpl/peak_detection.h +++ b/src/simpl/peak_detection.h @@ -3,8 +3,13 @@ #include "base.h" +extern "C" { + #include "sms.h" +} + using namespace std; + namespace simpl { @@ -16,7 +21,7 @@ namespace simpl // --------------------------------------------------------------------------- class PeakDetection { - private: + protected: int _sampling_rate; int _frame_size; bool _static_frame_size; @@ -32,26 +37,27 @@ class PeakDetection { virtual ~PeakDetection(); void clear(); - int sampling_rate(); - void sampling_rate(int new_sampling_rate); - int frame_size(); - void frame_size(int new_frame_size); - bool static_frame_size(); - void static_frame_size(bool new_static_frame_size); + virtual int sampling_rate(); + virtual void sampling_rate(int new_sampling_rate); + virtual int frame_size(); + virtual void frame_size(int new_frame_size); + virtual bool static_frame_size(); + virtual void static_frame_size(bool new_static_frame_size); virtual int next_frame_size(); - int hop_size(); - void hop_size(int new_hop_size); - int max_peaks(); - void max_peaks(int new_max_peaks); - std::string window_type(); - void window_type(std::string new_window_type); - int window_size(); - void window_size(int new_window_size); - sample min_peak_separation(); - void min_peak_separation(sample new_min_peak_separation); + virtual int hop_size(); + virtual void hop_size(int new_hop_size); + virtual int max_peaks(); + virtual void max_peaks(int new_max_peaks); + virtual std::string window_type(); + virtual void window_type(std::string new_window_type); + virtual int window_size(); + virtual void window_size(int new_window_size); + virtual sample min_peak_separation(); + virtual void min_peak_separation(sample new_min_peak_separation); int num_frames(); Frame* frame(int frame_number); Frames frames(); + void frames(Frames new_frames); // Find and return all spectral peaks in a given frame of audio virtual Peaks find_peaks_in_frame(Frame* frame); @@ -67,8 +73,18 @@ class PeakDetection { // SMSPeakDetection // --------------------------------------------------------------------------- class SMSPeakDetection : public PeakDetection { + private: + SMSAnalysisParams _analysis_params; + SMSSpectralPeaks _peaks; + public: SMSPeakDetection(); + ~SMSPeakDetection(); + int next_frame_size(); + void hop_size(int new_hop_size); + void max_peaks(int new_max_peaks); + Peaks find_peaks_in_frame(Frame* frame); + Frames find_peaks(int audio_size, sample* audio); }; diff --git a/src/sms/sms.h b/src/sms/sms.h index 0613ab3..4e6e5c1 100644 --- a/src/sms/sms.h +++ b/src/sms/sms.h @@ -132,7 +132,7 @@ typedef struct } SMS_Peak; /* a collection of spectral peaks */ -typedef struct +typedef struct SMSSpectralPeaks { SMS_Peak *pSpectralPeaks; int nPeaks; @@ -228,7 +228,7 @@ typedef struct * and peak detection/continuation process can be re-computed with more accuracy. * */ -typedef struct +typedef struct SMSAnalysisParams { int iDebugMode; /*!< debug codes enumerated by SMS_DBG \see SMS_DBG */ int iFormat; /*!< analysis format code defined by SMS_Format \see SMS_Format */ diff --git a/tests/test_peak_detection.py b/tests/test_peak_detection.py index b24b701..6d623b6 100644 --- a/tests/test_peak_detection.py +++ b/tests/test_peak_detection.py @@ -1,12 +1,20 @@ import os import numpy as np from nose.tools import assert_almost_equals +import pysms import simpl import simpl.peak_detection as peak_detection +PeakDetection = peak_detection.PeakDetection +SMSPeakDetection = peak_detection.SMSPeakDetection + float_precision = 5 frame_size = 512 hop_size = 512 +max_peaks = 10 +max_partials = 10 +num_frames = 30 +num_samples = num_frames * hop_size audio_path = os.path.join( os.path.dirname(__file__), 'audio/flute.wav' ) @@ -18,8 +26,104 @@ class TestPeakDetection(object): cls.audio = simpl.read_wav(audio_path)[0] def test_peak_detection(self): - pd = peak_detection.PeakDetection() + pd = PeakDetection() pd.find_peaks(self.audio) assert len(pd.frames) == len(self.audio) / hop_size assert len(pd.frames[0].peaks) == 0 + + +class TestSMSPeakDetection(object): + def _pysms_analysis_params(self, sampling_rate): + analysis_params = pysms.SMS_AnalParams() + pysms.sms_initAnalParams(analysis_params) + analysis_params.iSamplingRate = sampling_rate + analysis_params.iFrameRate = sampling_rate / hop_size + analysis_params.iWindowType = pysms.SMS_WIN_HAMMING + analysis_params.fDefaultFundamental = 100 + analysis_params.fHighestFreq = 20000 + analysis_params.iFormat = pysms.SMS_FORMAT_HP + analysis_params.nTracks = max_peaks + analysis_params.peakParams.iMaxPeaks = max_peaks + analysis_params.nGuides = max_peaks + analysis_params.iMaxDelayFrames = 4 + analysis_params.analDelay = 0 + analysis_params.minGoodFrames = 1 + analysis_params.iCleanTracks = 0 + analysis_params.iStochasticType = pysms.SMS_STOC_NONE + analysis_params.preEmphasis = 0 + return analysis_params + + def test_size_next_read(self): + """ + test_size_next_read + Make sure PeakDetection is calculating the correct value for the + size of the next frame. + """ + audio, sampling_rate = simpl.read_wav(audio_path) + pysms.sms_init() + snd_header = pysms.SMS_SndHeader() + + # Try to open the input file to fill snd_header + if(pysms.sms_openSF(audio_path, snd_header)): + raise NameError( + "error opening sound file: " + pysms.sms_errorString() + ) + + analysis_params = self._pysms_analysis_params(sampling_rate) + analysis_params.iMaxDelayFrames = num_frames + 1 + if pysms.sms_initAnalysis(analysis_params, snd_header) != 0: + raise Exception("Error allocating memory for analysis_params") + analysis_params.nFrames = num_frames + sms_header = pysms.SMS_Header() + pysms.sms_fillHeader(sms_header, analysis_params, "pysms") + + sample_offset = 0 + pysms_size_new_data = 0 + current_frame = 0 + sms_next_read_sizes = [] + + while current_frame < num_frames: + sms_next_read_sizes.append(analysis_params.sizeNextRead) + sample_offset += pysms_size_new_data + pysms_size_new_data = analysis_params.sizeNextRead + + # convert frame to floats for libsms + frame = audio[sample_offset:sample_offset + pysms_size_new_data] + frame = np.array(frame, dtype=np.float32) + if len(frame) < pysms_size_new_data: + frame = np.hstack(( + frame, np.zeros(pysms_size_new_data - len(frame), + dtype=np.float32) + )) + + analysis_data = pysms.SMS_Data() + pysms.sms_allocFrameH(sms_header, analysis_data) + status = pysms.sms_analyze(frame, analysis_data, analysis_params) + # as the no. of frames of delay is > num_frames, sms_analyze should + # never get around to performing partial tracking, and so the + # return value should be 0 + assert status == 0 + pysms.sms_freeFrame(analysis_data) + current_frame += 1 + + pysms.sms_freeAnalysis(analysis_params) + pysms.sms_closeSF() + pysms.sms_free() + + pd = SMSPeakDetection() + pd.hop_size = hop_size + pd.max_peaks = max_peaks + current_frame = 0 + sample_offset = 0 + + while current_frame < num_frames: + pd.frame_size = pd.next_frame_size() + assert sms_next_read_sizes[current_frame] == pd.frame_size,\ + (sms_next_read_sizes[current_frame], pd.frame_size) + frame = simpl.Frame() + frame.size = pd.frame_size + frame.audio = audio[sample_offset:sample_offset + pd.frame_size] + pd.find_peaks_in_frame(frame) + sample_offset += pd.frame_size + current_frame += 1 |