diff options
| -rw-r--r-- | setup.py | 69 | ||||
| -rw-r--r-- | simpl/peak_detection.pxd | 5 | ||||
| -rw-r--r-- | simpl/peak_detection.pyx | 7 | ||||
| -rw-r--r-- | src/simpl/peak_detection.cpp | 120 | ||||
| -rw-r--r-- | src/simpl/peak_detection.h | 50 | ||||
| -rw-r--r-- | src/sms/sms.h | 4 | ||||
| -rw-r--r-- | tests/test_peak_detection.py | 106 | 
7 files changed, 306 insertions, 55 deletions
@@ -8,11 +8,10 @@ many of which have yet to be released in software. Simpl is primarily intended  as a tool for other researchers in the field, allowing them to easily combine,  compare and contrast many of the published analysis/synthesis algorithms.  """ +import os  from distutils.core import setup  from distutils.extension import Extension  from Cython.Distutils import build_ext -import os -from glob import glob  # -----------------------------------------------------------------------------  # Global @@ -35,16 +34,11 @@ except ImportError:  macros = []  link_args = []  swig_opts = ['-c++'] -include_dirs = [numpy_include, '/usr/local/include'] - -simpl_sources = glob('src/simpl/*.cpp') -simpl_include_dirs = ['src/simpl'] -simpl_include_dirs.extend(include_dirs) +include_dirs = ['simpl', 'src/simpl', 'src/sms', 'src/sndobj', +                'src/sndobj/rfftw', numpy_include, '/usr/local/include'] +libs = ['m', 'fftw3', 'gsl', 'gslcblas'] +sources = [] -base = Extension("simpl.base", -                 sources=["simpl/base.pyx", "src/simpl/base.cpp"], -                 include_dirs=["simpl"] + simpl_include_dirs, -                 language="c++")  # -----------------------------------------------------------------------------  # SndObj Library @@ -80,6 +74,8 @@ fftw_sources = """  sndobj_sources = map(lambda x: 'src/sndobj/' + x, sndobj_sources)  sndobj_sources.extend(map(lambda x: 'src/sndobj/rfftw/' + x, fftw_sources)) +# sources.extend(sndobj_sources) +  sndobj_sources.append("simpl/sndobj.i")  sndobj_macros = [('PYTHON_WRAP', None)] @@ -99,18 +95,18 @@ sndobj = Extension("simpl/_simplsndobj",  # SMS  # ----------------------------------------------------------------------------- -# sms_sources = """ -#     OOURA.c cepstrum.c peakContinuation.c soundIO.c tables.c -#     fileIO.c peakDetection.c spectralApprox.c transforms.c -#     filters.c residual.c spectrum.c windows.c SFMT.c fixTracks.c -#     sineSynth.c stocAnalysis.c harmDetection.c sms.c synthesis.c -#     analysis.c modify.c -#     """.split() +sms_sources = """ +    OOURA.c cepstrum.c peakContinuation.c soundIO.c tables.c +    fileIO.c peakDetection.c spectralApprox.c transforms.c +    filters.c residual.c spectrum.c windows.c SFMT.c fixTracks.c +    sineSynth.c stocAnalysis.c harmDetection.c sms.c synthesis.c +    analysis.c modify.c +    """.split() + +sms_sources = map(lambda x: 'src/sms/' + x, sms_sources) +sources.extend(sms_sources) -# sms_sources = map(lambda x: 'src/sms/' + x, sms_sources) -# sms_sources.append("simpl/sms.i") -# sms_include_dirs = ['src/sms'] -# sms_include_dirs.extend(include_dirs) +sms_sources.append("simpl/sms.i")  # sms = Extension("simpl/_simplsms",  #                 sources=sms_sources, @@ -118,20 +114,27 @@ sndobj = Extension("simpl/_simplsndobj",  #                 libraries=['m', 'fftw3', 'gsl', 'gslcblas'],  #                 extra_compile_args=['-DMERSENNE_TWISTER']) -# sms = Extension("simpl.sms", -#                 sources=["simpl/sms.pyx", "simpl/base.pyx", -#                          "src/simpl/simplsms.cpp", "src/simpl/base.cpp"], -#                 include_dirs=["simpl"] + simpl_include_dirs, -#                 language="c++") +# ----------------------------------------------------------------------------- +# Base +# ----------------------------------------------------------------------------- +base = Extension( +    "simpl.base", +    sources=["simpl/base.pyx", "src/simpl/base.cpp"], +    include_dirs=include_dirs, +    language="c++" +)  # -----------------------------------------------------------------------------  # Peak Detection  # -----------------------------------------------------------------------------  peak_detection = Extension(      "simpl.peak_detection", -    sources=["simpl/peak_detection.pyx", "src/simpl/peak_detection.cpp", -             "src/simpl/base.cpp"], -    include_dirs=["simpl"] + simpl_include_dirs, +    sources=sources + ["simpl/peak_detection.pyx", +                       "src/simpl/peak_detection.cpp", +                       "src/simpl/base.cpp"], +    include_dirs=include_dirs, +    libraries=libs, +    extra_compile_args=['-DMERSENNE_TWISTER'],      language="c++"  ) @@ -142,7 +145,7 @@ partial_tracking = Extension(      "simpl.partial_tracking",      sources=["simpl/partial_tracking.pyx", "src/simpl/partial_tracking.cpp",               "src/simpl/base.cpp"], -    include_dirs=["simpl"] + simpl_include_dirs, +    include_dirs=include_dirs,      language="c++"  ) @@ -154,7 +157,7 @@ synthesis = Extension(      "simpl.synthesis",      sources=["simpl/synthesis.pyx", "src/simpl/synthesis.cpp",               "src/simpl/base.cpp"], -    include_dirs=["simpl"] + simpl_include_dirs, +    include_dirs=include_dirs,      language="c++"  ) @@ -166,7 +169,7 @@ residual = Extension(      "simpl.residual",      sources=["simpl/residual.pyx", "src/simpl/residual.cpp",               "src/simpl/base.cpp"], -    include_dirs=["simpl"] + simpl_include_dirs, +    include_dirs=include_dirs,      language="c++"  ) diff --git a/simpl/peak_detection.pxd b/simpl/peak_detection.pxd index 7ff9d09..867a89e 100644 --- a/simpl/peak_detection.pxd +++ b/simpl/peak_detection.pxd @@ -32,8 +32,13 @@ cdef extern from "../src/simpl/peak_detection.h" namespace "simpl":          void min_peak_separation(double new_min_peak_separation)          int num_frames()          c_Frame* frame(int frame_number) +        void frames(vector[c_Frame*] new_frames)          vector[c_Peak*] find_peaks_in_frame(c_Frame* frame)          vector[c_Frame*] find_peaks(int audio_size, double* audio)      cdef cppclass c_SMSPeakDetection "simpl::SMSPeakDetection"(c_PeakDetection):          c_SMSPeakDetection() +        void hop_size(int new_hop_size) +        void max_peaks(int new_max_peaks) +        vector[c_Peak*] find_peaks_in_frame(c_Frame* frame) +        vector[c_Frame*] find_peaks(int audio_size, double* audio) diff --git a/simpl/peak_detection.pyx b/simpl/peak_detection.pyx index a2fc8f5..12f47b2 100644 --- a/simpl/peak_detection.pyx +++ b/simpl/peak_detection.pyx @@ -63,8 +63,11 @@ cdef class PeakDetection:      property frames:          def __get__(self):              return [self.frame(i) for i in range(self.thisptr.num_frames())] -        def __set__(self, f): -            raise Exception("NotImplemented") +        def __set__(self, new_frames): +            cdef vector[c_Frame*] c_frames +            for f in new_frames: +                c_frames.push_back((<Frame>f).thisptr) +            self.thisptr.frames(c_frames)      def find_peaks_in_frame(self, Frame frame not None):          peaks = [] diff --git a/src/simpl/peak_detection.cpp b/src/simpl/peak_detection.cpp index 1d44b6f..7a31846 100644 --- a/src/simpl/peak_detection.cpp +++ b/src/simpl/peak_detection.cpp @@ -113,6 +113,10 @@ Frames PeakDetection::frames() {      return _frames;  } +void PeakDetection::frames(Frames new_frames) { +    _frames = new_frames; +} +  // Find and return all spectral peaks in a given frame of audio  Peaks PeakDetection::find_peaks_in_frame(Frame* frame) {      Peaks peaks; @@ -153,4 +157,120 @@ Frames PeakDetection::find_peaks(int audio_size, sample* audio) {  // SMSPeakDetection  // ---------------------------------------------------------------------------  SMSPeakDetection::SMSPeakDetection() { +    sms_init(); + +    sms_initAnalParams(&_analysis_params); +    _analysis_params.iSamplingRate = _sampling_rate; +    _analysis_params.iFrameRate = _sampling_rate / _hop_size; +    _analysis_params.iWindowType = SMS_WIN_HAMMING; +    _analysis_params.fHighestFreq = 20000; +    _analysis_params.iMaxDelayFrames = 4; +    _analysis_params.analDelay = 0; +    _analysis_params.minGoodFrames = 1; +    _analysis_params.iCleanTracks = 0; +    _analysis_params.iFormat = SMS_FORMAT_HP; +    _analysis_params.nTracks = _max_peaks; +    _analysis_params.maxPeaks = _max_peaks; +    _analysis_params.nGuides = _max_peaks; +    _analysis_params.preEmphasis = 0; +    sms_initAnalysis(&_analysis_params); + +    sms_initSpectralPeaks(&_peaks, _max_peaks); + +    // By default, SMS will change the size of the frames being read +    // depending on the detected fundamental frequency (if any) of the +    // input sound. To prevent this behaviour (useful when comparing +    // different analysis algorithms), set the +    // _static_frame_size variable to True +    _static_frame_size = false; +} + +SMSPeakDetection::~SMSPeakDetection() { +    sms_freeAnalysis(&_analysis_params); +    sms_freeSpectralPeaks(&_peaks); +    sms_free(); +} + +int SMSPeakDetection::next_frame_size() { +    return _analysis_params.sizeNextRead; +} + +void SMSPeakDetection::hop_size(int new_hop_size) { +    _hop_size = new_hop_size; +    sms_freeAnalysis(&_analysis_params); +    _analysis_params.iFrameRate = _sampling_rate / _hop_size; +    sms_initAnalysis(&_analysis_params); +} + +void SMSPeakDetection::max_peaks(int new_max_peaks) { +    _max_peaks = new_max_peaks; +    if(_max_peaks > SMS_MAX_NPEAKS) { +        _max_peaks = SMS_MAX_NPEAKS; +    } + +    sms_freeAnalysis(&_analysis_params); +    sms_freeSpectralPeaks(&_peaks); + +    _analysis_params.nTracks = _max_peaks; +    _analysis_params.maxPeaks = _max_peaks; +    _analysis_params.nGuides = _max_peaks; + +    sms_initAnalysis(&_analysis_params); +    sms_initSpectralPeaks(&_peaks, _max_peaks); +} + +// Find and return all spectral peaks in a given frame of audio +Peaks SMSPeakDetection::find_peaks_in_frame(Frame* frame) { +    Peaks peaks; + +    int num_peaks = sms_findPeaks(frame->size(), frame->audio(),  +                                  &_analysis_params, &_peaks); + +    for(int i = 0; i < num_peaks; i++) { +        Peak* p = new Peak(); +        p->amplitude = _peaks.pSpectralPeaks[i].fMag; +        p->frequency = _peaks.pSpectralPeaks[i].fFreq; +        p->phase = _peaks.pSpectralPeaks[i].fPhase; +        peaks.push_back(p); +    } +    return peaks; +} + +// Find and return all spectral peaks in a given audio signal. +// If the signal contains more than 1 frame worth of audio, +// it will be broken up into separate frames, with a list of +// peaks returned for each frame. +// +// TODO: This hops by frame size rather than hop size in order to +//       make sure the results are the same as with libsms. Make sure +//       we have the same number of frames as the other algorithms. +Frames SMSPeakDetection::find_peaks(int audio_size, sample* audio) { +    clear(); + +    _analysis_params.iSizeSound = audio_size; +    unsigned int pos = 0; + +    // account for SMS analysis delay +    // need an extra (max_frame_delay - 1) frames +    int delay = (_analysis_params.iMaxDelayFrames - 1) & _hop_size; + +    while(pos < ((audio_size - _hop_size) + delay)) { +        // get the next frame size +        if(!_static_frame_size) { +            _frame_size = next_frame_size(); +        } + +        // get the next frame +        Frame* f = new Frame(_frame_size); +        f->audio(&audio[pos]); + +        // find peaks +        Peaks peaks = find_peaks_in_frame(f); +        f->add_peaks(&peaks); + +        _frames.push_back(f); +        pos += _frame_size; +    } + +    return _frames;  } diff --git a/src/simpl/peak_detection.h b/src/simpl/peak_detection.h index 0045762..577e0b5 100644 --- a/src/simpl/peak_detection.h +++ b/src/simpl/peak_detection.h @@ -3,8 +3,13 @@  #include "base.h" +extern "C" { +    #include "sms.h" +} +  using namespace std; +  namespace simpl  { @@ -16,7 +21,7 @@ namespace simpl  // ---------------------------------------------------------------------------  class PeakDetection { -    private: +    protected:          int _sampling_rate;          int _frame_size;          bool _static_frame_size; @@ -32,26 +37,27 @@ class PeakDetection {          virtual ~PeakDetection();          void clear(); -        int sampling_rate(); -        void sampling_rate(int new_sampling_rate); -        int frame_size(); -        void frame_size(int new_frame_size); -        bool static_frame_size(); -        void static_frame_size(bool new_static_frame_size); +        virtual int sampling_rate(); +        virtual void sampling_rate(int new_sampling_rate); +        virtual int frame_size(); +        virtual void frame_size(int new_frame_size); +        virtual bool static_frame_size(); +        virtual void static_frame_size(bool new_static_frame_size);          virtual int next_frame_size(); -        int hop_size(); -        void hop_size(int new_hop_size); -        int max_peaks(); -        void max_peaks(int new_max_peaks); -        std::string window_type(); -        void window_type(std::string new_window_type); -        int window_size(); -        void window_size(int new_window_size); -        sample min_peak_separation(); -        void min_peak_separation(sample new_min_peak_separation); +        virtual int hop_size(); +        virtual void hop_size(int new_hop_size); +        virtual int max_peaks(); +        virtual void max_peaks(int new_max_peaks); +        virtual std::string window_type(); +        virtual void window_type(std::string new_window_type); +        virtual int window_size(); +        virtual void window_size(int new_window_size); +        virtual sample min_peak_separation(); +        virtual void min_peak_separation(sample new_min_peak_separation);          int num_frames();          Frame* frame(int frame_number);          Frames frames(); +        void frames(Frames new_frames);          // Find and return all spectral peaks in a given frame of audio          virtual Peaks find_peaks_in_frame(Frame* frame); @@ -67,8 +73,18 @@ class PeakDetection {  // SMSPeakDetection  // ---------------------------------------------------------------------------  class SMSPeakDetection : public PeakDetection { +    private: +        SMSAnalysisParams _analysis_params; +        SMSSpectralPeaks _peaks; +      public:          SMSPeakDetection(); +        ~SMSPeakDetection(); +        int next_frame_size(); +        void hop_size(int new_hop_size); +        void max_peaks(int new_max_peaks); +        Peaks find_peaks_in_frame(Frame* frame); +        Frames find_peaks(int audio_size, sample* audio);  }; diff --git a/src/sms/sms.h b/src/sms/sms.h index 0613ab3..4e6e5c1 100644 --- a/src/sms/sms.h +++ b/src/sms/sms.h @@ -132,7 +132,7 @@ typedef struct  } SMS_Peak;  /* a collection of spectral peaks */ -typedef struct +typedef struct SMSSpectralPeaks  {      SMS_Peak *pSpectralPeaks;      int nPeaks; @@ -228,7 +228,7 @@ typedef struct   * and peak detection/continuation process can be re-computed with more accuracy.   *    */ -typedef struct  +typedef struct SMSAnalysisParams  {      int iDebugMode;                  /*!< debug codes enumerated by SMS_DBG \see SMS_DBG */      int iFormat;                     /*!< analysis format code defined by SMS_Format \see SMS_Format */ diff --git a/tests/test_peak_detection.py b/tests/test_peak_detection.py index b24b701..6d623b6 100644 --- a/tests/test_peak_detection.py +++ b/tests/test_peak_detection.py @@ -1,12 +1,20 @@  import os  import numpy as np  from nose.tools import assert_almost_equals +import pysms  import simpl  import simpl.peak_detection as peak_detection +PeakDetection = peak_detection.PeakDetection +SMSPeakDetection = peak_detection.SMSPeakDetection +  float_precision = 5  frame_size = 512  hop_size = 512 +max_peaks = 10 +max_partials = 10 +num_frames = 30 +num_samples = num_frames * hop_size  audio_path = os.path.join(      os.path.dirname(__file__), 'audio/flute.wav'  ) @@ -18,8 +26,104 @@ class TestPeakDetection(object):          cls.audio = simpl.read_wav(audio_path)[0]      def test_peak_detection(self): -        pd = peak_detection.PeakDetection() +        pd = PeakDetection()          pd.find_peaks(self.audio)          assert len(pd.frames) == len(self.audio) / hop_size          assert len(pd.frames[0].peaks) == 0 + + +class TestSMSPeakDetection(object): +    def _pysms_analysis_params(self, sampling_rate): +        analysis_params = pysms.SMS_AnalParams() +        pysms.sms_initAnalParams(analysis_params) +        analysis_params.iSamplingRate = sampling_rate +        analysis_params.iFrameRate = sampling_rate / hop_size +        analysis_params.iWindowType = pysms.SMS_WIN_HAMMING +        analysis_params.fDefaultFundamental = 100 +        analysis_params.fHighestFreq = 20000 +        analysis_params.iFormat = pysms.SMS_FORMAT_HP +        analysis_params.nTracks = max_peaks +        analysis_params.peakParams.iMaxPeaks = max_peaks +        analysis_params.nGuides = max_peaks +        analysis_params.iMaxDelayFrames = 4 +        analysis_params.analDelay = 0 +        analysis_params.minGoodFrames = 1 +        analysis_params.iCleanTracks = 0 +        analysis_params.iStochasticType = pysms.SMS_STOC_NONE +        analysis_params.preEmphasis = 0 +        return analysis_params + +    def test_size_next_read(self): +        """ +        test_size_next_read +        Make sure PeakDetection is calculating the correct value for the +        size of the next frame. +        """ +        audio, sampling_rate = simpl.read_wav(audio_path) +        pysms.sms_init() +        snd_header = pysms.SMS_SndHeader() + +        # Try to open the input file to fill snd_header +        if(pysms.sms_openSF(audio_path, snd_header)): +            raise NameError( +                "error opening sound file: " + pysms.sms_errorString() +            ) + +        analysis_params = self._pysms_analysis_params(sampling_rate) +        analysis_params.iMaxDelayFrames = num_frames + 1 +        if pysms.sms_initAnalysis(analysis_params, snd_header) != 0: +            raise Exception("Error allocating memory for analysis_params") +        analysis_params.nFrames = num_frames +        sms_header = pysms.SMS_Header() +        pysms.sms_fillHeader(sms_header, analysis_params, "pysms") + +        sample_offset = 0 +        pysms_size_new_data = 0 +        current_frame = 0 +        sms_next_read_sizes = [] + +        while current_frame < num_frames: +            sms_next_read_sizes.append(analysis_params.sizeNextRead) +            sample_offset += pysms_size_new_data +            pysms_size_new_data = analysis_params.sizeNextRead + +            # convert frame to floats for libsms +            frame = audio[sample_offset:sample_offset + pysms_size_new_data] +            frame = np.array(frame, dtype=np.float32) +            if len(frame) < pysms_size_new_data: +                frame = np.hstack(( +                    frame, np.zeros(pysms_size_new_data - len(frame), +                                    dtype=np.float32) +                )) + +            analysis_data = pysms.SMS_Data() +            pysms.sms_allocFrameH(sms_header, analysis_data) +            status = pysms.sms_analyze(frame, analysis_data, analysis_params) +            # as the no. of frames of delay is > num_frames, sms_analyze should +            # never get around to performing partial tracking, and so the +            # return value should be 0 +            assert status == 0 +            pysms.sms_freeFrame(analysis_data) +            current_frame += 1 + +        pysms.sms_freeAnalysis(analysis_params) +        pysms.sms_closeSF() +        pysms.sms_free() + +        pd = SMSPeakDetection() +        pd.hop_size = hop_size +        pd.max_peaks = max_peaks +        current_frame = 0 +        sample_offset = 0 + +        while current_frame < num_frames: +            pd.frame_size = pd.next_frame_size() +            assert sms_next_read_sizes[current_frame] == pd.frame_size,\ +                (sms_next_read_sizes[current_frame], pd.frame_size) +            frame = simpl.Frame() +            frame.size = pd.frame_size +            frame.audio = audio[sample_offset:sample_offset + pd.frame_size] +            pd.find_peaks_in_frame(frame) +            sample_offset += pd.frame_size +            current_frame += 1  |