diff options
author | Richard <q@1bpm.net> | 2021-03-16 01:20:31 +0000 |
---|---|---|
committer | Richard <q@1bpm.net> | 2021-03-16 01:20:31 +0000 |
commit | 4e3f857cb84c2c2ba834d5ddd0c5e1d9375cd947 (patch) | |
tree | 4ffc6ef2c8db27689c1c230d1f1b93f365e03747 | |
download | csound-xtract-4e3f857cb84c2c2ba834d5ddd0c5e1d9375cd947.tar.gz csound-xtract-4e3f857cb84c2c2ba834d5ddd0c5e1d9375cd947.tar.bz2 csound-xtract-4e3f857cb84c2c2ba834d5ddd0c5e1d9375cd947.zip |
initial
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | CMakeLists.txt | 147 | ||||
-rw-r--r-- | README.md | 112 | ||||
-rw-r--r-- | TODO.md | 8 | ||||
-rw-r--r-- | cmake/Modules/FindCsound.cmake | 29 | ||||
-rw-r--r-- | cmake/Modules/FindXtract.cmake | 19 | ||||
-rw-r--r-- | examples/csound-extract-demonstration.csd | 137 | ||||
-rw-r--r-- | examples/sounds/fox.wav | bin | 0 -> 243182 bytes | |||
-rw-r--r-- | examples/sounds/magicshop.wav | bin | 0 -> 9995924 bytes | |||
-rw-r--r-- | include/dtw.h | 79 | ||||
-rw-r--r-- | plugin.h.patch | 4 | ||||
-rw-r--r-- | src/dtw.cpp | 99 | ||||
-rw-r--r-- | src/opcodes.cpp | 735 |
13 files changed, 1370 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..567609b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +build/ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..df0c04f --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,147 @@ +project("csound_xtract") + +cmake_minimum_required(VERSION 3.8) + +set(APIVERSION "6.0") + +# Release or Debug +set(CMAKE_BUILD_TYPE "Release") + +# force make to print the command lines +set(CMAKE_VERBOSE_MAKEFILE on) + +# path to Csound cmake module +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/") + +# set compilation flags +set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} "-std=c++11 -fpermissive -fPIC -w -DUSE_DOUBLE -DB64BIT") + +# options +option(USE_LIB64 "Set to on to set installation dir for libs to lib64" OFF) +option(USE_DOUBLE "Use doubles for audio calculations" ON) +option(CPP11 "c++11" ON) + +set(BUILDING_CSOUND_PLUGINS ON) + +# ---------------------------------------------- + +include(FindCsound) +include(FindXtract) + +include(CheckCCompilerFlag) +include(CheckCXXCompilerFlag) + +# ----------------------------------------------- + +function(addflag flag flagname) + check_c_compiler_flag(${flag} ${flagname}) + if (${flagname}) + # message(STATUS "Setting C flag ${flag}") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${flag}" PARENT_SCOPE) + endif() + check_cxx_compiler_flag(${flag} CXX_${flagname}) + if (CXX_${flagname}) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flag}" PARENT_SCOPE) + endif() +endfunction(addflag) + + +# set optimization flags +if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANG) + add_definitions(-fvisibility=hidden) + if(NATIVE) + add_definitions(-march=native) + endif() + + include(CheckCCompilerFlag) + include(CheckCXXCompilerFlag) + + addflag(-msse HAS_SSE) + addflag(-msse2 HAS_SSE2) + addflag(-mfgpath=sse HAS_FPMATH_SSE) + +endif() + +if(MINGW) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mstackrealign") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mstackrealign") +endif() + +addflag(-ftree-vectorize HAS_TREE_VECTORIZE) +addflag(-ffast-math HAS_FAST_MATH) +addflag(-fomit-frame-pointer HAS_OMIT_FRAME_POINTER) + + +# ------------------------------------------------------------------- + +set(CS_FRAMEWORK_DEST "~/Library/Frameworks") + + +if(USE_LIB64) + set(LIBRARY_INSTALL_DIR "lib64") + add_definitions("-DLIB64") +else() + set(LIBRARY_INSTALL_DIR "lib") +endif() + +message(STATUS "LIBRARY INSTALL DIR: ${LIBRARY_INSTALL_DIR}") + +# ------------------------------------------------------------------- + + +if(USE_DOUBLE) + message(STATUS ">>> using doubles") + + if(APPLE) + set(CSOUNDLIB "CsoundLib64") + set(PLUGIN_INSTALL_DIR "${CS_FRAMEWORK_DEST}/${CSOUNDLIB}.framework/Versions/${APIVERSION}/Resources/Opcodes64") + else() + set(CSOUNDLIB "csound64") + set(PLUGIN_INSTALL_DIR "${LIBRARY_INSTALL_DIR}/csound/plugins64-${APIVERSION}") + endif() +else() + message(STATUS ">>> not using doubles") + if(APPLE) + set(CSOUNDLIB "CsoundLib") + set(PLUGIN_INSTALL_DIR "${CS_FRAMEWORK_DEST}/${CSOUNDLIB}.framework/Versions/${APIVERSION}/Resources/Opcodes") + else() + set(CSOUNDLIB "csound") + set(PLUGIN_INSTALL_DIR "${LIBRARY_INSTALL_DIR}/csound/plugins-${APIVERSION}") + endif() +endif() + + +# ------------------------------------------------------------------- + +# Csound opcode build +find_package(Csound) + + +set(BUILD_PLUGINS_DIR ${CMAKE_CURRENT_BINARY_DIR}) + +if(NOT CSOUND_FOUND) + message(FATAL_ERROR "Csound installation not found") +endif() + + +if(NOT XTRACT_FOUND) + message(FATAL_ERROR "libxtract could not be found") +endif() + +set(CPPFILES src/opcodes.cpp src/dtw.cpp) + + +include_directories(${CSOUND_INCLUDE_DIRS}) +link_libraries(${XTRACT_LIBRARIES}) +include_directories(${XTRACT_INCLUDE_DIRS}) +include_directories(include) + + +add_library(csxtract SHARED ${CPPFILES}) + +set_target_properties(csxtract PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${BUILD_PLUGINS_DIR} + LIBRARY_OUTPUT_DIRECTORY ${BUILD_PLUGINS_DIR}) + +install(TARGETS csxtract LIBRARY DESTINATION "${PLUGIN_INSTALL_DIR}" ) + diff --git a/README.md b/README.md new file mode 100644 index 0000000..3219ac6 --- /dev/null +++ b/README.md @@ -0,0 +1,112 @@ +# csound-xtract : Csound feature extraction using libXtract + +## Overview +csxtract is a plugin opcode which uses libXtract to perform feature extraction from within Csound. + +Development is still ongoing and subject to various research matters, thus is provided in an experimental/alpha state and may contain bugs. Parts of the code are due overhauls and refactoring, but the intention is for the opcodes and general operation to remain the same as presented here. + +The current source of Csound has the member function get_csound() in plugin.h, however this does not function correctly. As of writing there is an outstanding issue on github so it is anticipated to be fixed soon. In the meantime a single line needs to be changed in plugin.h. The patch file plugin.h.patch is provided as an interim solution. + + +## Requirements +* Cmake >= 3.8 +* Csound development libraries/headers (with patch applied as above or get_csound() fixed) +* [LibXtract](https://github.com/jamiebullock/LibXtract) + +Tested on Linux and Windows 7 with MSYS as of March 2021. + + +## Installation +Create a build directory at the top of the source tree, excute *cmake ..*, *make* and optionally *make install* as root. If the latter is not used/possible then the resulting libcsxtract library can be used with the *--opcode-lib* flag in Csound. +eg: + + mkdir build && cd build + cmake .. + make && sudo make install + +Cmake should find Csound and libXtract using the modules in the cmake/Modules directory and installation should be as simple as above. + +## Examples +Some examples are provided in the examples directory. + +## Bugs/suggestions etc +Please contact Richard at q@1bpm.net + +## Opcode reference + +### iprofile xtprofile [ibuffersize=4096, iblocksize=512, imfccs=1, icentroid=1, izerocrossings=1, irms=1, iflatness=1, iirregularity=1, ipower=1, isharpness=1, ismoothness=1] +Declare a feature extraction profile for use in extraction opcodes. The exact techniques for extraction of individual features can be found by examining the libXtract documentation and source code. + +* iprofile : the profile handle + +* ibuffersize : buffer size used in extraction +* iblocksize : block size for extraction +* imfccs : use MFCCs +* icentroid : use spectral centroid +* izerocrossings : use zero crossings +* irms : use RMS +* iflatness : use spectral flatness +* iirregularity : use spectral irregularity +* ipower : use spectral power +* isharpness : use spectral sharpnesss + + +### icorpus xtcorpus iprofile, ifn +Analyse sound contained in a f-table and store in a handle, for later use in comparison/matching opcodes. Done during init time. + +* icorpus : the corpus handle + +* iprofile : profile handle as created by xtprofile +* ifn : f-table containing the sound to be analysed, typically GEN1 + + +### ixtract, kdone xtractor iprofile, ain +Analyse a live sound. + +* ixtract : the extraction handle + +* iprofile : profile handle as created by xtprofile +* ain : sound to analyse + + +### kdistance xtdistance ixtract1, ixtract2, ktrigger, [idistancefunc=0] +Compare two extraction streams using a basic distance function between each frame of the analyses. The profiles used for the streams must be the same. + +* kdistance : the calculated distance, 0 should represent no difference between the analyses. + +* ixtract1 : analysis stream as created by xtractor +* ixtract2 : analysis stream as created by xtractor +* ktrigger : comparison is conducted when 1 +* idistancefunc : 0 for Euclidean distance, 1 for Manhattan distance + + +### kanalysis[] xtdump ixtract +Obtain the analysis data from an xtractor handle. The array length will depend on the number of features specified in the profile used (MFCC uses 13 indexes, all others use 1). The indexes are presented in the same order as declared in xtprofile. For example, a profile using MFCC and centroid would mean indexes 0 to 12 would be MFCCs, and 13 would be centroid. Similarly a profile using only centroid and flatness would imply index 0 would be centroid, and 1 would be flatness. + +* kanalysis[] : the analysed features + +* ixtract : analysis stream as created by xtractor + + +### kdone, kanalysis[] xtaccdump ixtract, ktrigger +Obtain the analysis data from an xtractor handle as with xtdump, but accumulate the analyses and output the mean when ktrigger is 1. + +* kdone : outputs 1 when new data is provided, 0 at all other times +* kanalysis[] : the analysed features as with xtdump. + +* ixtract : analysis stream as created by xtractor +* ktrigger : triggers the output of data when 1 + + +### kposition xtcorpusmatch icorpus, ixtract, ktrigger, [idistancefunc=0] +Obtain the nearest match in a corpus created with xtcorpus by comparing a live input stream created by xtractor. + +* kposition : position of nearest corpus region, in sample points + +* icorpus : corpus handle as created by xtcorpus +* ixtract : analysis stream as created by xtractor +* ktrigger : perform the comparison when 1 +* idistancefunc : 0 for Euclidean distance, 1 for Manhattan distance + + + @@ -0,0 +1,8 @@ +Add more/all extract features as in libXtract. +Refactor for dynamic libXtract feature types. +Refactor out into separate source/header files for analysis, and separate source file for Csound opcodes. +Test/compare against standalone libXtract implementation. + +Use DTW for accumulated corpus match. +Check if DTW is required in current places/implementation. + diff --git a/cmake/Modules/FindCsound.cmake b/cmake/Modules/FindCsound.cmake new file mode 100644 index 0000000..e55b269 --- /dev/null +++ b/cmake/Modules/FindCsound.cmake @@ -0,0 +1,29 @@ +# Try to find the Csound library. +# Once done this will define: +# CSOUND_FOUND - System has the Csound library +# CSOUND_INCLUDE_DIRS - The Csound include directories. +# CSOUND_LIBRARIES - The libraries needed to use the Csound library. + +if(APPLE) +find_path(CSOUND_INCLUDE_DIR csound.h HINTS /Library/Frameworks/CsoundLib64.framework/Headers +"$ENV{HOME}/Library/Frameworks/CsoundLib64.framework/Headers") +else() +find_path(CSOUND_INCLUDE_DIR csound.h PATH_SUFFIXES csound) +endif() + +if(APPLE) +find_library(CSOUND_LIBRARY NAMES CsoundLib64 HINTS /Library/Frameworks/CsoundLib64.framework/ +"$ENV{HOME}/Library/Frameworks/CsoundLib64.framework") +else() +find_library(CSOUND_LIBRARY NAMES csound64 csound) +endif() + +include(FindPackageHandleStandardArgs) +# handle the QUIETLY and REQUIRED arguments and set CSOUND_FOUND to TRUE +# if all listed variables are TRUE +find_package_handle_standard_args(CSOUND + CSOUND_LIBRARY CSOUND_INCLUDE_DIR) +mark_as_advanced(CSOUND_INCLUDE_DIR CSOUND_LIBRARY) + +set(CSOUND_INCLUDE_DIRS ${CSOUND_INCLUDE_DIR}) +set(CSOUND_LIBRARIES ${CSOUND_LIBRARY} ) diff --git a/cmake/Modules/FindXtract.cmake b/cmake/Modules/FindXtract.cmake new file mode 100644 index 0000000..952c916 --- /dev/null +++ b/cmake/Modules/FindXtract.cmake @@ -0,0 +1,19 @@ +FIND_PATH(XTRACT_INCLUDE_DIR NAMES xtract/libxtract.h) +# Look for the library. +FIND_LIBRARY(XTRACT_LIBRARY NAMES xtract) +# Handle the QUIETLY and REQUIRED arguments and set XTRACT_FOUND to TRUE if all listed variables are TRUE. +INCLUDE(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(XTRACT DEFAULT_MSG XTRACT_LIBRARY XTRACT_INCLUDE_DIR) + + + +# Copy the results to the output variables. +IF(XTRACT_FOUND) + SET(XTRACT_LIBRARIES ${XTRACT_LIBRARY}) + SET(XTRACT_INCLUDE_DIRS ${XTRACT_INCLUDE_DIR}) +ELSE(XTRACT_FOUND) + SET(XTRACT_LIBRARIES) + SET(XTRACT_INCLUDE_DIRS) +ENDIF(XTRACT_FOUND) + +MARK_AS_ADVANCED(XTRACT_INCLUDE_DIRS XTRACT_LIBRARIES) diff --git a/examples/csound-extract-demonstration.csd b/examples/csound-extract-demonstration.csd new file mode 100644 index 0000000..5368414 --- /dev/null +++ b/examples/csound-extract-demonstration.csd @@ -0,0 +1,137 @@ +/* + * + * csound-xtract demonstration + * By Richard Knight 2021 + * + * This demonstrates some of the csound-xtract plugin opcodes in three instruments: + * + * 1. test_dump prints selected analysed features of a sound + * 2. test_match uses feature matching to play a corpus based on analysis of an input sound + * 3. test_distance prints the distance between two streams of analysed features + * + * + * csound-xtract is current experimental. Using different buffer and block sizes for xtprofile may + * result in quite different feature results depending on the input sound. For certain opcodes, namely + * xtcorpusmatch, this require some fine-tuning to obtain the desired results based on the character of + * the input sound. + * + */ + +<CsoundSynthesizer> +<CsOptions> +-odac +</CsOptions> +<CsInstruments> +sr = 44100 +kr = 4410 +nchnls = 2 +0dbfs = 1 +seed 0 + + +; source and corpus sounds +gSsource = "sounds/fox.wav" +gScorpus = "sounds/magicshop.wav" + +; corpus f-table +gicorpuswave ftgen 0, 0, 0, 1, gScorpus, 0, 0, 1 + +; half size window for sndwarp reading +giwindow ftgen 0, 0, 512, 9, 0.5, 1, 0 + + + + +/* + play the source sound, dry, and print the extracted features +*/ +instr test_dump + ; use centroid, irregularity, sharpness and smoothness + iprofile xtprofile 2048, 1024, 0, 1, 0, 0, 0, 1, 0, 1, 1 + + ; read the source/driving sound + asource diskin gSsource + + ; perform feature extraction on source sound + ixtract, kdone xtractor iprofile, asource + + ; print the features + if (kdone == 1) then + kanalysis[] xtdump ixtract + printarray kanalysis + endif + + outs asource, asource +endin + + + +/* + match the source sound against the corpus and play the 'best match' +*/ +instr test_match + + ; use MFCC, rms, flatness, irregularity, spectral power, sharpness and smoothness + iprofile xtprofile 2048, 1024, 1, 0, 0, 1, 1, 1, 1, 1, 1 + + ; analyse the corpus wave + icorpus xtcorpus iprofile, gicorpuswave + + ; read the source/driving sound + asource diskin gSsource + + ; perform feature extraction on source sound and match with corpus + ixtract, kdone xtractor iprofile, asource + kdex xtcorpusmatch icorpus, ixtract, kdone + + ; if matched index != 0 then calculate the position to be used for sndwarp + if (kdex != 0) then + ilen = ftlen(gicorpuswave) + icsr = ftsr(gicorpuswave) + icduration = ilen / icsr + icps = 1/(icduration) + aphs, a_ syncphasor icps, a(kdone) + apos = (((aphs * ilen) + kdex) / ilen) * icduration + + endif + + ; play the position in the corpus wave + amatched sndwarp 0.7, apos, 1, gicorpuswave, 0, 1024, 512, 8, giwindow, 1 + outs amatched, amatched +endin + + + +/* + print the distance between two sound streams +*/ +instr test_distance + ; use all features + iprofile xtprofile 2048, 1024 + + ; read the source/driving sound + asource diskin gSsource + + ; modify + amodified = butterlp(asource, 10000) + + ; perform feature extraction on sounds + ixtract1, kdone1 xtractor iprofile, asource + ixtract2, kdone2 xtractor iprofile, amodified + + ; calculate the distance and print it + kdistance xtdistance ixtract1, ixtract2, 1 + printk2 kdistance +endin + + + +</CsInstruments> +<CsScore> + +i"test_dump" 0 3 +i"test_match" 3 3 +i"test_distance" 6 3 + +</CsScore> +</CsoundSynthesizer>
\ No newline at end of file diff --git a/examples/sounds/fox.wav b/examples/sounds/fox.wav Binary files differnew file mode 100644 index 0000000..f5eed30 --- /dev/null +++ b/examples/sounds/fox.wav diff --git a/examples/sounds/magicshop.wav b/examples/sounds/magicshop.wav Binary files differnew file mode 100644 index 0000000..915eb3d --- /dev/null +++ b/examples/sounds/magicshop.wav diff --git a/include/dtw.h b/include/dtw.h new file mode 100644 index 0000000..4c8064d --- /dev/null +++ b/include/dtw.h @@ -0,0 +1,79 @@ +/* +Copyright (c) 2014, Calder Phillips-Grafflin (calder.pg@gmail.com) +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include <stdlib.h> +#include <stdio.h> +#include <plugin.h> + +#ifndef DTW_H +#define DTW_H + +namespace DTW +{ + +class SimpleDTW +{ +private: + + double (*distance_fn_)(MYFLT* p1, MYFLT* p2, int bands); + MYFLT* data_; + size_t x_dim_; + size_t y_dim_; + int mfcc_bands; + + inline size_t GetDataIndex(size_t x, size_t y) + { + return (x * y_dim_) + y; + } + + inline double GetFromDTWMatrix(size_t x, size_t y) + { + return data_[GetDataIndex(x, y)]; + } + + inline void SetInDTWMatrix(size_t x, size_t y, MYFLT val) + { + data_[GetDataIndex(x, y)] = val; + } + +public: + + SimpleDTW (csnd::Csound* csound, size_t x_size, size_t y_size, int mfcc_bands, MYFLT (*distance_fn)(MYFLT* p1, MYFLT* p2, int bands)); + + ~SimpleDTW() {} + + double EvaluateWarpingCost( + MYFLT* sequence_1, + int seq1_items, + MYFLT* sequence_2, + int seq2_items + ); + +}; + +} + +#endif // DTW_H diff --git a/plugin.h.patch b/plugin.h.patch new file mode 100644 index 0000000..af4e621 --- /dev/null +++ b/plugin.h.patch @@ -0,0 +1,4 @@ +56c56 +< class Csound : CSOUND { +--- +> class Csound : public CSOUND { diff --git a/src/dtw.cpp b/src/dtw.cpp new file mode 100644 index 0000000..7497e9a --- /dev/null +++ b/src/dtw.cpp @@ -0,0 +1,99 @@ +/* +Copyright (c) 2014, Calder Phillips-Grafflin (calder.pg@gmail.com) +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include <stdio.h> +#include <stdlib.h> +#include "math.h" +#include <vector> +#include <string> +#include <sstream> +#include "string.h" +#include <iostream> +#include <algorithm> +#include <stdexcept> +#include <dtw.h> +#include <plugin.h> + +using namespace DTW; + + + + +SimpleDTW::SimpleDTW (csnd::Csound* csound, size_t x_size, size_t y_size, int mfcc_bands, MYFLT (*distance_fn)(MYFLT* p1, MYFLT* p2, int bands)) +{ + distance_fn_ = distance_fn; + this->mfcc_bands = mfcc_bands; + x_dim_ = x_size + 1; + y_dim_ = y_size + 1; + // Resize the data + data_ = (MYFLT*) csound->malloc(sizeof(MYFLT) * x_dim_ * y_dim_); + //Populate matrix with starting values + SetInDTWMatrix(0, 0, 0.0); + for (size_t i = 1; i < x_dim_; i++) + { + SetInDTWMatrix(i, 0, INFINITY); + } + for (size_t i = 1; i < y_dim_; i++) + { + SetInDTWMatrix(0, i, INFINITY); + } +} + + +double SimpleDTW::EvaluateWarpingCost(MYFLT* sequence_1, int seq1_items, MYFLT* sequence_2, int seq2_items) +{ + + //Compute DTW cost for the two sequences + for (unsigned int i = 1; i <= seq1_items; i++) + { + for (unsigned int j = 1; j <= seq2_items; j++) + { + double index_cost = distance_fn_(sequence_1 + (i - 1), sequence_2 + (j - 1), mfcc_bands); + double prev_cost = 0.0; + // Get the three neighboring values from the matrix to use for the update + double im1j = GetFromDTWMatrix(i - 1, j); + double im1jm1 = GetFromDTWMatrix(i - 1, j - 1); + double ijm1 = GetFromDTWMatrix(i, j - 1); + // Start the update step + if (im1j < im1jm1 && im1j < ijm1) + { + prev_cost = im1j; + } + else if (ijm1 < im1j && ijm1 < im1jm1) + { + prev_cost = ijm1; + } + else + { + prev_cost = im1jm1; + } + // Update the value in the matrix + SetInDTWMatrix(i, j, index_cost + prev_cost); + } + } + //Return total path cost + return GetFromDTWMatrix(seq1_items, seq2_items); +} diff --git a/src/opcodes.cpp b/src/opcodes.cpp new file mode 100644 index 0000000..8878722 --- /dev/null +++ b/src/opcodes.cpp @@ -0,0 +1,735 @@ +#include "dtw.h" +#include <plugin.h> +#include "xtract/libxtract.h" +#include "xtract/xtract_stateful.h" +#include "xtract/xtract_scalar.h" +#include "xtract/xtract_helper.h" +#include <vector> +#include <stdexcept> +//#include "math.h" + +#define MFCC_BANDS 13 + +MYFLT euclidean_distance(MYFLT* p1, MYFLT* p2, int bands) { + MYFLT total = 0.0; + for (int i = 0; i < bands; i++) + { + total = total + pow((p1[i] - p2[i]), 2); + } + return sqrt(total); +} + +MYFLT manhattan_distance(MYFLT* p1, MYFLT* p2, int bands) { + MYFLT sum = 0.0; + for (int i = 0; i < bands; i++) { + for (int j = i + 1; j < bands; j++) { + sum += (abs(p1[i] - p1[j]) + abs(p2[i] - p2[j])); + } + } + return sum; +} + + + + +struct AnalysisProfile { + bool centroid; + bool mfccs; + bool rms; + bool zerocrossing; + bool flatness; + bool irregularity; + bool power; + bool sharpness; + bool smoothness; + int block_size; + int buffer_size; + + int bands() { + int num = 0; + if (centroid) num ++; + if (rms) num ++; + if (zerocrossing) num ++; + if (flatness) num ++; + if (irregularity) num ++; + if (power) num ++; + if (sharpness) num ++; + if (smoothness) num ++; + if (mfccs) num += MFCC_BANDS; + + return num; + } + + bool compatible(AnalysisProfile* t) { + return ( + centroid == t->centroid + && mfccs == t->mfccs + && rms == t->rms + && zerocrossing == t->zerocrossing + && flatness == t->flatness + && irregularity == t->irregularity + && power == t->power + && sharpness == t->sharpness + && smoothness == t->smoothness + ); + } +}; + +struct AnalysisData { + void* mutex; + MYFLT* data; + bool accumulated; + int itemsPerBufferPeriod; +// MYFLT timePerItem; + int position; + bool ready; + int sample_points; + int data_size; + AnalysisProfile* analysisProfile; + +}; + +struct CorpusData { + FUNC* input; + AnalysisData* analysisData; +}; + + +const char* badHandle = "cannot obtain data from handle"; + +template <typename T> +char* handleIdentifier() { + if (std::is_same<T, AnalysisData>::value) { // TODO not working?? + return "::xtA%d"; + } else if (std::is_same<T, CorpusData>::value) { + return "::xtC%d"; + } else if (std::is_same<T, AnalysisProfile>::value) { + return "::xtP%d"; + } + return "::xt%d"; +} + +/* + * Obtain global object of typename from global variables by handle + */ +template <typename T> +T* getHandle(csnd::Csound* csound, MYFLT handle) {csnd::AuxMem<MYFLT> ax; + char buffer[32]; + snprintf(buffer, 32, handleIdentifier<T>(), (int)handle); + return (T*) csound->query_global_variable(buffer); +} + + +/* + * Create global object of typename in global variables, returning handle + */ +template <typename T> +MYFLT createHandle(csnd::Csound* csound, T** data) { + char buffer[32]; + int handle = 0; + snprintf(buffer, 32, handleIdentifier<T>(), handle); + while ((*data = (T*) csound->query_global_variable(buffer)) != NULL) { + snprintf(buffer, 32, handleIdentifier<T>(), ++handle); + } + csound->create_global_variable(buffer, sizeof(T)); + *data = (T*) csound->query_global_variable(buffer); + + return FL(handle); +} + + +class Analyser { + csnd::Csound* csound; + xtract_mel_filter mel_filters; + MYFLT samplerate; + MYFLT* windowed; + MYFLT* spectrum; + MYFLT* window; + MYFLT* window_subframe; + MYFLT* specargs; + MYFLT* tempMfcc; + int block_size; + int buffer_size; + int half_blocksize; + AnalysisData* analysisOut; + int outPos; + bool firstIterationDone; + bool accumulate; + long sampling_length; + int sample_point; + int analysis_bands; + +public: + + void init( + csnd::Csound* csound, + AnalysisData* analysisOut, + bool accumulate=false, + long sampling_length=0 + ) { + + this->csound = csound; + this->sample_point = 0; + this->sampling_length = sampling_length; + this->accumulate = accumulate; + this->analysisOut = analysisOut; + + analysis_bands = analysisOut->analysisProfile->bands(); + + block_size = analysisOut->analysisProfile->block_size; + buffer_size = analysisOut->analysisProfile->buffer_size; + half_blocksize = block_size / 2; + + outPos = 0; + analysisOut->ready = false; + analysisOut->accumulated = accumulate; + + allocate_vectors(); + samplerate = csound->sr(); + windowed = (MYFLT*) csound->calloc(block_size * sizeof(MYFLT)); + spectrum = (MYFLT*) csound->calloc(block_size * sizeof(MYFLT)); + specargs = (MYFLT*) csound->calloc(4 * sizeof(MYFLT)); + tempMfcc = (MYFLT*) csound->calloc(MFCC_BANDS * sizeof(MYFLT)); + specargs[0] = samplerate / block_size; + specargs[1] = XTRACT_MAGNITUDE_SPECTRUM; // XTRACT_LOG_POWER_SPECTRUM + specargs[2] = 0; // DC component + specargs[3] = 0; // Normalisation = 1 + + window = xtract_init_window(block_size, XTRACT_HANN); + window_subframe = xtract_init_window(block_size >> 1, XTRACT_HANN); + xtract_init_wavelet_f0_state(); + + if (analysisOut->analysisProfile->mfccs) { + mel_filters.n_filters = MFCC_BANDS; + mel_filters.filters = (MYFLT**) csound->malloc(MFCC_BANDS * sizeof(MYFLT*)); + + for (int k = 0; k < MFCC_BANDS; k++) { + mel_filters.filters[k] = (MYFLT*) csound->malloc(block_size * sizeof(MYFLT)); + } + + xtract_init_mfcc( + block_size >> 1, + ((int)samplerate) >> 1, + XTRACT_EQUAL_GAIN, + 20, + 20000, + mel_filters.n_filters, + mel_filters.filters + ); + } + } + + void allocate_vectors() { + int items = 0; + // TODO: some reasonable calculation here instead of loop + for (int pos = 0; (pos + block_size) < buffer_size ; pos += half_blocksize) { + items += 1; + } + analysisOut->itemsPerBufferPeriod = items; + + int sample_points = 1; + if (accumulate) { + sample_points = (int) (sampling_length / buffer_size); + } + analysisOut->sample_points = sample_points; + analysisOut->data_size = sample_points * items * analysis_bands; + analysisOut->data = (MYFLT*) csound->calloc( + sizeof(MYFLT) * analysisOut->data_size + ); + + + } + + void deinit() { + //csound->free(windowed); + //csound->free(spectrum); + //csound->free(mfccargs); + if (analysisOut->analysisProfile->mfccs) { + for (int n = 0; n < MFCC_BANDS; ++n) { + csound->free(mel_filters.filters[n]); + } + csound->free(mel_filters.filters); + } + xtract_free_window(window); + xtract_free_window(window_subframe); + } + + + void analyse(MYFLT* buffer) { + int buffer_period = 0; + int data_pos; + + for (int pos = 0; (pos + block_size) < buffer_size ; pos += half_blocksize) { + MYFLT *now = &buffer[pos]; + xtract_windowed(now, block_size, window, windowed); + xtract_init_fft(block_size, XTRACT_SPECTRUM); + xtract[XTRACT_SPECTRUM](windowed, block_size, &specargs[0], spectrum); + xtract_free_fft(); + + + int data_pos; + if (accumulate) { + data_pos = analysisOut->itemsPerBufferPeriod * analysis_bands * sample_point + analysis_bands * buffer_period; + } else { + data_pos = analysisOut->itemsPerBufferPeriod * analysis_bands * 0 + analysis_bands * buffer_period; + } + + if (analysisOut->analysisProfile->mfccs) { + xtract_mfcc(spectrum, block_size >> 1, &mel_filters, tempMfcc); + xtract_dct(tempMfcc, MFCC_BANDS, NULL, analysisOut->data+data_pos); + data_pos += MFCC_BANDS; + } + + if (analysisOut->analysisProfile->centroid) { + xtract_spectral_centroid(spectrum, block_size, NULL, analysisOut->data+data_pos); + data_pos ++; + } + + if (analysisOut->analysisProfile->zerocrossing) { + xtract_zcr(now, block_size, NULL, analysisOut->data+data_pos); + data_pos ++; + } + + if (analysisOut->analysisProfile->rms) { + xtract_rms_amplitude(now, block_size, NULL, analysisOut->data+data_pos); + data_pos ++; + } + + if (analysisOut->analysisProfile->flatness) { + xtract_flatness(spectrum, block_size >> 1, NULL, analysisOut->data+data_pos); + data_pos ++; + } + + if (analysisOut->analysisProfile->irregularity) { + xtract_irregularity_j(spectrum, block_size >> 1, NULL, analysisOut->data+data_pos); + data_pos ++; + } + + if (analysisOut->analysisProfile->power) { + xtract_power(spectrum, block_size >> 1, NULL, analysisOut->data+data_pos); + data_pos ++; + } + + if (analysisOut->analysisProfile->sharpness) { + xtract_sharpness(spectrum, block_size >> 1, NULL, analysisOut->data+data_pos); + data_pos ++; + } + + if (analysisOut->analysisProfile->smoothness) { + xtract_smoothness(spectrum, block_size >> 1, NULL, analysisOut->data+data_pos); + data_pos ++; + } + + + buffer_period++; + } + if (accumulate) { + sample_point++; + } + } + +}; + + +struct xtdump : csnd::Plugin<1, 1> { + static constexpr char const *otypes = "k[]"; + static constexpr char const *itypes = "i"; + AnalysisData* input; + int bands; + + int init() { + if (!(input = getHandle<AnalysisData>(csound, inargs[0]))) { + return csound->init_error("xtractor handle not valid"); + } + bands = input->analysisProfile->bands(); + csnd::Vector<MYFLT> &out = outargs.vector_data<MYFLT>(0); + out.init(csound, bands); + return OK; + } + + int kperf() { + csnd::Vector<MYFLT> &out = outargs.vector_data<MYFLT>(0); + for (int i=0; i < bands; i++) { + out[i] = *(input->data + i); + } + + return OK; + } +}; + +// kdone, kanalysis[] xtaccdump ixttractorhandle, koutputtrigger +struct xtaccdump : csnd::Plugin<2, 2> { + static constexpr char const *otypes = "kk[]"; + static constexpr char const *itypes = "ik"; + AnalysisData* input; + int bands; + int accumulated; + MYFLT* accumulation; + + int init() { + if (!(input = getHandle<AnalysisData>(csound, inargs[0]))) { + return csound->init_error("xtractor handle not valid"); + } + bands = input->analysisProfile->bands(); + accumulated = 0; + accumulation = (MYFLT*) csound->calloc(sizeof(MYFLT) * bands); + outargs[0] = FL(0); + csnd::Vector<MYFLT> &out = outargs.vector_data<MYFLT>(1); + out.init(csound, bands); + return OK; + } + + void clearBuffer() { + for (int i=0; i < bands; i++) { + *(accumulation + i) = FL(0); + } + } + + int kperf() { + csnd::Vector<MYFLT> &out = outargs.vector_data<MYFLT>(1); + for (int i=0; i < bands; i++) { + *(accumulation + i) += *(input->data + i); + } + accumulated++; + + if (inargs[1] == FL(1)) { + for (int i=0; i < bands; i++) { + out[i] = *(accumulation + i) / accumulated; + } + outargs[0] = FL(1); + accumulated = 0; + clearBuffer(); + } else { + outargs[0] = FL(0); + } + + return OK; + } +}; + + +struct xtcorpusmatch : csnd::Plugin<1, 4> { + static constexpr char const *otypes = "k"; + static constexpr char const *itypes = "iiko"; + CorpusData* corpus; + AnalysisData* input; + DTW::SimpleDTW evaluator; + int bands; + + int init() { + + if (!(corpus = getHandle<CorpusData>(csound, inargs[0]))) { + return csound->init_error("corpus handle not valid"); + } + + if (!corpus->analysisData->ready) { + return csound->init_error("corpus analysis not ready"); + } + + if (!(input = getHandle<AnalysisData>(csound, inargs[1]))) { + return csound->init_error("xtractor handle not valid"); + } + + if (!corpus->analysisData->analysisProfile->compatible(input->analysisProfile)) { + return csound->init_error("xtractor profiles incompatible"); + } + + bands = corpus->analysisData->analysisProfile->bands(); + + + MYFLT (*distance_function)(MYFLT* p1, MYFLT* p2, int bands); + switch ((int) inargs[3]) { + case 0: + distance_function = euclidean_distance; + break; + case 1: + distance_function = manhattan_distance; + break; + } + + evaluator = DTW::SimpleDTW( + csound, + corpus->analysisData->itemsPerBufferPeriod, + input->itemsPerBufferPeriod, + MFCC_BANDS, + distance_function + ); + + return OK; + } + + int nearest() { + MYFLT best = INFINITY; + MYFLT distance; + int bestIndex = 0; + + + for (int index = 0; index < corpus->analysisData->sample_points; index++) { + distance = evaluator.EvaluateWarpingCost( + input->data, + input->itemsPerBufferPeriod, + corpus->analysisData->data+(corpus->analysisData->itemsPerBufferPeriod * bands * index + bands * 0), + corpus->analysisData->itemsPerBufferPeriod + ); + + if (distance < best) {// && distance > 1) { + best = distance; + bestIndex = index; + } + } + return bestIndex * corpus->analysisData->analysisProfile->buffer_size; + } + + int kperf() { + if (inargs[2] == FL(1)) { + outargs[0] = FL(nearest()); + } else { + outargs[0] = 0; + } + return OK; + } +}; + + +struct xtprofile : csnd::Plugin<1, 11> { + static constexpr char const *otypes = "i"; + static constexpr char const *itypes = "ooppppppppp"; + + int init() { + AnalysisProfile* profile; + outargs[0] = createHandle<AnalysisProfile>(csound, &profile); + profile->buffer_size = (inargs[0] != FL(0))? (int) inargs[0]: 4096; + profile->block_size = (inargs[1] != FL(0))? (int) inargs[1]: 512; + + if (profile->block_size >= profile->buffer_size) { + return csound->init_error("block size must be smaller than buffer size"); + } + + profile->mfccs = (bool) inargs[2]; + profile->centroid = (bool) inargs[3]; + profile->zerocrossing = (bool) inargs[4]; + profile->rms = (bool) inargs[5]; + profile->flatness = (bool) inargs[6]; + profile->irregularity = (bool) inargs[7]; + profile->power = (bool) inargs[8]; + profile->sharpness = (bool) inargs[9]; + profile->smoothness = (bool) inargs[10]; + return OK; + } +}; + +/* +struct xtprofileprint : csnd::Plugin<1, 1> { + static constexpr char const *otypes = "S"; + static constexpr char const *itypes = "i"; + int init() { + AnalysisProfile* profile; + if (!(profile = getHandle<AnalysisProfile>(csound, inargs[0]))) { + return csound->init_error("profile handle invalid"); + } + + STRINGDAT* out = (STRINGDAT*) outargs(0); + + std::stringstream data; + data << "\nBuffer size:\t" << profile->buffer_size + << "\nBlock size:\t" << profile->block_size + << "\nMFCCS:\t\t" << profile->mfccs + << "\nCentroid:\t" << profile->centroid + << "\nRMS:\t\t" << profile->rms << "\n"; + out->size = data.str().length(); + out->data = data.str().c_str(); + + return OK; + } +}; +*/ + +// icorpushandle xtcorpus iprofilehandle, ifn +struct xtcorpus : csnd::Plugin<1, 2> { + static constexpr char const *otypes = "i"; + static constexpr char const *itypes = "ii"; + CorpusData* corpus; + + + int init() { + + outargs[0] = createHandle<CorpusData>(csound, &corpus); + + AnalysisProfile* corpusProfile; + if (!(corpusProfile = getHandle<AnalysisProfile>(csound, inargs[0]))) { + return csound->init_error("profile handle invalid"); + } + + if ((corpus->input = csound->get_csound()->FTnp2Find(csound, &inargs[1])) == NULL) { + return csound->init_error("cannot get function table specified"); + } + + corpus->analysisData = (AnalysisData*) csound->malloc(sizeof(AnalysisData)); + corpus->analysisData->analysisProfile = corpusProfile; + int buffer_size = corpusProfile->buffer_size; + MYFLT* buffer; + int buffer_position = 0; + Analyser analyser; + buffer = (MYFLT*) csound->malloc(sizeof(MYFLT) * buffer_size); + analyser.init(csound, corpus->analysisData, true, corpus->input->flen); + + for (int i = 0; i < corpus->input->flen; i++) { + buffer[buffer_position] = corpus->input->ftable[i]; + if (buffer_position == buffer_size -1) { + buffer_position = 0; + analyser.analyse(buffer); + } else { + buffer_position++; + } + } + + //if (buffer_position != 0) { + // analyser.analyse(buffer, buffer_size-buffer_position); + //} + analyser.deinit(); + + corpus->analysisData->ready = true; + + return OK; + } + +}; + + + +// ixthandle, kdone xtractor iprofile, ainput +struct xtractor : csnd::Plugin<2, 2> { + static constexpr char const *otypes = "ik"; + static constexpr char const *itypes = "ia"; // trigger on input???? + Analyser analyser; + AnalysisData* analysisData; + AnalysisProfile* profile; + int buffer_position; + int ksmps; + MYFLT* buffer; + + int init() { + csound->plugin_deinit(this); + buffer_position = 0; + + if (!(profile = getHandle<AnalysisProfile>(csound, inargs[0]))) { + return csound->init_error("profile handle invalid"); + } + + buffer = (MYFLT*) csound->calloc(sizeof(MYFLT) * profile->buffer_size); + ksmps = insdshead->ksmps; + outargs[0] = createHandle<AnalysisData>(csound, &analysisData); + + analysisData->analysisProfile = profile; + try { + analyser.init(csound, analysisData, false); + } catch (std::exception &exception) { + return csound->init_error(exception.what()); + } + return OK; + } + + int deinit() { + analyser.deinit(); + return OK; + } + + int aperf() { + outargs[1] = FL(0); + for (int i = 0; i < ksmps; i++) { + buffer[buffer_position] = inargs(1)[i]; + if (buffer_position == profile->buffer_size -1) { + break; + } + buffer_position += 1; + } + + if (buffer_position == profile->buffer_size - 1) { + analyser.analyse(buffer); + buffer_position = 0; + outargs[1] = FL(1); + } + + return OK; + } +}; + +struct xtdistance : csnd::Plugin<1, 4> { + static constexpr char const *otypes = "k"; + static constexpr char const *itypes = "iiko"; + AnalysisData* analysisData1; + AnalysisData* analysisData2; + DTW::SimpleDTW eval; + + int init() { + if (!(analysisData1 = getHandle<AnalysisData>(csound, inargs[0]))) { + return csound->init_error("xtractor handle 1 invalid"); + } + + if (!(analysisData2 = getHandle<AnalysisData>(csound, inargs[1]))) { + return csound->init_error("xtractor handle 2 invalid"); + } + + if (!analysisData1->analysisProfile->compatible(analysisData2->analysisProfile)) { + return csound->init_error("xtractor profiles incompatible"); + } + + MYFLT (*distance_function)(MYFLT* p1, MYFLT* p2, int bands); + switch ((int) inargs[3]) { + case 0: + distance_function = euclidean_distance; + break; + case 1: + distance_function = manhattan_distance; + break; + } + + eval = DTW::SimpleDTW( + csound, + analysisData1->itemsPerBufferPeriod, + analysisData2->itemsPerBufferPeriod, + MFCC_BANDS, + distance_function + ); + + outargs[0] = INFINITY; + return OK; + } + + int kperf() { + + // && analysisData1->ready && analysisData1->ready + if (inargs[2] == FL(1)) { + outargs[0] = eval.EvaluateWarpingCost( + analysisData1->data, + analysisData1->itemsPerBufferPeriod, + analysisData2->data, + analysisData2->itemsPerBufferPeriod + ); + } else { + outargs[0] = INFINITY; + } + + return OK; + } +}; + + + + + + + +#include <modload.h> + +void csnd::on_load(csnd::Csound *csound) { + csnd::plugin<xtprofile>(csound, "xtprofile", csnd::thread::i); + //csnd::plugin<xtprofileprint>(csound, "xtprofileprint", csnd::thread::i); + csnd::plugin<xtractor>(csound, "xtractor", csnd::thread::ia); + csnd::plugin<xtdistance>(csound, "xtdistance", csnd::thread::ik); + csnd::plugin<xtcorpus>(csound, "xtcorpus", csnd::thread::i); + csnd::plugin<xtcorpusmatch>(csound, "xtcorpusmatch", csnd::thread::ik); + csnd::plugin<xtdump>(csound, "xtdump", csnd::thread::ik); + csnd::plugin<xtaccdump>(csound, "xtaccdump", csnd::thread::ik); + +} |