aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/delta.c1
-rw-r--r--src/descriptors.c102
-rw-r--r--src/vector.c34
-rw-r--r--xtract/libxtract.h13
-rw-r--r--xtract/xtract_vector.h9
5 files changed, 69 insertions, 90 deletions
diff --git a/src/delta.c b/src/delta.c
index f1a5ee0..6ca3794 100644
--- a/src/delta.c
+++ b/src/delta.c
@@ -24,6 +24,7 @@
int xtract_flux(const float *data, const int N, const void *argv , float *result){
+
return XTRACT_FEATURE_NOT_IMPLEMENTED;
}
diff --git a/src/descriptors.c b/src/descriptors.c
index 3c43775..4584899 100644
--- a/src/descriptors.c
+++ b/src/descriptors.c
@@ -48,79 +48,6 @@ void *xtract_make_descriptors(){
argc = &d->argc;
argv_type = &d->argv.type;
- switch(f){
-
- case XTRACT_VARIANCE:
- case XTRACT_STANDARD_DEVIATION:
- case XTRACT_AVERAGE_DEVIATION:
- case XTRACT_SPECTRAL_VARIANCE:
- case XTRACT_SPECTRAL_STANDARD_DEVIATION:
- case XTRACT_SPECTRAL_AVERAGE_DEVIATION:
- case XTRACT_SPECTRAL_INHARMONICITY:
- case XTRACT_LOWEST_VALUE:
- case XTRACT_F0:
- case XTRACT_FAILSAFE_F0:
- case XTRACT_TONALITY:
- *argc = 1;
- *argv_type = XTRACT_FLOAT;
- break;
- case XTRACT_SKEWNESS:
- case XTRACT_KURTOSIS:
- case XTRACT_SPECTRAL_SKEWNESS:
- case XTRACT_SPECTRAL_KURTOSIS:
- case XTRACT_SPECTRUM:
- case XTRACT_PEAK_SPECTRUM:
- case XTRACT_HARMONIC_SPECTRUM:
- case XTRACT_NOISINESS:
- case XTRACT_CREST:
- case XTRACT_ROLLOFF:
- *argc = 2;
- *argv_type = XTRACT_FLOAT;
- break;
- case XTRACT_MFCC:
- *argc = 1;
- *argv_type = XTRACT_MEL_FILTER;
- break;
- case XTRACT_BARK_COEFFICIENTS:
- *argc = XTRACT_BARK_BANDS;
- *argv_type = XTRACT_INT;
- break;
- case XTRACT_MEAN:
- case XTRACT_SPECTRAL_MEAN:
- case XTRACT_SPECTRAL_CENTROID:
- case XTRACT_IRREGULARITY_K:
- case XTRACT_IRREGULARITY_J:
- case XTRACT_TRISTIMULUS_1:
- case XTRACT_TRISTIMULUS_2:
- case XTRACT_TRISTIMULUS_3:
- case XTRACT_SMOOTHNESS:
- case XTRACT_FLATNESS:
- case XTRACT_SPREAD:
- case XTRACT_ZCR:
- case XTRACT_LOUDNESS:
- case XTRACT_HIGHEST_VALUE:
- case XTRACT_SUM:
- case XTRACT_RMS_AMPLITUDE:
- case XTRACT_POWER:
- case XTRACT_SHARPNESS:
- case XTRACT_SPECTRAL_SLOPE:
- case XTRACT_HPS:
- case XTRACT_FLUX:
- case XTRACT_ATTACK_TIME:
- case XTRACT_DECAY_TIME:
- case XTRACT_DELTA_FEATURE:
- case XTRACT_AUTOCORRELATION_FFT:
- case XTRACT_DCT:
- case XTRACT_AUTOCORRELATION:
- case XTRACT_AMDF:
- case XTRACT_ASDF:
- case XTRACT_NONZERO_COUNT:
- case XTRACT_ODD_EVEN_RATIO:
- default:
- *argc = 0;
- break;
- }
-
argv_min = &d->argv.min[0];
argv_max = &d->argv.max[0];
argv_def = &d->argv.def[0];
@@ -169,14 +96,22 @@ void *xtract_make_descriptors(){
*(argv_unit + 1) = XTRACT_PERCENT;
break;
case XTRACT_SPECTRUM:
- *argv_min = XTRACT_SR_LOWER_LIMIT / 2;
- *argv_max = XTRACT_SR_UPPER_LIMIT / 2;
- *argv_def = XTRACT_SR_DEFAULT / 2;
+ *argv_min = XTRACT_SR_LOWER_LIMIT / XTRACT_FFT_BANDS_MIN;
+ *argv_max = XTRACT_SR_UPPER_LIMIT / XTRACT_FFT_BANDS_MAX;
+ *argv_def = XTRACT_SR_DEFAULT / XTRACT_FFT_BANDS_DEF;
*argv_unit = XTRACT_HERTZ;
*(argv_min + 1) = 0;
*(argv_max + 1) = 3 ;
*(argv_def + 1) = 0;
*(argv_unit + 1) = XTRACT_NONE;
+ *(argv_min + 2) = 0;
+ *(argv_max + 2) = 1;
+ *(argv_def + 2) = 0;
+ *(argv_unit + 2) = XTRACT_NONE;
+ *(argv_min + 3) = 0;
+ *(argv_max + 3) = 1;
+ *(argv_def + 3) = 0;
+ *(argv_unit + 3) = XTRACT_NONE;
break;
case XTRACT_PEAK_SPECTRUM:
*argv_min = XTRACT_SR_LOWER_LIMIT / 2;
@@ -260,7 +195,6 @@ void *xtract_make_descriptors(){
*argv_donor = XTRACT_INIT_MFCC;
break;
/* argc = 2 */;
- case XTRACT_SPECTRUM:
case XTRACT_ROLLOFF:
case XTRACT_PEAK_SPECTRUM:
*argv_donor = XTRACT_ANY;
@@ -288,7 +222,14 @@ void *xtract_make_descriptors(){
*argv_donor = XTRACT_HIGHEST_VALUE;
*(argv_donor + 1) = XTRACT_MEAN;
break;
- /* argc = BARK_BANDS */
+ /* argc = 4 */
+ case XTRACT_SPECTRUM:
+ *argv_donor = XTRACT_ANY;
+ *(argv_donor + 1) = XTRACT_ANY;
+ *(argv_donor + 2) = XTRACT_ANY;
+ *(argv_donor + 3) = XTRACT_ANY;
+ break;
+ /* BARK_BANDS */
case XTRACT_BARK_COEFFICIENTS:
*argv_donor = XTRACT_INIT_BARK;
break;
@@ -951,7 +892,6 @@ void *xtract_make_descriptors(){
case XTRACT_KURTOSIS:
case XTRACT_SPECTRAL_SKEWNESS:
case XTRACT_SPECTRAL_KURTOSIS:
- case XTRACT_SPECTRUM:
case XTRACT_PEAK_SPECTRUM:
case XTRACT_HARMONIC_SPECTRUM:
case XTRACT_NOISINESS:
@@ -960,6 +900,10 @@ void *xtract_make_descriptors(){
*argc = 2;
*argv_type = XTRACT_FLOAT;
break;
+ case XTRACT_SPECTRUM:
+ *argc = 4;
+ *argv_type = XTRACT_FLOAT;
+ break;
case XTRACT_MFCC:
*argc = 1;
*argv_type = XTRACT_MEL_FILTER;
diff --git a/src/vector.c b/src/vector.c
index 06fc281..d3fdd67 100644
--- a/src/vector.c
+++ b/src/vector.c
@@ -45,12 +45,17 @@
int xtract_spectrum(const float *data, const int N, const void *argv, float *result){
- float *input, *rfft, q, temp;
+ float *input, *rfft, q, temp, max;
size_t bytes;
- int n , NxN, M, vector, withDC, argc;
- //fftwf_plan plan;
+ int n,
+ NxN,
+ M,
+ vector,
+ withDC,
+ argc,
+ normalise;
- vector = argc = withDC = 0;
+ vector = argc = withDC = normalise = 0;
M = N >> 1;
NxN = XTRACT_SQ(N);
@@ -62,13 +67,14 @@ int xtract_spectrum(const float *data, const int N, const void *argv, float *res
q = *(float *)argv;
vector = (int)*((float *)argv+1);
withDC = (int)*((float *)argv+2);
+ normalise = (int)*((float *)argv+3);
+
+ temp = 0.f;
+ max = 0.f;
XTRACT_CHECK_q;
if(fft_plans.spectrum_plan == NULL){
- /* FIX: Not sure this should really be here. Might introduce
- * DEBUG_POST macro, or some kind of error handler, or leave it to the
- * caller... */
fprintf(stderr,
"libxtract: Error: xtract_spectrum() has uninitialised plan\n");
return XTRACT_NO_RESULT;
@@ -98,6 +104,7 @@ int xtract_spectrum(const float *data, const int N, const void *argv, float *res
XTRACT_DB_SCALE_OFFSET;
result[M + n - 1] = n * q;
}
+ max = result[n] > max ? result[n] : max;
}
break;
@@ -113,6 +120,7 @@ int xtract_spectrum(const float *data, const int N, const void *argv, float *res
(XTRACT_SQ(rfft[n]) + XTRACT_SQ(rfft[N - n])) / NxN;
result[M + n - 1] = n * q;
}
+ max = result[n] > max ? result[n] : max;
}
break;
@@ -133,6 +141,7 @@ int xtract_spectrum(const float *data, const int N, const void *argv, float *res
XTRACT_DB_SCALE_OFFSET;
result[M + n - 1] = n * q;
}
+ max = result[n] > max ? result[n] : max;
}
break;
@@ -149,6 +158,7 @@ int xtract_spectrum(const float *data, const int N, const void *argv, float *res
XTRACT_SQ(rfft[N - n])) / N;
result[M + n - 1] = n * q;
}
+ max = result[n] > max ? result[n] : max;
}
break;
}
@@ -157,16 +167,26 @@ int xtract_spectrum(const float *data, const int N, const void *argv, float *res
/* The DC component */
result[0] = XTRACT_SQ(rfft[0]);
result[M + 1] = 0.f;
+ max = result[0] > max ? result[0] : max;
/* The Nyquist */
result[M] = XTRACT_SQ(rfft[M]);
result[N + 1] = q * M;
+ max = result[M] > max ? result[M] : max;
+ M++; /* So we normalise the Nyquist (below) */
}
else {
/* The Nyquist */
result[M - 1] = (float)XTRACT_SQ(rfft[M]);
result[N - 1] = q * M;
+ max = result[M - 1] > max ? result[M - 1] : max;
}
+
+ if(normalise){
+ for(n = 0; n < M; n++)
+ result[n] /= max;
+ }
+
fftwf_free(rfft);
free(input);
diff --git a/xtract/libxtract.h b/xtract/libxtract.h
index 40512b0..4b948f1 100644
--- a/xtract/libxtract.h
+++ b/xtract/libxtract.h
@@ -25,8 +25,19 @@
*
* This philosophy of 'cascading' features is followed throughout the library, for example with features that operate on the magnitude spectrum of a signal vector (e.g. 'irregularity'), the magnitude spectrum is not calculated 'inside' the respective function, instead, a pointer to the first element in an array containing the magnitude spectrum is passed in as an argument.
*
- * Hopefully this not only makes the library more efficient when computing large numbers of features, but also makes it more flexible because extraction functions can be combined arbitrarily (one can take the irregularility of the Mel Frequency Cepstral Coefficients for example).
+ * Hopefully this not only makes the library more efficient when computing large numbers of features, but also makes it more flexible because extraction functions can be combined arbitrarily (one can take the irregularility of the Mel Frequency Cepstral Coefficients for example).
*
+ * All feature extraction functions follow the same prototype:
+ *
+int xtract_function_name(const float *data, const int N, const void *argv, float *result){
+ *
+ * \param const float *data points to an array of floats representing the input data
+ * \param const int N represents the number of elementes from *data to be considered in the calculation
+ * \param const void *argv represents an arbitrary list of arguments. Used to pass in values required by the feature calculation
+ * \param float *result points to an array of floats, or a single float represnting the result of the calculation
+ *
+ *
+ * It is up to the calling function to allocate enough memory for the *data, *argv, and *result, and to free it when required. Some feature extraction functions may also require an _init() function to be called in order to perform some initialisation. The struct xtract_function_descriptor_t is used to give an indication of recommended default values, and argc for the *argv array.
*
* LibXtract can be downloaded from http://www.sf.net/projects/libxtract
*
diff --git a/xtract/xtract_vector.h b/xtract/xtract_vector.h
index 7effac0..e758f10 100644
--- a/xtract/xtract_vector.h
+++ b/xtract/xtract_vector.h
@@ -34,12 +34,15 @@ extern "C" {
* @{
*/
-/** \brief Extract normalized (0-1) frequency domain spectrum from time domain signal
+/** \brief Extract frequency domain spectrum from time domain signal
*
* \param *data: a pointer to the first element in an array of floats representing an audio vector
* \param N: the number of array elements to be considered
- * \param *argv: a pointer to an array of floats, the first representing (samplerate / N), the second will be cast to an integer and determines the spectrum type (e.g. XTRACT_MAGNITUDE_SPECTRUM, XTRACT_LOG_POWER_SPECTRUM). The third argument determines whether or not the DC component is included in the output. If argv[2] == 1, then the DC component is included in which case the size of the array pointed to by *result must be N+2. For any further use of the array pointed to by *result, the value of N must reflect the (larger) array size.
- * \param *result: a pointer to an array of size N containing N/2 magnitude/power/log magnitude/log power coefficients and N/2 bin frequencies.
+ * \param *argv: a pointer to an array of floats, the first representing (samplerate / N), the second will be cast to an integer and determines the spectrum type (e.g. XTRACT_MAGNITUDE_SPECTRUM, XTRACT_LOG_POWER_SPECTRUM). The third argument determines whether or not the DC component is included in the output. If argv[2] == 1, then the DC component is included in which case the size of the array pointed to by *result must be N+2. For any further use of the array pointed to by *result, the value of N must reflect the (larger) array size. The fourth argument determines whether the magnitude/power coefficients are to be normalised. If argv[3] == 1, then the coefficients are normalised.
+ * \param *result: a pointer to an array of size N containing N/2 magnitude/power/log magnitude/log power coefficients and N/2 bin frequencies.
+ *
+ * The magnitude/power coefficients are scaled to the range 0-1 so that for a given coefficient x, 0 <= x <= 1
+ *
*/
int xtract_spectrum(const float *data, const int N, const void *argv, float *result);