1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
|
/*
* Copyright (c) 2008 MUSIC TECHNOLOGY GROUP (MTG)
* UNIVERSITAT POMPEU FABRA
*
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
/*! \file sms.h
* \brief header file to be included in all SMS application
*/
#ifndef _SMS_H
#define _SMS_H
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <memory.h>
#include <strings.h>
#define SMS_VERSION 1.15 /*!< \brief version control number */
#define SMS_MAX_NPEAKS 400 /*!< \brief maximum number of peaks */
#define SMS_MAX_FRAME_SIZE 10000 /* maximum size of input frame in samples */
#define SMS_MAX_SPEC 8192 /*! \brief maximum size for magnitude spectrum */
#define sfloat double
/*! \struct SMS_Header
* \brief structure for the header of an SMS file
*
* This header contains all the information necessary to read an SMS
* file, prepare memory and synthesizer parameters.
*
* The header also contains variable components for additional information
* that may be stored along with the analysis, such as descriptors or text.
*
* The first four members of the Header are necessary in this order to correctly
* open the .sms files created by this library.
*
* iSampleRate contains the samplerate of the analysis signal because it is
* necessary to know this information to recreate the residual spectrum.
*
* In the first release, the descriptors are not used, but are here because they
* were implemented in previous versions of this code (in the 90's). With time,
* the documentation will be updated to reflect which members of the header
* are useful in manipulations, and what functions to use for these manipulatinos
*/
typedef struct SMSHeader
{
int iSmsMagic; /*!< identification constant */
int iHeadBSize; /*!< size in bytes of header */
int nFrames; /*!< number of data frames */
int iFrameBSize; /*!< size in bytes of each data frame */
int iSamplingRate; /*!< samplerate of analysis signal (necessary to recreate residual spectrum */
int iFormat; /*!< type of data format \see SMS_Format */
int nTracks; /*!< number of sinusoidal tracks per frame */
int iFrameRate; /*!< rate in Hz of data frames */
int iStochasticType; /*!< type stochastic representation */
int nStochasticCoeff; /*!< number of stochastic coefficients per frame */
int iEnvType; /*!< type of envelope representation */
int nEnvCoeff; /*!< number of cepstral coefficents per frame */
int iMaxFreq; /*!< maximum frequency of peaks (also corresponds to the last bin of the specEnv */
sfloat fResidualPerc; /*!< percentage of the residual to original */
} SMS_Header;
/*! \struct SMS_Data
* \brief structure with SMS data
*
* Here is where all the analysis data ends up. Once data is in here, it is ready
* for synthesis.
*
* It is in one contigous block (pSmsData), the other pointer members point
* specifically to each component in the block.
*
* pFSinPha is optional in the final output, but it is always used to construct the
* residual signal.
*/
typedef struct SMSData
{
sfloat *pSmsData; /*!< pointer to all SMS data */
int sizeData; /*!< size of all the data */
sfloat *pFSinFreq; /*!< frequency of sinusoids */
sfloat *pFSinAmp; /*!< magnitude of sinusoids (stored in dB) */
sfloat *pFSinPha; /*!< phase of sinusoids */
int nTracks; /*!< number of sinusoidal tracks in frame */
sfloat *pFStocGain; /*!< gain of stochastic component */
int nCoeff; /*!< number of filter coefficients */
sfloat *pFStocCoeff; /*!< filter coefficients for stochastic component */
sfloat *pResPhase; /*!< residual phase spectrum */
int nEnvCoeff; /*!< number of spectral envelope coefficients */
sfloat *pSpecEnv;
} SMS_Data;
/*! \struct SMS_SndBuffer
* \brief buffer for sound data
*
* This structure is used for holding a buffer of audio data. iMarker is a
* sample number of the sound source that corresponds to the first sample
* in the buffer.
*
*/
typedef struct
{
sfloat *pFBuffer; /*!< buffer for sound data*/
int sizeBuffer; /*!< size of buffer */
int iMarker; /*!< sample marker relating to sound source */
int iFirstGood; /*!< first sample in buffer that is a good one */
} SMS_SndBuffer;
/*! \struct SMS_Peak
* \brief structure for sinusodial peak
*/
typedef struct
{
sfloat fFreq; /*!< frequency of peak */
sfloat fMag; /*!< magnitude of peak */
sfloat fPhase; /*!< phase of peak */
} SMS_Peak;
/* a collection of spectral peaks */
typedef struct SMSSpectralPeaks
{
SMS_Peak *pSpectralPeaks;
int nPeaks;
int nPeaksFound;
} SMS_SpectralPeaks;
/*! \struct SMS_AnalFrame
* \brief structure to hold an analysis frame
*
* This structure has extra information for continuing the analysis,
* which can be disregarded once the analysis is complete.
*/
typedef struct
{
int iFrameSample; /*!< sample number of the middle of the frame */
int iFrameSize; /*!< number of samples used in the frame */
int iFrameNum; /*!< frame number */
SMS_Peak *pSpectralPeaks; /*!< spectral peaks found in frame */
int nPeaks; /*!< number of peaks found */
sfloat fFundamental; /*!< fundamental frequency in frame */
SMS_Data deterministic; /*!< deterministic data */
int iStatus; /*!< status of frame enumerated by SMS_FRAME_STATUS \see SMS_FRAME_STATUS */
} SMS_AnalFrame;
/*! \struct SMS_SEnvParams;
* \brief structure information and data for spectral enveloping
*
*/
typedef struct
{
int iType; /*!< envelope type \see SMS_SpecEnvType */
int iOrder; /*!< ceptrum order */
int iMaxFreq; /*!< maximum frequency covered by the envelope */
sfloat fLambda; /*!< regularization factor */
int nCoeff; /*!< number of coefficients (bins) in the envelope */
int iAnchor; /*!< whether to make anchor points at DC / Nyquist or not */
} SMS_SEnvParams;
/*! \struct SMS_Guide
* \brief information attached to a guide
*
* This structure is used to organize the detected peaks into time-varying
* trajectories, or sinusoidal tracks. As the analysis progresses, previous
* guides may be updated according to new information in the peak continuation
* of new frames (two-way mismatch).
*/
typedef struct
{
sfloat fFreq; /*!< frequency of guide */
sfloat fMag; /*!< magnitude of guide */
int iStatus; /*!< status of guide: DEAD, SLEEPING, ACTIVE */
int iPeakChosen; /*!< peak number chosen by the guide */
} SMS_Guide;
/*! \struct SMS_ResidualParams
* \brief structure with information for residual functions
*
* This structure contains all the necessary settings and memory for residual synthesis.
*
*/
typedef struct
{
int samplingRate;
int hopSize;
int residualSize;
sfloat *residual;
sfloat *fftWindow;
sfloat *ifftWindow;
sfloat windowScale;
sfloat residualMag;
sfloat originalMag;
int nCoeffs;
sfloat *stocCoeffs;
int sizeStocMagSpectrum;
sfloat *stocMagSpectrum;
sfloat *stocPhaseSpectrum;
sfloat *approx;
sfloat *approxEnvelope;
sfloat fftBuffer[SMS_MAX_SPEC * 2];
} SMS_ResidualParams;
/*! \struct SMS_AnalParams
* \brief structure with useful information for analysis functions
*
* Each analysis needs one of these, which contains all settings,
* sound data, deterministic synthesis data, and every other
* piece of data that needs to be shared between functions.
*
* There is an array of already analyzed frames (hardcoded to 50 right now -
* \todo make it variable) that are accumulated for good harmonic detection
* and partial tracking. For instance, once the fundamental frequency of a
* harmonic signal is located (after a few frames), the harmonic analysis
* and peak detection/continuation process can be re-computed with more accuracy.
*
*/
typedef struct SMSAnalysisParams
{
int iDebugMode; /*!< debug codes enumerated by SMS_DBG \see SMS_DBG */
int iFormat; /*!< analysis format code defined by SMS_Format \see SMS_Format */
int iSoundType; /*!< type of sound to be analyzed \see SMS_SOUND_TYPE */
int iStochasticType; /*!< type of stochastic model defined by SMS_StocSynthType \see SMS_StocSynthType */
int iFrameRate; /*!< rate in Hz of data frames */
int nStochasticCoeff; /*!< number of stochastic coefficients per frame */
sfloat fLowestFundamental; /*!< lowest fundamental frequency in Hz */
sfloat fHighestFundamental; /*!< highest fundamental frequency in Hz */
sfloat fDefaultFundamental; /*!< default fundamental in Hz */
sfloat fPeakContToGuide; /*!< contribution of previous peak to current guide (between 0 and 1) */
sfloat fFundContToGuide; /*!< contribution of current fundamental to current guide (between 0 and 1) */
sfloat fFreqDeviation; /*!< maximum deviation from peak to peak */
int iSamplingRate; /*! sampling rate of sound to be analyzed */
int iDefaultSizeWindow; /*!< default size of analysis window in samples */
int windowSize; /*!< the current window size */
int sizeHop; /*!< hop size of analysis window in samples */
sfloat fSizeWindow; /*!< size of analysis window in number of periods */
int nTracks; /*!< number of sinusoidal tracks in frame */
int maxPeaks; /*!< maximum number of peaks in a frame */
int nGuides; /*!< number of guides used for peak detection and continuation \see SMS_Guide */
int iCleanTracks; /*!< whether or not to clean sinusoidal tracks */
sfloat fMinRefHarmMag; /*!< minimum magnitude in dB for reference peak */
sfloat fRefHarmMagDiffFromMax; /*!< maximum magnitude difference from reference peak to highest peak */
int iRefHarmonic; /*!< reference harmonic to use in the fundamental detection */
int iMinTrackLength; /*!< minimum length in samples of a given track */
int iMaxSleepingTime; /*!< maximum sleeping time for a track */
sfloat fLowestFreq; /*!< lowest frequency to be searched */
sfloat fHighestFreq; /*!< highest frequency to be searched */
sfloat fMinPeakMag; /*!< minimum magnitude in dB for a good peak */
int iAnalysisDirection; /*!< analysis direction, direct or reverse */
int iSizeSound; /*!< total size of sound to be analyzed in samples */
int nFrames; /*!< total number of frames that will be analyzed */
int iWindowType; /*!< type of FFT analysis window \see SMS_WINDOWS */
int iMaxDelayFrames; /*!< maximum number of frames to delay before peak continuation */
int minGoodFrames; /*!< minimum number of stable frames for backward search */
sfloat maxDeviation; /*!< maximum deviation allowed */
int analDelay; /*! number of frames in the past to be looked in possible re-analyze */
sfloat fResidualAccumPerc; /*!< accumalitive residual percentage */
int sizeNextRead; /*!< size of samples to read from sound file next analysis */
int preEmphasis; /*!< whether or not to perform pre-emphasis */
sfloat preEmphasisLastValue;
SMS_Data prevFrame; /*!< the previous analysis frame */
SMS_SEnvParams specEnvParams; /*!< all data for spectral enveloping */
SMS_SndBuffer soundBuffer; /*!< signal to be analyzed */
SMS_SndBuffer synthBuffer; /*!< resynthesized signal used to create the residual */
SMS_AnalFrame *pFrames; /*!< an array of frames that have already been analyzed */
sfloat magSpectrum[SMS_MAX_SPEC];
sfloat phaseSpectrum[SMS_MAX_SPEC];
sfloat spectrumWindow[SMS_MAX_SPEC];
sfloat fftBuffer[SMS_MAX_SPEC * 2];
SMS_ResidualParams residualParams;
int *guideStates;
SMS_Guide* guides;
sfloat inputBuffer[SMS_MAX_FRAME_SIZE];
int sizeStocMagSpectrum;
sfloat *stocMagSpectrum;
sfloat *approxEnvelope; /*!< spectral approximation envelope */
SMS_AnalFrame **ppFrames; /*!< pointers to the frames analyzed (it is circular-shifted once the array is full */
} SMS_AnalParams;
/*! \struct SMS_ModifyParams
*
* \brief structure with parameters and data that will be used to modify an SMS_Data frame
*/
typedef struct
{
int ready; /*!< a flag to know if the struct has been initialized) */
int maxFreq; /*!< maximum frequency component */
int doResGain; /*!< whether or not to scale residual gain */
sfloat resGain; /*!< residual scale factor */
int doTranspose; /*!< whether or not to transpose */
sfloat transpose; /*!< transposition factor */
int doSinEnv; /*!< whether or not to apply a new spectral envelope to the sin component */
sfloat sinEnvInterp; /*!< value between 0 (use frame's env) and 1 (use *env). Interpolates inbetween values*/
int sizeSinEnv; /*!< size of the envelope pointed to by env */
sfloat *sinEnv; /*!< sinusoidal spectral envelope */
int doResEnv; /*!< whether or not to apply a new spectral envelope to the residual component */
sfloat resEnvInterp; /*!< value between 0 (use frame's env) and 1 (use *env). Interpolates inbetween values*/
int sizeResEnv; /*!< size of the envelope pointed to by resEnv */
sfloat *resEnv; /*!< residual spectral envelope */
} SMS_ModifyParams;
/*! \struct SMS_SynthParams
* \brief structure with information for synthesis functions
*
* This structure contains all the necessary settings for different types of synthesis.
* It also holds arrays for windows and the inverse-FFT, as well as the previously
* synthesized frame.
*
*/
typedef struct
{
int iStochasticType; /*!< type of stochastic model defined by SMS_StocSynthType
\see SMS_StocSynthType */
int iSynthesisType; /*!< type of synthesis to perform \see SMS_SynthType */
int iDetSynthType; /*!< method for synthesizing deterministic component \see SMS_DetSynthType */
int iOriginalSRate; /*!< samplerate of the sound model source (for stochastic synthesis approximation) */
int iSamplingRate; /*!< synthesis samplerate */
int sizeHop; /*!< number of samples to synthesis for each frame */
int origSizeHop; /*!< original number of samples used to create each analysis frame */
int nTracks;
int nStochasticCoeff;
int deEmphasis; /*!< whether or not to perform de-emphasis */
sfloat deEmphasisLastValue;
sfloat *pFDetWindow; /*!< array to hold the window used for deterministic synthesis \see SMS_WIN_IFFT */
sfloat *pFStocWindow; /*!< array to hold the window used for stochastic synthesis (Hanning) */
sfloat *pSynthBuff; /*!< an array for keeping samples during overlap-add (2x sizeHop) */
sfloat *pMagBuff; /*!< an array for keeping magnitude spectrum for stochastic synthesis */
sfloat *pPhaseBuff; /*!< an array for keeping phase spectrum for stochastic synthesis */
sfloat *pSpectra; /*!< array for in-place FFT transform */
SMS_Data prevFrame; /*!< previous data frame, for interpolation between frames */
SMS_ModifyParams modParams; /*!< modification parameters */
sfloat *approxEnvelope; /*!< spectral approximation envelope */
} SMS_SynthParams;
/*! \struct SMS_HarmCandidate
* \brief structure to hold information about a harmonic candidate
*
* This structure provides storage for accumimlated statistics when
* trying to decide which track is the fundamental frequency, during
* harmonic detection.
*/
typedef struct
{
sfloat fFreq; /*!< frequency of harmonic */
sfloat fMag; /*!< magnitude of harmonic */
sfloat fMagPerc; /*!< percentage of magnitude */
sfloat fFreqDev; /*!< deviation from perfect harmonic */
sfloat fHarmRatio; /*!< percentage of harmonics found */
} SMS_HarmCandidate;
/*! \struct SMS_ContCandidate
* \brief structure to hold information about a continuation candidate
*
* This structure holds statistics about the guides, which is used to
* decide the status of the guide
*/
typedef struct
{
sfloat fFreqDev; /*!< frequency deviation from guide */
sfloat fMagDev; /*!< magnitude deviation from guide */
int iPeak; /*!< peak number (organized according to frequency)*/
} SMS_ContCandidate;
/*! \brief analysis format
*
* Is the signal is known to be harmonic, using format harmonic (with out without
* phase) will give more accuracy to the peak continuation algorithm. If the signal
* is known to be inharmonic, then it is best to use one of the inharmonic settings
* to tell the peak continuation algorithm to just look at the peaks and connect them,
* instead of trying to look for peaks at specific frequencies (harmonic partials).
*/
enum SMS_Format
{
SMS_FORMAT_H, /*!< 0, format harmonic */
SMS_FORMAT_IH, /*!< 1, format inharmonic */
SMS_FORMAT_HP, /*!< 2, format harmonic with phase */
SMS_FORMAT_IHP /*!< 3, format inharmonic with phase */
};
/*! \brief synthesis types
*
* These values are used to determine whether to synthesize
* both deterministic and stochastic components together,
* the deterministic component alone, or the stochastic
* component alone.
*/
enum SMS_SynthType
{
SMS_STYPE_ALL, /*!< both components combined */
SMS_STYPE_DET, /*!< deterministic component alone */
SMS_STYPE_STOC /*!< stochastic component alone */
};
/*! \brief synthesis method for deterministic component
*
* There are two options for deterministic synthesis available to the
* SMS synthesizer. The Inverse Fast Fourier Transform method
* (IFFT) is more effecient for models with lots of partial tracks, but can
* possibly smear transients. The Sinusoidal Table Lookup (SIN) can
* theoritically support faster moving tracks at a higher fidelity, but
* can consume lots of cpu at varying rates.
*/
enum SMS_DetSynthType
{
SMS_DET_IFFT, /*!< Inverse Fast Fourier Transform (IFFT) */
SMS_DET_SIN /*!< Sinusoidal Table Lookup (SIN) */
};
/*! \brief synthesis method for stochastic component
*
* Currently, Stochastic Approximation is the only reasonable choice
* for stochastic synthesis: this method approximates the spectrum of
* the stochastic component by a specified number of coefficients during
* analyses, and then approximates another set of coefficients during
* synthesis in order to fit the specified hopsize. The phases of the
* coefficients are randomly generated, according to the theory that a
* stochastic spectrum consists of random phases.
*
* The Inverse FFT method is not implemented, but is based on the idea of storing
* the exact spectrum and phases of the residual component to file. Synthesis
* could then be an exact reconstruction of the original signal, provided
* interpolation is not necessary.
*
* No stochastic component can also be specified in order to skip the this
* time consuming process altogether. This is especially useful when
* performing multiple analyses to fine tune parameters pertaining to the
* determistic component; once that is achieved, the stochastic component
* will be much better as well.
*/
enum SMS_StocSynthType
{
SMS_STOC_NONE, /*!< 0, no stochastistic component */
SMS_STOC_APPROX, /*!< 1, Inverse FFT, magnitude approximation and generated phases */
SMS_STOC_IFFT /*!< 2, inverse FFT, interpolated spectrum (not used) */
};
/*! \brief synthesis method for deterministic component
*
* There are two options for deterministic synthesis available to the
* SMS synthesizer. The Inverse Fast Fourier Transform method
* (IFFT) is more effecient for models with lots of partial tracks, but can
* possibly smear transients. The Sinusoidal Table Lookup (SIN) can
* theoritically support faster moving tracks at a higher fidelity, but
* can consume lots of cpu at varying rates.
*/
enum SMS_SpecEnvType
{
SMS_ENV_NONE, /*!< none */
SMS_ENV_CEP, /*!< cepstral coefficients */
SMS_ENV_FBINS /*!< frequency bins */
};
/*! \brief Error codes returned by SMS file functions */
/* \todo remove me */
enum SMS_ERRORS
{
SMS_OK, /*!< 0, no error*/
SMS_NOPEN, /*!< 1, couldn't open file */
SMS_NSMS , /*!< 2, not a SMS file */
SMS_MALLOC, /*!< 3, couldn't allocate memory */
SMS_RDERR, /*!< 4, read error */
SMS_WRERR, /*!< 5, write error */
SMS_SNDERR /*!< 6, sound IO error */
};
/*! \brief debug modes
*
* \todo write details about debug files
*/
enum SMS_DBG
{
SMS_DBG_NONE, /*!< 0, no debugging */
SMS_DBG_DET, /*!< 1, not yet implemented \todo make this show main information to look at for discovering the correct deterministic parameters*/
SMS_DBG_PEAK_DET, /*!< 2, peak detection function */
SMS_DBG_HARM_DET, /*!< 3, harmonic detection function */
SMS_DBG_PEAK_CONT, /*!< 4, peak continuation function */
SMS_DBG_CLEAN_TRAJ, /*!< 5, clean tracks function */
SMS_DBG_SINE_SYNTH, /*!< 6, sine synthesis function */
SMS_DBG_STOC_ANAL, /*!< 7, stochastic analysis function */
SMS_DBG_STOC_SYNTH, /*!< 8, stochastic synthesis function */
SMS_DBG_SMS_ANAL, /*!< 9, top level analysis function */
SMS_DBG_ALL, /*!< 10, everything */
SMS_DBG_RESIDUAL, /*!< 11, write residual to file */
SMS_DBG_SYNC, /*!< 12, write original, synthesis and residual to a text file */
};
#define SMS_MAX_WINDOW 8190 /*!< \brief maximum size for analysis window */
/* \brief type of sound to be analyzed
*
* \todo explain the differences between these two
*/
enum SMS_SOUND_TYPE
{
SMS_SOUND_TYPE_MELODY, /*!< 0, sound composed of several notes */
SMS_SOUND_TYPE_NOTE /*!< 1, sound composed of a single note */
};
/* \brief direction of analysis
*
* Sometimes a signal can be clearer at the end than at
* the beginning. If the signal is very harmonic at the end then
* doing the analysis in reverse could provide better results.
*/
enum SMS_DIRECTION
{
SMS_DIR_FWD, /*!< analysis from left to right */
SMS_DIR_REV /*!< analysis from right to left */
};
/* \brief window selection
*/
enum SMS_WINDOWS
{
SMS_WIN_HAMMING, /*!< 0: hamming */
SMS_WIN_BH_62, /*!< 1: blackman-harris, 62dB cutoff */
SMS_WIN_BH_70, /*!< 2: blackman-harris, 70dB cutoff */
SMS_WIN_BH_74, /*!< 3: blackman-harris, 74dB cutoff */
SMS_WIN_BH_92, /*!< 4: blackman-harris, 92dB cutoff */
SMS_WIN_HANNING, /*!< 5: hanning */
SMS_WIN_IFFT /*!< 6: window for deterministic synthesis based on the Inverse-FFT algorithm.
This is a combination of an inverse Blackman-Harris 92dB and a triangular window. */
};
/*!
* \brief frame status
*/
enum SMS_FRAME_STATUS
{
SMS_FRAME_EMPTY,
SMS_FRAME_READY,
SMS_FRAME_PEAKS_FOUND,
SMS_FRAME_FUND_FOUND,
SMS_FRAME_TRAJ_FOUND,
SMS_FRAME_CLEANED,
SMS_FRAME_RECOMPUTED,
SMS_FRAME_DETER_SYNTH,
SMS_FRAME_STOC_COMPUTED,
SMS_FRAME_DONE,
SMS_FRAME_END
};
#define SMS_MIN_SIZE_FRAME 128 /* size of synthesis frame */
/*! \defgroup math_macros Math Macros
* \brief mathematical operations and values needed for functions within
* this library
* \{
*/
#define PI 3.141592653589793238462643 /*!< pi */
#define TWO_PI 6.28318530717958647692 /*!< pi * 2 */
#define INV_TWO_PI (1 / TWO_PI) /*!< 1 / ( pi * 2) */
#define PI_2 1.57079632679489661923 /*!< pi / 2 */
#define LOG2 0.69314718055994529 /*!< natural logarithm of 2 */
#define LOG10 2.3025850929940459 /*!< natural logarithm of 10 */
#define EXP 2.7182818284590451 /*!< Eurler's number */
sfloat sms_magToDB(sfloat x);
sfloat sms_dBToMag(sfloat x);
void sms_arrayMagToDB(int sizeArray, sfloat *pArray);
void sms_arrayDBToMag(int sizeArray, sfloat *pArray);
void sms_setMagThresh(sfloat x);
sfloat sms_rms(int sizeArray, sfloat *pArray);
sfloat sms_sine(sfloat fTheta);
sfloat sms_sinc(sfloat fTheta);
sfloat sms_random(void);
int sms_power2(int n);
sfloat sms_scalarTempered(sfloat x);
void sms_arrayScalarTempered(int sizeArray, sfloat *pArray);
#ifndef MAX
/*! \brief returns the maximum of a and b */
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
#ifndef MIN
/*! \brief returns the minimum of a and b */
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#endif
/*! \} */
/* function declarations */
void sms_setPeaks(SMS_AnalParams *pAnalParams, int numamps, sfloat* amps,
int numfreqs, sfloat* freqs, int numphases, sfloat* phases);
int sms_findPeaks(int sizeWaveform, sfloat *pWaveform,
SMS_AnalParams *pAnalParams, SMS_SpectralPeaks *pSpectralPeaks);
int sms_findPartials(SMS_Data *pSmsFrame, SMS_AnalParams *pAnalParams);
int sms_findResidual(int sizeSynthesis, sfloat* pSynthesis,
int sizeOriginal, sfloat* pOriginal,
SMS_ResidualParams *residualParams);
void sms_approxResidual(int sizeResidual, sfloat* residual,
int sizeApprox, sfloat* approx,
SMS_ResidualParams *residualParams);
int sms_analyze(int sizeWaveform, sfloat *pWaveform, SMS_Data *pSmsData,
SMS_AnalParams *pAnalParams);
void sms_analyzeFrame(int iCurrentFrame, SMS_AnalParams *pAnalParams, sfloat fRefFundamental);
int sms_init();
void sms_free();
int sms_initAnalysis(SMS_AnalParams *pAnalParams);
void sms_initAnalParams(SMS_AnalParams *pAnalParams);
void sms_initSynthParams(SMS_SynthParams *synthParams);
int sms_initSynth(SMS_SynthParams *pSynthParams);
void sms_freeAnalysis(SMS_AnalParams *pAnalParams);
void sms_freeSynth(SMS_SynthParams *pSynthParams);
int sms_initSpectralPeaks(SMS_SpectralPeaks* peaks, int n);
void sms_freeSpectralPeaks(SMS_SpectralPeaks* peaks);
void sms_fillSoundBuffer(int sizeWaveform, sfloat *pWaveform, SMS_AnalParams *pAnalParams);
void sms_windowCentered(int sizeWindow, sfloat *pWaveform, sfloat *pWindow, int sizeFft, sfloat *pFftBuffer);
void sms_getWindow(int sizeWindow, sfloat *pWindow, int iWindowType);
void sms_scaleWindow(int sizeWindow, sfloat *pWindow);
int sms_spectrum(int sizeWindow, sfloat *pWaveform, sfloat *pWindow, int sizeMag,
sfloat *pMag, sfloat *pPhase, sfloat *pFftBuffer);
int sms_spectrumW(int sizeWindow, sfloat *pWaveform, sfloat *pWindow, int sizeMag,
sfloat *pMag, sfloat *pPhase, sfloat *pFftBuffer);
int sms_invSpectrum(int sizeWaveform, sfloat *pWaveform, sfloat *pWindow ,
int sizeMag, sfloat *pMag, sfloat *pPhase, sfloat *pFftBuffer);
/* \todo remove this once invSpectrum is completely implemented */
int sms_invQuickSpectrumW(sfloat *pFMagSpectrum, sfloat *pFPhaseSpectrum,
int sizeFft, sfloat *pFWaveform, int sizeWave,
sfloat *pFWindow, sfloat *pFftBuffer);
int sms_spectralApprox(sfloat *pSpec1, int sizeSpec1, int sizeSpec1Used,
sfloat *pSpec2, int sizeSpec2, int nCoefficients,
sfloat *envelope);
int sms_spectrumMag(int sizeWindow, sfloat *pWaveform, sfloat *pWindow,
int sizeMag, sfloat *pMag, sfloat *pFftBuffer);
void sms_dCepstrum(int sizeCepstrum, sfloat *pCepstrum, int sizeFreq, sfloat *pFreq, sfloat *pMag,
sfloat fLambda, int iSamplingRate);
void sms_dCepstrumEnvelope(int sizeCepstrum, sfloat *pCepstrum, int sizeEnv, sfloat *pEnv);
void sms_spectralEnvelope(SMS_Data *pSmsData, SMS_SEnvParams *pSpecEnvParams);
int sms_sizeNextWindow(int iCurrentFrame, SMS_AnalParams *pAnalParams);
sfloat sms_fundDeviation(SMS_AnalParams *pAnalParams, int iCurrentFrame);
int sms_detectPeaks(int sizeSpec, sfloat *pFMag, sfloat *pPhase,
SMS_Peak *pSpectralPeaks, SMS_AnalParams *pAnalParams);
sfloat sms_harmDetection(int numPeaks, SMS_Peak* spectralPeaks, sfloat refFundamental,
sfloat refHarmonic, sfloat lowestFreq, sfloat highestFreq,
int soundType, sfloat minRefHarmMag, sfloat refHarmMagDiffFromMax);
int sms_peakContinuation(int iFrame, SMS_AnalParams *pAnalParams);
sfloat sms_preEmphasis(sfloat fInput, SMS_AnalParams *pAnalParams);
sfloat sms_deEmphasis(sfloat fInput, SMS_SynthParams *pSynthParams);
void sms_cleanTracks(int iCurrentFrame, SMS_AnalParams *pAnalParams);
void sms_scaleDet(sfloat *pSynthBuffer, sfloat *pOriginalBuffer,
sfloat *pSinAmp, SMS_AnalParams *pAnalParams, int nTracks);
int sms_prepSine(int nTableSize);
int sms_prepSinc(int nTableSize);
void sms_clearSine();
void sms_clearSinc();
void sms_synthesize(SMS_Data *pSmsFrame, sfloat*pSynthesis, SMS_SynthParams *pSynthParams);
void sms_sineSynthFrame(SMS_Data *pSmsFrame, sfloat *pBuffer,
int sizeBuffer, SMS_Data *pLastFrame,
int iSamplingRate);
void sms_initHeader(SMS_Header *pSmsHeader);
int sms_getHeader(char *pChFileName, SMS_Header **ppSmsHeader, FILE **ppInputFile);
void sms_fillHeader(SMS_Header *pSmsHeader, SMS_AnalParams *pAnalParams);
int sms_writeHeader(char *pFileName, SMS_Header *pSmsHeader, FILE **ppOutSmsFile);
int sms_writeFile(FILE *pSmsFile, SMS_Header *pSmsHeader);
int sms_initFrame(int iCurrentFrame, SMS_AnalParams *pAnalParams, int sizeWindow);
int sms_clearAnalysisFrame(int iCurrentFrame, SMS_AnalParams *pAnalParams);
int sms_allocFrame(SMS_Data *pSmsFrame, int nTracks, int nCoeff,
int iPhase, int stochType, int nEnvCoeff);
int sms_allocFrameH(SMS_Header *pSmsHeader, SMS_Data *pSmsFrame);
int sms_getFrame(FILE *pInputFile, SMS_Header *pSmsHeader, int iFrame, SMS_Data *pSmsFrame);
int sms_writeFrame(FILE *pSmsFile, SMS_Header *pSmsHeader, SMS_Data *pSmsFrame);
void sms_freeFrame(SMS_Data *pSmsFrame);
void sms_clearFrame(SMS_Data *pSmsFrame);
void sms_copyFrame(SMS_Data *pCopySmsFrame, SMS_Data *pOriginalSmsFrame);
int sms_frameSizeB(SMS_Header *pSmsHeader);
void sms_initResidualParams(SMS_ResidualParams *residualParams);
int sms_initResidual(SMS_ResidualParams *residualParams);
void sms_freeResidual(SMS_ResidualParams *residualParams);
int sms_residual(int sizeWindow, sfloat *pSynthesis, sfloat *pOriginal,
SMS_ResidualParams* residualParams);
void sms_filterHighPass(int sizeResidual, sfloat *pResidual, int iSamplingRate);
int sms_stocAnalysis(int sizeWindow, sfloat *pResidual, sfloat *pWindow,
SMS_Data *pSmsFrame, SMS_AnalParams *pAnalParams);
void sms_interpolateFrames(SMS_Data *pSmsFrame1, SMS_Data *pSmsFrame2,
SMS_Data *pSmsFrameOut, sfloat fInterpFactor);
void sms_fft(int sizeFft, sfloat *pArray);
void sms_ifft(int sizeFft, sfloat *pArray);
void sms_RectToPolar(int sizeSpec, sfloat *pReal, sfloat *pMag, sfloat *pPhase);
void sms_PolarToRect(int sizeSpec, sfloat *pReal, sfloat *pMag, sfloat *pPhase);
void sms_spectrumRMS(int sizeMag, sfloat *pReal, sfloat *pMag);
void sms_initModify(SMS_Header *header, SMS_ModifyParams *params);
void sms_initModifyParams(SMS_ModifyParams *params);
void sms_freeModify(SMS_ModifyParams *params);
void sms_modify(SMS_Data *frame, SMS_ModifyParams *params);
/***********************************************************************************/
/************* debug functions: ******************************************************/
int sms_createDebugFile(SMS_AnalParams *pAnalParams);
void sms_writeDebugData(sfloat *pBuffer1, sfloat *pBuffer2,
sfloat *pBuffer3, int sizeBuffer);
void sms_writeDebugFile();
void sms_error(char *pErrorMessage );
int sms_errorCheck();
char* sms_errorString();
#endif /* _SMS_H */
|