diff options
author | John Glover <glover.john@gmail.com> | 2011-01-06 11:54:26 +0000 |
---|---|---|
committer | John Glover <glover.john@gmail.com> | 2011-01-06 11:54:26 +0000 |
commit | 0c141d4c9a03d4839e2a8626961bd6bbdd3e7f26 (patch) | |
tree | 33c6f276f0b9ace8d96535f4afb89926ec33e7bc | |
parent | 17c5625449888117208447dd4f86504281357013 (diff) | |
download | simpl-0c141d4c9a03d4839e2a8626961bd6bbdd3e7f26.tar.gz simpl-0c141d4c9a03d4839e2a8626961bd6bbdd3e7f26.tar.bz2 simpl-0c141d4c9a03d4839e2a8626961bd6bbdd3e7f26.zip |
Fixed SMSResidual. Also removed blank frames produced by sms at the beginning of analysis which was causing synthesised audio to be out of sync with the original.
-rw-r--r-- | basetypes.py | 49 | ||||
-rw-r--r-- | sms.py | 105 | ||||
-rw-r--r-- | sms/analysis.c | 17 | ||||
-rw-r--r-- | sms/sineSynth.c | 40 | ||||
-rw-r--r-- | sms/sms.c | 72 | ||||
-rw-r--r-- | sms/sms.h | 19 | ||||
-rw-r--r-- | sms/sms.i | 21 | ||||
-rw-r--r-- | sms/spectrum.c | 59 | ||||
-rw-r--r-- | sms/synthesis.c | 113 | ||||
-rw-r--r-- | tests/sms.py | 111 |
10 files changed, 402 insertions, 204 deletions
diff --git a/basetypes.py b/basetypes.py index bcad0a0..c12091d 100644 --- a/basetypes.py +++ b/basetypes.py @@ -353,6 +353,23 @@ class Residual(object): self._hop_size = 512 self._frame_size = 512 + frame_size = property(lambda self: self.get_frame_size(), + lambda self, x: self.set_frame_size(x)) + hop_size = property(lambda self: self.get_hop_size(), + lambda self, x: self.set_hop_size(x)) + + def get_frame_size(self): + return self._frame_size + + def set_frame_size(self, frame_size): + self._frame_size = frame_size + + def get_hop_size(self): + return self._hop_size + + def set_hop_size(self, hop_size): + self._hop_size = hop_size + def residual_frame(self, synth, original): "Computes the residual signal for a frame of audio" raise Exception("NotYetImplemented") @@ -360,21 +377,21 @@ class Residual(object): def find_residual(self, synth, original): "Calculate and return the residual signal" # pad the signals if necessary - if len(synth) % self._hop_size != 0: - synth = np.hstack((synth, np.zeros(self._hop_size - (len(synth) % self._hop_size)))) - if len(original) % self._hop_size != 0: - original = np.hstack((original, np.zeros(self._hop_size - (len(original) % self._hop_size)))) + if len(synth) % self.hop_size != 0: + synth = np.hstack((synth, np.zeros(self.hop_size - (len(synth) % self.hop_size)))) + if len(original) % self.hop_size != 0: + original = np.hstack((original, np.zeros(self.hop_size - (len(original) % self.hop_size)))) - num_frames = len(original) / self._hop_size + num_frames = len(original) / self.hop_size residual = simpl.array([]) sample_offset = 0 for i in range(num_frames): - synth_frame = synth[sample_offset:sample_offset+self._hop_size] - original_frame = original[sample_offset:sample_offset+self._hop_size] + synth_frame = synth[sample_offset:sample_offset+self.hop_size] + original_frame = original[sample_offset:sample_offset+self.hop_size] residual = np.hstack((residual, self.residual_frame(synth_frame, original_frame))) - sample_offset += self._hop_size + sample_offset += self.hop_size return residual def synth_frame(self, synth, original): @@ -384,20 +401,20 @@ class Residual(object): def synth(self, synth, original): "Calculate and return a synthesised residual signal" # pad the signals if necessary - if len(synth) % self._hop_size != 0: - synth = np.hstack((synth, np.zeros(self._hop_size - (len(synth) % self._hop_size)))) - if len(original) % self._hop_size != 0: - original = np.hstack((original, np.zeros(self._hop_size - (len(original) % self._hop_size)))) + if len(synth) % self.hop_size != 0: + synth = np.hstack((synth, np.zeros(self.hop_size - (len(synth) % self.hop_size)))) + if len(original) % self.hop_size != 0: + original = np.hstack((original, np.zeros(self.hop_size - (len(original) % self.hop_size)))) - num_frames = len(original) / self._hop_size + num_frames = len(original) / self.hop_size residual = simpl.array([]) sample_offset = 0 for i in range(num_frames): - synth_frame = synth[sample_offset:sample_offset+self._hop_size] - original_frame = original[sample_offset:sample_offset+self._hop_size] + synth_frame = synth[sample_offset:sample_offset+self.hop_size] + original_frame = original[sample_offset:sample_offset+self.hop_size] residual = np.hstack((residual, self.synth_frame(synth_frame, original_frame))) - sample_offset += self._hop_size + sample_offset += self.hop_size return residual @@ -39,6 +39,7 @@ class SMSPeakDetection(simpl.PeakDetection): self._analysis_params.nTracks = self._max_peaks self._analysis_params.maxPeaks = self._max_peaks self._analysis_params.nGuides = self._max_peaks + self._analysis_params.preEmphasis = 0 if simplsms.sms_initAnalysis(self._analysis_params) != 0: raise Exception("Error allocating memory for analysis_params") self._peaks = simplsms.SMS_SpectralPeaks(self._max_peaks) @@ -199,7 +200,10 @@ class SMSPeakDetection(simpl.PeakDetection): self._analysis_params.iSizeSound = len(audio) self.frames = [] pos = 0 - while pos < len(audio): + # account for SMS analysis delay + # need an extra (max_frame_delay - 1) frames + num_samples = (len(audio) - self.hop_size) + ((self.max_frame_delay -1) * self.hop_size) + while pos < num_samples: # get the next frame size if not self._static_frame_size: self.frame_size = self.get_next_frame_size() @@ -231,6 +235,7 @@ class SMSPartialTracking(simpl.PartialTracking): self._analysis_params.iFormat = simplsms.SMS_FORMAT_HP self._analysis_params.nTracks = self._max_partials self._analysis_params.nGuides = self._max_partials + self._analysis_params.preEmphasis = 0 if simplsms.sms_initAnalysis(self._analysis_params) != 0: raise Exception("Error allocating memory for analysis_params") self._sms_header = simplsms.SMS_Header() @@ -243,6 +248,36 @@ class SMSPartialTracking(simpl.PartialTracking): simplsms.sms_freeFrame(self._analysis_frame) simplsms.sms_free() + # properties + # TODO: make properties for the remaining analysis parameters + max_frequency = property(lambda self: self.get_max_frequency(), + lambda self, x: self.set_max_frequency(x)) + default_fundamental = property(lambda self: self.get_default_fundamental(), + lambda self, x: self.set_default_fundamental(x)) + max_frame_delay = property(lambda self: self.get_max_frame_delay(), + lambda self, x: self.set_max_frame_delay(x)) + + def get_max_frequency(self): + return self._analysis_params.fHighestFreq + + def set_max_frequency(self, max_frequency): + self._analysis_params.fHighestFreq = max_frequency + + def get_default_fundamental(self): + return self._analysis_params.fDefaultFundamental + + def set_default_fundamental(self, default_fundamental): + self._analysis_params.fDefaultFundamental = default_fundamental + + def get_max_frame_delay(self): + return self._analysis_params.iMaxDelayFrames + + def set_max_frame_delay(self, max_frame_delay): + simplsms.sms_freeAnalysis(self._analysis_params) + self._analysis_params.iMaxDelayFrames = max_frame_delay + if simplsms.sms_initAnalysis(self._analysis_params) != 0: + raise Exception("Error allocating memory for analysis_params") + def get_max_partials(self): return self._analysis_params.nTracks @@ -290,6 +325,17 @@ class SMSPartialTracking(simpl.PartialTracking): peaks.append(p) return peaks + def find_partials(self, frames): + """Find partials from the sinusoidal peaks in a list of Frames""" + self.frames = [] + for frame in frames: + frame.partials = self.update_partials(frame) + self.frames.append(frame) + # account for SMS analysis delay + # the first extra (max_frame_delay) frames are blank + if len(self.frames) > (self.max_frame_delay): + self.frames = self.frames[self.max_frame_delay:] + return self.frames class SMSSynthesis(simpl.Synthesis): "Sinusoidal resynthesis using SMS" @@ -299,13 +345,15 @@ class SMSSynthesis(simpl.Synthesis): simplsms.sms_init() self._synth_params = simplsms.SMS_SynthParams() simplsms.sms_initSynthParams(self._synth_params) - self._synth_params.iDetSynthType = simplsms.SMS_DET_IFFT + self._synth_params.iSamplingRate = self._sampling_rate + self._synth_params.iDetSynthType = simplsms.SMS_DET_SIN self._synth_params.iSynthesisType = simplsms.SMS_STYPE_DET self._synth_params.iStochasticType = simplsms.SMS_STOC_NONE - # use the default simpl hop size instead of the default SMS hop size self._synth_params.sizeHop = self._hop_size + self._synth_params.nTracks = self._max_partials + self._synth_params.deEmphasis = 0 simplsms.sms_initSynth(self._synth_params) - self._current_frame = simpl.zeros(self.hop_size) + self._current_frame = simpl.zeros(self._hop_size) self._analysis_frame = simplsms.SMS_Data() simplsms.sms_allocFrame(self._analysis_frame, self.max_partials, self.num_stochastic_coeffs, 1, self.stochastic_type, 0) @@ -336,7 +384,7 @@ class SMSSynthesis(simpl.Synthesis): simplsms.sms_freeSynth(self._synth_params) self._synth_params.sizeHop = hop_size simplsms.sms_initSynth(self._synth_params) - self._current_frame = simpl.zeros(self.hop_size) + self._current_frame = simpl.zeros(hop_size) def get_max_partials(self): return self._synth_params.nTracks @@ -365,7 +413,7 @@ class SMSSynthesis(simpl.Synthesis): return self._synth_params.iDetSynthesisType def set_det_synthesis_type(self, det_synthesis_type): - self._synth_params.iDetSynthesisType = det_synthesis_type + self._synth_params.iDetSynthType = det_synthesis_type def get_num_stochastic_coeffs(self): return self._synth_params.nStochasticCoeff @@ -404,7 +452,8 @@ class SMSSynthesis(simpl.Synthesis): amps = simpl.zeros(self.max_partials) freqs = simpl.zeros(self.max_partials) phases = simpl.zeros(self.max_partials) - for i in range(len(frame.partials)): + num_partials = min(self.max_partials, len(frame.partials)) + for i in range(num_partials): amps[i] = frame.partials[i].amplitude freqs[i] = frame.partials[i].frequency phases[i] = frame.partials[i].phase @@ -423,25 +472,53 @@ class SMSResidual(simpl.Residual): simplsms.sms_init() self._residual_params = simplsms.SMS_ResidualParams() simplsms.sms_initResidualParams(self._residual_params) - self._residual_params.residualSize = self._hop_size# * 2 + self._residual_params.hopSize = self._hop_size simplsms.sms_initResidual(self._residual_params) def __del__(self): simplsms.sms_freeResidual(self._residual_params) simplsms.sms_free() + + def get_hop_size(self): + return self._residual_params.hopSize + + def set_hop_size(self, hop_size): + simplsms.sms_freeResidual(self._residual_params) + self._residual_params.hopSize = hop_size + simplsms.sms_initResidual(self._residual_params) def residual_frame(self, synth, original): "Computes the residual signal for a frame of audio" simplsms.sms_findResidual(synth, original, self._residual_params) - residual = simpl.zeros(self._residual_params.residualSize) + residual = simpl.zeros(self._residual_params.hopSize) self._residual_params.getResidual(residual) return residual + def find_residual(self, synth, original): + "Calculate and return the residual signal" + import numpy as np + # pad the signals if necessary + if len(synth) % self.hop_size != 0: + synth = np.hstack((synth, np.zeros(self.hop_size - (len(synth) % self.hop_size)))) + if len(original) % self.hop_size != 0: + original = np.hstack((original, np.zeros(self.hop_size - (len(original) % self.hop_size)))) + + num_frames = len(original) / self.hop_size + residual = simpl.array([]) + sample_offset = 0 + + for i in range(num_frames): + synth_frame = synth[sample_offset:sample_offset+self.hop_size] + original_frame = original[sample_offset:sample_offset+self.hop_size] + residual = np.hstack((residual, + self.residual_frame(synth_frame, original_frame))) + sample_offset += self.hop_size + return residual + def synth_frame(self, synth, original): "Calculate and return one frame of the synthesised residual signal" - self.residual_frame(synth, original) - simplsms.sms_approxResidual(self._residual_params) - residual_approx = simpl.zeros(self._residual_params.residualSize) - self._residual_params.getApprox(residual_approx) - return residual_approx + residual = self.residual_frame(synth, original) + approx = simpl.zeros(self._residual_params.hopSize) + simplsms.sms_approxResidual(residual, approx, self._residual_params) + return approx diff --git a/sms/analysis.c b/sms/analysis.c index ad8420e..89d4b4a 100644 --- a/sms/analysis.c +++ b/sms/analysis.c @@ -210,7 +210,6 @@ int sms_findPeaks(int sizeWaveform, sfloat *pWaveform, SMS_AnalParams *pAnalPara pSpectralPeaks->pSpectralPeaks[i].fPhase = 0.0; } } - /*printf("\n");*/ return pSpectralPeaks->nPeaks; } else @@ -328,13 +327,16 @@ int sms_findResidual(int sizeSynthesis, sfloat* pSynthesis, int sizeOriginal, sfloat* pOriginal, SMS_ResidualParams *residualParams) { - if(residualParams->residualSize < sizeOriginal) + if(residualParams->hopSize < sizeOriginal) { sms_error("Residual signal length is smaller than the original signal length"); return -1; } - sms_residual(residualParams->residualSize, pSynthesis, pOriginal, residualParams); + sms_residual(residualParams->hopSize, pSynthesis, pOriginal, residualParams); + sms_filterHighPass(residualParams->hopSize, + residualParams->residual, + residualParams->samplingRate); return 0; } @@ -467,8 +469,9 @@ int sms_analyze(int sizeWaveform, sfloat *pWaveform, SMS_Data *pSmsData, SMS_Ana else if(sizeData < pAnalParams->residualParams.residualSize) { /* should only happen if we're at the end of a sound, unless hop size changes */ - sms_getWindow(sizeData, pAnalParams->residualParams.residualWindow, SMS_WIN_HAMMING); - sms_scaleWindow(sizeData, pAnalParams->residualParams.residualWindow); + /* TODO: should the window type be set to pAnalParams->iWindowType? */ + sms_getWindow(sizeData, pAnalParams->residualParams.fftWindow, SMS_WIN_HAMMING); + sms_scaleWindow(sizeData, pAnalParams->residualParams.fftWindow); } /* obtain residual sound from original and synthesized sounds. accumulate the residual percentage.*/ @@ -483,13 +486,13 @@ int sms_analyze(int sizeWaveform, sfloat *pWaveform, SMS_Data *pSmsData, SMS_Ana sms_filterHighPass(sizeData, pAnalParams->residualParams.residual, pAnalParams->iSamplingRate); /* approximate residual */ - sms_stocAnalysis(sizeData, pAnalParams->residualParams.residual, pAnalParams->residualParams.residualWindow, + sms_stocAnalysis(sizeData, pAnalParams->residualParams.residual, pAnalParams->residualParams.fftWindow, pSmsData, pAnalParams); } else if(pAnalParams->iStochasticType == SMS_STOC_IFFT) { int sizeMag = sms_power2(sizeData >> 1); - sms_spectrum(sizeData, pAnalParams->residualParams.residual, pAnalParams->residualParams.residualWindow, + sms_spectrum(sizeData, pAnalParams->residualParams.residual, pAnalParams->residualParams.fftWindow, sizeMag, pSmsData->pFStocCoeff, pSmsData->pResPhase, pAnalParams->fftBuffer); } diff --git a/sms/sineSynth.c b/sms/sineSynth.c index c91bf65..9882062 100644 --- a/sms/sineSynth.c +++ b/sms/sineSynth.c @@ -45,18 +45,18 @@ static void SinePhaSynth(sfloat fFreq, sfloat fMag, sfloat fPhase, sfloat fAlpha, fBeta, fTmp1, fTmp2; /* if no mag in last frame copy freq from current and make phase */ - if (pLastFrame->pFSinAmp[iTrack] <= 0) + if(pLastFrame->pFSinAmp[iTrack] <= 0) { pLastFrame->pFSinFreq[iTrack] = fFreq; fTmp = fPhase - (fFreq * sizeBuffer); pLastFrame->pFSinPha[iTrack] = fTmp - floor(fTmp / TWO_PI) * TWO_PI; } /* and the other way */ - else if (fMag <= 0) + else if(fMag <= 0) { fFreq = pLastFrame->pFSinFreq[iTrack]; fTmp = pLastFrame->pFSinPha[iTrack] + - (pLastFrame->pFSinFreq[iTrack] * sizeBuffer); + (pLastFrame->pFSinFreq[iTrack] * sizeBuffer); fPhase = fTmp - floor(fTmp / TWO_PI) * TWO_PI; } @@ -67,27 +67,27 @@ static void SinePhaSynth(sfloat fFreq, sfloat fMag, sfloat fPhase, /* create instantaneous phase from freq. and phase values */ fTmp1 = fFreq - pLastFrame->pFSinFreq[iTrack]; fTmp2 = ((pLastFrame->pFSinPha[iTrack] + - pLastFrame->pFSinFreq[iTrack] * sizeBuffer - fPhase) + - fTmp1 * sizeBuffer / 2.0) / TWO_PI; - iM = (int) (fTmp2 + .5); + pLastFrame->pFSinFreq[iTrack] * sizeBuffer - fPhase) + + fTmp1 * sizeBuffer / 2.0) / TWO_PI; + iM = (int)(fTmp2 + .5); fTmp2 = fPhase - pLastFrame->pFSinPha[iTrack] - - pLastFrame->pFSinFreq[iTrack] * sizeBuffer + - TWO_PI * iM; + pLastFrame->pFSinFreq[iTrack] * sizeBuffer + TWO_PI * iM; fAlpha = (3.0 / (sfloat)(sizeBuffer * sizeBuffer)) * - fTmp2 - fTmp1 / sizeBuffer; + fTmp2 - fTmp1 / sizeBuffer; fBeta = (-2.0 / ((sfloat) (sizeBuffer * sizeBuffer * sizeBuffer))) * - fTmp2 + fTmp1 / ((sfloat) (sizeBuffer * sizeBuffer)); + fTmp2 + fTmp1 / ((sfloat) (sizeBuffer * sizeBuffer)); for(i=0; i<sizeBuffer; i++) { fInstMag += fMagIncr; fInstPhase = pLastFrame->pFSinPha[iTrack] + - pLastFrame->pFSinFreq[iTrack] * i + - fAlpha * i * i + fBeta * i * i * i; + pLastFrame->pFSinFreq[iTrack] * i + + fAlpha * i * i + fBeta * i * i * i; - /* pFWaveform[i] += sms_dBToMag(fInstMag) * sms_sine(fInstPhase + PI_2); */ + /*pFWaveform[i] += sms_dBToMag(fInstMag) * sms_sine(fInstPhase + PI_2);*/ pFWaveform[i] += sms_dBToMag(fInstMag) * sinf(fInstPhase + PI_2); } + /* save current values into buffer */ pLastFrame->pFSinFreq[iTrack] = fFreq; pLastFrame->pFSinAmp[iTrack] = fMag; @@ -110,14 +110,13 @@ static void SineSynth(sfloat fFreq, sfloat fMag, SMS_Data *pLastFrame, int i; /* if no mag in last frame copy freq from current */ - if (pLastFrame->pFSinAmp[iTrack] <= 0) + if(pLastFrame->pFSinAmp[iTrack] <= 0) { pLastFrame->pFSinFreq[iTrack] = fFreq; - pLastFrame->pFSinPha[iTrack] = - TWO_PI * sms_random(); + pLastFrame->pFSinPha[iTrack] = TWO_PI * sms_random(); } /* and the other way */ - else if (fMag <= 0) + else if(fMag <= 0) fFreq = pLastFrame->pFSinFreq[iTrack]; /* calculate the instantaneous amplitude */ @@ -129,20 +128,18 @@ static void SineSynth(sfloat fFreq, sfloat fMag, SMS_Data *pLastFrame, fInstPhase = pLastFrame->pFSinPha[iTrack]; /* generate all the samples */ - for (i = 0; i < sizeBuffer; i++) + for(i = 0; i < sizeBuffer; i++) { fInstMag += fMagIncr; fInstFreq += fFreqIncr; fInstPhase += fInstFreq; - pFBuffer[i] += sms_dBToMag(fInstMag) * sms_sine(fInstPhase); } /* save current values into last values */ pLastFrame->pFSinFreq[iTrack] = fFreq; pLastFrame->pFSinAmp[iTrack] = fMag; - pLastFrame->pFSinPha[iTrack] = fInstPhase - - floor(fInstPhase / TWO_PI) * TWO_PI; + pLastFrame->pFSinPha[iTrack] = fInstPhase - floor(fInstPhase / TWO_PI) * TWO_PI; } /*! \brief generate all the sinusoids for a given frame @@ -191,4 +188,3 @@ void sms_sineSynthFrame(SMS_Data *pSmsData, sfloat *pFBuffer, } } } - @@ -319,7 +319,7 @@ int sms_initAnalysis(SMS_AnalParams *pAnalParams) } /* memory for residual */ - pAnalParams->residualParams.residualSize = pAnalParams->sizeHop * 2; + pAnalParams->residualParams.hopSize = pAnalParams->sizeHop; sms_initResidual(&pAnalParams->residualParams); /* memory for guide states */ @@ -467,15 +467,20 @@ int sms_initSynth(SMS_SynthParams *pSynthParams) void sms_initResidualParams(SMS_ResidualParams *residualParams) { residualParams->samplingRate = 44100; + residualParams->hopSize = 256; residualParams->residualSize = 0; residualParams->residual = NULL; - residualParams->residualWindow = NULL; + residualParams->fftWindow = NULL; + residualParams->ifftWindow = NULL; + residualParams->windowScale = 0.0; residualParams->residualMag = 0.0; residualParams->originalMag = 0.0; residualParams->nCoeffs = 128; residualParams->stocCoeffs = NULL; residualParams->sizeStocMagSpectrum = 0; residualParams->stocMagSpectrum = NULL; + residualParams->stocPhaseSpectrum = NULL; + residualParams->approx = NULL; residualParams->approxEnvelope = NULL; int i; for(i = 0; i < SMS_MAX_SPEC; i++) @@ -492,13 +497,14 @@ void sms_initResidualParams(SMS_ResidualParams *residualParams) */ int sms_initResidual(SMS_ResidualParams *residualParams) { - if(residualParams->residualSize <= 0) + if(residualParams->hopSize <= 0) { - sms_error("Residual size must be a positive integer"); + sms_error("Residual hop size must be a positive integer"); return -1; } /* residual signal */ + residualParams->residualSize = residualParams->hopSize * 2; residualParams->residual = (sfloat *)calloc(residualParams->residualSize, sizeof(sfloat)); if(residualParams->residual == NULL) { @@ -506,15 +512,34 @@ int sms_initResidual(SMS_ResidualParams *residualParams) return -1; } - /* residual window */ - residualParams->residualWindow = (sfloat *)calloc(residualParams->residualSize, sizeof(sfloat)); - if(residualParams->residualWindow == NULL) + /* residual fft/ifft windows */ + residualParams->fftWindow = (sfloat *)calloc(residualParams->residualSize, sizeof(sfloat)); + if(residualParams->fftWindow == NULL) { - sms_error("Could not allocate memory for residualWindow"); + sms_error("Could not allocate memory for residual FFT window"); return -1; } - sms_getWindow(residualParams->residualSize, residualParams->residualWindow, SMS_WIN_HAMMING); - sms_scaleWindow(residualParams->residualSize, residualParams->residualWindow); + sms_getWindow(residualParams->residualSize, residualParams->fftWindow, SMS_WIN_BH_70); + sms_scaleWindow(residualParams->residualSize, residualParams->fftWindow); + + residualParams->ifftWindow = (sfloat *)calloc(residualParams->residualSize, sizeof(sfloat)); + if(residualParams->ifftWindow == NULL) + { + sms_error("Could not allocate memory for residual IFFT window"); + return -1; + } + sms_getWindow(residualParams->residualSize, residualParams->ifftWindow, SMS_WIN_HANNING); + /* compute IFFT window scaling: + * windows per hop = hop size / window size = 0.5 + * overlap = 50% => 1 window total in each hop/frame + * => windowScale = window size / sum(window samples) = 1.85 + * for a 1024 sized hamming window + */ + int i; + sfloat sum = 0.0; + for(i = 0; i < residualParams->residualSize; i++) + sum += residualParams->ifftWindow[i]; + residualParams->windowScale = (sfloat)residualParams->residualSize / sum; /* stochastic analysis */ residualParams->stocCoeffs = (sfloat *)calloc(residualParams->nCoeffs, sizeof(sfloat)); @@ -531,7 +556,19 @@ int sms_initResidual(SMS_ResidualParams *residualParams) sms_error("Could not allocate memory for stochastic magnitude spectrum"); return -1; } + residualParams->stocPhaseSpectrum = (sfloat *)calloc(residualParams->sizeStocMagSpectrum, sizeof(sfloat)); + if(residualParams->stocPhaseSpectrum == NULL) + { + sms_error("Could not allocate memory for stochastic magnitude spectrum"); + return -1; + } + residualParams->approx = (sfloat *)calloc(residualParams->residualSize, sizeof(sfloat)); + if(residualParams->approx == NULL) + { + sms_error("Could not allocate memory for spectral approximation"); + return -1; + } residualParams->approxEnvelope = (sfloat *)calloc(residualParams->nCoeffs, sizeof(sfloat)); if(residualParams->approxEnvelope == NULL) { @@ -553,19 +590,28 @@ void sms_freeResidual(SMS_ResidualParams *residualParams) { if(residualParams->residual) free(residualParams->residual); - if(residualParams->residualWindow) - free(residualParams->residualWindow); + if(residualParams->fftWindow) + free(residualParams->fftWindow); + if(residualParams->ifftWindow) + free(residualParams->ifftWindow); if(residualParams->stocCoeffs) free(residualParams->stocCoeffs); if(residualParams->stocMagSpectrum) free(residualParams->stocMagSpectrum); + if(residualParams->stocPhaseSpectrum) + free(residualParams->stocPhaseSpectrum); + if(residualParams->approx) + free(residualParams->approx); if(residualParams->approxEnvelope) free(residualParams->approxEnvelope); residualParams->residual = NULL; - residualParams->residualWindow = NULL; + residualParams->fftWindow = NULL; + residualParams->ifftWindow = NULL; residualParams->stocCoeffs = NULL; residualParams->stocMagSpectrum = NULL; + residualParams->stocPhaseSpectrum = NULL; + residualParams->approx = NULL; residualParams->approxEnvelope = NULL; } @@ -196,15 +196,20 @@ typedef struct typedef struct { int samplingRate; + int hopSize; int residualSize; sfloat *residual; - sfloat *residualWindow; + sfloat *fftWindow; + sfloat *ifftWindow; + sfloat windowScale; sfloat residualMag; sfloat originalMag; int nCoeffs; sfloat *stocCoeffs; int sizeStocMagSpectrum; sfloat *stocMagSpectrum; + sfloat *stocPhaseSpectrum; + sfloat *approx; sfloat *approxEnvelope; sfloat fftBuffer[SMS_MAX_SPEC * 2]; } SMS_ResidualParams; @@ -589,13 +594,17 @@ void sms_arrayScalarTempered(int sizeArray, sfloat *pArray); /* function declarations */ void sms_setPeaks(SMS_AnalParams *pAnalParams, int numamps, sfloat* amps, int numfreqs, sfloat* freqs, int numphases, sfloat* phases); -int sms_findPeaks(int sizeWaveform, sfloat *pWaveform, SMS_AnalParams *pAnalParams, SMS_SpectralPeaks *pSpectralPeaks); +int sms_findPeaks(int sizeWaveform, sfloat *pWaveform, + SMS_AnalParams *pAnalParams, SMS_SpectralPeaks *pSpectralPeaks); int sms_findPartials(SMS_Data *pSmsFrame, SMS_AnalParams *pAnalParams); int sms_findResidual(int sizeSynthesis, sfloat* pSynthesis, int sizeOriginal, sfloat* pOriginal, SMS_ResidualParams *residualParams); -void sms_approxResidual(SMS_ResidualParams *residualParams); -int sms_analyze(int sizeWaveform, sfloat *pWaveform, SMS_Data *pSmsData, SMS_AnalParams *pAnalParams); +void sms_approxResidual(int sizeResidual, sfloat* residual, + int sizeApprox, sfloat* approx, + SMS_ResidualParams *residualParams); +int sms_analyze(int sizeWaveform, sfloat *pWaveform, SMS_Data *pSmsData, + SMS_AnalParams *pAnalParams); void sms_analyzeFrame(int iCurrentFrame, SMS_AnalParams *pAnalParams, sfloat fRefFundamental); int sms_init(); @@ -615,6 +624,8 @@ void sms_getWindow(int sizeWindow, sfloat *pWindow, int iWindowType); void sms_scaleWindow(int sizeWindow, sfloat *pWindow); int sms_spectrum(int sizeWindow, sfloat *pWaveform, sfloat *pWindow, int sizeMag, sfloat *pMag, sfloat *pPhase, sfloat *pFftBuffer); +int sms_spectrumW(int sizeWindow, sfloat *pWaveform, sfloat *pWindow, int sizeMag, + sfloat *pMag, sfloat *pPhase, sfloat *pFftBuffer); int sms_invSpectrum(int sizeWaveform, sfloat *pWaveform, sfloat *pWindow , int sizeMag, sfloat *pMag, sfloat *pPhase, sfloat *pFftBuffer); /* \todo remove this once invSpectrum is completely implemented */ @@ -31,7 +31,6 @@ %apply(int DIM1, double* INPLACE_ARRAY1) {(int sizeAmp, double* pAmp)}; %apply(int DIM1, double* INPLACE_ARRAY1) {(int sizeMag, double* pMag)}; %apply(int DIM1, double* INPLACE_ARRAY1) {(int sizePhase, double* pPhase)}; -%apply(int DIM1, double* INPLACE_ARRAY1) {(int sizeRes, double* pRes)}; %apply(int DIM1, double* INPLACE_ARRAY1) {(int sizeCepstrum, double* pCepstrum)}; %apply(int DIM1, double* INPLACE_ARRAY1) {(int sizeEnv, double* pEnv)}; %apply(int DIM1, double* INPLACE_ARRAY1) {(int sizeTrack, double* pTrack)}; @@ -39,6 +38,11 @@ %apply(int DIM1, double* IN_ARRAY1) {(int sizeInArray, double* pInArray)}; %apply(int DIM1, double* INPLACE_ARRAY1) {(int sizeOutArray, double* pOutArray)}; %apply(int DIM1, double* INPLACE_ARRAY1) {(int sizeHop, double* pSynthesis)}; +%apply(int DIM1, double* INPLACE_ARRAY1) +{ + (int sizeResidual, double* residual), + (int sizeApprox, double* approx) +} %apply(int DIM1, double* IN_ARRAY1) { (int numamps, double* amps), @@ -504,26 +508,15 @@ { void getResidual(int sizeArray, sfloat *pArray) { - if(sizeArray < $self->residualSize) + if(sizeArray < $self->hopSize) { sms_error("numpy array not big enough"); return; } int i; - for(i = 0; i < $self->residualSize; i++) + for(i = 0; i < $self->hopSize; i++) pArray[i] = $self->residual[i]; } - void getApprox(int sizeArray, sfloat *pArray) - { - if(sizeArray < $self->nCoeffs) - { - sms_error("numpy array not big enough"); - return; - } - int i; - for(i = 0; i < $self->nCoeffs; i++) - pArray[i] = $self->approxEnvelope[i]; - } } %extend SMS_ModifyParams diff --git a/sms/spectrum.c b/sms/spectrum.c index 666b42d..1b8e053 100644 --- a/sms/spectrum.c +++ b/sms/spectrum.c @@ -37,7 +37,6 @@ int sms_spectrum(int sizeWindow, sfloat *pWaveform, sfloat *pWindow, int sizeMag sfloat *pMag, sfloat *pPhase, sfloat *pFftBuffer) { int i, it2; - int err = 0; sfloat fReal, fImag; int sizeFft = sizeMag << 1; @@ -55,6 +54,38 @@ int sms_spectrum(int sizeWindow, sfloat *pWaveform, sfloat *pWindow, int sizeMag fImag = pFftBuffer[it2 + 1]; /*even numbers 2->N+2 */ pMag[i] = sqrt(fReal * fReal + fImag * fImag); pPhase[i] = atan2(-fImag, fReal); /* \todo why is fImag negated? */ + /*pPhase[i] = atan2(fImag, fReal);*/ + } + + return sizeFft; +} + +/* sms_spectrum, but without zero-phase windowing, and with phase calculated + * according by arctan(imag/real) instead of arctan2(-imag/real) + */ +int sms_spectrumW(int sizeWindow, sfloat *pWaveform, sfloat *pWindow, int sizeMag, + sfloat *pMag, sfloat *pPhase, sfloat *pFftBuffer) +{ + int i, it2; + sfloat fReal, fImag; + + int sizeFft = sizeMag << 1; + memset(pFftBuffer, 0, sizeFft * sizeof(sfloat)); + + /* apply window to waveform */ + for(i = 0; i < sizeWindow; i++) + pFftBuffer[i] = pWaveform[i] * pWindow[i]; + + sms_fft(sizeFft, pFftBuffer); + + /* convert from rectangular to polar coordinates */ + for(i = 0; i < sizeMag; i++) + { + it2 = i << 1; //even numbers 0-N + fReal = pFftBuffer[it2]; /*odd numbers 1->N+1 */ + fImag = pFftBuffer[it2 + 1]; /*even numbers 2->N+2 */ + pMag[i] = sqrt(fReal * fReal + fImag * fImag); + pPhase[i] = atan2(fImag, fReal); } return sizeFft; @@ -81,8 +112,8 @@ int sms_spectrumMag(int sizeWindow, sfloat *pWaveform, sfloat *pWindow, sfloat fReal, fImag; /* apply window to waveform, zero the rest of the array */ - for (i = 0; i < sizeWindow; i++) - pFftBuffer[i] = pWindow[i] * pWaveform[i]; + for(i = 0; i < sizeWindow; i++) + pFftBuffer[i] = pWaveform[i] * pWindow[i]; for(i = sizeWindow; i < sizeFft; i++) pFftBuffer[i] = 0.; @@ -90,7 +121,7 @@ int sms_spectrumMag(int sizeWindow, sfloat *pWaveform, sfloat *pWindow, sms_fft(sizeFft, pFftBuffer); /* convert from rectangular to polar coordinates */ - for (i=0; i<sizeMag; i++) + for(i = 0; i < sizeMag; i++) { it2 = i << 1; fReal = pFftBuffer[it2]; @@ -123,7 +154,7 @@ int sms_invSpectrum(int sizeWaveform, sfloat *pWaveform, sfloat *pWindow, sms_PolarToRect(sizeMag, pFftBuffer, pMag, pPhase); sms_ifft(sizeFft, pFftBuffer); - /* assume the output array has been taken care off */ + /* assume that the output array does not need to be cleared */ /* before, this was multiplied by .5, why? */ for(i = 0; i < sizeWaveform; i++) //pWaveform[i] += pFftBuffer[i] * pWindow[i]; @@ -145,25 +176,21 @@ int sms_invQuickSpectrumW(sfloat *pFMagSpectrum, sfloat *pFPhaseSpectrum, int sizeFft, sfloat *pFWaveform, int sizeWave, sfloat *pFWindow, sfloat* pFftBuffer) { - int sizeMag = sizeFft >> 1, i, it2; - sfloat fPower; + int i, it2; + int sizeMag = sizeFft >> 1; - /* convert from polar coordinates to rectangular */ - for(i = 0; i<sizeMag; i++) + /* convert from polar coordinates to rectangular */ + for(i = 0; i < sizeMag; i++) { it2 = i << 1; - fPower = pFMagSpectrum[i]; - pFftBuffer[it2] = fPower * cos (pFPhaseSpectrum[i]); - pFftBuffer[it2+1] = fPower * sin (pFPhaseSpectrum[i]); + pFftBuffer[it2] = pFMagSpectrum[i] * cos(pFPhaseSpectrum[i]); + pFftBuffer[it2+1] = pFMagSpectrum[i] * sin(pFPhaseSpectrum[i]); } /* compute IFFT */ sms_ifft(sizeFft, pFftBuffer); - /* assume the output array has been taken care off */ - /* \todo is a seperate pFftBuffer necessary here? - it seems like multiplying the window into the waveform - would be fine, without pFftBuffer */ + /* assume that the output array does not need to be cleared */ for(i = 0; i < sizeWave; i++) pFWaveform[i] += (pFftBuffer[i] * pFWindow[i] * .5); diff --git a/sms/synthesis.c b/sms/synthesis.c index 9523ee6..bc739d0 100644 --- a/sms/synthesis.c +++ b/sms/synthesis.c @@ -120,14 +120,12 @@ static int StocSynthApprox(SMS_Data *pSmsData, SMS_SynthParams *pSynthParams) int sizeSpec1 = pSmsData->nCoeff; int sizeSpec2 = pSynthParams->sizeHop; int sizeFft = pSynthParams->sizeHop << 1; /* 50% overlap, so sizeFft is 2x sizeHop */ - sfloat fStocGain; /* if no gain or no coefficients return */ - if (*(pSmsData->pFStocGain) <= 0) + if(*(pSmsData->pFStocGain) <= 0) return 0; - sizeSpec1Used = sizeSpec1 * pSynthParams->iSamplingRate / - pSynthParams->iOriginalSRate; + sizeSpec1Used = sizeSpec1 * pSynthParams->iSamplingRate / pSynthParams->iOriginalSRate; /* sizeSpec1Used cannot be more than what is available \todo check by graph */ if(sizeSpec1Used > sizeSpec1) sizeSpec1Used = sizeSpec1; @@ -150,74 +148,79 @@ static int StocSynthApprox(SMS_Data *pSmsData, SMS_SynthParams *pSynthParams) * * \param residualParams Parameters and memory for residual synthesis */ -void sms_approxResidual(SMS_ResidualParams *residualParams) +void sms_approxResidual(int sizeResidual, sfloat* residual, + int sizeApprox, sfloat* approx, + SMS_ResidualParams *residualParams) { - /* filter residual with a high pass filter */ - sms_filterHighPass(residualParams->residualSize, - residualParams->residual, - residualParams->samplingRate); - - sms_spectrumMag(residualParams->residualSize, - residualParams->residual, - residualParams->residualWindow, - residualParams->sizeStocMagSpectrum, - residualParams->stocMagSpectrum, - residualParams->fftBuffer); - - sms_spectralApprox(residualParams->stocMagSpectrum, - residualParams->sizeStocMagSpectrum, - residualParams->sizeStocMagSpectrum, - residualParams->stocCoeffs, - residualParams->nCoeffs, - residualParams->nCoeffs, - residualParams->approxEnvelope); - - /* get energy of spectrum */ int i; - sfloat fMag = 0.0; - for(i = 0; i < residualParams->sizeStocMagSpectrum; i++) - fMag += (residualParams->stocMagSpectrum[i] * pAnalParams->stocMagSpectrum[i]); - /* if no gain or no coefficients return */ - sfloat stocGain = fMag / residualParams->sizeStocMagSpectrum; - if(stocGain <= 0) - return; + /* shift buffers */ + memcpy(residualParams->residual, + residualParams->residual + residualParams->hopSize, + sizeof(sfloat) * residualParams->hopSize); + memcpy(residualParams->residual + residualParams->hopSize, residual, + sizeof(sfloat) * residualParams->hopSize); - int i, sizeSpec1Used; - int sizeSpec1 = residualParams->nCoeffs; - /*int sizeSpec2 = pSynthParams->sizeHop;*/ - int sizeSpec2 = residualParams->residualSize; - int sizeFft = sizeSpec2 << 1; /* 50% overlap, so sizeFft is 2x sizeHop */ + memcpy(residualParams->approx, + residualParams->approx + residualParams->hopSize, + sizeof(sfloat) * residualParams->hopSize); + memset(residualParams->approx + residualParams->hopSize, 0, + sizeof(sfloat) * residualParams->hopSize); - /*sizeSpec1Used = sizeSpec1 * pSynthParams->iSamplingRate / pSynthParams->iOriginalSRate;*/ + sms_spectrumMag(residualParams->residualSize, + residualParams->residual, + residualParams->fftWindow, + residualParams->sizeStocMagSpectrum, + residualParams->stocMagSpectrum, + residualParams->fftBuffer); + + if(residualParams->sizeStocMagSpectrum != residualParams->nCoeffs) + { + sms_spectralApprox(residualParams->stocMagSpectrum, + residualParams->sizeStocMagSpectrum, + residualParams->sizeStocMagSpectrum, + residualParams->stocCoeffs, + residualParams->nCoeffs, + residualParams->nCoeffs, + residualParams->approxEnvelope); - /*[> sizeSpec1Used cannot be more than what is available \todo check by graph <]*/ - /*if(sizeSpec1Used > sizeSpec1) sizeSpec1Used = sizeSpec1;*/ + sms_spectralApprox(residualParams->stocCoeffs, + residualParams->nCoeffs, + residualParams->nCoeffs, + residualParams->stocMagSpectrum, + residualParams->sizeStocMagSpectrum, + residualParams->sizeStocMagSpectrum, + residualParams->approxEnvelope); + } - /*sms_spectralApprox(pSmsData->pFStocCoeff, sizeSpec1, sizeSpec1Used,*/ - /* pSynthParams->pMagBuff, sizeSpec2, sizeSpec1Used,*/ - /* pSynthParams->approxEnvelope);*/ + /* generate random phases */ + for(i = 0; i < residualParams->sizeStocMagSpectrum; i++) + residualParams->stocPhaseSpectrum[i] = TWO_PI * sms_random(); - /*[> generate random phases <]*/ - /*for(i = 0; i < sizeSpec2; i++)*/ - /* pSynthParams->pPhaseBuff[i] = TWO_PI * sms_random();*/ + /* IFFT with 50% overlap */ + sms_invQuickSpectrumW(residualParams->stocMagSpectrum, + residualParams->stocPhaseSpectrum, + residualParams->sizeStocMagSpectrum*2, + residualParams->approx, + residualParams->residualSize, + residualParams->ifftWindow, + residualParams->fftBuffer); - /*sms_invQuickSpectrumW(pSynthParams->pMagBuff, pSynthParams->pPhaseBuff,*/ - /* sizeFft, pSynthParams->pSynthBuff, sizeFft,*/ - /* pSynthParams->pFStocWindow, pSynthParams->pSpectra);*/ + /* output */ + for(i = 0; i < sizeApprox; i++) + approx[i] = residualParams->approx[i] * residualParams->windowScale; } /*! \brief synthesizes one frame of SMS data * - * \param pSmsData input SMS data - * \param pFSynthesis output sound buffer - * \param pSynthParams synthesis parameters + * \param pSmsData input SMS data + * \param pFSynthesis output sound buffer + * \param pSynthParams synthesis parameters */ void sms_synthesize(SMS_Data *pSmsData, sfloat *pFSynthesis, SMS_SynthParams *pSynthParams) { - int i, k; + int i; int sizeHop = pSynthParams->sizeHop; - int sizeFft = sizeHop << 1; memcpy(pSynthParams->pSynthBuff, (sfloat *)(pSynthParams->pSynthBuff+sizeHop), sizeof(sfloat) * sizeHop); diff --git a/tests/sms.py b/tests/sms.py index b09e692..ca51f3f 100644 --- a/tests/sms.py +++ b/tests/sms.py @@ -24,10 +24,8 @@ from nose.tools import assert_almost_equals class TestSimplSMS(object): FLOAT_PRECISION = 2 # number of decimal places to check for accuracy input_file = 'audio/flute.wav' - frame_size = 2048 hop_size = 512 num_frames = 50 - #num_samples = frame_size + ((num_frames - 1) * hop_size) num_samples = num_frames * hop_size max_peaks = 10 max_partials = 10 @@ -55,6 +53,7 @@ class TestSimplSMS(object): analysis_params.minGoodFrames = 1 analysis_params.iCleanTracks = 0 analysis_params.iStochasticType = pysms.SMS_STOC_NONE + analysis_params.preEmphasis = 0 return analysis_params def simplsms_analysis_params(self, sampling_rate): @@ -74,6 +73,7 @@ class TestSimplSMS(object): analysis_params.minGoodFrames = 1 analysis_params.iCleanTracks = 0 analysis_params.iStochasticType = simplsms.SMS_STOC_NONE + analysis_params.preEmphasis = 0 return analysis_params def pysms_synthesis_params(self, sampling_rate): @@ -84,6 +84,7 @@ class TestSimplSMS(object): synth_params.iStochasticType = pysms.SMS_STOC_NONE synth_params.sizeHop = self.hop_size synth_params.nTracks = self.max_peaks + synth_params.deEmphasis = 0 return synth_params def test_size_next_read(self): @@ -112,13 +113,13 @@ class TestSimplSMS(object): while current_frame < self.num_frames: sms_next_read_sizes.append(analysis_params.sizeNextRead) sample_offset += pysms_size_new_data - if((sample_offset + analysis_params.sizeNextRead) < self.num_samples): - pysms_size_new_data = analysis_params.sizeNextRead - else: - pysms_size_new_data = self.num_samples - sample_offset + pysms_size_new_data = analysis_params.sizeNextRead # convert frame to floats for libsms frame = audio[sample_offset:sample_offset + pysms_size_new_data] frame = np.array(frame, dtype=np.float32) + if len(frame) < pysms_size_new_data: + frame = np.hstack((frame, np.zeros(pysms_size_new_data - len(frame), + dtype=np.float32))) analysis_data = pysms.SMS_Data() pysms.sms_allocFrameH(sms_header, analysis_data) status = pysms.sms_analyze(frame, analysis_data, analysis_params) @@ -141,6 +142,7 @@ class TestSimplSMS(object): while current_frame < self.num_frames: pd.frame_size = pd.get_next_frame_size() + #print current_frame, sms_next_read_sizes[current_frame], pd.frame_size assert sms_next_read_sizes[current_frame] == pd.frame_size frame = simpl.Frame() frame.size = pd.frame_size @@ -181,6 +183,9 @@ class TestSimplSMS(object): frame.size = size_new_data frame.audio = np.array(audio[sample_offset:sample_offset + size_new_data], dtype=np.float32) + if len(frame.audio) < size_new_data: + frame.audio = np.hstack((frame.audio, np.zeros(size_new_data - len(frame.audio), + dtype=np.float32))) analysis_data = pysms.SMS_Data() pysms.sms_allocFrameH(sms_header, analysis_data) status = pysms.sms_analyze(frame.audio, analysis_data, analysis_params) @@ -242,6 +247,8 @@ class TestSimplSMS(object): frame = simpl.Frame() frame.size = size_new_data frame.audio = audio[sample_offset:sample_offset + size_new_data] + if len(frame.audio) < size_new_data: + frame.audio = np.hstack((frame.audio, simpl.zeros(size_new_data - len(frame.audio)))) analysis_data = simplsms.SMS_Data() simplsms.sms_allocFrameH(simpl_sms_header, analysis_data) status = simplsms.sms_analyze(frame.audio, analysis_data, simpl_analysis_params) @@ -483,6 +490,8 @@ class TestSimplSMS(object): sample_offset += size_new_data size_new_data = analysis_params.sizeNextRead frame = audio[sample_offset:sample_offset + size_new_data] + if len(frame) < size_new_data: + frame = np.hstack((frame, simpl.zeros(size_new_data - len(frame)))) analysis_data = simplsms.SMS_Data() simplsms.sms_allocFrameH(sms_header, analysis_data) status = simplsms.sms_analyze(frame, analysis_data, analysis_params) @@ -526,6 +535,8 @@ class TestSimplSMS(object): frame = simpl.Frame() frame.size = pd.frame_size frame.audio = audio[sample_offset:sample_offset + pd.frame_size] + if len(frame.audio) < pd.frame_size: + frame.audio = np.hstack((frame.audio, simpl.zeros(pd.frame_size - len(frame.audio)))) simpl_peaks.append(pd.find_peaks_in_frame(frame)) sample_offset += pd.frame_size current_frame += 1 @@ -764,30 +775,27 @@ class TestSimplSMS(object): p.frequency = sms_freqs[i] p.phase = sms_phases[i] peaks.append(p) - else: - for i in range(num_partials): - p = simpl.Peak() - p.amplitude = 0.0 - p.frequency = 0.0 - p.phase = 0.0 - peaks.append(p) + frame.partials = peaks + sms_frames.append(frame) + current_frame += 1 if status == -1: do_analysis = False - frame.partials = peaks - sms_frames.append(frame) pysms.sms_freeFrame(analysis_data) - current_frame += 1 + # first frame is blank + sms_frames = sms_frames[1:] + + # free sms memory pysms.sms_freeAnalysis(analysis_params) pysms.sms_closeSF() pysms.sms_free() pd = simpl.SMSPeakDetection() pd.max_peaks = self.max_peaks - pd.hop_size = self.hop_size - peaks = pd.find_peaks(audio)[0:self.num_frames] + pd.hop_size = self.hop_size + peaks = pd.find_peaks(audio) pt = simpl.SMSPartialTracking() pt.max_partials = self.max_partials simpl_frames = pt.find_partials(peaks) @@ -1023,10 +1031,19 @@ class TestSimplSMS(object): analysis_data = pysms.SMS_Data() pysms.sms_allocFrameH(sms_header, analysis_data) status = pysms.sms_analyze(frame, analysis_data, analysis_params) - analysis_frames.append(analysis_data) - if status == -1: + if status == 1: + analysis_frames.append(analysis_data) + current_frame += 1 + elif status == 0: + pysms.sms_freeFrame(analysis_data) + elif status == -1: do_analysis = False - current_frame += 1 + pysms.sms_freeFrame(analysis_data) + + # remove the first frame, it's blank + blank_frame = analysis_frames[0] + analysis_frames = analysis_frames[1:] + pysms.sms_freeFrame(blank_frame) synth_params = self.pysms_synthesis_params(sampling_rate) pysms.sms_initSynth(sms_header, synth_params) @@ -1050,15 +1067,14 @@ class TestSimplSMS(object): pd = simpl.SMSPeakDetection() pd.max_peaks = self.max_peaks pd.hop_size = self.hop_size - peaks = pd.find_peaks(audio)[0:self.num_frames] + peaks = pd.find_peaks(audio) pt = simpl.SMSPartialTracking() pt.max_partials = self.max_partials partials = pt.find_partials(peaks) synth = simpl.SMSSynthesis() synth.hop_size = self.hop_size synth.max_partials = self.max_partials - synth.stochastic_type = simplsms.SMS_STOC_NONE - synth.synthesis_type = simplsms.SMS_STYPE_DET + synth.det_synthesis_type = simplsms.SMS_DET_IFFT simpl_audio = synth.synth(partials) assert len(sms_audio) == len(simpl_audio) @@ -1066,7 +1082,7 @@ class TestSimplSMS(object): assert_almost_equals(sms_audio[i], simpl_audio[i], self.FLOAT_PRECISION) def test_harmonic_synthesis_sin(self): - """test_harmonic_synthesis + """test_harmonic_synthesis_sin Compare pysms synthesised harmonic component with SMS synthesised harmonic component.""" audio, sampling_rate = self.get_audio() @@ -1076,10 +1092,10 @@ class TestSimplSMS(object): if(pysms.sms_openSF(self.input_file, snd_header)): raise NameError("error opening sound file: " + pysms.sms_errorString()) analysis_params = self.pysms_analysis_params(sampling_rate) - analysis_params.nFrames = self.num_frames if pysms.sms_initAnalysis(analysis_params, snd_header) != 0: raise Exception("Error allocating memory for analysis_params") analysis_params.iSizeSound = self.num_samples + analysis_params.nFrames = self.num_frames sms_header = pysms.SMS_Header() pysms.sms_fillHeader(sms_header, analysis_params, "pysms") @@ -1095,16 +1111,28 @@ class TestSimplSMS(object): frame = audio[sample_offset:sample_offset + size_new_data] # convert frame to floats for libsms frame = np.array(frame, dtype=np.float32) + if len(frame) < size_new_data: + frame = np.hstack((frame, np.zeros(size_new_data - len(frame), + dtype=np.float32))) analysis_data = pysms.SMS_Data() pysms.sms_allocFrameH(sms_header, analysis_data) status = pysms.sms_analyze(frame, analysis_data, analysis_params) - analysis_frames.append(analysis_data) - if status == -1: + if status == 1: + analysis_frames.append(analysis_data) + current_frame += 1 + elif status == 0: + pysms.sms_freeFrame(analysis_data) + elif status == -1: do_analysis = False - current_frame += 1 + pysms.sms_freeFrame(analysis_data) + + # remove the first frame, it's blank + blank_frame = analysis_frames[0] + analysis_frames = analysis_frames[1:] + pysms.sms_freeFrame(blank_frame) synth_params = self.pysms_synthesis_params(sampling_rate) - synth_params.iDetSynthesisType = pysms.SMS_DET_SIN + synth_params.iDetSynthType = pysms.SMS_DET_SIN pysms.sms_initSynth(sms_header, synth_params) synth_samples = np.zeros(synth_params.sizeHop, dtype=np.float32) @@ -1126,14 +1154,13 @@ class TestSimplSMS(object): pd = simpl.SMSPeakDetection() pd.max_peaks = self.max_peaks pd.hop_size = self.hop_size - peaks = pd.find_peaks(audio)[0:self.num_frames] + peaks = pd.find_peaks(audio) pt = simpl.SMSPartialTracking() pt.max_partials = self.max_partials partials = pt.find_partials(peaks) synth = simpl.SMSSynthesis() synth.hop_size = self.hop_size synth.max_partials = self.max_partials - synth.stochastic_type = simplsms.SMS_STOC_NONE synth.det_synthesis_type = simplsms.SMS_DET_SIN simpl_audio = synth.synth(partials) @@ -1144,6 +1171,13 @@ class TestSimplSMS(object): def test_residual_synthesis(self): """test_residual_synthesis Compare pysms residual signal with SMS residual""" + + # ------------------------------------------- + # This test is not finished yet. Skip for now + from nose.plugins.skip import SkipTest + raise SkipTest + # ------------------------------------------- + audio, sampling_rate = self.get_audio() pysms.sms_init() snd_header = pysms.SMS_SndHeader() @@ -1154,7 +1188,6 @@ class TestSimplSMS(object): analysis_params.nFrames = self.num_frames analysis_params.nStochasticCoeff = 128 analysis_params.iStochasticType = pysms.SMS_STOC_APPROX - analysis_params.preEmphasis = 0 if pysms.sms_initAnalysis(analysis_params, snd_header) != 0: raise Exception("Error allocating memory for analysis_params") analysis_params.iSizeSound = self.num_samples @@ -1176,13 +1209,6 @@ class TestSimplSMS(object): analysis_data = pysms.SMS_Data() pysms.sms_allocFrameH(sms_header, analysis_data) status = pysms.sms_analyze(frame, analysis_data, analysis_params) - #if status == 1: - # analysis_frames.append(analysis_data) - #elif status == -1: - # do_analysis = False - # pysms.sms_freeFrame(analysis_data) - #else: - # pysms.sms_freeFrame(analysis_data) analysis_frames.append(analysis_data) if status == -1: do_analysis = False @@ -1192,7 +1218,6 @@ class TestSimplSMS(object): synth_params = self.pysms_synthesis_params(sampling_rate) synth_params.iStochasticType = pysms.SMS_STOC_APPROX synth_params.iSynthesisType = pysms.SMS_STYPE_STOC - synth_params.deEmphasis = 0 pysms.sms_initSynth(sms_header, synth_params) synth_samples = np.zeros(synth_params.sizeHop, dtype=np.float32) sms_residual = np.array([], dtype=np.float32) @@ -1235,7 +1260,7 @@ if __name__ == "__main__": # useful for debugging, particularly with GDB import nose argv = [__file__, + "--nocapture", #__file__ + ":TestSimplSMS.test_residual_synthesis"] - __file__ + ":TestSimplSMS.test_sms_analyze"] + __file__ + ":TestSimplSMS.test_harmonic_synthesis_sin"] nose.run(argv=argv) - |